Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the abstract syntax for tokens #307

Merged
merged 2 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 11 additions & 13 deletions lib/backend-glr/src/Happy/Backend/GLR/ProduceCode.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This module is designed as an extension to the Haskell parser generator Happy.

> import Paths_happy_lib ( version )
> import Happy.Grammar
> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole )
> import Happy.Tabular.LALR
> import Data.Array ( Array, (!), array, assocs )
> import Data.Char ( isSpace, isAlphaNum )
Expand Down Expand Up @@ -273,9 +274,9 @@ that will be used for them in the GLR parser.
> | (i,tok) <- token_specs g ] -- Tokens (terminals)
> ++ [(eof_term g,"HappyEOF")] -- EOF symbol (internal terminal)
> where
> mkMatch tok = case mapDollarDollar tok of
> Nothing -> tok
> Just fn -> fn "_"
> mkMatch tok = case tok of
> TokenFixed t -> t
> TokenWithValue e -> substExpressionWithHole e "_"

> toGSym :: [(Int, String)] -> Int -> String
> toGSym gsMap i
Expand Down Expand Up @@ -314,10 +315,7 @@ It also shares identical reduction values as CAFs
> where
> startLine
> = unwords [ name , show_st exts state, "(" , getTok , ") =" ]
> getTok = let tok = toGSym gsMap symInt
> in case mapDollarDollar tok of
> Nothing -> tok
> Just f -> f "_"
> getTok = toGSym gsMap symInt
> mkAct act
> = case act of
> LR'Shift newSt _ -> "Shift " ++ show newSt ++ " []"
Expand Down Expand Up @@ -421,7 +419,7 @@ Creating a type for storing semantic rules
use in later stages.

> type SemInfo
> = [(String, String, [Int], [((Int,Int), ([(Int,String)],String), [Int])])]
> = [(String, String, [Int], [((Int, Int), ([(Int, TokenSpec)], String), [Int])])]

> mkGSemType :: Options -> Grammar String -> Pragmas -> (ShowS, SemInfo)
> mkGSemType (TreeDecode,_,_) g pragmas
Expand Down Expand Up @@ -566,17 +564,17 @@ Creates the appropriate semantic values.
> nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask
> ]

> mk_lambda :: [(Int, String)] -> Int -> String -> String
> mk_lambda :: [(Int, TokenSpec)] -> Int -> String -> String
> mk_lambda pats v
> = (\s -> "\\" ++ s ++ " -> ") . mk_binder id pats v

> mk_binder :: (String -> String) -> [(Int, String)] -> Int -> String -> String
> mk_binder :: (String -> String) -> [(Int, TokenSpec)] -> Int -> String -> String
> mk_binder wrap pats v
> = case lookup v pats of
> Nothing -> mkHappyVar v
> Just p -> case mapDollarDollar p of
> Nothing -> wrap . mkHappyVar v . showChar '@' . brack p
> Just fn -> wrap . brack' (fn . mkHappyVar v)
> Just p -> case p of
> TokenFixed p' -> wrap . mkHappyVar v . showChar '@' . brack p'
> TokenWithValue e -> wrap . brack' (substExpressionWithHole e . mkHappyVar v)


---
Expand Down
15 changes: 7 additions & 8 deletions lib/backend-lalr/src/Happy/Backend/LALR/ProduceCode.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The code generator.
> import Paths_happy_lib ( version )
> import Data.Version ( showVersion )
> import Happy.Grammar
> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole )
> import Happy.Tabular.LALR

> import Data.Maybe ( isNothing, fromMaybe )
Expand Down Expand Up @@ -417,19 +418,17 @@ The token conversion function.
Use a variable rather than '_' to replace '$$', so we can use it on
the left hand side of '@'.

> removeDollarDollar xs = case mapDollarDollar xs of
> Nothing -> xs
> Just fn -> fn "happy_dollar_dollar"
> removeDollarDollar tok = case tok of
> TokenFixed t -> t
> TokenWithValue e -> substExpressionWithHole e "happy_dollar_dollar"

> mkHappyTerminalVar :: Int -> Int -> String -> String
> mkHappyTerminalVar i t =
> case tok_str_fn of
> case lookup t token_rep of
> Nothing -> pat
> Just fn -> brack (fn (pat []))
> Just (TokenFixed _) -> pat
> Just (TokenWithValue e) -> brack $ substExpressionWithHole e $ pat []
> where
> tok_str_fn = case lookup t token_rep of
> Nothing -> Nothing
> Just str' -> mapDollarDollar str'
> pat = mkHappyVar i

> tokIndex i = i - n_nonterminals - n_starts - 2
Expand Down
6 changes: 3 additions & 3 deletions lib/frontend/boot-src/Parser.ly
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,12 @@ The parser.
> : id { Just $1 }
> | {- nothing -} { Nothing }

> tokenSpecs :: { [(String,String)] }
> tokenSpecs :: { [(String, TokenSpec)] }
> : tokenSpec tokenSpecs { $1:$2 }
> | tokenSpec { [$1] }

> tokenSpec :: { (String,String) }
> : id code { ($1,$2) }
> tokenSpec :: { (String, TokenSpec) }
> : id code { ($1, parseTokenSpec $2) }

> ids :: { [String] }
> : id ids { $1 : $2 }
Expand Down
38 changes: 34 additions & 4 deletions lib/frontend/src/Happy/Frontend/AbsSyn.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,18 @@ Here is the abstract syntax of the language we parse.
> getImportedIdentity, getMonad, getError,
> getPrios, getPrioNames, getExpect, getErrorHandlerType,
> getAttributes, getAttributetype, getAttributeGrammarExtras,
> Rule(..), Prod(..), Term(..), Prec(..)
> parseTokenSpec,
> Rule(..), Prod(..), Term(..), Prec(..),
> TokenSpec(..) -- reexport
> ) where

> import Happy.Grammar (ErrorHandlerType(..), AttributeGrammarExtras(..))
> import Data.Char (isAlphaNum)
> import Happy.Grammar
> ( ErrorHandlerType(..)
> , TokenSpec(..)
> , AttributeGrammarExtras(..)
> )
> import Happy.Grammar.ExpressionWithHole

> data BookendedAbsSyn
> = BookendedAbsSyn
Expand Down Expand Up @@ -63,7 +71,7 @@ generate some error messages.
>
> data Directive a
> = TokenType String -- %tokentype
> | TokenSpec [(a,String)] -- %token
> | TokenSpec [(a, TokenSpec)] -- %token
> | TokenName String (Maybe String) Bool -- %name/%partial (True <=> %partial)
> | TokenLexer String String -- %lexer
> | TokenErrorHandlerType String -- %errorhandlertype
Expand Down Expand Up @@ -109,7 +117,7 @@ generate some error messages.
> [] -> (False,"()","HappyIdentity","Prelude.>>=","Prelude.return")
> _ -> error "multiple monad directives"

> getTokenSpec :: [Directive t] -> [(t, String)]
> getTokenSpec :: [Directive t] -> [(t, TokenSpec)]
> getTokenSpec ds = concat [ t | (TokenSpec t) <- ds ]

> getPrios :: [Directive t] -> [Directive t]
Expand Down Expand Up @@ -170,3 +178,25 @@ generate some error messages.
> attributetype = at
> }
> (_ : _, Nothing) -> error "attributes found without attribute type directive"

> -- | Parse a token spec.
> --
> -- The first occurence of '$$' indicates an expression in which the '$$'
> -- will be substituted for the actual lexed token. '$$' in string or char
> -- literals ('".."' and '\'.\'') however does not count.
> parseTokenSpec :: String -> TokenSpec
> parseTokenSpec code0 = go code0 ""
> where go code acc =
> case code of
> [] -> TokenFixed code0
>
> '"' :r -> case reads code :: [(String,String)] of
> [] -> go r ('"':acc)
> (s,r'):_ -> go r' (reverse (show s) ++ acc)
> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc)
> '\'' :r -> case reads code :: [(Char,String)] of
> [] -> go r ('\'':acc)
> (c,r'):_ -> go r' (reverse (show c) ++ acc)
> '\\':'$':r -> go r ('$':acc)
> '$':'$':r -> TokenWithValue $ ExpressionWithHole (reverse acc) r
> c:r -> go r (c:acc)
10 changes: 5 additions & 5 deletions lib/frontend/src/Happy/Frontend/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ happyIn19 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap19 x)
happyOut19 :: (HappyAbsSyn ) -> HappyWrap19
happyOut19 x = Happy_GHC_Exts.unsafeCoerce# x
{-# INLINE happyOut19 #-}
newtype HappyWrap20 = HappyWrap20 ([(String,String)])
happyIn20 :: ([(String,String)]) -> (HappyAbsSyn )
newtype HappyWrap20 = HappyWrap20 ([(String, TokenSpec)])
happyIn20 :: ([(String, TokenSpec)]) -> (HappyAbsSyn )
happyIn20 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap20 x)
{-# INLINE happyIn20 #-}
happyOut20 :: (HappyAbsSyn ) -> HappyWrap20
happyOut20 x = Happy_GHC_Exts.unsafeCoerce# x
{-# INLINE happyOut20 #-}
newtype HappyWrap21 = HappyWrap21 ((String,String))
happyIn21 :: ((String,String)) -> (HappyAbsSyn )
newtype HappyWrap21 = HappyWrap21 ((String, TokenSpec))
happyIn21 :: ((String, TokenSpec)) -> (HappyAbsSyn )
happyIn21 x = Happy_GHC_Exts.unsafeCoerce# (HappyWrap21 x)
{-# INLINE happyIn21 #-}
happyOut21 :: (HappyAbsSyn ) -> HappyWrap21
Expand Down Expand Up @@ -768,7 +768,7 @@ happyReduction_51 happy_x_2
= case happyOutTok happy_x_1 of { (TokenInfo happy_var_1 TokId) ->
case happyOutTok happy_x_2 of { (TokenInfo happy_var_2 TokCodeQuote) ->
happyIn21
((happy_var_1,happy_var_2)
((happy_var_1, parseTokenSpec happy_var_2)
)}}

happyReduce_52 :: () => Happy_GHC_Exts.Int# -> Token -> Happy_GHC_Exts.Int# -> Happy_IntList -> HappyStk (HappyAbsSyn ) -> P (HappyAbsSyn )
Expand Down
41 changes: 16 additions & 25 deletions lib/grammar/src/Happy/Grammar.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,43 @@ The Grammar data type.
> module Happy.Grammar (
> Name,
>
> Production(..), Grammar(..),
> Production(..),
> TokenSpec(..),
> Grammar(..),
> AttributeGrammarExtras(..),
> Priority(..),
> Assoc(..),
> Pragmas(..), ErrorHandlerType(..),
>
> errorName, errorTok, startName, dummyName, firstStartTok, dummyTok,
> eofName, epsilonTok,
>
> mapDollarDollar
> ) where

> import Data.Array
> import Data.Char (isAlphaNum)
> import Happy.Grammar.ExpressionWithHole (ExpressionWithHole)

> type Name = Int

> data Production eliminator
> = Production Name [Name] (eliminator,[Int]) Priority
> deriving Show

> data TokenSpec
>
> -- | The token is just a fixed value
> = TokenFixed String
>
> -- | The token is an expression involving the value of the lexed token.
> | TokenWithValue ExpressionWithHole
>
> deriving (Eq, Show)

> data Grammar eliminator
> = Grammar {
> productions :: [Production eliminator],
> lookupProdNo :: Int -> Production eliminator,
> lookupProdsOfName :: Name -> [Int],
> token_specs :: [(Name,String)],
> token_specs :: [(Name, TokenSpec)],
> terminals :: [Name],
> non_terminals :: [Name],
> starts :: [(String,Name,Name,Bool)],
Expand Down Expand Up @@ -145,23 +156,3 @@ For array-based parsers, see the note in Tabular/LALR.lhs.
> dummyTok = 2
> errorTok = 1
> epsilonTok = 0

-----------------------------------------------------------------------------
Replace $$ with an arbitrary string, being careful to avoid ".." and '.'.

> mapDollarDollar :: String -> Maybe (String -> String)
> mapDollarDollar code0 = go code0 ""
> where go code acc =
> case code of
> [] -> Nothing
>
> '"' :r -> case reads code :: [(String,String)] of
> [] -> go r ('"':acc)
> (s,r'):_ -> go r' (reverse (show s) ++ acc)
> a:'\'' :r | isAlphaNum a -> go r ('\'':a:acc)
> '\'' :r -> case reads code :: [(Char,String)] of
> [] -> go r ('\'':acc)
> (c,r'):_ -> go r' (reverse (show c) ++ acc)
> '\\':'$':r -> go r ('$':acc)
> '$':'$':r -> Just (\repl -> reverse acc ++ repl ++ r)
> c:r -> go r (c:acc)
13 changes: 13 additions & 0 deletions lib/grammar/src/Happy/Grammar/ExpressionWithHole.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Happy.Grammar.ExpressionWithHole where

-- | The overall expression is
-- 'tokLeft ++ substitutedForHole ++ tokRight'.
data ExpressionWithHole
= ExpressionWithHole {
exprLeft :: String,
exprRight :: String
}
deriving (Eq, Show)

substExpressionWithHole :: ExpressionWithHole -> String -> String
substExpressionWithHole (ExpressionWithHole l r) = \repr -> l ++ repr ++ r
5 changes: 4 additions & 1 deletion lib/happy-lib.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ common common-stanza
library grammar
import: common-stanza
hs-source-dirs: grammar/src
exposed-modules: Happy.Grammar
exposed-modules:
Happy.Grammar
Happy.Grammar.ExpressionWithHole
build-depends: base < 5, array

library frontend
Expand Down Expand Up @@ -129,6 +131,7 @@ library backend-glr
library
import: common-stanza
reexported-modules: Happy.Grammar,
Happy.Grammar.ExpressionWithHole,
Happy.Frontend,
Happy.Frontend.AbsSyn,
Happy.Frontend.Mangler,
Expand Down
6 changes: 5 additions & 1 deletion lib/tabular/src/Happy/Tabular/Info.lhs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Generating info files.
> import Data.Set ( Set )
> import qualified Data.Set as Set hiding ( Set )
> import Happy.Grammar
> import Happy.Grammar.ExpressionWithHole ( substExpressionWithHole )
> import Happy.Tabular.LALR ( Lr0Item(..), LRAction(..), Goto(..), GotoTable, ActionTable )

> import Data.Array
Expand Down Expand Up @@ -182,7 +183,10 @@ Produce a file of parser information, useful for debugging the parser.
> showTerminal (t,s)
> = str "\t"
> . showJName 15 t
> . str "{ " . str s . str " }"
> . str "{ " . showToken s . str " }"

> showToken (TokenFixed s) = str s
> showToken (TokenWithValue e) = str $ substExpressionWithHole e "$$"

> showNonTerminals
> = banner "Non-terminals"
Expand Down