Skip to content

Commit

Permalink
Merge pull request #155 from andreasabel/issue71
Browse files Browse the repository at this point in the history
[ #71 ] warn about nullable regexs in the absence of start codes
  • Loading branch information
simonmar authored Feb 24, 2020
2 parents 25c8c6d + 564f22a commit 60d5932
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 44 deletions.
29 changes: 20 additions & 9 deletions src/AbsSyn.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ module AbsSyn (
wrapperName,
Scanner(..),
RECtx(..),
RExp(..),
RExp(..), nullable,
DFA(..), State(..), SNum, StartCode, Accept(..),
RightContext(..), showRCtx, strtype,
encodeStartCodes, extractActions,
Expand Down Expand Up @@ -188,21 +188,21 @@ usesPreds dfa
-- Regular expressions

-- `RExp' provides an abstract syntax for regular expressions. `Eps' will
-- match empty strings; `Ch p' matches strings containinng a single character
-- match empty strings; `Ch p' matches strings containing a single character
-- `c' if `p c' is true; `re1 :%% re2' matches a string if `re1' matches one of
-- its prefixes and `re2' matches the rest; `re1 :| re2' matches a string if
-- `re1' or `re2' matches it; `Star re', `Plus re' and `Ques re' can be
-- expressed in terms of the other operators. See the definitions of `ARexp'
-- for a formal definition of the semantics of these operators.

data RExp
= Eps
| Ch CharSet
| RExp :%% RExp
| RExp :| RExp
| Star RExp
| Plus RExp
| Ques RExp
= Eps -- ^ Empty.
| Ch CharSet -- ^ Singleton.
| RExp :%% RExp -- ^ Sequence.
| RExp :| RExp -- ^ Alternative.
| Star RExp -- ^ Zero or more repetitions.
| Plus RExp -- ^ One or more repetitions.
| Ques RExp -- ^ Zero or one repetitions.

instance Show RExp where
showsPrec _ Eps = showString "()"
Expand All @@ -213,6 +213,17 @@ instance Show RExp where
showsPrec _ (Plus r) = shows r . ('+':)
showsPrec _ (Ques r) = shows r . ('?':)

-- | A regular expression is nullable if it matches the empty string.
nullable :: RExp -> Bool
nullable Eps = True
nullable Ch{} = False
nullable (l :%% r) = nullable l && nullable r
nullable (l :| r) = nullable l || nullable r
nullable Star{} = True
nullable (Plus r) = nullable r
nullable Ques{} = True


{------------------------------------------------------------------------------
Abstract Regular Expression
------------------------------------------------------------------------------}
Expand Down
13 changes: 11 additions & 2 deletions src/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import Info
import Map ( Map )
import qualified Map hiding ( Map )
import Output
import ParseMonad ( runP )
import ParseMonad ( runP, Warning(..) )
import Parser
import Scan
import Util ( hline )
Expand Down Expand Up @@ -114,11 +114,20 @@ parseScript file prg =
Left (Nothing, err) ->
die (file ++ ": " ++ err ++ "\n")

Right script@(_, _, scanner, _) -> do
Right (warnings, script@(_, _, scanner, _)) -> do
-- issue 46: give proper error when lexer definition is empty
when (null $ scannerTokens scanner) $
dieAlex $ file ++ " contains no lexer rules\n"
-- issue 71: warn about nullable regular expressions
mapM_ printWarning warnings
return script
where
printWarning (WarnNullableRExp (AlexPn _ line col) msg) =
hPutStrLn stderr $ concat
[ "Warning: "
, file , ":", show line , ":" , show col , ": "
, msg
]

alex :: [CLIFlags]
-> FilePath
Expand Down
83 changes: 60 additions & 23 deletions src/ParseMonad.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- -----------------------------------------------------------------------------
--
--
-- ParseMonad.hs, part of Alex
--
-- (c) Simon Marlow 2003
Expand All @@ -9,7 +9,7 @@
module ParseMonad (
AlexInput, alexInputPrevChar, alexGetChar, alexGetByte,
AlexPosn(..), alexStartPos,

Warning(..), warnIfNullable,
P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac,
setStartCode, getStartCode, getInput, setInput,
) where
Expand All @@ -23,7 +23,7 @@ import UTF8
#if __GLASGOW_HASKELL__ < 710
import Control.Applicative ( Applicative(..) )
#endif
import Control.Monad ( liftM, ap )
import Control.Monad ( liftM, ap, when )
import Data.Word (Word8)
-- -----------------------------------------------------------------------------
-- The input type
Expand All @@ -49,15 +49,15 @@ alexGetChar (_, _ ,_ : _, _) = undefined -- hide compiler warning
alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
alexGetByte (_,_,[],[]) = Nothing
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
(b:bs) = UTF8.encode c
in p' `seq` Just (b, (p', c, bs, s))

-- -----------------------------------------------------------------------------
-- Token positions

-- `Posn' records the location of a token in the input text. It has three
-- fields: the address (number of chacaters preceding the token), line number
-- fields: the address (number of charaters preceding the token), line number
-- and column of a token within the file. `start_pos' gives the position of the
-- start of the file and `eof_pos' a standard encoding for the end of file.
-- `move_pos' calculates the new position after traversing a given character,
Expand All @@ -77,15 +77,22 @@ alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
-- -----------------------------------------------------------------------------
-- Alex lexing/parsing monad

data Warning
= WarnNullableRExp
{ _warnPos :: AlexPosn -- ^ The position of the code following the regex.
, _warnText :: String -- ^ Warning text.
}

type ParseError = (Maybe AlexPosn, String)
type StartCode = Int

data PState = PState {
smac_env :: Map String CharSet,
rmac_env :: Map String RExp,
startcode :: Int,
input :: AlexInput
}
data PState = PState
{ warnings :: [Warning] -- ^ Stack of warnings, top = last warning.
, smac_env :: Map String CharSet
, rmac_env :: Map String RExp
, startcode :: Int
, input :: AlexInput
}

newtype P a = P { unP :: PState -> Either ParseError (PState,a) }

Expand All @@ -102,15 +109,27 @@ instance Monad P where
Right (env',ok) -> unP (k ok) env'
return = pure

runP :: String -> (Map String CharSet, Map String RExp)
-> P a -> Either ParseError a
runP str (senv,renv) (P p)
-- | Run the parser on given input.
runP :: String
-- ^ Input string.
-> (Map String CharSet, Map String RExp)
-- ^ Character set and regex definitions.
-> P a
-- ^ Parsing computation.
-> Either ParseError ([Warning], a)
-- ^ List of warnings in first-to-last order, result.
runP str (senv,renv) (P p)
= case p initial_state of
Left err -> Left err
Right (_,a) -> Right a
where initial_state =
PState{ smac_env=senv, rmac_env=renv,
startcode = 0, input=(alexStartPos,'\n',[],str) }
Right (s, a) -> Right (reverse (warnings s), a)
where
initial_state = PState
{ warnings = []
, smac_env = senv
, rmac_env = renv
, startcode = 0
, input = (alexStartPos, '\n', [], str)
}

failP :: String -> P a
failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)
Expand All @@ -121,24 +140,24 @@ failP str = P $ \PState{ input = (p,_,_,_) } -> Left (Just p,str)

lookupSMac :: (AlexPosn,String) -> P CharSet
lookupSMac (posn,smac)
= P $ \s@PState{ smac_env = senv } ->
= P $ \s@PState{ smac_env = senv } ->
case Map.lookup smac senv of
Just ok -> Right (s,ok)
Nothing -> Left (Just posn, "unknown set macro: $" ++ smac)

lookupRMac :: String -> P RExp
lookupRMac rmac
= P $ \s@PState{ rmac_env = renv } ->
lookupRMac rmac
= P $ \s@PState{ rmac_env = renv } ->
case Map.lookup rmac renv of
Just ok -> Right (s,ok)
Nothing -> Left (Nothing, "unknown regex macro: %" ++ rmac)

newSMac :: String -> CharSet -> P ()
newSMac smac set
newSMac smac set
= P $ \s -> Right (s{smac_env = Map.insert smac set (smac_env s)}, ())

newRMac :: String -> RExp -> P ()
newRMac rmac rexp
newRMac rmac rexp
= P $ \s -> Right (s{rmac_env = Map.insert rmac rexp (rmac_env s)}, ())

setStartCode :: StartCode -> P ()
Expand All @@ -152,3 +171,21 @@ getInput = P $ \s -> Right (s, input s)

setInput :: AlexInput -> P ()
setInput inp = P $ \s -> Right (s{ input = inp }, ())

-- | Add a warning if given regular expression is nullable
-- unless the user wrote the regex 'Eps'.
warnIfNullable
:: RExp -- ^ Regular expression.
-> AlexPosn -- ^ Position associated to regular expression.
-> P ()
-- If the user wrote @()@, they wanted to match the empty sequence!
-- Thus, skip the warning then.
warnIfNullable Eps _ = return ()
warnIfNullable r pos = when (nullable r) $ P $ \ s ->
Right (s{ warnings = WarnNullableRExp pos w : warnings s}, ())
where
w = unwords
[ "Regular expression"
, show r
, "matches the empty string."
]
27 changes: 17 additions & 10 deletions src/Parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,23 @@ tokendefs :: { [RECtx] }
| {- empty -} { [] }
tokendef :: { [RECtx] }
: startcodes rule { [ replaceCodes $1 $2 ] }
: startcodes rule { [ replaceCodes $1 (snd $2) ] }
| startcodes '{' rules '}' { map (replaceCodes $1) $3 }
| rule { [ $1 ] }
rule :: { RECtx }
: context rhs { let (l,e,r) = $1 in
RECtx [] l e r $2 }
| rule {% do
let (pos, res@(RECtx _ _ e _ _)) = $1
warnIfNullable e pos
return [ res ]
}
rule :: { (AlexPosn, RECtx) }
: context rhs { let
(l, e, r) = $1
(pos, code) = $2
in (pos, RECtx [] l e r code)
}
rules :: { [RECtx] }
: rule rules { $1 : $2 }
: rule rules { snd $1 : $2 }
| {- empty -} { [] }
startcodes :: { [(String,StartCode)] }
Expand All @@ -132,9 +139,9 @@ startcode :: { String }
: ZERO { "0" }
| ID { $1 }
rhs :: { Maybe Code }
: CODE { case $1 of T _ (CodeT code) -> Just code }
| ';' { Nothing }
rhs :: { (AlexPosn, Maybe Code) }
: CODE { case $1 of T pos (CodeT code) -> (pos, Just code) }
| ';' { (tokPosn $1, Nothing) }
context :: { Maybe CharSet, RExp, RightContext RExp }
: left_ctx rexp right_ctx { (Just $1,$2,$3) }
Expand Down

0 comments on commit 60d5932

Please sign in to comment.