diff --git a/alex.cabal b/alex.cabal index 8eb501a1..9948b6d5 100644 --- a/alex.cabal +++ b/alex.cabal @@ -92,6 +92,9 @@ extra-source-files: tests/issue_119.x tests/issue_141.x tests/issue_197.x + tests/strict_text_typeclass.x + tests/posn_typeclass_strict_text.x + tests/tokens_monadUserState_strict_text.x source-repository head type: git diff --git a/data/AlexWrappers.hs b/data/AlexWrappers.hs index 955b27e9..0aa90bde 100644 --- a/data/AlexWrappers.hs +++ b/data/AlexWrappers.hs @@ -4,11 +4,16 @@ -- This code is in the PUBLIC DOMAIN; you may copy it freely and use -- it for any purpose whatsoever. -#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) +#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_MONAD_STRICT_TEXT) import Control.Applicative as App (Applicative (..)) #endif +#if defined(ALEX_STRICT_TEXT) || defined (ALEX_POSN_STRICT_TEXT) || defined(ALEX_MONAD_STRICT_TEXT) +import qualified Data.Text +#endif + import Data.Word (Word8) + #if defined(ALEX_BASIC_BYTESTRING) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) import Data.Int (Int64) @@ -82,6 +87,50 @@ alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c (b, bs) -> p' `seq` Just (b, (p', c, bs, s)) #endif +#if defined (ALEX_STRICT_TEXT) +type AlexInput = (Char, -- previous char + [Byte], -- pending bytes on current char + Data.Text.Text) -- current input string + +ignorePendingBytes :: AlexInput -> AlexInput +ignorePendingBytes (c,_ps,s) = (c,[],s) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (c,_bs,_s) = c + +alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) +alexGetByte (c,(b:bs),s) = Just (b,(c,bs,s)) +alexGetByte (_,[],s) = case Data.Text.uncons s of + Just (c, cs) -> + case utf8Encode' c of + (b, bs) -> Just (b, (c, bs, cs)) + Nothing -> + Nothing +#endif + +#if defined (ALEX_POSN_STRICT_TEXT) || defined(ALEX_MONAD_STRICT_TEXT) +type AlexInput = (AlexPosn, -- current position, + Char, -- previous char + [Byte], -- pending bytes on current char + Data.Text.Text) -- current input string + +ignorePendingBytes :: AlexInput -> AlexInput +ignorePendingBytes (p,c,_ps,s) = (p,c,[],s) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (_p,c,_bs,_s) = c + +alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) +alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) +alexGetByte (p,_,[],s) = case Data.Text.uncons s of + Just (c, cs) -> + let p' = alexMove p c + in case utf8Encode' c of + (b, bs) -> p' `seq` Just (b, (p', c, bs, cs)) + Nothing -> + Nothing +#endif + #if defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) type AlexInput = (AlexPosn, -- current position, Char, -- previous char @@ -151,7 +200,7 @@ alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) = -- `move_pos' calculates the new position after traversing a given character, -- assuming the usual eight character tab stops. -#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) +#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) || defined (ALEX_POSN_STRICT_TEXT) || defined(ALEX_MONAD_STRICT_TEXT) data AlexPosn = AlexPn !Int !Int !Int deriving (Eq, Show, Ord) @@ -167,14 +216,20 @@ alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1) -- ----------------------------------------------------------------------------- -- Monad (default and with ByteString input) -#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) +#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_MONAD_STRICT_TEXT) data AlexState = AlexState { alex_pos :: !AlexPosn, -- position at current input location -#ifndef ALEX_MONAD_BYTESTRING +#ifdef ALEX_MONAD_STRICT_TEXT + alex_inp :: Data.Text.Text, + alex_chr :: !Char, + alex_bytes :: [Byte], +#endif /* ALEX_MONAD_STRICT_TEXT */ +#ifdef ALEX_MONAD alex_inp :: String, -- the current input alex_chr :: !Char, -- the character before the input alex_bytes :: [Byte], -#else /* ALEX_MONAD_BYTESTRING */ +#endif /* ALEX_MONAD */ +#ifdef ALEX_MONAD_BYTESTRING alex_bpos:: !Int64, -- bytes consumed so far alex_inp :: ByteString.ByteString, -- the current input alex_chr :: !Char, -- the character before the input @@ -187,15 +242,24 @@ data AlexState = AlexState { -- Compile with -funbox-strict-fields for best results! -#ifndef ALEX_MONAD_BYTESTRING +#ifdef ALEX_MONAD runAlex :: String -> Alex a -> Either String a runAlex input__ (Alex f) = case f (AlexState {alex_bytes = [], -#else /* ALEX_MONAD_BYTESTRING */ + alex_pos = alexStartPos, + alex_inp = input__, + alex_chr = '\n', +#ifdef ALEX_MONAD_USER_STATE + alex_ust = alexInitUserState, +#endif + alex_scd = 0}) of Left msg -> Left msg + Right ( _, a ) -> Right a +#endif + +#ifdef ALEX_MONAD_BYTESTRING runAlex :: ByteString.ByteString -> Alex a -> Either String a runAlex input__ (Alex f) = case f (AlexState {alex_bpos = 0, -#endif /* ALEX_MONAD_BYTESTRING */ alex_pos = alexStartPos, alex_inp = input__, alex_chr = '\n', @@ -204,6 +268,21 @@ runAlex input__ (Alex f) #endif alex_scd = 0}) of Left msg -> Left msg Right ( _, a ) -> Right a +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +runAlex :: Data.Text.Text -> Alex a -> Either String a +runAlex input__ (Alex f) + = case f (AlexState {alex_bytes = [], + alex_pos = alexStartPos, + alex_inp = input__, + alex_chr = '\n', +#ifdef ALEX_MONAD_USER_STATE + alex_ust = alexInitUserState, +#endif + alex_scd = 0}) of Left msg -> Left msg + Right ( _, a ) -> Right a +#endif newtype Alex a = Alex { unAlex :: AlexState -> Either String (AlexState, a) } @@ -226,28 +305,51 @@ instance Monad Alex where Right (s',a) -> unAlex (k a) s' return = App.pure + +#ifdef ALEX_MONAD alexGetInput :: Alex AlexInput alexGetInput -#ifndef ALEX_MONAD_BYTESTRING = Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} -> Right (s, (pos,c,bs,inp__)) -#else /* ALEX_MONAD_BYTESTRING */ +#endif + +#ifdef ALEX_MONAD_BYTESTRING +alexGetInput :: Alex AlexInput +alexGetInput = Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} -> Right (s, (pos,c,inp__,bpos)) -#endif /* ALEX_MONAD_BYTESTRING */ +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +alexGetInput :: Alex AlexInput +alexGetInput + = Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} -> + Right (s, (pos,c,bs,inp__)) +#endif +#ifdef ALEX_MONAD alexSetInput :: AlexInput -> Alex () -#ifndef ALEX_MONAD_BYTESTRING alexSetInput (pos,c,bs,inp__) = Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} of -#else /* ALEX_MONAD_BYTESTRING */ + state__@(AlexState{}) -> Right (state__, ()) +#endif + +#ifdef ALEX_MONAD_BYTESTRING +alexSetInput :: AlexInput -> Alex () alexSetInput (pos,c,inp__,bpos) = Alex $ \s -> case s{alex_pos=pos, alex_bpos=bpos, alex_chr=c, alex_inp=inp__} of -#endif /* ALEX_MONAD_BYTESTRING */ - state__@(AlexState{}) -> Right (state__, ()) + state__@(AlexState{}) -> Right (state__, ()) +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +alexSetInput :: AlexInput -> Alex () +alexSetInput (pos,c,bs,inp__) + = Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} of + state__@(AlexState{}) -> Right (state__, ()) +#endif alexError :: String -> Alex a alexError message = Alex $ const $ Left message @@ -266,12 +368,9 @@ alexSetUserState :: AlexUserState -> Alex () alexSetUserState ss = Alex $ \s -> Right (s{alex_ust=ss}, ()) #endif /* !defined(ALEX_MONAD_BYTESTRING) && defined(ALEX_MONAD_USER_STATE) */ +#ifdef ALEX_MONAD alexMonadScan = do -#ifndef ALEX_MONAD_BYTESTRING inp__ <- alexGetInput -#else /* ALEX_MONAD_BYTESTRING */ - inp__@(_,_,_,n) <- alexGetInput -#endif /* ALEX_MONAD_BYTESTRING */ sc <- alexGetStartCode case alexScan inp__ sc of AlexEOF -> alexEOF @@ -279,22 +378,55 @@ alexMonadScan = do AlexSkip inp__' _len -> do alexSetInput inp__' alexMonadScan -#ifndef ALEX_MONAD_BYTESTRING AlexToken inp__' len action -> do -#else /* ALEX_MONAD_BYTESTRING */ + alexSetInput inp__' + action (ignorePendingBytes inp__) len +#endif + +#ifdef ALEX_MONAD_BYTESTRING +alexMonadScan = do + inp__@(_,_,_,n) <- alexGetInput + sc <- alexGetStartCode + case alexScan inp__ sc of + AlexEOF -> alexEOF + AlexError ((AlexPn _ line column),_,_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) + AlexSkip inp__' _len -> do + alexSetInput inp__' + alexMonadScan AlexToken inp__'@(_,_,_,n') _ action -> let len = n'-n in do -#endif /* ALEX_MONAD_BYTESTRING */ alexSetInput inp__' action (ignorePendingBytes inp__) len +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +alexMonadScan = do + inp__ <- alexGetInput + sc <- alexGetStartCode + case alexScan inp__ sc of + AlexEOF -> alexEOF + AlexError ((AlexPn _ line column),_,_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) + AlexSkip inp__' _len -> do + alexSetInput inp__' + alexMonadScan + AlexToken inp__' len action -> do + alexSetInput inp__' + action (ignorePendingBytes inp__) len +#endif -- ----------------------------------------------------------------------------- -- Useful token actions -#ifndef ALEX_MONAD_BYTESTRING +#ifdef ALEX_MONAD type AlexAction result = AlexInput -> Int -> Alex result -#else /* ALEX_MONAD_BYTESTRING */ +#endif + +#ifdef ALEX_MONAD_BYTESTRING type AlexAction result = AlexInput -> Int64 -> Alex result -#endif /* ALEX_MONAD_BYTESTRING */ +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +type AlexAction result = AlexInput -> Int -> Alex result +#endif -- just ignore this token and scan another one -- skip :: AlexAction result @@ -310,14 +442,22 @@ andBegin :: AlexAction result -> Int -> AlexAction result alexSetStartCode code action input__ len -#ifndef ALEX_MONAD_BYTESTRING +#ifdef ALEX_MONAD token :: (AlexInput -> Int -> token) -> AlexAction token -#else /* ALEX_MONAD_BYTESTRING */ +token t input__ len = return (t input__ len) +#endif + +#ifdef ALEX_MONAD_BYTESTRING token :: (AlexInput -> Int64 -> token) -> AlexAction token -#endif /* ALEX_MONAD_BYTESTRING */ token t input__ len = return (t input__ len) -#endif /* defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) */ +#endif +#ifdef ALEX_MONAD_STRICT_TEXT +token :: (AlexInput -> Int -> token) -> AlexAction token +token t input__ len = return (t input__ len) +#endif + +#endif /* defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_MONAD_STRICT_TEXT) */ -- ----------------------------------------------------------------------------- -- Basic wrapper @@ -378,6 +518,28 @@ alexScanTokens str = go (AlexInput '\n' str 0) #endif +#ifdef ALEX_STRICT_TEXT +-- alexScanTokens :: Data.Text.Text -> [token] +alexScanTokens str = go ('\n',[],str) + where go inp__@(_,_bs,s) = + case alexScan inp__ 0 of + AlexEOF -> [] + AlexError _ -> error "lexical error" + AlexSkip inp__' _len -> go inp__' + AlexToken inp__' len act -> act (Data.Text.take len s) : go inp__' +#endif + +#ifdef ALEX_POSN_STRICT_TEXT +-- alexScanTokens :: Data.Text.Text -> [token] +alexScanTokens str = go (alexStartPos,'\n',[],str) + where go inp__@(pos,_,_bs,s) = + case alexScan inp__ 0 of + AlexEOF -> [] + AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) + AlexSkip inp__' _len -> go inp__' + AlexToken inp__' len act -> act pos (Data.Text.take len s) : go inp__' +#endif + -- ----------------------------------------------------------------------------- -- Posn wrapper diff --git a/doc/api.rst b/doc/api.rst index a47bc275..0827c30b 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -577,6 +577,143 @@ The ``monadUserState-bytestring`` wrapper is the same as the ``monadUserState`` All of the actions in your lexical specification have the same type as in the ``monadUserState`` wrapper. It is only the types of the function to run the monad and the type of the ``token`` function that change. +The text wrappers +~~~~~~~~~~~~~~~~~ + +The ``strict-text``, ``posn-strict-text`` and ``monad-strict-text`` wrappers are variations on the ``basic``, ``posn`` and ``monad`` wrappers that use strict ``Text``\ s as the input and token types instead of an ordinary ``String``. + +The point of using these wrappers is that ``Text``\ s provide a more memory efficient representation of an input stream. +They can also be somewhat faster to process. +Note that using these wrappers adds a dependency on the ``Data.Text`` modules, which live in the ``text`` package. + +Note also that by default tokens share space with the input ``Text`` which has the advantage that it does not need to make a copy but it also prevents the input from being garbage collected. +It may make sense in some applications to use ``Text``'s ``copy`` function to unshare tokens that will be kept for a long time, to allow the original input to be collected. + +The "strict-text" wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``strict-text`` wrapper is the same as the ``basic`` wrapper but with strict ``Text`` instead of ``String``: + +.. code-block:: haskell + + import Data.Text (Text) + import qualified Data.Text as Text + + type AlexInput = + ( Char -- previous char + , [Byte] -- rest of the bytes for the current char + , Text -- rest of the input Text + ) + + alexGetByte :: AlexInput -> Maybe (Char, AlexInput) + + alexInputPrevChar :: AlexInput -> Char + + -- alexScanTokens :: Text -> [token] + +All of the actions in your lexical specification should have type: + +.. code-block:: haskell + + { ... } :: Text -> token + +for some type ``token``. + +The "posn-strict-text" wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``posn-strict-text`` wrapper is the same as the ``posn`` wrapper but with strict ``Text`` instead of ``String``: + +.. code-block:: haskell + + import Data.Text (Text) + import qualified Data.Text as Text + + type AlexInput = + ( AlexPosn -- current position, + , Char -- previous char + , [Byte] -- rest of the bytes for the current char + , Text -- current input Text + ) + + -- alexScanTokens :: Text -> [token] + +All of the actions in your lexical specification should have type: + +.. code-block:: haskell + + { ... } :: AlexPosn -> Text -> token + +for some type ``token``. + +The "monad-strict-text" wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``monad-strict-text`` wrapper is the same as the ``monad`` wrapper but with strict ``Text`` instead of ``String``: + +.. code-block:: haskell + + import Data.Text (Text) + import qualified Data.Text as Text + + data AlexState = AlexState + { alex_pos :: !AlexPosn -- position at current input location + , alex_inp :: Text -- the current input + , alex_chr :: !Char -- the character before the input + , alex_bytes :: [Byte] -- rest of the bytes for the current char + , alex_scd :: !Int -- the current startcode + } + + newtype Alex a = Alex { unAlex :: AlexState + -> Either String (AlexState, a) } + + instance Functor Alex where ... + instance Applicative Alex where ... + instance Monad Alex where ... + + runAlex :: Text -> Alex a -> Either String a + + type AlexInput = + ( AlexPosn -- current position, + , Char -- previous char + , [Byte] -- rest of the bytes for the current char + , Text -- current input string + ) + + -- token :: (AlexInput -> Int -> token) -> AlexAction token + +All of the actions in your lexical specification have the same type as in the ``monad`` wrapper. +It is only the types of the function to run the monad and the type of the ``token`` function that change. + +The "monadUserState-strict-text" wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``monadUserState-strict-text`` wrapper is the same as the ``monadUserState`` wrapper but with strict ``Text`` instead of ``String``: + +.. code-block:: haskell + + import Data.Text (Text) + import qualified Data.Text as Text + + data AlexState = AlexState + { alex_pos :: !AlexPosn -- position at current input location + , alex_inp :: Text -- the current input + , alex_chr :: !Char -- the character before the input + , alex_bytes :: [Byte] -- rest of the bytes for the current char + , alex_scd :: !Int -- the current startcode + , alex_ust :: AlexUserState -- AlexUserState will be defined in the user program + } + + newtype Alex a = Alex { unAlex :: AlexState + -> Either String (AlexState, a) } + + runAlex :: Text -> Alex a -> Either String a + + -- token :: (AlexInput -> Int -> token) -> AlexAction token + +All of the actions in your lexical specification have the same type as in the ``monadUserState`` wrapper. +It is only the types of the function to run the monad and the type of the ``token`` function that change. + .. _types: Type Signatures and Typeclasses diff --git a/src/AbsSyn.hs b/src/AbsSyn.hs index 2e993d91..fd189398 100644 --- a/src/AbsSyn.hs +++ b/src/AbsSyn.hs @@ -16,7 +16,7 @@ module AbsSyn ( RECtx(..), RExp(..), nullable, DFA(..), State(..), SNum, StartCode, Accept(..), - RightContext(..), showRCtx, strtype, + RightContext(..), showRCtx, encodeStartCodes, extractActions, Target(..), UsesPreds(..), usesPreds, @@ -49,44 +49,53 @@ data Directive | TokenType String deriving Show -data StrType = Str | Lazy | Strict +data StrType = Str | Lazy | Strict | StrictText + deriving Eq instance Show StrType where show Str = "String" show Lazy = "ByteString.ByteString" show Strict = "ByteString.ByteString" + show StrictText = "Data.Text.Text" data Scheme = Default { defaultTypeInfo :: Maybe (Maybe String, String) } | GScan { gscanTypeInfo :: Maybe (Maybe String, String) } | Basic { basicStrType :: StrType, basicTypeInfo :: Maybe (Maybe String, String) } - | Posn { posnByteString :: Bool, + | Posn { posnStrType :: StrType, posnTypeInfo :: Maybe (Maybe String, String) } - | Monad { monadByteString :: Bool, monadUserState :: Bool, + | Monad { monadStrType :: StrType, + monadUserState :: Bool, monadTypeInfo :: Maybe (Maybe String, String) } -strtype :: Bool -> String -strtype True = "ByteString.ByteString" -strtype False = "String" - - wrapperCppDefs :: Scheme -> Maybe [String] wrapperCppDefs Default {} = Nothing wrapperCppDefs GScan {} = Just ["ALEX_GSCAN"] wrapperCppDefs Basic { basicStrType = Str } = Just ["ALEX_BASIC"] wrapperCppDefs Basic { basicStrType = Lazy } = Just ["ALEX_BASIC_BYTESTRING"] wrapperCppDefs Basic { basicStrType = Strict } = Just ["ALEX_STRICT_BYTESTRING"] -wrapperCppDefs Posn { posnByteString = False } = Just ["ALEX_POSN"] -wrapperCppDefs Posn { posnByteString = True } = Just ["ALEX_POSN_BYTESTRING"] -wrapperCppDefs Monad { monadByteString = False, +wrapperCppDefs Basic { basicStrType = StrictText } = Just ["ALEX_STRICT_TEXT"] +wrapperCppDefs Posn { posnStrType = Str } = Just ["ALEX_POSN"] +wrapperCppDefs Posn { posnStrType = Lazy } = Just ["ALEX_POSN_BYTESTRING"] +wrapperCppDefs Posn { posnStrType = Strict } = Just ["ALEX_POSN_BYTESTRING"] +wrapperCppDefs Posn { posnStrType = StrictText } = Just ["ALEX_POSN_STRICT_TEXT"] +wrapperCppDefs Monad { monadStrType = Str, monadUserState = False } = Just ["ALEX_MONAD"] -wrapperCppDefs Monad { monadByteString = True, +wrapperCppDefs Monad { monadStrType = Strict, monadUserState = False } = Just ["ALEX_MONAD_BYTESTRING"] -wrapperCppDefs Monad { monadByteString = False, +wrapperCppDefs Monad { monadStrType = Lazy, + monadUserState = False } = Just ["ALEX_MONAD_BYTESTRING"] +wrapperCppDefs Monad { monadStrType = StrictText, + monadUserState = False } = Just ["ALEX_MONAD_STRICT_TEXT"] +wrapperCppDefs Monad { monadStrType = Str, monadUserState = True } = Just ["ALEX_MONAD", "ALEX_MONAD_USER_STATE"] -wrapperCppDefs Monad { monadByteString = True, +wrapperCppDefs Monad { monadStrType = Strict, + monadUserState = True } = Just ["ALEX_MONAD_BYTESTRING", "ALEX_MONAD_USER_STATE"] +wrapperCppDefs Monad { monadStrType = Lazy, monadUserState = True } = Just ["ALEX_MONAD_BYTESTRING", "ALEX_MONAD_USER_STATE"] +wrapperCppDefs Monad { monadStrType = StrictText, + monadUserState = True } = Just ["ALEX_MONAD_STRICT_TEXT", "ALEX_MONAD_USER_STATE"] -- TODO: update this comment -- @@ -369,25 +378,25 @@ extractActions scheme scanner = (scanner{scannerTokens = new_tokens}, decl_str . basicTypeInfo = Just (Just tyclasses, tokenty) } -> nl . str fun . str " :: (" . str tyclasses . str ") => " . str (show strty) . str " -> " . str tokenty . nl - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Nothing, tokenty) } -> nl . - str fun . str " :: AlexPosn -> " . str (strtype isByteString) . str " -> " + str fun . str " :: AlexPosn -> " . str (show strty) . str " -> " . str tokenty . nl - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Just tyclasses, tokenty) } -> nl . str fun . str " :: (" . str tyclasses . str ") => AlexPosn -> " . - str (strtype isByteString) . str " -> " . str tokenty . nl - Monad { monadByteString = isByteString, + str (show strty) . str " -> " . str tokenty . nl + Monad { monadStrType = strty, monadTypeInfo = Just (Nothing, tokenty) } -> nl . let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" in str fun . str " :: AlexInput -> " . str actintty . str " -> Alex (" . str tokenty . str ")" . nl - Monad { monadByteString = isByteString, + Monad { monadStrType = strty, monadTypeInfo = Just (Just tyclasses, tokenty) } -> nl . let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" in str fun . str " :: (" . str tyclasses . str ") =>" . str " AlexInput -> " . str actintty diff --git a/src/Main.hs b/src/Main.hs index 558332cd..1de73e65 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -290,12 +290,13 @@ getScheme directives = (Just _, Nothing, Nothing) -> dieAlex "%typeclass directive without %token directive" | single == "basic" || single == "basic-bytestring" || - single == "strict-bytestring" -> + single == "strict-bytestring" || single == "strict-text" -> let strty = case single of "basic" -> Str "basic-bytestring" -> Lazy "strict-bytestring" -> Strict + "strict-text" -> StrictText _ -> error "Impossible case" in case (typeclass, token, action) of (Nothing, Nothing, Nothing) -> @@ -311,42 +312,50 @@ getScheme directives = dieAlex "%action directive not allowed with a wrapper" (Just _, Nothing, Nothing) -> dieAlex "%typeclass directive without %token directive" - | single == "posn" || single == "posn-bytestring" -> + | single == "posn" || single == "posn-bytestring" || single == "posn-strict-text" -> let - isByteString = single == "posn-bytestring" + strty = case single of + "posn" -> Str + "posn-bytestring" -> Strict + "posn-strict-text" -> StrictText + _ -> error "invalid str type for posn" in case (typeclass, token, action) of (Nothing, Nothing, Nothing) -> - return Posn { posnByteString = isByteString, + return Posn { posnStrType = strty, posnTypeInfo = Nothing } (Nothing, Just tokenty, Nothing) -> - return Posn { posnByteString = isByteString, + return Posn { posnStrType = strty, posnTypeInfo = Just (Nothing, tokenty) } (Just _, Just tokenty, Nothing) -> - return Posn { posnByteString = isByteString, + return Posn { posnStrType = strty, posnTypeInfo = Just (typeclass, tokenty) } (_, _, Just _) -> dieAlex "%action directive not allowed with a wrapper" (Just _, Nothing, Nothing) -> dieAlex "%typeclass directive without %token directive" - | single == "monad" || single == "monad-bytestring" || + | single == "monad" || single == "monad-bytestring" || single == "monad-strict-text" || single == "monadUserState" || - single == "monadUserState-bytestring" -> + single == "monadUserState-bytestring" || + single == "monadUserState-strict-text" -> let + isText = single == "monad-strict-text" || + single == "monadUserState-strict-text" isByteString = single == "monad-bytestring" || single == "monadUserState-bytestring" userState = single == "monadUserState" || - single == "monadUserState-bytestring" + single == "monadUserState-bytestring" || + single == "monadUserState-strict-text" in case (typeclass, token, action) of (Nothing, Nothing, Nothing) -> - return Monad { monadByteString = isByteString, + return Monad { monadStrType = if isByteString then Lazy else if isText then StrictText else Str, monadUserState = userState, monadTypeInfo = Nothing } (Nothing, Just tokenty, Nothing) -> - return Monad { monadByteString = isByteString, + return Monad { monadStrType = if isByteString then Lazy else if isText then StrictText else Str, monadUserState = userState, monadTypeInfo = Just (Nothing, tokenty) } (Just _, Just tokenty, Nothing) -> - return Monad { monadByteString = isByteString, + return Monad { monadStrType = if isByteString then Lazy else if isText then StrictText else Str, monadUserState = userState, monadTypeInfo = Just (typeclass, tokenty) } (_, _, Just _) -> diff --git a/src/Output.hs b/src/Output.hs index 42c44625..563fd2e9 100644 --- a/src/Output.hs +++ b/src/Output.hs @@ -122,28 +122,28 @@ outputDFA target _ _ scheme dfa . str ") => Array Int (" . str (show strty) . str " -> " . str toktype . str ")\n" - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Nothing, toktype) } -> str actions_nm . str " :: Array Int (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype + . str (show strty) . str " -> " . str toktype . str ")\n" - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Just tyclasses, toktype) } -> str actions_nm . str " :: (" . str tyclasses . str ") => Array Int (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype + . str (show strty) . str " -> " . str toktype . str ")\n" - Monad { monadByteString = isByteString, + Monad { monadStrType = strty, monadTypeInfo = Just (Nothing, toktype) } -> let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" in str actions_nm . str " :: Array Int (AlexInput -> " . str actintty . str " -> Alex(" . str toktype . str "))\n" - Monad { monadByteString = isByteString, + Monad { monadStrType = strty, monadTypeInfo = Just (Just tyclasses, toktype) } -> let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" in str actions_nm . str " :: (" . str tyclasses . str ") => Array Int (AlexInput -> " @@ -212,31 +212,31 @@ outputDFA target _ _ scheme dfa . str "alexScan :: (" . str tyclasses . str ") => AlexInput -> Int -> AlexReturn (" . str (show strty) . str " -> " . str toktype . str ")\n" - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Nothing, toktype) } -> str "alex_scan_tkn :: () -> AlexInput -> " . str intty . str " -> " . str "AlexInput -> " . str intty . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype . str ")\n" + . str (show strty) . str " -> " . str toktype . str ")\n" . str "alexScan :: AlexInput -> Int -> AlexReturn (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype . str ")\n" - Posn { posnByteString = isByteString, + . str (show strty) . str " -> " . str toktype . str ")\n" + Posn { posnStrType = strty, posnTypeInfo = Just (Just tyclasses, toktype) } -> str "alex_scan_tkn :: () -> AlexInput -> " . str intty . str " -> " . str "AlexInput -> " . str intty . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" . str "alexScanUser :: (" . str tyclasses . str ") => () -> AlexInput -> Int -> AlexReturn (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype . str ")\n" + . str (show strty) . str " -> " . str toktype . str ")\n" . str "alexScan :: (" . str tyclasses . str ") => AlexInput -> Int -> AlexReturn (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype . str ")\n" + . str (show strty) . str " -> " . str toktype . str ")\n" Monad { monadTypeInfo = Just (Nothing, toktype), - monadByteString = isByteString, + monadStrType = strty, monadUserState = userState } -> let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" userStateTy | userState = "AlexUserState" | otherwise = "()" in @@ -253,10 +253,10 @@ outputDFA target _ _ scheme dfa . str " -> Alex (" . str toktype . str "))\n" . str "alexMonadScan :: Alex (" . str toktype . str ")\n" Monad { monadTypeInfo = Just (Just tyclasses, toktype), - monadByteString = isByteString, + monadStrType = strty, monadUserState = userState } -> let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" userStateTy | userState = "AlexUserState" | otherwise = "()" in diff --git a/tests/Makefile b/tests/Makefile index b0a90c3d..2618dff1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -32,6 +32,8 @@ GHC_VERSION:=$(shell $(HC) --numeric-version) GHC_VERSION_WORDS=$(subst ., ,$(GHC_VERSION)) GHC_MAJOR_VERSION=$(word 1,$(GHC_VERSION_WORDS)) GHC_MINOR_VERSION=$(word 2,$(GHC_VERSION_WORDS)) +# Text dependency comes with GHC from 8.4 onwards +GHC_SHIPS_WITH_TEXT:=$(shell if [ $(GHC_MAJOR_VERSION) -gt 8 -o $(GHC_MAJOR_VERSION) -ge 8 -a $(GHC_MINOR_VERSION) -ge 4 ]; then echo "yes"; else echo "no"; fi) # -fwarn-incomplete-uni-patterns only from 7.4 WARNS_FOR_GHC_GTEQ_7_4=-fwarn-incomplete-uni-patterns WARNS_FOR_GHC_LT_7_4=-fno-warn-lazy-unlifted-bindings @@ -77,6 +79,19 @@ TESTS = \ tokens_strict_bytestring.x \ unicode.x +ifeq "$(GHC_SHIPS_WITH_TEXT)" "yes" +TEXT_DEP = -package text + +TEXT_TESTS = \ + strict_text_typeclass.x \ + posn_typeclass_strict_text.x \ + tokens_monadUserState_strict_text.x +else +TEXT_DEP = + +TEXT_TESTS = +endif + # NOTE: `cabal` will set the `alex_datadir` env-var accordingly before invoking the test-suite #TEST_ALEX_OPTS = --template=../data/ TEST_ALEX_OPTS= @@ -89,7 +104,7 @@ TEST_ALEX_OPTS= CLEAN_FILES += *.n.hs *.g.hs *.info *.hi *.o *.bin *.exe -ALL_TEST_HS = $(shell echo $(TESTS) | sed -e 's/\([^\. ]*\)\.\(l\)\{0,1\}x/\1.n.hs \1.g.hs/g') +ALL_TEST_HS = $(shell echo $(TESTS) $(TEXT_TESTS) | sed -e 's/\([^\. ]*\)\.\(l\)\{0,1\}x/\1.n.hs \1.g.hs/g') ALL_TESTS = $(patsubst %.hs, %.run, $(ALL_TEST_HS)) @@ -97,7 +112,7 @@ ALL_TESTS = $(patsubst %.hs, %.run, $(ALL_TEST_HS)) ./$< %$(HS_PROG_EXT) : %.hs - $(HC) $(HC_OPTS) -package array -package bytestring $($*_LD_OPTS) $< -o $@ + $(HC) $(HC_OPTS) -package array -package bytestring $(TEXT_DEP) $($*_LD_OPTS) $< -o $@ all :: $(ALL_TESTS) diff --git a/tests/posn_typeclass_strict_text.x b/tests/posn_typeclass_strict_text.x new file mode 100644 index 00000000..5039dd5a --- /dev/null +++ b/tests/posn_typeclass_strict_text.x @@ -0,0 +1,79 @@ +{ + +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where +import System.Exit +import Prelude hiding (lex) + +import qualified Data.Text as Text + +} + +%wrapper "posn-strict-text" +%token "Token s" +%typeclass "Read s" + +tokens :- + +[a-b]+$ { idtoken 0 } +[c-d]+/"." { idtoken 1 } +[e-f]+/{ tokpred } { idtoken 2 } +^[g-h]+$ { idtoken 3 } +^[i-j]+/"." { idtoken 4 } +^[k-l]+/{ tokpred } { idtoken 5 } +[m-n]+$ { idtoken 6 } +[o-p]+/"." { idtoken 7 } +[q-r]+/{ tokpred } { idtoken 8 } +[0-1]^[s-t]+$ { idtoken 9 } +[2-3]^[u-v]+/"." { idtoken 10 } +[4-5]^[w-x]+/{ tokpred } { idtoken 11 } +[y-z]+ { idtoken 12 } +[A-B]+$ ; +[C-D]+/"." ; +[E-F]+/{ tokpred } ; +^[G-H]+$ ; +^[I-J]+/"." ; +^[K-L]+/{ tokpred } ; +[M-N]+$ ; +[O-P]+/"." ; +[Q-R]+/{ tokpred } ; +[0-1]^[S-T]+$ ; +[2-3]^[U-V]+/"." ; +[4-5]^[W-X]+/{ tokpred } ; +[Y-Z]+ ; +\. ; +[ \n\t\r]+ ; +[0-9] ; + +{ + +tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool +tokpred _ _ _ _ = True + +idtoken :: Read s => Int -> AlexPosn -> Text.Text -> Token s +idtoken n _ s = Id n (read ("\"" ++ (Text.unpack s) ++ "\"")) + +data Token s = Id Int s + deriving (Show, Ord, Eq) + +lex :: Read s => Text.Text -> [Token s] +lex = alexScanTokens + +input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" + +tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", + Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", + Id 10 "uuvu", Id 11 "xxw"] + +main :: IO () +main = + let + result :: [Token String] + result = lex input + in do + if result /= tokens + then exitFailure + else exitWith ExitSuccess + +} diff --git a/tests/strict_text_typeclass.x b/tests/strict_text_typeclass.x new file mode 100644 index 00000000..9027953c --- /dev/null +++ b/tests/strict_text_typeclass.x @@ -0,0 +1,79 @@ +{ + +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where +import System.Exit +import Prelude hiding (lex) + +import qualified Data.Text as Text + +} + +%wrapper "strict-text" +%token "Token s" +%typeclass "Read s" + +tokens :- + +[a-b]+$ { idtoken 0 } +[c-d]+/"." { idtoken 1 } +[e-f]+/{ tokpred } { idtoken 2 } +^[g-h]+$ { idtoken 3 } +^[i-j]+/"." { idtoken 4 } +^[k-l]+/{ tokpred } { idtoken 5 } +[m-n]+$ { idtoken 6 } +[o-p]+/"." { idtoken 7 } +[q-r]+/{ tokpred } { idtoken 8 } +[0-1]^[s-t]+$ { idtoken 9 } +[2-3]^[u-v]+/"." { idtoken 10 } +[4-5]^[w-x]+/{ tokpred } { idtoken 11 } +[y-z]+ { idtoken 12 } +[A-B]+$ ; +[C-D]+/"." ; +[E-F]+/{ tokpred } ; +^[G-H]+$ ; +^[I-J]+/"." ; +^[K-L]+/{ tokpred } ; +[M-N]+$ ; +[O-P]+/"." ; +[Q-R]+/{ tokpred } ; +[0-1]^[S-T]+$ ; +[2-3]^[U-V]+/"." ; +[4-5]^[W-X]+/{ tokpred } ; +[Y-Z]+ ; +\. ; +[ \n\t\r]+ ; +[0-9] ; + +{ + +tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool +tokpred _ _ _ _ = True + +idtoken :: Read s => Int -> Text.Text -> Token s +idtoken n s = Id n (read ("\"" ++ (Text.unpack s) ++ "\"")) + +data Token s = Id Int s + deriving (Show, Ord, Eq) + +lex :: Read s => Text.Text -> [Token s] +lex = alexScanTokens + +input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" + +tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", + Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", + Id 10 "uuvu", Id 11 "xxw"] + +main :: IO () +main = + let + result :: [Token String] + result = lex input + in do + if result /= tokens + then exitFailure + else exitWith ExitSuccess + +} diff --git a/tests/tokens_monadUserState_strict_text.x b/tests/tokens_monadUserState_strict_text.x new file mode 100644 index 00000000..f8726331 --- /dev/null +++ b/tests/tokens_monadUserState_strict_text.x @@ -0,0 +1,64 @@ +{ +{-# LANGUAGE OverloadedStrings #-} +module Main (main) where +import System.Exit +import qualified Data.Text +} + +%wrapper "monadUserState-strict-text" +%encoding "iso-8859-1" + +$digit = 0-9 -- digits +$alpha = [a-zA-Z] -- alphabetic characters + +tokens :- + + $white+ ; + "--".* ; + let { tok (\p _ -> Let p) } + in { tok (\p _ -> In p) } + $digit+ { tok (\p s -> Int p (read (Data.Text.unpack s))) } + [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head (Data.Text.unpack s))) } + $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p (Data.Text.unpack s)) } + +{ +-- Each right-hand side has type :: AlexPosn -> Data.Text.Text -> Token + +-- Some action helpers: +tok f (p,_,_,input) len = return (f p (Data.Text.take (fromIntegral len) input)) + +-- The token type: +data Token = + Let AlexPosn | + In AlexPosn | + Sym AlexPosn Char | + Var AlexPosn String | + Int AlexPosn Int | + Err AlexPosn | + EOF + deriving (Eq,Show) + +alexEOF = return EOF + +main = if test1 /= result1 then do print test1; exitFailure + else exitWith ExitSuccess + +type AlexUserState = () +alexInitUserState = () + +scanner str = runAlex str $ do + let loop = do tk <- alexMonadScan + if tk == EOF + then return [tk] + else do toks <- loop + return (tk:toks) + loop + +test1 = case scanner " let in 012334\n=+*foo bar__'" of + Left err -> error err + Right toks -> toks + +result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'", EOF] + + +}