From aeb8c62e0547d49e326dc00623fe4299bdba74b7 Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Mon, 12 Jun 2023 08:25:39 +0200 Subject: [PATCH 01/10] Introduce strict-text wrapper --- data/AlexWrappers.hs | 40 +++++++++++++++++- src/AbsSyn.hs | 4 +- src/Main.hs | 3 +- tests/Makefile | 3 +- tests/strict_text_typeclass.x | 79 +++++++++++++++++++++++++++++++++++ 5 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 tests/strict_text_typeclass.x diff --git a/data/AlexWrappers.hs b/data/AlexWrappers.hs index 955b27e9..b95ed9e9 100644 --- a/data/AlexWrappers.hs +++ b/data/AlexWrappers.hs @@ -8,7 +8,12 @@ import Control.Applicative as App (Applicative (..)) #endif +#if defined(ALEX_STRICT_TEXT) +import qualified Data.Text +#endif + import Data.Word (Word8) + #if defined(ALEX_BASIC_BYTESTRING) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) import Data.Int (Int64) @@ -82,6 +87,29 @@ alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c (b, bs) -> p' `seq` Just (b, (p', c, bs, s)) #endif +#if defined (ALEX_STRICT_TEXT) +type AlexInput = (AlexPosn, -- current position, + Char, -- previous char + [Byte], -- pending bytes on current char + Data.Text.Text) -- current input string + +ignorePendingBytes :: AlexInput -> AlexInput +ignorePendingBytes (p,c,_ps,s) = (p,c,[],s) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (_p,c,_bs,_s) = c + +alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) +alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) +alexGetByte (p,_,[],s) = case Data.Text.uncons s of + Just (c, cs) -> + let p' = alexMove p c + in case utf8Encode' c of + (b, bs) -> p' `seq` Just (b, (p', c, bs, cs)) + Nothing -> + Nothing +#endif + #if defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) type AlexInput = (AlexPosn, -- current position, Char, -- previous char @@ -151,7 +179,7 @@ alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) = -- `move_pos' calculates the new position after traversing a given character, -- assuming the usual eight character tab stops. -#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) +#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) || defined (ALEX_STRICT_TEXT) data AlexPosn = AlexPn !Int !Int !Int deriving (Eq, Show, Ord) @@ -378,6 +406,16 @@ alexScanTokens str = go (AlexInput '\n' str 0) #endif +#ifdef ALEX_STRICT_TEXT +-- alexScanTokens :: Data.Text.Text -> [token] +alexScanTokens str = go (alexStartPos,'\n',[],str) + where go inp__@(_,_,_bs,s) = + case alexScan inp__ 0 of + AlexEOF -> [] + AlexError _ -> error "lexical error" + AlexSkip inp__' _len -> go inp__' + AlexToken inp__' len act -> act (Data.Text.take len s) : go inp__' +#endif -- ----------------------------------------------------------------------------- -- Posn wrapper diff --git a/src/AbsSyn.hs b/src/AbsSyn.hs index 2e993d91..43fa76d0 100644 --- a/src/AbsSyn.hs +++ b/src/AbsSyn.hs @@ -49,12 +49,13 @@ data Directive | TokenType String deriving Show -data StrType = Str | Lazy | Strict +data StrType = Str | Lazy | Strict | StrictText instance Show StrType where show Str = "String" show Lazy = "ByteString.ByteString" show Strict = "ByteString.ByteString" + show StrictText = "Data.Text.Text" data Scheme = Default { defaultTypeInfo :: Maybe (Maybe String, String) } @@ -77,6 +78,7 @@ wrapperCppDefs GScan {} = Just ["ALEX_GSCAN"] wrapperCppDefs Basic { basicStrType = Str } = Just ["ALEX_BASIC"] wrapperCppDefs Basic { basicStrType = Lazy } = Just ["ALEX_BASIC_BYTESTRING"] wrapperCppDefs Basic { basicStrType = Strict } = Just ["ALEX_STRICT_BYTESTRING"] +wrapperCppDefs Basic { basicStrType = StrictText } = Just ["ALEX_STRICT_TEXT"] wrapperCppDefs Posn { posnByteString = False } = Just ["ALEX_POSN"] wrapperCppDefs Posn { posnByteString = True } = Just ["ALEX_POSN_BYTESTRING"] wrapperCppDefs Monad { monadByteString = False, diff --git a/src/Main.hs b/src/Main.hs index 558332cd..091f1e7b 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -290,12 +290,13 @@ getScheme directives = (Just _, Nothing, Nothing) -> dieAlex "%typeclass directive without %token directive" | single == "basic" || single == "basic-bytestring" || - single == "strict-bytestring" -> + single == "strict-bytestring" || single == "strict-text" -> let strty = case single of "basic" -> Str "basic-bytestring" -> Lazy "strict-bytestring" -> Strict + "strict-text" -> StrictText _ -> error "Impossible case" in case (typeclass, token, action) of (Nothing, Nothing, Nothing) -> diff --git a/tests/Makefile b/tests/Makefile index b0a90c3d..add9eece 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -75,7 +75,8 @@ TESTS = \ tokens_posn_bytestring.x \ tokens_scan_user.x \ tokens_strict_bytestring.x \ - unicode.x + unicode.x \ + strict_text_typeclass.x # NOTE: `cabal` will set the `alex_datadir` env-var accordingly before invoking the test-suite #TEST_ALEX_OPTS = --template=../data/ diff --git a/tests/strict_text_typeclass.x b/tests/strict_text_typeclass.x new file mode 100644 index 00000000..9027953c --- /dev/null +++ b/tests/strict_text_typeclass.x @@ -0,0 +1,79 @@ +{ + +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where +import System.Exit +import Prelude hiding (lex) + +import qualified Data.Text as Text + +} + +%wrapper "strict-text" +%token "Token s" +%typeclass "Read s" + +tokens :- + +[a-b]+$ { idtoken 0 } +[c-d]+/"." { idtoken 1 } +[e-f]+/{ tokpred } { idtoken 2 } +^[g-h]+$ { idtoken 3 } +^[i-j]+/"." { idtoken 4 } +^[k-l]+/{ tokpred } { idtoken 5 } +[m-n]+$ { idtoken 6 } +[o-p]+/"." { idtoken 7 } +[q-r]+/{ tokpred } { idtoken 8 } +[0-1]^[s-t]+$ { idtoken 9 } +[2-3]^[u-v]+/"." { idtoken 10 } +[4-5]^[w-x]+/{ tokpred } { idtoken 11 } +[y-z]+ { idtoken 12 } +[A-B]+$ ; +[C-D]+/"." ; +[E-F]+/{ tokpred } ; +^[G-H]+$ ; +^[I-J]+/"." ; +^[K-L]+/{ tokpred } ; +[M-N]+$ ; +[O-P]+/"." ; +[Q-R]+/{ tokpred } ; +[0-1]^[S-T]+$ ; +[2-3]^[U-V]+/"." ; +[4-5]^[W-X]+/{ tokpred } ; +[Y-Z]+ ; +\. ; +[ \n\t\r]+ ; +[0-9] ; + +{ + +tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool +tokpred _ _ _ _ = True + +idtoken :: Read s => Int -> Text.Text -> Token s +idtoken n s = Id n (read ("\"" ++ (Text.unpack s) ++ "\"")) + +data Token s = Id Int s + deriving (Show, Ord, Eq) + +lex :: Read s => Text.Text -> [Token s] +lex = alexScanTokens + +input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" + +tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", + Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", + Id 10 "uuvu", Id 11 "xxw"] + +main :: IO () +main = + let + result :: [Token String] + result = lex input + in do + if result /= tokens + then exitFailure + else exitWith ExitSuccess + +} From 67f15a5a3f1a5e3d495bff0a262b4291210d5662 Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Mon, 12 Jun 2023 08:45:32 +0200 Subject: [PATCH 02/10] Generalize posn string type --- src/AbsSyn.hs | 23 ++++++++++------------- src/Main.hs | 14 +++++++++----- src/Output.hs | 20 ++++++++++---------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/AbsSyn.hs b/src/AbsSyn.hs index 43fa76d0..08a310a7 100644 --- a/src/AbsSyn.hs +++ b/src/AbsSyn.hs @@ -16,7 +16,7 @@ module AbsSyn ( RECtx(..), RExp(..), nullable, DFA(..), State(..), SNum, StartCode, Accept(..), - RightContext(..), showRCtx, strtype, + RightContext(..), showRCtx, encodeStartCodes, extractActions, Target(..), UsesPreds(..), usesPreds, @@ -62,16 +62,11 @@ data Scheme | GScan { gscanTypeInfo :: Maybe (Maybe String, String) } | Basic { basicStrType :: StrType, basicTypeInfo :: Maybe (Maybe String, String) } - | Posn { posnByteString :: Bool, + | Posn { posnStrType :: StrType, posnTypeInfo :: Maybe (Maybe String, String) } | Monad { monadByteString :: Bool, monadUserState :: Bool, monadTypeInfo :: Maybe (Maybe String, String) } -strtype :: Bool -> String -strtype True = "ByteString.ByteString" -strtype False = "String" - - wrapperCppDefs :: Scheme -> Maybe [String] wrapperCppDefs Default {} = Nothing wrapperCppDefs GScan {} = Just ["ALEX_GSCAN"] @@ -79,8 +74,10 @@ wrapperCppDefs Basic { basicStrType = Str } = Just ["ALEX_BASIC"] wrapperCppDefs Basic { basicStrType = Lazy } = Just ["ALEX_BASIC_BYTESTRING"] wrapperCppDefs Basic { basicStrType = Strict } = Just ["ALEX_STRICT_BYTESTRING"] wrapperCppDefs Basic { basicStrType = StrictText } = Just ["ALEX_STRICT_TEXT"] -wrapperCppDefs Posn { posnByteString = False } = Just ["ALEX_POSN"] -wrapperCppDefs Posn { posnByteString = True } = Just ["ALEX_POSN_BYTESTRING"] +wrapperCppDefs Posn { posnStrType = Str } = Just ["ALEX_POSN"] +wrapperCppDefs Posn { posnStrType = Lazy } = Just ["ALEX_POSN_BYTESTRING"] +wrapperCppDefs Posn { posnStrType = Strict } = Just ["ALEX_POSN_BYTESTRING"] +wrapperCppDefs Posn { posnStrType = StrictText } = Just ["ALEX_POSN_STRICT_TEXT"] wrapperCppDefs Monad { monadByteString = False, monadUserState = False } = Just ["ALEX_MONAD"] wrapperCppDefs Monad { monadByteString = True, @@ -371,14 +368,14 @@ extractActions scheme scanner = (scanner{scannerTokens = new_tokens}, decl_str . basicTypeInfo = Just (Just tyclasses, tokenty) } -> nl . str fun . str " :: (" . str tyclasses . str ") => " . str (show strty) . str " -> " . str tokenty . nl - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Nothing, tokenty) } -> nl . - str fun . str " :: AlexPosn -> " . str (strtype isByteString) . str " -> " + str fun . str " :: AlexPosn -> " . str (show strty) . str " -> " . str tokenty . nl - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Just tyclasses, tokenty) } -> nl . str fun . str " :: (" . str tyclasses . str ") => AlexPosn -> " . - str (strtype isByteString) . str " -> " . str tokenty . nl + str (show strty) . str " -> " . str tokenty . nl Monad { monadByteString = isByteString, monadTypeInfo = Just (Nothing, tokenty) } -> nl . let diff --git a/src/Main.hs b/src/Main.hs index 091f1e7b..b3442b6c 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -312,18 +312,22 @@ getScheme directives = dieAlex "%action directive not allowed with a wrapper" (Just _, Nothing, Nothing) -> dieAlex "%typeclass directive without %token directive" - | single == "posn" || single == "posn-bytestring" -> + | single == "posn" || single == "posn-bytestring" || single == "posn-strict-text" -> let - isByteString = single == "posn-bytestring" + strty = case single of + "posn" -> Str + "posn-bytestring" -> Strict + "posn-strict-text" -> StrictText + _ -> error "invalid str type for posn" in case (typeclass, token, action) of (Nothing, Nothing, Nothing) -> - return Posn { posnByteString = isByteString, + return Posn { posnStrType = strty, posnTypeInfo = Nothing } (Nothing, Just tokenty, Nothing) -> - return Posn { posnByteString = isByteString, + return Posn { posnStrType = strty, posnTypeInfo = Just (Nothing, tokenty) } (Just _, Just tokenty, Nothing) -> - return Posn { posnByteString = isByteString, + return Posn { posnStrType = strty, posnTypeInfo = Just (typeclass, tokenty) } (_, _, Just _) -> dieAlex "%action directive not allowed with a wrapper" diff --git a/src/Output.hs b/src/Output.hs index 42c44625..09221f7a 100644 --- a/src/Output.hs +++ b/src/Output.hs @@ -122,16 +122,16 @@ outputDFA target _ _ scheme dfa . str ") => Array Int (" . str (show strty) . str " -> " . str toktype . str ")\n" - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Nothing, toktype) } -> str actions_nm . str " :: Array Int (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype + . str (show strty) . str " -> " . str toktype . str ")\n" - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Just tyclasses, toktype) } -> str actions_nm . str " :: (" . str tyclasses . str ") => Array Int (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype + . str (show strty) . str " -> " . str toktype . str ")\n" Monad { monadByteString = isByteString, monadTypeInfo = Just (Nothing, toktype) } -> @@ -212,26 +212,26 @@ outputDFA target _ _ scheme dfa . str "alexScan :: (" . str tyclasses . str ") => AlexInput -> Int -> AlexReturn (" . str (show strty) . str " -> " . str toktype . str ")\n" - Posn { posnByteString = isByteString, + Posn { posnStrType = strty, posnTypeInfo = Just (Nothing, toktype) } -> str "alex_scan_tkn :: () -> AlexInput -> " . str intty . str " -> " . str "AlexInput -> " . str intty . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype . str ")\n" + . str (show strty) . str " -> " . str toktype . str ")\n" . str "alexScan :: AlexInput -> Int -> AlexReturn (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype . str ")\n" - Posn { posnByteString = isByteString, + . str (show strty) . str " -> " . str toktype . str ")\n" + Posn { posnStrType = strty, posnTypeInfo = Just (Just tyclasses, toktype) } -> str "alex_scan_tkn :: () -> AlexInput -> " . str intty . str " -> " . str "AlexInput -> " . str intty . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" . str "alexScanUser :: (" . str tyclasses . str ") => () -> AlexInput -> Int -> AlexReturn (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype . str ")\n" + . str (show strty) . str " -> " . str toktype . str ")\n" . str "alexScan :: (" . str tyclasses . str ") => AlexInput -> Int -> AlexReturn (AlexPosn -> " - . str (strtype isByteString) . str " -> " . str toktype . str ")\n" + . str (show strty) . str " -> " . str toktype . str ")\n" Monad { monadTypeInfo = Just (Nothing, toktype), monadByteString = isByteString, monadUserState = userState } -> From 9e2b44563eddb35cedfb3fdedf09a29abf7b0e74 Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Mon, 12 Jun 2023 08:54:43 +0200 Subject: [PATCH 03/10] Introduce posn-strict-text wrapper --- data/AlexWrappers.hs | 41 ++++++++++++++-- tests/Makefile | 3 +- tests/posn_typeclass_strict_text.x | 79 ++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 tests/posn_typeclass_strict_text.x diff --git a/data/AlexWrappers.hs b/data/AlexWrappers.hs index b95ed9e9..2c84db01 100644 --- a/data/AlexWrappers.hs +++ b/data/AlexWrappers.hs @@ -8,7 +8,7 @@ import Control.Applicative as App (Applicative (..)) #endif -#if defined(ALEX_STRICT_TEXT) +#if defined(ALEX_STRICT_TEXT) || defined (ALEX_POSN_STRICT_TEXT) import qualified Data.Text #endif @@ -88,6 +88,27 @@ alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c #endif #if defined (ALEX_STRICT_TEXT) +type AlexInput = (Char, -- previous char + [Byte], -- pending bytes on current char + Data.Text.Text) -- current input string + +ignorePendingBytes :: AlexInput -> AlexInput +ignorePendingBytes (c,_ps,s) = (c,[],s) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (c,_bs,_s) = c + +alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) +alexGetByte (c,(b:bs),s) = Just (b,(c,bs,s)) +alexGetByte (_,[],s) = case Data.Text.uncons s of + Just (c, cs) -> + case utf8Encode' c of + (b, bs) -> Just (b, (c, bs, cs)) + Nothing -> + Nothing +#endif + +#if defined (ALEX_POSN_STRICT_TEXT) type AlexInput = (AlexPosn, -- current position, Char, -- previous char [Byte], -- pending bytes on current char @@ -179,7 +200,7 @@ alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) = -- `move_pos' calculates the new position after traversing a given character, -- assuming the usual eight character tab stops. -#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) || defined (ALEX_STRICT_TEXT) +#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) || defined (ALEX_POSN_STRICT_TEXT) data AlexPosn = AlexPn !Int !Int !Int deriving (Eq, Show, Ord) @@ -408,8 +429,8 @@ alexScanTokens str = go (AlexInput '\n' str 0) #ifdef ALEX_STRICT_TEXT -- alexScanTokens :: Data.Text.Text -> [token] -alexScanTokens str = go (alexStartPos,'\n',[],str) - where go inp__@(_,_,_bs,s) = +alexScanTokens str = go ('\n',[],str) + where go inp__@(_,_bs,s) = case alexScan inp__ 0 of AlexEOF -> [] AlexError _ -> error "lexical error" @@ -417,6 +438,18 @@ alexScanTokens str = go (alexStartPos,'\n',[],str) AlexToken inp__' len act -> act (Data.Text.take len s) : go inp__' #endif +#ifdef ALEX_POSN_STRICT_TEXT +-- alexScanTokens :: Data.Text.Text -> [token] +alexScanTokens str = go (alexStartPos,'\n',[],str) + where go inp__@(pos,_,_bs,s) = + case alexScan inp__ 0 of + AlexEOF -> [] + AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) + AlexSkip inp__' _len -> go inp__' + AlexToken inp__' len act -> act pos (Data.Text.take len s) : go inp__' +#endif + + -- ----------------------------------------------------------------------------- -- Posn wrapper diff --git a/tests/Makefile b/tests/Makefile index add9eece..765a2eae 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -76,7 +76,8 @@ TESTS = \ tokens_scan_user.x \ tokens_strict_bytestring.x \ unicode.x \ - strict_text_typeclass.x + strict_text_typeclass.x \ + posn_typeclass_strict_text.x # NOTE: `cabal` will set the `alex_datadir` env-var accordingly before invoking the test-suite #TEST_ALEX_OPTS = --template=../data/ diff --git a/tests/posn_typeclass_strict_text.x b/tests/posn_typeclass_strict_text.x new file mode 100644 index 00000000..5039dd5a --- /dev/null +++ b/tests/posn_typeclass_strict_text.x @@ -0,0 +1,79 @@ +{ + +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where +import System.Exit +import Prelude hiding (lex) + +import qualified Data.Text as Text + +} + +%wrapper "posn-strict-text" +%token "Token s" +%typeclass "Read s" + +tokens :- + +[a-b]+$ { idtoken 0 } +[c-d]+/"." { idtoken 1 } +[e-f]+/{ tokpred } { idtoken 2 } +^[g-h]+$ { idtoken 3 } +^[i-j]+/"." { idtoken 4 } +^[k-l]+/{ tokpred } { idtoken 5 } +[m-n]+$ { idtoken 6 } +[o-p]+/"." { idtoken 7 } +[q-r]+/{ tokpred } { idtoken 8 } +[0-1]^[s-t]+$ { idtoken 9 } +[2-3]^[u-v]+/"." { idtoken 10 } +[4-5]^[w-x]+/{ tokpred } { idtoken 11 } +[y-z]+ { idtoken 12 } +[A-B]+$ ; +[C-D]+/"." ; +[E-F]+/{ tokpred } ; +^[G-H]+$ ; +^[I-J]+/"." ; +^[K-L]+/{ tokpred } ; +[M-N]+$ ; +[O-P]+/"." ; +[Q-R]+/{ tokpred } ; +[0-1]^[S-T]+$ ; +[2-3]^[U-V]+/"." ; +[4-5]^[W-X]+/{ tokpred } ; +[Y-Z]+ ; +\. ; +[ \n\t\r]+ ; +[0-9] ; + +{ + +tokpred :: () -> AlexInput -> Int -> AlexInput -> Bool +tokpred _ _ _ _ = True + +idtoken :: Read s => Int -> AlexPosn -> Text.Text -> Token s +idtoken n _ s = Id n (read ("\"" ++ (Text.unpack s) ++ "\"")) + +data Token s = Id Int s + deriving (Show, Ord, Eq) + +lex :: Read s => Text.Text -> [Token s] +lex = alexScanTokens + +input = "abab\ndddc.fff\ngh\nijji.\nllmnm\noop.rq0tsst\n3uuvu.5xxw" + +tokens = [ Id 0 "abab", Id 1 "dddc", Id 2 "fff", Id 3 "gh", Id 4 "ijji", + Id 5 "ll", Id 6 "mnm", Id 7 "oop", Id 8 "rq", Id 9 "tsst", + Id 10 "uuvu", Id 11 "xxw"] + +main :: IO () +main = + let + result :: [Token String] + result = lex input + in do + if result /= tokens + then exitFailure + else exitWith ExitSuccess + +} From 32664b3b57275a08ecd9a1a64b54a6f621be8c79 Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Mon, 12 Jun 2023 09:07:17 +0200 Subject: [PATCH 04/10] Add tests to extra-source-files --- alex.cabal | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alex.cabal b/alex.cabal index 8eb501a1..0d297ef8 100644 --- a/alex.cabal +++ b/alex.cabal @@ -92,6 +92,8 @@ extra-source-files: tests/issue_119.x tests/issue_141.x tests/issue_197.x + tests/strict_text_typeclass.x + tests/posn_typeclass_strict_text.x source-repository head type: git From 2af8b4b0be45f9081ef9a6d3e7cfbdf6d853651c Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Mon, 12 Jun 2023 11:27:36 +0200 Subject: [PATCH 05/10] Require text dependency for tests --- tests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Makefile b/tests/Makefile index 765a2eae..02a99fe9 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -99,7 +99,7 @@ ALL_TESTS = $(patsubst %.hs, %.run, $(ALL_TEST_HS)) ./$< %$(HS_PROG_EXT) : %.hs - $(HC) $(HC_OPTS) -package array -package bytestring $($*_LD_OPTS) $< -o $@ + $(HC) $(HC_OPTS) -package array -package bytestring -package text $($*_LD_OPTS) $< -o $@ all :: $(ALL_TESTS) From b1d0de46e71c4e6bcdc3c1ee82018cdeda83b20a Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Fri, 16 Jun 2023 11:42:05 +0200 Subject: [PATCH 06/10] Don't run tests that require text dependency on older GHCs --- tests/Makefile | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index 02a99fe9..dd51b7d2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -32,6 +32,8 @@ GHC_VERSION:=$(shell $(HC) --numeric-version) GHC_VERSION_WORDS=$(subst ., ,$(GHC_VERSION)) GHC_MAJOR_VERSION=$(word 1,$(GHC_VERSION_WORDS)) GHC_MINOR_VERSION=$(word 2,$(GHC_VERSION_WORDS)) +# Text dependency comes with GHC from 8.4 onwards +GHC_SHIPS_WITH_TEXT:=$(shell if [ $(GHC_MAJOR_VERSION) -gt 8 -o $(GHC_MAJOR_VERSION) -ge 8 -a $(GHC_MINOR_VERSION) -ge 4 ]; then echo "yes"; else echo "no"; fi) # -fwarn-incomplete-uni-patterns only from 7.4 WARNS_FOR_GHC_GTEQ_7_4=-fwarn-incomplete-uni-patterns WARNS_FOR_GHC_LT_7_4=-fno-warn-lazy-unlifted-bindings @@ -75,9 +77,19 @@ TESTS = \ tokens_posn_bytestring.x \ tokens_scan_user.x \ tokens_strict_bytestring.x \ - unicode.x \ - strict_text_typeclass.x \ - posn_typeclass_strict_text.x + unicode.x + +ifeq "$(GHC_SHIPS_WITH_TEXT)" "yes" +TEXT_DEP = -package text + +TEXT_TESTS = \ + strict_text_typeclass.x \ + posn_typeclass_strict_text.x +else +TEXT_DEP = + +TEXT_TESTS = +endif # NOTE: `cabal` will set the `alex_datadir` env-var accordingly before invoking the test-suite #TEST_ALEX_OPTS = --template=../data/ @@ -91,7 +103,7 @@ TEST_ALEX_OPTS= CLEAN_FILES += *.n.hs *.g.hs *.info *.hi *.o *.bin *.exe -ALL_TEST_HS = $(shell echo $(TESTS) | sed -e 's/\([^\. ]*\)\.\(l\)\{0,1\}x/\1.n.hs \1.g.hs/g') +ALL_TEST_HS = $(shell echo $(TESTS) $(TEXT_TESTS) | sed -e 's/\([^\. ]*\)\.\(l\)\{0,1\}x/\1.n.hs \1.g.hs/g') ALL_TESTS = $(patsubst %.hs, %.run, $(ALL_TEST_HS)) @@ -99,7 +111,7 @@ ALL_TESTS = $(patsubst %.hs, %.run, $(ALL_TEST_HS)) ./$< %$(HS_PROG_EXT) : %.hs - $(HC) $(HC_OPTS) -package array -package bytestring -package text $($*_LD_OPTS) $< -o $@ + $(HC) $(HC_OPTS) -package array -package bytestring $(TEXT_DEP) $($*_LD_OPTS) $< -o $@ all :: $(ALL_TESTS) From 5a0a38c90d9935ae6caee5dfc882864a4988ef5a Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Sat, 17 Jun 2023 15:13:56 +0200 Subject: [PATCH 07/10] Introduce monadUserState-strict-text wrapper --- alex.cabal | 1 + data/AlexWrappers.hs | 153 +++++++++++++++++----- src/AbsSyn.hs | 28 ++-- src/Main.hs | 16 ++- src/Output.hs | 16 +-- tests/Makefile | 3 +- tests/tokens_monadUserState_strict_text.x | 64 +++++++++ 7 files changed, 226 insertions(+), 55 deletions(-) create mode 100644 tests/tokens_monadUserState_strict_text.x diff --git a/alex.cabal b/alex.cabal index 0d297ef8..9948b6d5 100644 --- a/alex.cabal +++ b/alex.cabal @@ -94,6 +94,7 @@ extra-source-files: tests/issue_197.x tests/strict_text_typeclass.x tests/posn_typeclass_strict_text.x + tests/tokens_monadUserState_strict_text.x source-repository head type: git diff --git a/data/AlexWrappers.hs b/data/AlexWrappers.hs index 2c84db01..3a3b9b0c 100644 --- a/data/AlexWrappers.hs +++ b/data/AlexWrappers.hs @@ -4,11 +4,11 @@ -- This code is in the PUBLIC DOMAIN; you may copy it freely and use -- it for any purpose whatsoever. -#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) +#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_MONAD_STRICT_TEXT) import Control.Applicative as App (Applicative (..)) #endif -#if defined(ALEX_STRICT_TEXT) || defined (ALEX_POSN_STRICT_TEXT) +#if defined(ALEX_STRICT_TEXT) || defined (ALEX_POSN_STRICT_TEXT) || defined(ALEX_MONAD_STRICT_TEXT) import qualified Data.Text #endif @@ -108,7 +108,7 @@ alexGetByte (_,[],s) = case Data.Text.uncons s of Nothing #endif -#if defined (ALEX_POSN_STRICT_TEXT) +#if defined (ALEX_POSN_STRICT_TEXT) || defined(ALEX_MONAD_STRICT_TEXT) type AlexInput = (AlexPosn, -- current position, Char, -- previous char [Byte], -- pending bytes on current char @@ -200,7 +200,7 @@ alexGetByte (AlexInput {alexStr=cs,alexBytePos=n}) = -- `move_pos' calculates the new position after traversing a given character, -- assuming the usual eight character tab stops. -#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) || defined (ALEX_POSN_STRICT_TEXT) +#if defined(ALEX_POSN) || defined(ALEX_MONAD) || defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_GSCAN) || defined (ALEX_POSN_STRICT_TEXT) || defined(ALEX_MONAD_STRICT_TEXT) data AlexPosn = AlexPn !Int !Int !Int deriving (Eq, Show, Ord) @@ -216,14 +216,20 @@ alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1) -- ----------------------------------------------------------------------------- -- Monad (default and with ByteString input) -#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) +#if defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_MONAD_STRICT_TEXT) data AlexState = AlexState { alex_pos :: !AlexPosn, -- position at current input location -#ifndef ALEX_MONAD_BYTESTRING +#ifdef ALEX_MONAD_STRICT_TEXT + alex_inp :: Data.Text.Text, + alex_chr :: !Char, + alex_bytes :: [Byte], +#endif /* ALEX_MONAD_STRICT_TEXT */ +#ifdef ALEX_MONAD alex_inp :: String, -- the current input alex_chr :: !Char, -- the character before the input alex_bytes :: [Byte], -#else /* ALEX_MONAD_BYTESTRING */ +#endif /* ALEX_MONAD */ +#ifdef ALEX_MONAD_BYTESTRING alex_bpos:: !Int64, -- bytes consumed so far alex_inp :: ByteString.ByteString, -- the current input alex_chr :: !Char, -- the character before the input @@ -236,15 +242,24 @@ data AlexState = AlexState { -- Compile with -funbox-strict-fields for best results! -#ifndef ALEX_MONAD_BYTESTRING +#ifdef ALEX_MONAD runAlex :: String -> Alex a -> Either String a runAlex input__ (Alex f) = case f (AlexState {alex_bytes = [], -#else /* ALEX_MONAD_BYTESTRING */ + alex_pos = alexStartPos, + alex_inp = input__, + alex_chr = '\n', +#ifdef ALEX_MONAD_USER_STATE + alex_ust = alexInitUserState, +#endif + alex_scd = 0}) of Left msg -> Left msg + Right ( _, a ) -> Right a +#endif + +#ifdef ALEX_MONAD_BYTESTRING runAlex :: ByteString.ByteString -> Alex a -> Either String a runAlex input__ (Alex f) = case f (AlexState {alex_bpos = 0, -#endif /* ALEX_MONAD_BYTESTRING */ alex_pos = alexStartPos, alex_inp = input__, alex_chr = '\n', @@ -253,6 +268,21 @@ runAlex input__ (Alex f) #endif alex_scd = 0}) of Left msg -> Left msg Right ( _, a ) -> Right a +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +runAlex :: Data.Text.Text -> Alex a -> Either String a +runAlex input__ (Alex f) + = case f (AlexState {alex_bytes = [], + alex_pos = alexStartPos, + alex_inp = input__, + alex_chr = '\n', +#ifdef ALEX_MONAD_USER_STATE + alex_ust = alexInitUserState, +#endif + alex_scd = 0}) of Left msg -> Left msg + Right ( _, a ) -> Right a +#endif newtype Alex a = Alex { unAlex :: AlexState -> Either String (AlexState, a) } @@ -275,28 +305,51 @@ instance Monad Alex where Right (s',a) -> unAlex (k a) s' return = App.pure + +#ifdef ALEX_MONAD alexGetInput :: Alex AlexInput alexGetInput -#ifndef ALEX_MONAD_BYTESTRING = Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} -> Right (s, (pos,c,bs,inp__)) -#else /* ALEX_MONAD_BYTESTRING */ +#endif + +#ifdef ALEX_MONAD_BYTESTRING +alexGetInput :: Alex AlexInput +alexGetInput = Alex $ \s@AlexState{alex_pos=pos,alex_bpos=bpos,alex_chr=c,alex_inp=inp__} -> Right (s, (pos,c,inp__,bpos)) -#endif /* ALEX_MONAD_BYTESTRING */ +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +alexGetInput :: Alex AlexInput +alexGetInput + = Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} -> + Right (s, (pos,c,bs,inp__)) +#endif +#ifdef ALEX_MONAD alexSetInput :: AlexInput -> Alex () -#ifndef ALEX_MONAD_BYTESTRING alexSetInput (pos,c,bs,inp__) = Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} of -#else /* ALEX_MONAD_BYTESTRING */ + state__@(AlexState{}) -> Right (state__, ()) +#endif + +#ifdef ALEX_MONAD_BYTESTRING +alexSetInput :: AlexInput -> Alex () alexSetInput (pos,c,inp__,bpos) = Alex $ \s -> case s{alex_pos=pos, alex_bpos=bpos, alex_chr=c, alex_inp=inp__} of -#endif /* ALEX_MONAD_BYTESTRING */ - state__@(AlexState{}) -> Right (state__, ()) + state__@(AlexState{}) -> Right (state__, ()) +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +alexSetInput :: AlexInput -> Alex () +alexSetInput (pos,c,bs,inp__) + = Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} of + state__@(AlexState{}) -> Right (state__, ()) +#endif alexError :: String -> Alex a alexError message = Alex $ const $ Left message @@ -315,12 +368,9 @@ alexSetUserState :: AlexUserState -> Alex () alexSetUserState ss = Alex $ \s -> Right (s{alex_ust=ss}, ()) #endif /* !defined(ALEX_MONAD_BYTESTRING) && defined(ALEX_MONAD_USER_STATE) */ +#ifdef ALEX_MONAD alexMonadScan = do -#ifndef ALEX_MONAD_BYTESTRING inp__ <- alexGetInput -#else /* ALEX_MONAD_BYTESTRING */ - inp__@(_,_,_,n) <- alexGetInput -#endif /* ALEX_MONAD_BYTESTRING */ sc <- alexGetStartCode case alexScan inp__ sc of AlexEOF -> alexEOF @@ -328,22 +378,55 @@ alexMonadScan = do AlexSkip inp__' _len -> do alexSetInput inp__' alexMonadScan -#ifndef ALEX_MONAD_BYTESTRING AlexToken inp__' len action -> do -#else /* ALEX_MONAD_BYTESTRING */ + alexSetInput inp__' + action (ignorePendingBytes inp__) len +#endif + +#ifdef ALEX_MONAD_BYTESTRING +alexMonadScan = do + inp__@(_,_,_,n) <- alexGetInput + sc <- alexGetStartCode + case alexScan inp__ sc of + AlexEOF -> alexEOF + AlexError ((AlexPn _ line column),_,_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) + AlexSkip inp__' _len -> do + alexSetInput inp__' + alexMonadScan AlexToken inp__'@(_,_,_,n') _ action -> let len = n'-n in do -#endif /* ALEX_MONAD_BYTESTRING */ alexSetInput inp__' action (ignorePendingBytes inp__) len +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +alexMonadScan = do + inp__ <- alexGetInput + sc <- alexGetStartCode + case alexScan inp__ sc of + AlexEOF -> alexEOF + AlexError ((AlexPn _ line column),_,_,_) -> alexError $ "lexical error at line " ++ (show line) ++ ", column " ++ (show column) + AlexSkip inp__' _len -> do + alexSetInput inp__' + alexMonadScan + AlexToken inp__' len action -> do + alexSetInput inp__' + action (ignorePendingBytes inp__) len +#endif -- ----------------------------------------------------------------------------- -- Useful token actions -#ifndef ALEX_MONAD_BYTESTRING +#ifdef ALEX_MONAD type AlexAction result = AlexInput -> Int -> Alex result -#else /* ALEX_MONAD_BYTESTRING */ +#endif + +#ifdef ALEX_MONAD_BYTESTRING type AlexAction result = AlexInput -> Int64 -> Alex result -#endif /* ALEX_MONAD_BYTESTRING */ +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +type AlexAction result = AlexInput -> Int -> Alex result +#endif -- just ignore this token and scan another one -- skip :: AlexAction result @@ -359,14 +442,22 @@ andBegin :: AlexAction result -> Int -> AlexAction result alexSetStartCode code action input__ len -#ifndef ALEX_MONAD_BYTESTRING +#ifdef ALEX_MONAD token :: (AlexInput -> Int -> token) -> AlexAction token -#else /* ALEX_MONAD_BYTESTRING */ +token t input__ len = return (t input__ len) +#endif + +#ifdef ALEX_MONAD_BYTESTRING token :: (AlexInput -> Int64 -> token) -> AlexAction token -#endif /* ALEX_MONAD_BYTESTRING */ token t input__ len = return (t input__ len) -#endif /* defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) */ +#endif + +#ifdef ALEX_MONAD_STRICT_TEXT +token :: (AlexInput -> Int -> token) -> AlexAction token +token t input__ len = return (t input__ len) +#endif +#endif /* defined(ALEX_MONAD) || defined(ALEX_MONAD_BYTESTRING) || defined(ALEX_MONAD_STRICT_TEXT) */ -- ----------------------------------------------------------------------------- -- Basic wrapper diff --git a/src/AbsSyn.hs b/src/AbsSyn.hs index 08a310a7..fd189398 100644 --- a/src/AbsSyn.hs +++ b/src/AbsSyn.hs @@ -50,6 +50,7 @@ data Directive deriving Show data StrType = Str | Lazy | Strict | StrictText + deriving Eq instance Show StrType where show Str = "String" @@ -64,7 +65,8 @@ data Scheme basicTypeInfo :: Maybe (Maybe String, String) } | Posn { posnStrType :: StrType, posnTypeInfo :: Maybe (Maybe String, String) } - | Monad { monadByteString :: Bool, monadUserState :: Bool, + | Monad { monadStrType :: StrType, + monadUserState :: Bool, monadTypeInfo :: Maybe (Maybe String, String) } wrapperCppDefs :: Scheme -> Maybe [String] @@ -78,14 +80,22 @@ wrapperCppDefs Posn { posnStrType = Str } = Just ["ALEX_POSN"] wrapperCppDefs Posn { posnStrType = Lazy } = Just ["ALEX_POSN_BYTESTRING"] wrapperCppDefs Posn { posnStrType = Strict } = Just ["ALEX_POSN_BYTESTRING"] wrapperCppDefs Posn { posnStrType = StrictText } = Just ["ALEX_POSN_STRICT_TEXT"] -wrapperCppDefs Monad { monadByteString = False, +wrapperCppDefs Monad { monadStrType = Str, monadUserState = False } = Just ["ALEX_MONAD"] -wrapperCppDefs Monad { monadByteString = True, +wrapperCppDefs Monad { monadStrType = Strict, + monadUserState = False } = Just ["ALEX_MONAD_BYTESTRING"] +wrapperCppDefs Monad { monadStrType = Lazy, monadUserState = False } = Just ["ALEX_MONAD_BYTESTRING"] -wrapperCppDefs Monad { monadByteString = False, +wrapperCppDefs Monad { monadStrType = StrictText, + monadUserState = False } = Just ["ALEX_MONAD_STRICT_TEXT"] +wrapperCppDefs Monad { monadStrType = Str, monadUserState = True } = Just ["ALEX_MONAD", "ALEX_MONAD_USER_STATE"] -wrapperCppDefs Monad { monadByteString = True, +wrapperCppDefs Monad { monadStrType = Strict, + monadUserState = True } = Just ["ALEX_MONAD_BYTESTRING", "ALEX_MONAD_USER_STATE"] +wrapperCppDefs Monad { monadStrType = Lazy, monadUserState = True } = Just ["ALEX_MONAD_BYTESTRING", "ALEX_MONAD_USER_STATE"] +wrapperCppDefs Monad { monadStrType = StrictText, + monadUserState = True } = Just ["ALEX_MONAD_STRICT_TEXT", "ALEX_MONAD_USER_STATE"] -- TODO: update this comment -- @@ -376,17 +386,17 @@ extractActions scheme scanner = (scanner{scannerTokens = new_tokens}, decl_str . posnTypeInfo = Just (Just tyclasses, tokenty) } -> nl . str fun . str " :: (" . str tyclasses . str ") => AlexPosn -> " . str (show strty) . str " -> " . str tokenty . nl - Monad { monadByteString = isByteString, + Monad { monadStrType = strty, monadTypeInfo = Just (Nothing, tokenty) } -> nl . let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" in str fun . str " :: AlexInput -> " . str actintty . str " -> Alex (" . str tokenty . str ")" . nl - Monad { monadByteString = isByteString, + Monad { monadStrType = strty, monadTypeInfo = Just (Just tyclasses, tokenty) } -> nl . let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" in str fun . str " :: (" . str tyclasses . str ") =>" . str " AlexInput -> " . str actintty diff --git a/src/Main.hs b/src/Main.hs index b3442b6c..852e008c 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -333,25 +333,29 @@ getScheme directives = dieAlex "%action directive not allowed with a wrapper" (Just _, Nothing, Nothing) -> dieAlex "%typeclass directive without %token directive" - | single == "monad" || single == "monad-bytestring" || + | single == "monad" || single == "monad-bytestring" || single == "monad-strict-text" || single == "monadUserState" || - single == "monadUserState-bytestring" -> + single == "monadUserState-bytestring" || + single == "monadUserState-strict-text" -> let + isText = single == "monad-strict-text" || + single == "monadUserState-strict-text" isByteString = single == "monad-bytestring" || single == "monadUserState-bytestring" userState = single == "monadUserState" || - single == "monadUserState-bytestring" + single == "monadUserState-bytestring" || + single == "monadUserState-strict-text" in case (typeclass, token, action) of (Nothing, Nothing, Nothing) -> - return Monad { monadByteString = isByteString, + return Monad { monadStrType = if isByteString then Lazy else if isText then StrictText else Str, monadUserState = userState, monadTypeInfo = Nothing } (Nothing, Just tokenty, Nothing) -> - return Monad { monadByteString = isByteString, + return Monad { monadStrType = if isByteString then Lazy else if isText then StrictText else Str, monadUserState = userState, monadTypeInfo = Just (Nothing, tokenty) } (Just _, Just tokenty, Nothing) -> - return Monad { monadByteString = isByteString, + return Monad { monadStrType = if isByteString then Lazy else if isText then StrictText else Str, monadUserState = userState, monadTypeInfo = Just (typeclass, tokenty) } (_, _, Just _) -> diff --git a/src/Output.hs b/src/Output.hs index 09221f7a..563fd2e9 100644 --- a/src/Output.hs +++ b/src/Output.hs @@ -133,17 +133,17 @@ outputDFA target _ _ scheme dfa . str ") => Array Int (AlexPosn -> " . str (show strty) . str " -> " . str toktype . str ")\n" - Monad { monadByteString = isByteString, + Monad { monadStrType = strty, monadTypeInfo = Just (Nothing, toktype) } -> let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" in str actions_nm . str " :: Array Int (AlexInput -> " . str actintty . str " -> Alex(" . str toktype . str "))\n" - Monad { monadByteString = isByteString, + Monad { monadStrType = strty, monadTypeInfo = Just (Just tyclasses, toktype) } -> let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" in str actions_nm . str " :: (" . str tyclasses . str ") => Array Int (AlexInput -> " @@ -233,10 +233,10 @@ outputDFA target _ _ scheme dfa . str ") => AlexInput -> Int -> AlexReturn (AlexPosn -> " . str (show strty) . str " -> " . str toktype . str ")\n" Monad { monadTypeInfo = Just (Nothing, toktype), - monadByteString = isByteString, + monadStrType = strty, monadUserState = userState } -> let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" userStateTy | userState = "AlexUserState" | otherwise = "()" in @@ -253,10 +253,10 @@ outputDFA target _ _ scheme dfa . str " -> Alex (" . str toktype . str "))\n" . str "alexMonadScan :: Alex (" . str toktype . str ")\n" Monad { monadTypeInfo = Just (Just tyclasses, toktype), - monadByteString = isByteString, + monadStrType = strty, monadUserState = userState } -> let - actintty = if isByteString then "Int64" else "Int" + actintty = if strty == Lazy then "Int64" else "Int" userStateTy | userState = "AlexUserState" | otherwise = "()" in diff --git a/tests/Makefile b/tests/Makefile index dd51b7d2..2618dff1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -84,7 +84,8 @@ TEXT_DEP = -package text TEXT_TESTS = \ strict_text_typeclass.x \ - posn_typeclass_strict_text.x + posn_typeclass_strict_text.x \ + tokens_monadUserState_strict_text.x else TEXT_DEP = diff --git a/tests/tokens_monadUserState_strict_text.x b/tests/tokens_monadUserState_strict_text.x new file mode 100644 index 00000000..f8726331 --- /dev/null +++ b/tests/tokens_monadUserState_strict_text.x @@ -0,0 +1,64 @@ +{ +{-# LANGUAGE OverloadedStrings #-} +module Main (main) where +import System.Exit +import qualified Data.Text +} + +%wrapper "monadUserState-strict-text" +%encoding "iso-8859-1" + +$digit = 0-9 -- digits +$alpha = [a-zA-Z] -- alphabetic characters + +tokens :- + + $white+ ; + "--".* ; + let { tok (\p _ -> Let p) } + in { tok (\p _ -> In p) } + $digit+ { tok (\p s -> Int p (read (Data.Text.unpack s))) } + [\=\+\-\*\/\(\)] { tok (\p s -> Sym p (head (Data.Text.unpack s))) } + $alpha [$alpha $digit \_ \']* { tok (\p s -> Var p (Data.Text.unpack s)) } + +{ +-- Each right-hand side has type :: AlexPosn -> Data.Text.Text -> Token + +-- Some action helpers: +tok f (p,_,_,input) len = return (f p (Data.Text.take (fromIntegral len) input)) + +-- The token type: +data Token = + Let AlexPosn | + In AlexPosn | + Sym AlexPosn Char | + Var AlexPosn String | + Int AlexPosn Int | + Err AlexPosn | + EOF + deriving (Eq,Show) + +alexEOF = return EOF + +main = if test1 /= result1 then do print test1; exitFailure + else exitWith ExitSuccess + +type AlexUserState = () +alexInitUserState = () + +scanner str = runAlex str $ do + let loop = do tk <- alexMonadScan + if tk == EOF + then return [tk] + else do toks <- loop + return (tk:toks) + loop + +test1 = case scanner " let in 012334\n=+*foo bar__'" of + Left err -> error err + Right toks -> toks + +result1 = [Let (AlexPn 2 1 3),In (AlexPn 6 1 7),Int (AlexPn 9 1 10) 12334,Sym (AlexPn 16 2 1) '=',Sym (AlexPn 17 2 2) '+',Sym (AlexPn 18 2 3) '*',Var (AlexPn 19 2 4) "foo",Var (AlexPn 23 2 8) "bar__'", EOF] + + +} From ee9cff3aa38d6ff4812a0b6b106f7c2a4adb691b Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Sat, 17 Jun 2023 15:28:41 +0200 Subject: [PATCH 08/10] Update documentation with new text wrappers --- doc/api.rst | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index a47bc275..c099783e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -577,6 +577,143 @@ The ``monadUserState-bytestring`` wrapper is the same as the ``monadUserState`` All of the actions in your lexical specification have the same type as in the ``monadUserState`` wrapper. It is only the types of the function to run the monad and the type of the ``token`` function that change. +The text wrappers +~~~~~~~~~~~~~~~~~~~~~~~ + +The ``strict-text``, ``posn-strict-text`` and ``monad-strict-text`` wrappers are variations on the ``basic``, ``posn`` and ``monad`` wrappers that use strict ``Text``\ s as the input and token types instead of an ordinary ``String``. + +The point of using these wrappers is that ``Text``\ s provide a more memory efficient representation of an input stream. +They can also be somewhat faster to process. +Note that using these wrappers adds a dependency on the ``Text`` modules, which live in the ``text`` package. + +Note also that by default tokens share space with the input ``Text`` which has the advantage that it does not need to make a copy but it also prevents the input from being garbage collected. +It may make sense in some applications to use ``Text``'s ``copy`` function to unshare tokens that will be kept for a long time, to allow the original input to be collected. + +The "strict-text" wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``strict-text`` wrapper is the same as the ``basic`` wrapper but with strict ``Text`` instead of ``String``: + +.. code-block:: haskell + + import Data.Text (Text) + import qualified Data.Text as Text + + type AlexInput = + ( Char -- previous char + , [Byte] -- rest of the bytes for the current char + , Text -- rest of the input Text + ) + + alexGetByte :: AlexInput -> Maybe (Char, AlexInput) + + alexInputPrevChar :: AlexInput -> Char + + -- alexScanTokens :: ByteString -> [token] + +All of the actions in your lexical specification should have type: + +.. code-block:: haskell + + { ... } :: Text -> token + +for some type ``token``. + +The "posn-strict-text" wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``posn-strict-text`` wrapper is the same as the ``posn`` wrapper but with strict ``Text`` instead of ``String``: + +.. code-block:: haskell + + import Data.Text (Text) + import qualified Data.Text as Text + + type AlexInput = + ( AlexPosn -- current position, + , Char -- previous char + , [Byte] -- rest of the bytes for the current char + , Text -- current input Text + ) + + -- alexScanTokens :: Text -> [token] + +All of the actions in your lexical specification should have type: + +.. code-block:: haskell + + { ... } :: AlexPosn -> Text -> token + +for some type ``token``. + +The "monad-strict-text" wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``monad-strict-text`` wrapper is the same as the ``monad`` wrapper but with strict ``Text`` instead of ``String``: + +.. code-block:: haskell + + import Data.Text (Text) + import qualified Data.Text as Text + + data AlexState = AlexState + { alex_pos :: !AlexPosn -- position at current input location + , alex_inp :: Text -- the current input + , alex_chr :: !Char -- the character before the input + , alex_bytes :: [Byte] -- rest of the bytes for the current char + , alex_scd :: !Int -- the current startcode + } + + newtype Alex a = Alex { unAlex :: AlexState + -> Either String (AlexState, a) } + + instance Functor Alex where ... + instance Applicative Alex where ... + instance Monad Alex where ... + + runAlex :: Text -> Alex a -> Either String a + + type AlexInput = + ( AlexPosn -- current position, + , Char -- previous char + , [Byte] -- rest of the bytes for the current char + , Text -- current input string + ) + + -- token :: (AlexInput -> Int -> token) -> AlexAction token + +All of the actions in your lexical specification have the same type as in the ``monad`` wrapper. +It is only the types of the function to run the monad and the type of the ``token`` function that change. + +The "monadUserState-strict-text" wrapper +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``monadUserState-strict-text`` wrapper is the same as the ``monadUserState`` wrapper but with strict ``Text`` instead of ``String``: + +.. code-block:: haskell + + import Data.Text (Text) + import qualified Data.Text as Text + + data AlexState = AlexState + { alex_pos :: !AlexPosn -- position at current input location + , alex_inp :: Text -- the current input + , alex_chr :: !Char -- the character before the input + , alex_bytes :: [Byte] -- rest of the bytes for the current char + , alex_scd :: !Int -- the current startcode + , alex_ust :: AlexUserState -- AlexUserState will be defined in the user program + } + + newtype Alex a = Alex { unAlex :: AlexState + -> Either String (AlexState, a) } + + runAlex :: Text -> Alex a -> Either String a + + -- token :: (AlexInput -> Int -> token) -> AlexAction token + +All of the actions in your lexical specification have the same type as in the ``monadUserState`` wrapper. +It is only the types of the function to run the monad and the type of the ``token`` function that change. + .. _types: Type Signatures and Typeclasses From b3a19e762a482d050a746fa005c36d85930553c8 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Sun, 18 Jun 2023 23:46:13 +0200 Subject: [PATCH 09/10] Remove trailing whitespace --- data/AlexWrappers.hs | 14 +++++++------- src/Main.hs | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/data/AlexWrappers.hs b/data/AlexWrappers.hs index 3a3b9b0c..0aa90bde 100644 --- a/data/AlexWrappers.hs +++ b/data/AlexWrappers.hs @@ -122,13 +122,13 @@ alexInputPrevChar (_p,c,_bs,_s) = c alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) -alexGetByte (p,_,[],s) = case Data.Text.uncons s of - Just (c, cs) -> +alexGetByte (p,_,[],s) = case Data.Text.uncons s of + Just (c, cs) -> let p' = alexMove p c in case utf8Encode' c of (b, bs) -> p' `seq` Just (b, (p', c, bs, cs)) - Nothing -> - Nothing + Nothing -> + Nothing #endif #if defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING) @@ -242,7 +242,7 @@ data AlexState = AlexState { -- Compile with -funbox-strict-fields for best results! -#ifdef ALEX_MONAD +#ifdef ALEX_MONAD runAlex :: String -> Alex a -> Either String a runAlex input__ (Alex f) = case f (AlexState {alex_bytes = [], @@ -306,7 +306,7 @@ instance Monad Alex where return = App.pure -#ifdef ALEX_MONAD +#ifdef ALEX_MONAD alexGetInput :: Alex AlexInput alexGetInput = Alex $ \s@AlexState{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} -> @@ -327,7 +327,7 @@ alexGetInput Right (s, (pos,c,bs,inp__)) #endif -#ifdef ALEX_MONAD +#ifdef ALEX_MONAD alexSetInput :: AlexInput -> Alex () alexSetInput (pos,c,bs,inp__) = Alex $ \s -> case s{alex_pos=pos,alex_chr=c,alex_bytes=bs,alex_inp=inp__} of diff --git a/src/Main.hs b/src/Main.hs index 852e008c..1de73e65 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -314,7 +314,7 @@ getScheme directives = dieAlex "%typeclass directive without %token directive" | single == "posn" || single == "posn-bytestring" || single == "posn-strict-text" -> let - strty = case single of + strty = case single of "posn" -> Str "posn-bytestring" -> Strict "posn-strict-text" -> StrictText @@ -338,7 +338,7 @@ getScheme directives = single == "monadUserState-bytestring" || single == "monadUserState-strict-text" -> let - isText = single == "monad-strict-text" || + isText = single == "monad-strict-text" || single == "monadUserState-strict-text" isByteString = single == "monad-bytestring" || single == "monadUserState-bytestring" From 0b82df01efbe67f4759f40390d21de004294a7f5 Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Mon, 19 Jun 2023 14:31:39 +0200 Subject: [PATCH 10/10] Incorporate Andreas' feeback --- doc/api.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index c099783e..0827c30b 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -578,19 +578,19 @@ All of the actions in your lexical specification have the same type as in the `` It is only the types of the function to run the monad and the type of the ``token`` function that change. The text wrappers -~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~ The ``strict-text``, ``posn-strict-text`` and ``monad-strict-text`` wrappers are variations on the ``basic``, ``posn`` and ``monad`` wrappers that use strict ``Text``\ s as the input and token types instead of an ordinary ``String``. The point of using these wrappers is that ``Text``\ s provide a more memory efficient representation of an input stream. They can also be somewhat faster to process. -Note that using these wrappers adds a dependency on the ``Text`` modules, which live in the ``text`` package. +Note that using these wrappers adds a dependency on the ``Data.Text`` modules, which live in the ``text`` package. Note also that by default tokens share space with the input ``Text`` which has the advantage that it does not need to make a copy but it also prevents the input from being garbage collected. It may make sense in some applications to use ``Text``'s ``copy`` function to unshare tokens that will be kept for a long time, to allow the original input to be collected. The "strict-text" wrapper -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^ The ``strict-text`` wrapper is the same as the ``basic`` wrapper but with strict ``Text`` instead of ``String``: @@ -609,7 +609,7 @@ The ``strict-text`` wrapper is the same as the ``basic`` wrapper but with strict alexInputPrevChar :: AlexInput -> Char - -- alexScanTokens :: ByteString -> [token] + -- alexScanTokens :: Text -> [token] All of the actions in your lexical specification should have type: @@ -620,7 +620,7 @@ All of the actions in your lexical specification should have type: for some type ``token``. The "posn-strict-text" wrapper -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``posn-strict-text`` wrapper is the same as the ``posn`` wrapper but with strict ``Text`` instead of ``String``: @@ -647,7 +647,7 @@ All of the actions in your lexical specification should have type: for some type ``token``. The "monad-strict-text" wrapper -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``monad-strict-text`` wrapper is the same as the ``monad`` wrapper but with strict ``Text`` instead of ``String``: @@ -671,7 +671,7 @@ The ``monad-strict-text`` wrapper is the same as the ``monad`` wrapper but with instance Applicative Alex where ... instance Monad Alex where ... - runAlex :: Text -> Alex a -> Either String a + runAlex :: Text -> Alex a -> Either String a type AlexInput = ( AlexPosn -- current position, @@ -686,7 +686,7 @@ All of the actions in your lexical specification have the same type as in the `` It is only the types of the function to run the monad and the type of the ``token`` function that change. The "monadUserState-strict-text" wrapper -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``monadUserState-strict-text`` wrapper is the same as the ``monadUserState`` wrapper but with strict ``Text`` instead of ``String``: