diff --git a/changelog.md b/changelog.md index 0e50c687..3ecaa8c4 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,7 @@ +### 2.1.2 + +* [Add type synonym for lazy builders. Deprecated `StrictBuilder` for `StrictTextBuilder`](https://github.com/haskell/text/pull/581) + ### 2.1.1 * Add pure Haskell implementations as an alternative to C-based ones, diff --git a/src/Data/Text/Encoding.hs b/src/Data/Text/Encoding.hs index 751618f6..16ec1986 100644 --- a/src/Data/Text/Encoding.hs +++ b/src/Data/Text/Encoding.hs @@ -52,6 +52,7 @@ module Data.Text.Encoding , Utf8State , startUtf8State , StrictBuilder + , StrictTextBuilder , strictBuilderToText , textToStrictBuilder diff --git a/src/Data/Text/Internal/Builder.hs b/src/Data/Text/Internal/Builder.hs index 98c77876..0627829f 100644 --- a/src/Data/Text/Internal/Builder.hs +++ b/src/Data/Text/Internal/Builder.hs @@ -39,6 +39,7 @@ module Data.Text.Internal.Builder ( -- * Public API -- ** The Builder type Builder + , LazyTextBuilder , toLazyText , toLazyTextWith @@ -96,6 +97,8 @@ newtype Builder = Builder { -> ST s [S.Text] } +type LazyTextBuilder = Builder + instance Semigroup Builder where (<>) = append {-# INLINE (<>) #-} diff --git a/src/Data/Text/Internal/Encoding.hs b/src/Data/Text/Internal/Encoding.hs index cb6bd7fd..1a3179e5 100644 --- a/src/Data/Text/Internal/Encoding.hs +++ b/src/Data/Text/Internal/Encoding.hs @@ -26,7 +26,8 @@ module Data.Text.Internal.Encoding , decodeUtf8With2 , Utf8State , startUtf8State - , StrictBuilder() + , StrictTextBuilder() + , StrictBuilder , strictBuilderToText , textToStrictBuilder @@ -50,7 +51,7 @@ import Data.Text.Encoding.Error (OnDecodeError) import Data.Text.Internal (Text(..)) import Data.Text.Internal.Encoding.Utf8 (DecoderState, utf8AcceptState, utf8RejectState, updateDecoderState) -import Data.Text.Internal.StrictBuilder (StrictBuilder) +import Data.Text.Internal.StrictBuilder (StrictBuilder, StrictTextBuilder) import qualified Data.ByteString as B import qualified Data.ByteString.Internal as BI import qualified Data.ByteString.Short.Internal as SBS @@ -72,13 +73,13 @@ import Foreign.Ptr (Ptr) -- | Use 'StrictBuilder' to build 'Text'. -- -- @since 2.0.2 -strictBuilderToText :: StrictBuilder -> Text +strictBuilderToText :: StrictTextBuilder -> Text strictBuilderToText = SB.toText -- | Copy 'Text' in a 'StrictBuilder' -- -- @since 2.0.2 -textToStrictBuilder :: Text -> StrictBuilder +textToStrictBuilder :: Text -> StrictTextBuilder textToStrictBuilder = SB.fromText -- | State of decoding a 'ByteString' in UTF-8. @@ -352,11 +353,11 @@ validateUtf8MoreCont st@(Utf8State s0 part) bs k | otherwise = k (- partUtf8Len part) (Just (Utf8State s (partUtf8UnsafeAppend part bs))) -- Eta-expanded to inline partUtf8Foldr -partUtf8ToStrictBuilder :: PartialUtf8CodePoint -> StrictBuilder +partUtf8ToStrictBuilder :: PartialUtf8CodePoint -> StrictTextBuilder partUtf8ToStrictBuilder c = partUtf8Foldr ((<>) . SB.unsafeFromWord8) mempty c -utf8StateToStrictBuilder :: Utf8State -> StrictBuilder +utf8StateToStrictBuilder :: Utf8State -> StrictTextBuilder utf8StateToStrictBuilder = partUtf8ToStrictBuilder . partialUtf8CodePoint -- | Decode another chunk in an ongoing UTF-8 stream. @@ -413,7 +414,7 @@ utf8StateToStrictBuilder = partUtf8ToStrictBuilder . partialUtf8CodePoint -- s2b (pre1 '<>' pre2) = s2b pre3 -- ms2 = ms3 -- @ -decodeUtf8More :: Utf8State -> ByteString -> (StrictBuilder, ByteString, Maybe Utf8State) +decodeUtf8More :: Utf8State -> ByteString -> (StrictTextBuilder, ByteString, Maybe Utf8State) decodeUtf8More s bs = validateUtf8MoreCont s bs $ \len ms -> let builder | len <= 0 = mempty @@ -444,7 +445,7 @@ decodeUtf8More s bs = -- @ -- 'Data.Text.Encoding.encodeUtf8' ('Data.Text.Encoding.strictBuilderToText' builder) '<>' rest = chunk -- @ -decodeUtf8Chunk :: ByteString -> (StrictBuilder, ByteString, Maybe Utf8State) +decodeUtf8Chunk :: ByteString -> (StrictTextBuilder, ByteString, Maybe Utf8State) decodeUtf8Chunk = decodeUtf8More startUtf8State -- | Call the error handler on each byte of the partial code point stored in @@ -454,14 +455,14 @@ decodeUtf8Chunk = decodeUtf8More startUtf8State -- -- @since 2.0.2 {-# INLINE skipIncomplete #-} -skipIncomplete :: OnDecodeError -> String -> Utf8State -> StrictBuilder +skipIncomplete :: OnDecodeError -> String -> Utf8State -> StrictTextBuilder skipIncomplete onErr msg s = partUtf8Foldr ((<>) . handleUtf8Error onErr msg) mempty (partialUtf8CodePoint s) {-# INLINE handleUtf8Error #-} -handleUtf8Error :: OnDecodeError -> String -> Word8 -> StrictBuilder +handleUtf8Error :: OnDecodeError -> String -> Word8 -> StrictTextBuilder handleUtf8Error onErr msg w = case onErr msg (Just w) of Just c -> SB.fromChar c Nothing -> mempty @@ -505,7 +506,7 @@ decodeUtf8With2 :: #if defined(ASSERTS) HasCallStack => #endif - OnDecodeError -> String -> Utf8State -> ByteString -> (StrictBuilder, ByteString, Utf8State) + OnDecodeError -> String -> Utf8State -> ByteString -> (StrictTextBuilder, ByteString, Utf8State) decodeUtf8With2 onErr msg s0 bs = loop s0 0 mempty where loop s i !builder = diff --git a/src/Data/Text/Internal/StrictBuilder.hs b/src/Data/Text/Internal/StrictBuilder.hs index 84a57264..71583fc9 100644 --- a/src/Data/Text/Internal/StrictBuilder.hs +++ b/src/Data/Text/Internal/StrictBuilder.hs @@ -1,6 +1,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeSynonymInstances #-} -- | -- Module : Data.Text.Internal.Builder @@ -14,7 +15,8 @@ -- @since 2.0.2 module Data.Text.Internal.StrictBuilder - ( StrictBuilder(..) + ( StrictTextBuilder(..) + , StrictBuilder , toText , fromChar , fromText @@ -43,18 +45,24 @@ import qualified Data.Text.Internal.Unsafe.Char as Char -- | A delayed representation of strict 'Text'. -- --- @since 2.0.2 -data StrictBuilder = StrictBuilder +-- @since 2.1.2 +data StrictTextBuilder = StrictTextBuilder { sbLength :: {-# UNPACK #-} !Int , sbWrite :: forall s. A.MArray s -> Int -> ST s () } +-- | A delayed representation of strict 'Text'. +-- +-- @since 2.0.2 +{-# DEPRECATED StrictBuilder "Use StrictTextBuilder instead" #-} +type StrictBuilder = StrictTextBuilder + -- | Use 'StrictBuilder' to build 'Text'. -- -- @since 2.0.2 -toText :: StrictBuilder -> Text -toText (StrictBuilder 0 _) = empty -toText (StrictBuilder n write) = runST (do +toText :: StrictTextBuilder -> Text +toText (StrictTextBuilder 0 _) = empty +toText (StrictTextBuilder n write) = runST (do dst <- A.new n write dst 0 arr <- A.unsafeFreeze dst @@ -63,21 +71,21 @@ toText (StrictBuilder n write) = runST (do -- | Concatenation of 'StrictBuilder' is right-biased: -- the right builder will be run first. This allows a builder to -- run tail-recursively when it was accumulated left-to-right. -instance Semigroup StrictBuilder where +instance Semigroup StrictTextBuilder where (<>) = appendRStrictBuilder -instance Monoid StrictBuilder where +instance Monoid StrictTextBuilder where mempty = emptyStrictBuilder mappend = (<>) -emptyStrictBuilder :: StrictBuilder -emptyStrictBuilder = StrictBuilder 0 (\_ _ -> pure ()) +emptyStrictBuilder :: StrictTextBuilder +emptyStrictBuilder = StrictTextBuilder 0 (\_ _ -> pure ()) -appendRStrictBuilder :: StrictBuilder -> StrictBuilder -> StrictBuilder -appendRStrictBuilder (StrictBuilder 0 _) b2 = b2 -appendRStrictBuilder b1 (StrictBuilder 0 _) = b1 -appendRStrictBuilder (StrictBuilder n1 write1) (StrictBuilder n2 write2) = - StrictBuilder (n1 + n2) (\dst ofs -> do +appendRStrictBuilder :: StrictTextBuilder -> StrictTextBuilder -> StrictTextBuilder +appendRStrictBuilder (StrictTextBuilder 0 _) b2 = b2 +appendRStrictBuilder b1 (StrictTextBuilder 0 _) = b1 +appendRStrictBuilder (StrictTextBuilder n1 write1) (StrictTextBuilder n2 write2) = + StrictTextBuilder (n1 + n2) (\dst ofs -> do write2 dst (ofs + n1) write1 dst ofs) @@ -91,16 +99,16 @@ copyFromByteString dst ofs src = withBS src $ \ srcFPtr len -> -- Unsafe: This may not be valid UTF-8 text. -- -- @since 2.0.2 -unsafeFromByteString :: ByteString -> StrictBuilder +unsafeFromByteString :: ByteString -> StrictTextBuilder unsafeFromByteString bs = - StrictBuilder (B.length bs) (\dst ofs -> copyFromByteString dst ofs bs) + StrictTextBuilder (B.length bs) (\dst ofs -> copyFromByteString dst ofs bs) -- | -- @since 2.0.2 {-# INLINE fromChar #-} -fromChar :: Char -> StrictBuilder +fromChar :: Char -> StrictTextBuilder fromChar c = - StrictBuilder (utf8Length c) (\dst ofs -> void (Char.unsafeWrite dst ofs (safe c))) + StrictTextBuilder (utf8Length c) (\dst ofs -> void (Char.unsafeWrite dst ofs (safe c))) -- $unsafe -- For internal purposes, we abuse 'StrictBuilder' as a delayed 'Array' rather @@ -109,13 +117,13 @@ fromChar c = -- | Unsafe: This may not be valid UTF-8 text. -- -- @since 2.0.2 -unsafeFromWord8 :: Word8 -> StrictBuilder +unsafeFromWord8 :: Word8 -> StrictTextBuilder unsafeFromWord8 !w = - StrictBuilder 1 (\dst ofs -> A.unsafeWrite dst ofs w) + StrictTextBuilder 1 (\dst ofs -> A.unsafeWrite dst ofs w) -- | Copy 'Text' in a 'StrictBuilder' -- -- @since 2.0.2 -fromText :: Text -> StrictBuilder -fromText (Text src srcOfs n) = StrictBuilder n (\dst dstOfs -> +fromText :: Text -> StrictTextBuilder +fromText (Text src srcOfs n) = StrictTextBuilder n (\dst dstOfs -> A.copyI n dst dstOfs src srcOfs) diff --git a/src/Data/Text/Lazy/Builder.hs b/src/Data/Text/Lazy/Builder.hs index e5c2f8a3..92b36b27 100644 --- a/src/Data/Text/Lazy/Builder.hs +++ b/src/Data/Text/Lazy/Builder.hs @@ -39,6 +39,7 @@ module Data.Text.Lazy.Builder ( -- * The Builder type Builder + , LazyTextBuilder , toLazyText , toLazyTextWith