Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added TextBuilder alias #581

Merged
merged 3 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
### 2.1.2

* [Add type synonym for lazy builders. Deprecated `StrictBuilder` for `StrictTextBuilder`](https://github.com/haskell/text/pull/581)

### 2.1.1

* Add pure Haskell implementations as an alternative to C-based ones,
Expand Down
1 change: 1 addition & 0 deletions src/Data/Text/Encoding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ module Data.Text.Encoding
, Utf8State
, startUtf8State
, StrictBuilder
, StrictTextBuilder
, strictBuilderToText
, textToStrictBuilder

Expand Down
3 changes: 3 additions & 0 deletions src/Data/Text/Internal/Builder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ module Data.Text.Internal.Builder
( -- * Public API
-- ** The Builder type
Builder
, LazyTextBuilder
, toLazyText
, toLazyTextWith

Expand Down Expand Up @@ -96,6 +97,8 @@ newtype Builder = Builder {
-> ST s [S.Text]
}

type LazyTextBuilder = Builder

instance Semigroup Builder where
(<>) = append
{-# INLINE (<>) #-}
Expand Down
23 changes: 12 additions & 11 deletions src/Data/Text/Internal/Encoding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ module Data.Text.Internal.Encoding
, decodeUtf8With2
, Utf8State
, startUtf8State
, StrictBuilder()
, StrictTextBuilder()
, StrictBuilder
, strictBuilderToText
, textToStrictBuilder

Expand All @@ -50,7 +51,7 @@ import Data.Text.Encoding.Error (OnDecodeError)
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Encoding.Utf8
(DecoderState, utf8AcceptState, utf8RejectState, updateDecoderState)
import Data.Text.Internal.StrictBuilder (StrictBuilder)
import Data.Text.Internal.StrictBuilder (StrictBuilder, StrictTextBuilder)
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Short.Internal as SBS
Expand All @@ -72,13 +73,13 @@ import Foreign.Ptr (Ptr)
-- | Use 'StrictBuilder' to build 'Text'.
--
-- @since 2.0.2
strictBuilderToText :: StrictBuilder -> Text
strictBuilderToText :: StrictTextBuilder -> Text
strictBuilderToText = SB.toText

-- | Copy 'Text' in a 'StrictBuilder'
--
-- @since 2.0.2
textToStrictBuilder :: Text -> StrictBuilder
textToStrictBuilder :: Text -> StrictTextBuilder
textToStrictBuilder = SB.fromText

-- | State of decoding a 'ByteString' in UTF-8.
Expand Down Expand Up @@ -352,11 +353,11 @@ validateUtf8MoreCont st@(Utf8State s0 part) bs k
| otherwise = k (- partUtf8Len part) (Just (Utf8State s (partUtf8UnsafeAppend part bs)))

-- Eta-expanded to inline partUtf8Foldr
partUtf8ToStrictBuilder :: PartialUtf8CodePoint -> StrictBuilder
partUtf8ToStrictBuilder :: PartialUtf8CodePoint -> StrictTextBuilder
partUtf8ToStrictBuilder c =
partUtf8Foldr ((<>) . SB.unsafeFromWord8) mempty c

utf8StateToStrictBuilder :: Utf8State -> StrictBuilder
utf8StateToStrictBuilder :: Utf8State -> StrictTextBuilder
utf8StateToStrictBuilder = partUtf8ToStrictBuilder . partialUtf8CodePoint

-- | Decode another chunk in an ongoing UTF-8 stream.
Expand Down Expand Up @@ -413,7 +414,7 @@ utf8StateToStrictBuilder = partUtf8ToStrictBuilder . partialUtf8CodePoint
-- s2b (pre1 '<>' pre2) = s2b pre3
-- ms2 = ms3
-- @
decodeUtf8More :: Utf8State -> ByteString -> (StrictBuilder, ByteString, Maybe Utf8State)
decodeUtf8More :: Utf8State -> ByteString -> (StrictTextBuilder, ByteString, Maybe Utf8State)
decodeUtf8More s bs =
validateUtf8MoreCont s bs $ \len ms ->
let builder | len <= 0 = mempty
Expand Down Expand Up @@ -444,7 +445,7 @@ decodeUtf8More s bs =
-- @
-- 'Data.Text.Encoding.encodeUtf8' ('Data.Text.Encoding.strictBuilderToText' builder) '<>' rest = chunk
-- @
decodeUtf8Chunk :: ByteString -> (StrictBuilder, ByteString, Maybe Utf8State)
decodeUtf8Chunk :: ByteString -> (StrictTextBuilder, ByteString, Maybe Utf8State)
decodeUtf8Chunk = decodeUtf8More startUtf8State

-- | Call the error handler on each byte of the partial code point stored in
Expand All @@ -454,14 +455,14 @@ decodeUtf8Chunk = decodeUtf8More startUtf8State
--
-- @since 2.0.2
{-# INLINE skipIncomplete #-}
skipIncomplete :: OnDecodeError -> String -> Utf8State -> StrictBuilder
skipIncomplete :: OnDecodeError -> String -> Utf8State -> StrictTextBuilder
skipIncomplete onErr msg s =
partUtf8Foldr
((<>) . handleUtf8Error onErr msg)
mempty (partialUtf8CodePoint s)

{-# INLINE handleUtf8Error #-}
handleUtf8Error :: OnDecodeError -> String -> Word8 -> StrictBuilder
handleUtf8Error :: OnDecodeError -> String -> Word8 -> StrictTextBuilder
handleUtf8Error onErr msg w = case onErr msg (Just w) of
Just c -> SB.fromChar c
Nothing -> mempty
Expand Down Expand Up @@ -505,7 +506,7 @@ decodeUtf8With2 ::
#if defined(ASSERTS)
HasCallStack =>
#endif
OnDecodeError -> String -> Utf8State -> ByteString -> (StrictBuilder, ByteString, Utf8State)
OnDecodeError -> String -> Utf8State -> ByteString -> (StrictTextBuilder, ByteString, Utf8State)
decodeUtf8With2 onErr msg s0 bs = loop s0 0 mempty
where
loop s i !builder =
Expand Down
54 changes: 31 additions & 23 deletions src/Data/Text/Internal/StrictBuilder.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeSynonymInstances #-}

-- |
-- Module : Data.Text.Internal.Builder
Expand All @@ -14,7 +15,8 @@
-- @since 2.0.2

module Data.Text.Internal.StrictBuilder
( StrictBuilder(..)
( StrictTextBuilder(..)
, StrictBuilder
, toText
, fromChar
, fromText
Expand Down Expand Up @@ -43,18 +45,24 @@ import qualified Data.Text.Internal.Unsafe.Char as Char

-- | A delayed representation of strict 'Text'.
--
-- @since 2.0.2
data StrictBuilder = StrictBuilder
-- @since 2.1.2
data StrictTextBuilder = StrictTextBuilder
{ sbLength :: {-# UNPACK #-} !Int
, sbWrite :: forall s. A.MArray s -> Int -> ST s ()
}

-- | A delayed representation of strict 'Text'.
--
-- @since 2.0.2
{-# DEPRECATED StrictBuilder "Use StrictTextBuilder instead" #-}
type StrictBuilder = StrictTextBuilder

-- | Use 'StrictBuilder' to build 'Text'.
--
-- @since 2.0.2
toText :: StrictBuilder -> Text
toText (StrictBuilder 0 _) = empty
toText (StrictBuilder n write) = runST (do
toText :: StrictTextBuilder -> Text
toText (StrictTextBuilder 0 _) = empty
toText (StrictTextBuilder n write) = runST (do
dst <- A.new n
write dst 0
arr <- A.unsafeFreeze dst
Expand All @@ -63,21 +71,21 @@ toText (StrictBuilder n write) = runST (do
-- | Concatenation of 'StrictBuilder' is right-biased:
-- the right builder will be run first. This allows a builder to
-- run tail-recursively when it was accumulated left-to-right.
instance Semigroup StrictBuilder where
instance Semigroup StrictTextBuilder where
(<>) = appendRStrictBuilder

instance Monoid StrictBuilder where
instance Monoid StrictTextBuilder where
mempty = emptyStrictBuilder
mappend = (<>)

emptyStrictBuilder :: StrictBuilder
emptyStrictBuilder = StrictBuilder 0 (\_ _ -> pure ())
emptyStrictBuilder :: StrictTextBuilder
emptyStrictBuilder = StrictTextBuilder 0 (\_ _ -> pure ())

appendRStrictBuilder :: StrictBuilder -> StrictBuilder -> StrictBuilder
appendRStrictBuilder (StrictBuilder 0 _) b2 = b2
appendRStrictBuilder b1 (StrictBuilder 0 _) = b1
appendRStrictBuilder (StrictBuilder n1 write1) (StrictBuilder n2 write2) =
StrictBuilder (n1 + n2) (\dst ofs -> do
appendRStrictBuilder :: StrictTextBuilder -> StrictTextBuilder -> StrictTextBuilder
appendRStrictBuilder (StrictTextBuilder 0 _) b2 = b2
appendRStrictBuilder b1 (StrictTextBuilder 0 _) = b1
appendRStrictBuilder (StrictTextBuilder n1 write1) (StrictTextBuilder n2 write2) =
StrictTextBuilder (n1 + n2) (\dst ofs -> do
write2 dst (ofs + n1)
write1 dst ofs)

Expand All @@ -91,16 +99,16 @@ copyFromByteString dst ofs src = withBS src $ \ srcFPtr len ->
-- Unsafe: This may not be valid UTF-8 text.
--
-- @since 2.0.2
unsafeFromByteString :: ByteString -> StrictBuilder
unsafeFromByteString :: ByteString -> StrictTextBuilder
unsafeFromByteString bs =
StrictBuilder (B.length bs) (\dst ofs -> copyFromByteString dst ofs bs)
StrictTextBuilder (B.length bs) (\dst ofs -> copyFromByteString dst ofs bs)

-- |
-- @since 2.0.2
{-# INLINE fromChar #-}
fromChar :: Char -> StrictBuilder
fromChar :: Char -> StrictTextBuilder
fromChar c =
StrictBuilder (utf8Length c) (\dst ofs -> void (Char.unsafeWrite dst ofs (safe c)))
StrictTextBuilder (utf8Length c) (\dst ofs -> void (Char.unsafeWrite dst ofs (safe c)))

-- $unsafe
-- For internal purposes, we abuse 'StrictBuilder' as a delayed 'Array' rather
Expand All @@ -109,13 +117,13 @@ fromChar c =
-- | Unsafe: This may not be valid UTF-8 text.
--
-- @since 2.0.2
unsafeFromWord8 :: Word8 -> StrictBuilder
unsafeFromWord8 :: Word8 -> StrictTextBuilder
unsafeFromWord8 !w =
StrictBuilder 1 (\dst ofs -> A.unsafeWrite dst ofs w)
StrictTextBuilder 1 (\dst ofs -> A.unsafeWrite dst ofs w)

-- | Copy 'Text' in a 'StrictBuilder'
--
-- @since 2.0.2
fromText :: Text -> StrictBuilder
fromText (Text src srcOfs n) = StrictBuilder n (\dst dstOfs ->
fromText :: Text -> StrictTextBuilder
fromText (Text src srcOfs n) = StrictTextBuilder n (\dst dstOfs ->
A.copyI n dst dstOfs src srcOfs)
1 change: 1 addition & 0 deletions src/Data/Text/Lazy/Builder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
module Data.Text.Lazy.Builder
( -- * The Builder type
Builder
, LazyTextBuilder
, toLazyText
, toLazyTextWith

Expand Down
Loading