Skip to content

Commit

Permalink
Move Word.fromEscapedText to Utf8 default implementation
Browse files Browse the repository at this point in the history
This improves the performance of the default implementation.  It also
makes more sense because there really is UTF-8 encoding going on in
these functions.

Reverts tests and benchmarks that no longer apply.
  • Loading branch information
dylex committed Oct 15, 2016
1 parent 0a21933 commit 583d9c4
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 45 deletions.
11 changes: 11 additions & 0 deletions Blaze/ByteString/Builder/Html/Utf8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,17 @@ module Blaze.ByteString.Builder.Html.Utf8
import Data.ByteString.Char8 () -- for the 'IsString' instance of bytesrings

import qualified Data.Text as TS
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE

import Blaze.ByteString.Builder.Compat.Write ( Write, writePrimBounded )
import qualified Data.ByteString.Builder as B
import Data.ByteString.Builder.Prim ((>*<), (>$<), condB)
import qualified Data.ByteString.Builder.Prim as P

import Blaze.ByteString.Builder.Char.Utf8
import Blaze.ByteString.Builder.Html.Word

-- | Write a HTML escaped and UTF-8 encoded Unicode character to a bufffer.
--
Expand Down Expand Up @@ -101,9 +104,17 @@ fromHtmlEscapedShow = fromHtmlEscapedString . show
-- UTF-8 encoding.
--
fromHtmlEscapedText :: TS.Text -> B.Builder
#if MIN_VERSION_text(1,1,2) && MIN_VERSION_bytestring(0,10,4)
fromHtmlEscapedText = TE.encodeUtf8BuilderEscaped wordHtmlEscaped
#else
fromHtmlEscapedText = fromHtmlEscapedString . TS.unpack
#endif

-- | /O(n)/. Serialize a HTML escaped Unicode 'TL.Text' using the UTF-8 encoding.
--
fromHtmlEscapedLazyText :: TL.Text -> B.Builder
#if MIN_VERSION_text(1,1,2) && MIN_VERSION_bytestring(0,10,4)
fromHtmlEscapedLazyText = TLE.encodeUtf8BuilderEscaped wordHtmlEscaped
#else
fromHtmlEscapedLazyText = fromHtmlEscapedString . TL.unpack
#endif
26 changes: 2 additions & 24 deletions Blaze/ByteString/Builder/Html/Word.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,14 @@
------------------------------------------------------------------------------

module Blaze.ByteString.Builder.Html.Word
(
( wordHtmlEscaped
-- * Writing HTML escaped bytes to a buffer
writeHtmlEscapedWord
, writeHtmlEscapedWord
-- * Creating Builders from HTML escaped bytes
, fromHtmlEscapedWord
, fromHtmlEscapedWordList
, fromHtmlEscapedByteString
, fromHtmlEscapedLazyByteString
#if MIN_VERSION_text(1,1,2) && MIN_VERSION_bytestring(0,10,4)
-- * Creating Builders from HTML escaped and UTF-8 encoded text
-- | /Note/ that these functions are only available if built against @text >= 1.1.2.0@ and @bytestring >= 0.10.4.0@.
, fromHtmlEscapedText
, fromHtmlEscapedLazyText
#endif
) where

import qualified Blaze.ByteString.Builder.Compat.Write as W
Expand All @@ -41,10 +35,6 @@ import qualified Data.ByteString.Builder.Prim as P
import Data.ByteString.Internal (c2w)
import qualified Data.ByteString.Lazy as BSL
import Data.Word (Word8)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE

{-# INLINE wordHtmlEscaped #-}
wordHtmlEscaped :: P.BoundedPrim Word8
Expand Down Expand Up @@ -87,15 +77,3 @@ fromHtmlEscapedByteString = P.primMapByteStringBounded wordHtmlEscaped
-- | /O(n)/. Serialize a HTML escaped lazy 'BSL.ByteString'.
fromHtmlEscapedLazyByteString :: BSL.ByteString -> B.Builder
fromHtmlEscapedLazyByteString = P.primMapLazyByteStringBounded wordHtmlEscaped

#if MIN_VERSION_text(1,1,2) && MIN_VERSION_bytestring(0,10,4)
-- | /O(n)/. Serialize a HTML escaped strict 'T.Text' using the UTF-8 encoding.
-- This is identical to 'Blaze.ByteString.Builder.Html.Utf8.fromHtmlEscapedText' but more than twice as fast.
fromHtmlEscapedText :: T.Text -> B.Builder
fromHtmlEscapedText = TE.encodeUtf8BuilderEscaped wordHtmlEscaped

-- | /O(n)/. Serialize a HTML escaped lazy 'TL.Text' using the UTF-8 encoding.
-- This is identical to 'Blaze.ByteString.Builder.Html.Utf8.fromHtmlEscapedLazyText' but more than three times as fast.
fromHtmlEscapedLazyText :: TL.Text -> B.Builder
fromHtmlEscapedLazyText = TLE.encodeUtf8BuilderEscaped wordHtmlEscaped
#endif
15 changes: 4 additions & 11 deletions benchmarks/StringAndText.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ import qualified Data.Text.Lazy.Encoding as TL
import qualified Blaze.ByteString.Builder as Blaze
import qualified Data.ByteString.Builder.Internal as Blaze
import qualified Blaze.ByteString.Builder.Char.Utf8 as Blaze
import qualified Blaze.ByteString.Builder.Html.Utf8 as BlazeUtf8
import qualified Blaze.ByteString.Builder.Html.Word as BlazeWord
import qualified Blaze.ByteString.Builder.Html.Utf8 as Blaze

main :: IO ()
main = defaultMain
Expand Down Expand Up @@ -60,19 +59,13 @@ main = defaultMain
(L.length . TL.encodeUtf8) benchLazyText

, bench "fromHtmlEscapedString :: String --[Html esc. & Utf8 encoding]--> L.ByteString" $ whnf
(L.length . Blaze.toLazyByteString . BlazeUtf8.fromHtmlEscapedString) benchString
(L.length . Blaze.toLazyByteString . Blaze.fromHtmlEscapedString) benchString

, bench "fromHtmlEscapedStrictTextUnpacked :: StrictText --[HTML esc. & Utf8 encoding]--> L.ByteString" $ whnf
(L.length . Blaze.toLazyByteString . BlazeUtf8.fromHtmlEscapedText) benchStrictText

, bench "fromHtmlEscapedStrictTextUnpacked :: StrictText --[Utf8 encoding --> HTML esc.]--> L.ByteString" $ whnf
(L.length . Blaze.toLazyByteString . BlazeWord.fromHtmlEscapedText) benchStrictText
(L.length . Blaze.toLazyByteString . Blaze.fromHtmlEscapedText) benchStrictText

, bench "fromHtmlEscapedLazyTextUnpacked :: LazyText --[HTML esc. & Utf8 encoding]--> L.ByteString" $ whnf
(L.length . Blaze.toLazyByteString . BlazeUtf8.fromHtmlEscapedLazyText) benchLazyText

, bench "fromHtmlEscapedLazyTextUnpacked :: LazyText --[Utf8 encoding --> HTML esc.]--> L.ByteString" $ whnf
(L.length . Blaze.toLazyByteString . BlazeWord.fromHtmlEscapedLazyText) benchLazyText
(L.length . Blaze.toLazyByteString . Blaze.fromHtmlEscapedLazyText) benchLazyText

]

Expand Down
10 changes: 0 additions & 10 deletions tests/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import Codec.Binary.UTF8.String (decode)
import Blaze.ByteString.Builder
import Blaze.ByteString.Builder.Char.Utf8
import Blaze.ByteString.Builder.Html.Utf8
import qualified Blaze.ByteString.Builder.Html.Word as Word

main :: IO ()
main = defaultMain $ return $ testGroup "Tests" tests
Expand All @@ -40,9 +39,6 @@ tests =
, testCase "escaping case 1" escaping1
, testCase "escaping case 2" escaping2
, testCase "escaping case 3" escaping3
#if MIN_VERSION_text(1,1,2) && MIN_VERSION_bytestring(0,10,4)
, testProperty "fromHtmlEscapedText" wordUtf8
#endif
]

monoidLeftIdentity :: Builder -> Bool
Expand Down Expand Up @@ -86,12 +82,6 @@ escaping2 = fromString "f &amp;&amp;&amp; g" @?= fromHtmlEscapedString "f &&& g"
escaping3 :: Assertion
escaping3 = fromString "&quot;&#39;" @?= fromHtmlEscapedString "\"'"

#if MIN_VERSION_text(1,1,2) && MIN_VERSION_bytestring(0,10,4)
wordUtf8 :: String -> Property
wordUtf8 s = fromHtmlEscapedText t === Word.fromHtmlEscapedText t
where t = T.pack s
#endif

instance Show Builder where
show = show . toLazyByteString

Expand Down

0 comments on commit 583d9c4

Please sign in to comment.