Skip to content

Commit

Permalink
benchmarked UTF-8 writing to a file
Browse files Browse the repository at this point in the history
  • Loading branch information
meiersi committed Mar 10, 2011
1 parent 5745a28 commit 7aed2fe
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ benchmarks/FastPut
benchmarks/BuilderBufferRange
benchmarks/BoundedWrite
benchmarks/UnboxedAppend
benchmarks/Utf8IO

Criterion/ScalingBenchmark

Expand Down
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#########

GHC6 = ghc-6.12.3
GHC7 = ghc-7.0.1
GHC7 = ghc-7.0.2

GHC = $(GHC7)

Expand Down Expand Up @@ -35,6 +35,16 @@ clean-bench-all:
## Individual benchmarks
########################

# utf8 writing to a file
utf8-io:
$(GHC) --make -O2 -fforce-recomp -main-is Utf8IO benchmarks/Utf8IO.hs
time ./benchmarks/Utf8IO via-text 100000000 /dev/null
time ./benchmarks/Utf8IO text 100000000 /dev/null
time ./benchmarks/Utf8IO blaze 100000000 /dev/null
time ./benchmarks/Utf8IO base 100000000 /dev/null
time ./benchmarks/Utf8IO utf8-light 100000000 /dev/null
time ./benchmarks/Utf8IO utf8-string 100000000 /dev/null

# 'blaze-builder' vs. 'binary' comparision
bench-blaze-vs-binary:
$(GHC) --make -O2 -fforce-recomp -main-is BlazeVsBinary benchmarks/BlazeVsBinary.hs
Expand Down
102 changes: 102 additions & 0 deletions benchmarks/Utf8IO.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{-# LANGUAGE OverloadedStrings #-}
-- |
-- Copyright : (c) 2011 Simon Meier
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Simon Meier <[email protected]>
-- Stability : experimental
-- Portability : tested on GHC only
--
-- Benchmarking IO output speed of writing a string in Utf8 encoding to a file.
module Utf8IO (main) where

import Control.Monad
import Control.Exception (evaluate)

import qualified Codec.Binary.UTF8.Light as Utf8Light

import Data.Char (chr)
import Data.Time.Clock
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.UTF8 as Utf8String
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

import System.IO
import System.Environment

import Blaze.ByteString.Builder
import Blaze.ByteString.Builder.Internal (defaultBufferSize)
import qualified Blaze.ByteString.Builder.Char.Utf8 as Blaze


-- | Write using the standard text utf8 encoding function built into 'base'.
writeUtf8_base :: String -> FilePath -> IO ()
writeUtf8_base cs file =
withFile file WriteMode $ \h -> do
hSetEncoding h utf8
hPutStr h cs

-- | Write using utf8 encoding as provided by the 'blaze-builder' library.
writeUtf8_blaze :: String -> FilePath -> IO ()
writeUtf8_blaze cs file = L.writeFile file $ toLazyByteString $ Blaze.fromString cs

-- | Write using utf8 encoding as provided by the 'text' library.
writeUtf8_text :: TL.Text -> FilePath -> IO ()
writeUtf8_text tx file = L.writeFile file $ TL.encodeUtf8 tx

-- | Write using utf8 encoding as provided by the 'utf8-string' library.
writeUtf8_string :: String -> FilePath -> IO ()
writeUtf8_string cs file = L.writeFile file $ Utf8String.fromString cs

-- | Write using utf8 encoding as provided by the 'utf8-light' library. Note
-- that this library only allows encoding the whole string as a strict
-- bytestring. That might make it unusable in some circumstances.
{-# NOINLINE writeUtf8_light #-}
writeUtf8_light :: String -> FilePath -> IO ()
writeUtf8_light cs file = Utf8Light.writeUTF8File file cs


main :: IO ()
main = do
[how, len, file] <- getArgs
let blocksize = 32000
block = map chr [0..blocksize]
n = read len
cs = take n $ cycle $ block
tx = TL.pack cs
writer <- case how of
"base" -> return $ writeUtf8_base cs
"blaze" -> return $ writeUtf8_blaze cs
"utf8-string" -> return $ writeUtf8_string cs

-- utf8-light is missing support for lazy bytestrings => test 100 times
-- writing a 100 times smaller string to avoid out-of-memory errors.
"utf8-light" -> return $ \f -> sequence_ $ replicate 100 $
writeUtf8_light (take (n `div` 100) cs) f

"via-text" -> do return $ writeUtf8_text tx

-- Here, we ensure that the text tx is already packed before timing.
"text" -> do _ <- evaluate (TL.length tx)
return $ writeUtf8_text tx
_ -> error $ "unknown writer '" ++ how ++ "'"
t <- timed_ $ writer file
putStrLn $ how ++ ": " ++ show t

------------------------------------------------------------------------------
-- Timing
------------------------------------------------------------------------------

-- | Execute an IO action and return its result plus the time it took to execute it.
timed :: IO a -> IO (a, NominalDiffTime)
timed io = do
t0 <- getCurrentTime
x <- io
t1 <- getCurrentTime
return (x, diffUTCTime t1 t0)

-- | Execute an IO action and return the time it took to execute it.
timed_ :: IO a -> IO NominalDiffTime
timed_ = (snd `liftM`) . timed

0 comments on commit 7aed2fe

Please sign in to comment.