Skip to content

Commit

Permalink
Add simple benchmark suite
Browse files Browse the repository at this point in the history
  • Loading branch information
hvr committed Jan 28, 2018
1 parent e7efc18 commit 1f452d9
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 0 deletions.
90 changes: 90 additions & 0 deletions src-bench/Bench.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
module Main where

import Criterion.Main

import Control.Exception
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Short.Internal as IUT

-- The function we're benchmarking.
fib m | m < 0 = error "negative!"
| otherwise = go m
where
go 0 = 0
go 1 = 1
go n = go (n-1) + go (n-2)

{-# NOINLINE allcharsBS1 #-}
allcharsBS1 = T.encodeUtf8 (T.pack ['\x0'..'\x7f'])

{-# NOINLINE allcharsST1 #-}
Just allcharsST1 = IUT.fromByteString allcharsBS1

{-# NOINLINE allcharsST1' #-}
Just allcharsST1' = IUT.fromByteString (IUT.toByteString allcharsST1)

{-# NOINLINE allcharsBS2 #-}
allcharsBS2 = T.encodeUtf8 (T.pack $ ['\x0'..'\xd7ff'] ++ ['\xe000'..'\x10ffff'])

{-# NOINLINE allcharsST2 #-}
Just allcharsST2 = IUT.fromByteString allcharsBS2

{-# NOINLINE allcharsST2' #-}
Just allcharsST2' = IUT.fromByteString (IUT.toByteString allcharsST2)

{-# NOINLINE allAscii128K #-}
allAscii128K = mconcat (replicate 1024 allcharsST1)

-- Our benchmark harness.
main = do
evaluate allcharsST1
evaluate allcharsST1'
evaluate allcharsST2
evaluate allcharsST2'
evaluate allAscii128K

defaultMain
[ bgroup "singleton"
[ bench "'a' :: ShortText" $ whnf IUT.singleton 'a'
, bench "'a' :: Text" $ whnf (T.singleton) 'a'
, bench "U+10FFFF :: ShortText" $ whnf (IUT.singleton) '\x10ffff'
, bench "U+10FFFF :: Text" $ whnf (T.singleton) '\x10ffff'
]
, bgroup "toString"
[ bench "t1" $ nf IUT.toString allcharsST1
, bench "t2" $ nf IUT.toString allcharsST2
, bench "t3" $ nf IUT.toString allAscii128K
, bench "t1 (Text)" $ nf T.unpack (IUT.toText allcharsST1)
, bench "t2 (Text)" $ nf T.unpack (IUT.toText allcharsST2)
, bench "t3 (Text)" $ nf T.unpack (IUT.toText allAscii128K)
]

, bgroup "length"
[ bench "1" $ whnf IUT.length allcharsST1
, bench "2" $ whnf IUT.length allcharsST2
, bench "3" $ whnf IUT.length allAscii128K
]

, bgroup "=="
[ bench "== 1a" $ whnf (== allcharsST1) allcharsST1
, bench "== 1b" $ whnf (== allcharsST1) allcharsST1'
, bench "== 2a" $ whnf (== allcharsST2) allcharsST2
, bench "== 2b" $ whnf (== allcharsST2) allcharsST2'
]

, bgroup "isAscii"
[ bench "isAscii 1" $ whnf IUT.isAscii allcharsST1
, bench "isAscii 2" $ whnf IUT.isAscii allcharsST2
, bench "isAscii 3" $ whnf IUT.isAscii allAscii128K
]

, bgroup "isValidUtf8"
[ bench "isValidUtf8 1" $ whnf IUT.isValidUtf8 allcharsST1
, bench "fromByteString 1" $ whnf IUT.fromByteString allcharsBS1

, bench "isValidUtf8 2" $ whnf IUT.isValidUtf8 allcharsST2
, bench "fromByteString 2" $ whnf IUT.fromByteString allcharsBS2
]
]

2 changes: 2 additions & 0 deletions src-bench/cabal.project
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
packages: .
-- benchmarks: True
86 changes: 86 additions & 0 deletions src-bench/text-short-bench.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
cabal-version: 2.0

name: text-short-bench
version: 0.1.2
synopsis: Memory-efficient representation of Unicode text strings
license: BSD3
license-file: LICENSE
author: Herbert Valerio Riedel
maintainer: [email protected]
bug-reports: https://github.com/hvr/text-short/issues
category: Data
build-type: Simple
description: This package provides the 'ShortText' type which is suitable for keeping many short strings in memory. This is similiar to how 'ShortByteString' relates to 'ByteString'.
.
The main difference between 'Text' and 'ShortText' is that 'ShortText' uses UTF-8 instead of UTF-16 internally and also doesn't support slicing (thereby saving 2 words). Consequently, the memory footprint of a (boxed) 'ShortText' value is 4 words (2 words when unboxed) plus the length of the UTF-8 encoded payload.

tested-with: GHC==8.2.2, GHC==8.0.2, GHC==7.10.3, GHC==7.8.4
extra-source-files: ChangeLog.md

Source-Repository head
Type: git
Location: https://github.com/hvr/text-short.git

flag asserts
description: Enable runtime-checks via @assert@
default: False
manual: True

library text-short-iut
exposed-modules: Data.Text.Short
Data.Text.Short.Partial
Data.Text.Short.Unsafe

exposed-modules: Data.Text.Short.Internal

build-depends: base >= 4.7 && < 4.11
, bytestring >= 0.10.4 && < 0.11
, hashable >= 1.2.6 && < 1.3
, deepseq >= 1.3 && < 1.5
, text >= 1.0 && < 1.3
, binary >= 0.7.1 && < 0.9
, ghc-prim >= 0.3.1 && < 0.6

if !impl(ghc >= 8.0)
build-depends: semigroups >= 0.18.2 && < 0.19

-- GHC version specific PrimOps
if impl(ghc >= 8.4)
hs-source-dirs: ../src-ghc804
else
c-sources: ../cbits/memcmp.c
hs-source-dirs: ../src-ghc708
other-modules: PrimOps

hs-source-dirs: ../src

default-language: Haskell2010
other-extensions: CPP
, GeneralizedNewtypeDeriving
, MagicHash
, UnliftedFFITypes
, Trustworthy
, Unsafe

c-sources: ../cbits/cbits.c

if flag(asserts)
ghc-options: -fno-ignore-asserts
else
cc-options: -DNDEBUG=1

ghc-options: -Wall
cc-options: -O3 -Wall

executable text-short-bench
default-language: Haskell2010
-- type: exitcode-stdio-1.0
hs-source-dirs: .
main-is: Bench.hs

build-depends: base
, binary
, text
, text-short-iut
-- deps which don't inherit constraints from library stanza:
, criterion >= 1.3.0.0 && < 1.4

0 comments on commit 1f452d9

Please sign in to comment.