Add simple benchmark suite

IowaFP · Jan 28, 2018 · 1f452d9 · 1f452d9
1 parent e7efc18
commit 1f452d9
Show file tree

Hide file tree

Showing 3 changed files with 178 additions and 0 deletions.
diff --git a/src-bench/Bench.hs b/src-bench/Bench.hs
@@ -0,0 +1,90 @@
+module Main where
+
+import           Criterion.Main
+
+import           Control.Exception
+import qualified Data.Text                as T
+import qualified Data.Text.Encoding       as T
+import qualified Data.Text.Short.Internal as IUT
+
+-- The function we're benchmarking.
+fib m | m < 0     = error "negative!"
+      | otherwise = go m
+  where
+    go 0 = 0
+    go 1 = 1
+    go n = go (n-1) + go (n-2)
+
+{-# NOINLINE allcharsBS1 #-}
+allcharsBS1 = T.encodeUtf8 (T.pack ['\x0'..'\x7f'])
+
+{-# NOINLINE allcharsST1 #-}
+Just allcharsST1 = IUT.fromByteString allcharsBS1
+
+{-# NOINLINE allcharsST1' #-}
+Just allcharsST1' = IUT.fromByteString (IUT.toByteString allcharsST1)
+
+{-# NOINLINE allcharsBS2 #-}
+allcharsBS2 = T.encodeUtf8 (T.pack $ ['\x0'..'\xd7ff'] ++ ['\xe000'..'\x10ffff'])
+
+{-# NOINLINE allcharsST2 #-}
+Just allcharsST2 = IUT.fromByteString allcharsBS2
+
+{-# NOINLINE allcharsST2' #-}
+Just allcharsST2' = IUT.fromByteString (IUT.toByteString allcharsST2)
+
+{-# NOINLINE allAscii128K #-}
+allAscii128K = mconcat (replicate 1024 allcharsST1)
+
+-- Our benchmark harness.
+main = do
+  evaluate allcharsST1
+  evaluate allcharsST1'
+  evaluate allcharsST2
+  evaluate allcharsST2'
+  evaluate allAscii128K
+
+  defaultMain
+    [ bgroup "singleton"
+      [ bench "'a' :: ShortText" $ whnf IUT.singleton 'a'
+      , bench "'a' :: Text" $ whnf (T.singleton) 'a'
+      , bench "U+10FFFF :: ShortText" $ whnf (IUT.singleton) '\x10ffff'
+      , bench "U+10FFFF :: Text" $ whnf (T.singleton) '\x10ffff'
+      ]
+    , bgroup "toString"
+      [ bench "t1" $ nf IUT.toString allcharsST1
+      , bench "t2" $ nf IUT.toString allcharsST2
+      , bench "t3" $ nf IUT.toString allAscii128K
+      , bench "t1 (Text)" $ nf T.unpack (IUT.toText allcharsST1)
+      , bench "t2 (Text)" $ nf T.unpack (IUT.toText allcharsST2)
+      , bench "t3 (Text)" $ nf T.unpack (IUT.toText allAscii128K)
+      ]
+
+    , bgroup "length"
+      [ bench "1" $ whnf IUT.length allcharsST1
+      , bench "2" $ whnf IUT.length allcharsST2
+      , bench "3" $ whnf IUT.length allAscii128K
+      ]
+
+    , bgroup "=="
+      [ bench "== 1a"  $ whnf (== allcharsST1) allcharsST1
+      , bench "== 1b"  $ whnf (== allcharsST1) allcharsST1'
+      , bench "== 2a"  $ whnf (== allcharsST2) allcharsST2
+      , bench "== 2b"  $ whnf (== allcharsST2) allcharsST2'
+      ]
+
+    , bgroup "isAscii"
+      [ bench "isAscii 1" $ whnf IUT.isAscii allcharsST1
+      , bench "isAscii 2" $ whnf IUT.isAscii allcharsST2
+      , bench "isAscii 3" $ whnf IUT.isAscii allAscii128K
+      ]
+
+    , bgroup "isValidUtf8"
+      [ bench "isValidUtf8 1" $ whnf IUT.isValidUtf8 allcharsST1
+      , bench "fromByteString 1" $ whnf IUT.fromByteString allcharsBS1
+
+      , bench "isValidUtf8 2" $ whnf IUT.isValidUtf8 allcharsST2
+      , bench "fromByteString 2" $ whnf IUT.fromByteString allcharsBS2
+      ]
+    ]
+
diff --git a/src-bench/cabal.project b/src-bench/cabal.project
@@ -0,0 +1,2 @@
+packages: .
+-- benchmarks: True
diff --git a/src-bench/text-short-bench.cabal b/src-bench/text-short-bench.cabal
@@ -0,0 +1,86 @@
+cabal-version:       2.0
+
+name:                text-short-bench
+version:             0.1.2
+synopsis:            Memory-efficient representation of Unicode text strings
+license:             BSD3
+license-file:        LICENSE
+author:              Herbert Valerio Riedel
+maintainer:          [email protected]
+bug-reports:         https://github.com/hvr/text-short/issues
+category:            Data
+build-type:          Simple
+description:         This package provides the 'ShortText' type which is suitable for keeping many short strings in memory. This is similiar to how 'ShortByteString' relates to 'ByteString'.
+                     .
+                     The main difference between 'Text' and 'ShortText' is that 'ShortText' uses UTF-8 instead of UTF-16 internally and also doesn't support slicing (thereby saving 2 words). Consequently, the memory footprint of a (boxed) 'ShortText' value is 4 words (2 words when unboxed) plus the length of the UTF-8 encoded payload.
+
+tested-with:         GHC==8.2.2, GHC==8.0.2, GHC==7.10.3, GHC==7.8.4
+extra-source-files:  ChangeLog.md
+
+Source-Repository head
+    Type:              git
+    Location:          https://github.com/hvr/text-short.git
+
+flag asserts
+  description: Enable runtime-checks via @assert@
+  default: False
+  manual: True
+
+library text-short-iut
+  exposed-modules:     Data.Text.Short
+                       Data.Text.Short.Partial
+                       Data.Text.Short.Unsafe
+
+  exposed-modules:     Data.Text.Short.Internal
+
+  build-depends:       base        >= 4.7    && < 4.11
+                     , bytestring  >= 0.10.4 && < 0.11
+                     , hashable    >= 1.2.6  && < 1.3
+                     , deepseq     >= 1.3    && < 1.5
+                     , text        >= 1.0    && < 1.3
+                     , binary      >= 0.7.1  && < 0.9
+                     , ghc-prim    >= 0.3.1  && < 0.6
+
+  if !impl(ghc >= 8.0)
+     build-depends: semigroups >= 0.18.2 && < 0.19
+
+  -- GHC version specific PrimOps
+  if impl(ghc >= 8.4)
+     hs-source-dirs:   ../src-ghc804
+  else
+     c-sources:        ../cbits/memcmp.c
+     hs-source-dirs:   ../src-ghc708
+  other-modules:       PrimOps
+
+  hs-source-dirs:      ../src
+
+  default-language:    Haskell2010
+  other-extensions:    CPP
+                     , GeneralizedNewtypeDeriving
+                     , MagicHash
+                     , UnliftedFFITypes
+                     , Trustworthy
+                     , Unsafe
+
+  c-sources: ../cbits/cbits.c
+
+  if flag(asserts)
+     ghc-options: -fno-ignore-asserts
+  else
+     cc-options: -DNDEBUG=1
+
+  ghc-options: -Wall
+  cc-options: -O3 -Wall
+
+executable text-short-bench
+  default-language:    Haskell2010
+--  type:                exitcode-stdio-1.0
+  hs-source-dirs:      .
+  main-is:             Bench.hs
+
+  build-depends: base
+               , binary
+               , text
+               , text-short-iut
+                 -- deps which don't inherit constraints from library stanza:
+               , criterion >= 1.3.0.0 && < 1.4