forked from haskell-hvr/text-short
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
178 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
module Main where | ||
|
||
import Criterion.Main | ||
|
||
import Control.Exception | ||
import qualified Data.Text as T | ||
import qualified Data.Text.Encoding as T | ||
import qualified Data.Text.Short.Internal as IUT | ||
|
||
-- The function we're benchmarking. | ||
fib m | m < 0 = error "negative!" | ||
| otherwise = go m | ||
where | ||
go 0 = 0 | ||
go 1 = 1 | ||
go n = go (n-1) + go (n-2) | ||
|
||
{-# NOINLINE allcharsBS1 #-} | ||
allcharsBS1 = T.encodeUtf8 (T.pack ['\x0'..'\x7f']) | ||
|
||
{-# NOINLINE allcharsST1 #-} | ||
Just allcharsST1 = IUT.fromByteString allcharsBS1 | ||
|
||
{-# NOINLINE allcharsST1' #-} | ||
Just allcharsST1' = IUT.fromByteString (IUT.toByteString allcharsST1) | ||
|
||
{-# NOINLINE allcharsBS2 #-} | ||
allcharsBS2 = T.encodeUtf8 (T.pack $ ['\x0'..'\xd7ff'] ++ ['\xe000'..'\x10ffff']) | ||
|
||
{-# NOINLINE allcharsST2 #-} | ||
Just allcharsST2 = IUT.fromByteString allcharsBS2 | ||
|
||
{-# NOINLINE allcharsST2' #-} | ||
Just allcharsST2' = IUT.fromByteString (IUT.toByteString allcharsST2) | ||
|
||
{-# NOINLINE allAscii128K #-} | ||
allAscii128K = mconcat (replicate 1024 allcharsST1) | ||
|
||
-- Our benchmark harness. | ||
main = do | ||
evaluate allcharsST1 | ||
evaluate allcharsST1' | ||
evaluate allcharsST2 | ||
evaluate allcharsST2' | ||
evaluate allAscii128K | ||
|
||
defaultMain | ||
[ bgroup "singleton" | ||
[ bench "'a' :: ShortText" $ whnf IUT.singleton 'a' | ||
, bench "'a' :: Text" $ whnf (T.singleton) 'a' | ||
, bench "U+10FFFF :: ShortText" $ whnf (IUT.singleton) '\x10ffff' | ||
, bench "U+10FFFF :: Text" $ whnf (T.singleton) '\x10ffff' | ||
] | ||
, bgroup "toString" | ||
[ bench "t1" $ nf IUT.toString allcharsST1 | ||
, bench "t2" $ nf IUT.toString allcharsST2 | ||
, bench "t3" $ nf IUT.toString allAscii128K | ||
, bench "t1 (Text)" $ nf T.unpack (IUT.toText allcharsST1) | ||
, bench "t2 (Text)" $ nf T.unpack (IUT.toText allcharsST2) | ||
, bench "t3 (Text)" $ nf T.unpack (IUT.toText allAscii128K) | ||
] | ||
|
||
, bgroup "length" | ||
[ bench "1" $ whnf IUT.length allcharsST1 | ||
, bench "2" $ whnf IUT.length allcharsST2 | ||
, bench "3" $ whnf IUT.length allAscii128K | ||
] | ||
|
||
, bgroup "==" | ||
[ bench "== 1a" $ whnf (== allcharsST1) allcharsST1 | ||
, bench "== 1b" $ whnf (== allcharsST1) allcharsST1' | ||
, bench "== 2a" $ whnf (== allcharsST2) allcharsST2 | ||
, bench "== 2b" $ whnf (== allcharsST2) allcharsST2' | ||
] | ||
|
||
, bgroup "isAscii" | ||
[ bench "isAscii 1" $ whnf IUT.isAscii allcharsST1 | ||
, bench "isAscii 2" $ whnf IUT.isAscii allcharsST2 | ||
, bench "isAscii 3" $ whnf IUT.isAscii allAscii128K | ||
] | ||
|
||
, bgroup "isValidUtf8" | ||
[ bench "isValidUtf8 1" $ whnf IUT.isValidUtf8 allcharsST1 | ||
, bench "fromByteString 1" $ whnf IUT.fromByteString allcharsBS1 | ||
|
||
, bench "isValidUtf8 2" $ whnf IUT.isValidUtf8 allcharsST2 | ||
, bench "fromByteString 2" $ whnf IUT.fromByteString allcharsBS2 | ||
] | ||
] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
packages: . | ||
-- benchmarks: True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
cabal-version: 2.0 | ||
|
||
name: text-short-bench | ||
version: 0.1.2 | ||
synopsis: Memory-efficient representation of Unicode text strings | ||
license: BSD3 | ||
license-file: LICENSE | ||
author: Herbert Valerio Riedel | ||
maintainer: [email protected] | ||
bug-reports: https://github.com/hvr/text-short/issues | ||
category: Data | ||
build-type: Simple | ||
description: This package provides the 'ShortText' type which is suitable for keeping many short strings in memory. This is similiar to how 'ShortByteString' relates to 'ByteString'. | ||
. | ||
The main difference between 'Text' and 'ShortText' is that 'ShortText' uses UTF-8 instead of UTF-16 internally and also doesn't support slicing (thereby saving 2 words). Consequently, the memory footprint of a (boxed) 'ShortText' value is 4 words (2 words when unboxed) plus the length of the UTF-8 encoded payload. | ||
|
||
tested-with: GHC==8.2.2, GHC==8.0.2, GHC==7.10.3, GHC==7.8.4 | ||
extra-source-files: ChangeLog.md | ||
|
||
Source-Repository head | ||
Type: git | ||
Location: https://github.com/hvr/text-short.git | ||
|
||
flag asserts | ||
description: Enable runtime-checks via @assert@ | ||
default: False | ||
manual: True | ||
|
||
library text-short-iut | ||
exposed-modules: Data.Text.Short | ||
Data.Text.Short.Partial | ||
Data.Text.Short.Unsafe | ||
|
||
exposed-modules: Data.Text.Short.Internal | ||
|
||
build-depends: base >= 4.7 && < 4.11 | ||
, bytestring >= 0.10.4 && < 0.11 | ||
, hashable >= 1.2.6 && < 1.3 | ||
, deepseq >= 1.3 && < 1.5 | ||
, text >= 1.0 && < 1.3 | ||
, binary >= 0.7.1 && < 0.9 | ||
, ghc-prim >= 0.3.1 && < 0.6 | ||
|
||
if !impl(ghc >= 8.0) | ||
build-depends: semigroups >= 0.18.2 && < 0.19 | ||
|
||
-- GHC version specific PrimOps | ||
if impl(ghc >= 8.4) | ||
hs-source-dirs: ../src-ghc804 | ||
else | ||
c-sources: ../cbits/memcmp.c | ||
hs-source-dirs: ../src-ghc708 | ||
other-modules: PrimOps | ||
|
||
hs-source-dirs: ../src | ||
|
||
default-language: Haskell2010 | ||
other-extensions: CPP | ||
, GeneralizedNewtypeDeriving | ||
, MagicHash | ||
, UnliftedFFITypes | ||
, Trustworthy | ||
, Unsafe | ||
|
||
c-sources: ../cbits/cbits.c | ||
|
||
if flag(asserts) | ||
ghc-options: -fno-ignore-asserts | ||
else | ||
cc-options: -DNDEBUG=1 | ||
|
||
ghc-options: -Wall | ||
cc-options: -O3 -Wall | ||
|
||
executable text-short-bench | ||
default-language: Haskell2010 | ||
-- type: exitcode-stdio-1.0 | ||
hs-source-dirs: . | ||
main-is: Bench.hs | ||
|
||
build-depends: base | ||
, binary | ||
, text | ||
, text-short-iut | ||
-- deps which don't inherit constraints from library stanza: | ||
, criterion >= 1.3.0.0 && < 1.4 |