Skip to content

Commit

Permalink
Use internal Eq/Ord instances
Browse files Browse the repository at this point in the history
  • Loading branch information
hvr committed Jan 18, 2018
1 parent 230437b commit d228266
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 3 deletions.
12 changes: 12 additions & 0 deletions cbits/memcmp.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include <string.h>

int
hs_text_short_memcmp(const void *s1, const size_t s1ofs, const void *s2, const size_t s2ofs, const size_t n)
{
if (!n) return 0;

const void *s1_ = s1+s1ofs;
const void *s2_ = s2+s2ofs;

return (s1_ == s2_) ? 0 : memcmp(s1_, s2_, n);
}
23 changes: 23 additions & 0 deletions src-ghc708/PrimOps.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE Unsafe #-}

module PrimOps ( compareByteArrays# ) where

import Foreign.C.Types (CInt (..), CSize (..))
import GHC.Exts (Int (I#))
import GHC.Exts (ByteArray#, Int#)
import System.IO.Unsafe (unsafeDupablePerformIO)

-- | Emulate GHC 8.4's 'GHC.Prim.compareByteArrays#'
compareByteArrays# :: ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
compareByteArrays# ba1# ofs1# ba2# ofs2# n#
= unI (fromIntegral (unsafeDupablePerformIO (c_memcmp ba1# ofs1 ba2# ofs2 n)))
where
unI (I# i#) = i#
ofs1 = fromIntegral (I# ofs1#)
ofs2 = fromIntegral (I# ofs2#)
n = fromIntegral (I# n#)

foreign import ccall unsafe "hs_text_short_memcmp"
c_memcmp :: ByteArray# -> CSize -> ByteArray# -> CSize -> CSize -> IO CInt
2 changes: 2 additions & 0 deletions src-test/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ qcProps :: TestTree
qcProps = testGroup "Properties"
[ QC.testProperty "length/fromText" $ \t -> IUT.length (IUT.fromText t) == T.length t
, QC.testProperty "length/fromString" $ \s -> IUT.length (IUT.fromString s) == length s
, QC.testProperty "compare" $ \t1 t2 -> IUT.fromText t1 `compare` IUT.fromText t2 == t1 `compare` t2
, QC.testProperty "(==)" $ \t1 t2 -> (IUT.fromText t1 == IUT.fromText t2) == (t1 == t2)
, QC.testProperty "(!?)" $ \t -> let t' = IUT.fromText t
in mapMaybe (t' IUT.!?) [-5 .. 5+T.length t ] == T.unpack t
, QC.testProperty "toText.fromText" $ \t -> (IUT.toText . IUT.fromText) t == t
Expand Down
43 changes: 40 additions & 3 deletions src/Data/Text/Short/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
Expand Down Expand Up @@ -49,7 +50,6 @@ module Data.Text.Short.Internal
) where

import Control.DeepSeq (NFData)
-- import Control.Exception as E
import Data.Binary
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as BB
Expand All @@ -63,15 +63,18 @@ import qualified Data.String as S
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Foreign.C
import GHC.Exts (ByteArray#, Int (I#),
import GHC.Exts (ByteArray#, Int (I#), Int#,
MutableByteArray#,
copyByteArray#, newByteArray#,
sizeofByteArray#,
unsafeFreezeByteArray#)
import qualified GHC.Foreign as GHC
import GHC.IO.Encoding
import GHC.ST
import System.IO.Unsafe

import qualified PrimOps

-- | A compact representation of Unicode strings.
--
-- This type relates to 'T.Text' as 'ShortByteString' relates to 'BS.ByteString' by providing a more compact type. Please consult the documentation of "Data.ByteString.Short" for more information.
Expand All @@ -82,7 +85,35 @@ import System.IO.Unsafe
-- It can be shown that for realistic data <http://utf8everywhere.org/#asian UTF-16 has a space overhead of 50% over UTF-8>.
--
newtype ShortText = ShortText ShortByteString
deriving (Eq,Ord,Monoid,Semigroup,Hashable,NFData)
deriving (Monoid,Semigroup,Hashable,NFData)

instance Eq ShortText where
{-# INLINE (==) #-}
(==) x y
| lx /= ly = False
| lx == 0 = True
| otherwise = case PrimOps.compareByteArrays# (toByteArray# x) 0# (toByteArray# y) 0# n# of
0# -> True
_ -> False
where
!lx@(I# n#) = toLength x
!ly = toLength y

instance Ord ShortText where
compare t1 t2
| n == 0 = compare n1 n2
| otherwise = case PrimOps.compareByteArrays# ba1# 0# ba2# 0# n# of
r# | I# r# < 0 -> LT
| I# r# > 0 -> GT
| n1 < n2 -> LT
| n1 > n2 -> GT
| otherwise -> EQ
where
ba1# = toByteArray# t1
ba2# = toByteArray# t2
!n1 = toLength t1
!n2 = toLength t2
!n@(I# n#) = n1 `min` n2

instance Show ShortText where
showsPrec p (ShortText b) = showsPrec p (decodeStringShort' utf8 b)
Expand Down Expand Up @@ -138,6 +169,12 @@ foreign import ccall unsafe "hs_text_short_is_ascii" c_text_short_is_ascii :: By
toCSize :: ShortText -> CSize
toCSize = fromIntegral . BSS.length . toShortByteString

toLength :: ShortText -> Int
toLength st = I# (toLength# st)

toLength# :: ShortText -> Int#
toLength# st = sizeofByteArray# (toByteArray# st)

toByteArray# :: ShortText -> ByteArray#
toByteArray# (ShortText (BSSI.SBS ba#)) = ba#

Expand Down
4 changes: 4 additions & 0 deletions text-short.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ library
if !impl(ghc >= 8.0)
build-depends: semigroups >= 0.18.2 && < 0.19

c-sources: cbits/memcmp.c
hs-source-dirs: src-ghc708
other-modules: PrimOps

hs-source-dirs: src

default-language: Haskell2010
Expand Down

0 comments on commit d228266

Please sign in to comment.