From d22826607417a434b9b006519ec91d776690d5e6 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Thu, 18 Jan 2018 22:39:29 +0100 Subject: [PATCH] Use internal Eq/Ord instances --- cbits/memcmp.c | 12 +++++++++ src-ghc708/PrimOps.hs | 23 ++++++++++++++++++ src-test/Tests.hs | 2 ++ src/Data/Text/Short/Internal.hs | 43 ++++++++++++++++++++++++++++++--- text-short.cabal | 4 +++ 5 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 cbits/memcmp.c create mode 100644 src-ghc708/PrimOps.hs diff --git a/cbits/memcmp.c b/cbits/memcmp.c new file mode 100644 index 0000000..4e5042f --- /dev/null +++ b/cbits/memcmp.c @@ -0,0 +1,12 @@ +#include + +int +hs_text_short_memcmp(const void *s1, const size_t s1ofs, const void *s2, const size_t s2ofs, const size_t n) +{ + if (!n) return 0; + + const void *s1_ = s1+s1ofs; + const void *s2_ = s2+s2ofs; + + return (s1_ == s2_) ? 0 : memcmp(s1_, s2_, n); +} diff --git a/src-ghc708/PrimOps.hs b/src-ghc708/PrimOps.hs new file mode 100644 index 0000000..601bc07 --- /dev/null +++ b/src-ghc708/PrimOps.hs @@ -0,0 +1,23 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE Unsafe #-} + +module PrimOps ( compareByteArrays# ) where + +import Foreign.C.Types (CInt (..), CSize (..)) +import GHC.Exts (Int (I#)) +import GHC.Exts (ByteArray#, Int#) +import System.IO.Unsafe (unsafeDupablePerformIO) + +-- | Emulate GHC 8.4's 'GHC.Prim.compareByteArrays#' +compareByteArrays# :: ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int# +compareByteArrays# ba1# ofs1# ba2# ofs2# n# + = unI (fromIntegral (unsafeDupablePerformIO (c_memcmp ba1# ofs1 ba2# ofs2 n))) + where + unI (I# i#) = i# + ofs1 = fromIntegral (I# ofs1#) + ofs2 = fromIntegral (I# ofs2#) + n = fromIntegral (I# n#) + +foreign import ccall unsafe "hs_text_short_memcmp" + c_memcmp :: ByteArray# -> CSize -> ByteArray# -> CSize -> CSize -> IO CInt diff --git a/src-test/Tests.hs b/src-test/Tests.hs index e43745a..f0ff22d 100644 --- a/src-test/Tests.hs +++ b/src-test/Tests.hs @@ -27,6 +27,8 @@ qcProps :: TestTree qcProps = testGroup "Properties" [ QC.testProperty "length/fromText" $ \t -> IUT.length (IUT.fromText t) == T.length t , QC.testProperty "length/fromString" $ \s -> IUT.length (IUT.fromString s) == length s + , QC.testProperty "compare" $ \t1 t2 -> IUT.fromText t1 `compare` IUT.fromText t2 == t1 `compare` t2 + , QC.testProperty "(==)" $ \t1 t2 -> (IUT.fromText t1 == IUT.fromText t2) == (t1 == t2) , QC.testProperty "(!?)" $ \t -> let t' = IUT.fromText t in mapMaybe (t' IUT.!?) [-5 .. 5+T.length t ] == T.unpack t , QC.testProperty "toText.fromText" $ \t -> (IUT.toText . IUT.fromText) t == t diff --git a/src/Data/Text/Short/Internal.hs b/src/Data/Text/Short/Internal.hs index 9d48cca..2c97aea 100644 --- a/src/Data/Text/Short/Internal.hs +++ b/src/Data/Text/Short/Internal.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MagicHash #-} @@ -49,7 +50,6 @@ module Data.Text.Short.Internal ) where import Control.DeepSeq (NFData) --- import Control.Exception as E import Data.Binary import qualified Data.ByteString as BS import qualified Data.ByteString.Builder as BB @@ -63,15 +63,18 @@ import qualified Data.String as S import qualified Data.Text as T import qualified Data.Text.Encoding as T import Foreign.C -import GHC.Exts (ByteArray#, Int (I#), +import GHC.Exts (ByteArray#, Int (I#), Int#, MutableByteArray#, copyByteArray#, newByteArray#, + sizeofByteArray#, unsafeFreezeByteArray#) import qualified GHC.Foreign as GHC import GHC.IO.Encoding import GHC.ST import System.IO.Unsafe +import qualified PrimOps + -- | A compact representation of Unicode strings. -- -- This type relates to 'T.Text' as 'ShortByteString' relates to 'BS.ByteString' by providing a more compact type. Please consult the documentation of "Data.ByteString.Short" for more information. @@ -82,7 +85,35 @@ import System.IO.Unsafe -- It can be shown that for realistic data . -- newtype ShortText = ShortText ShortByteString - deriving (Eq,Ord,Monoid,Semigroup,Hashable,NFData) + deriving (Monoid,Semigroup,Hashable,NFData) + +instance Eq ShortText where + {-# INLINE (==) #-} + (==) x y + | lx /= ly = False + | lx == 0 = True + | otherwise = case PrimOps.compareByteArrays# (toByteArray# x) 0# (toByteArray# y) 0# n# of + 0# -> True + _ -> False + where + !lx@(I# n#) = toLength x + !ly = toLength y + +instance Ord ShortText where + compare t1 t2 + | n == 0 = compare n1 n2 + | otherwise = case PrimOps.compareByteArrays# ba1# 0# ba2# 0# n# of + r# | I# r# < 0 -> LT + | I# r# > 0 -> GT + | n1 < n2 -> LT + | n1 > n2 -> GT + | otherwise -> EQ + where + ba1# = toByteArray# t1 + ba2# = toByteArray# t2 + !n1 = toLength t1 + !n2 = toLength t2 + !n@(I# n#) = n1 `min` n2 instance Show ShortText where showsPrec p (ShortText b) = showsPrec p (decodeStringShort' utf8 b) @@ -138,6 +169,12 @@ foreign import ccall unsafe "hs_text_short_is_ascii" c_text_short_is_ascii :: By toCSize :: ShortText -> CSize toCSize = fromIntegral . BSS.length . toShortByteString +toLength :: ShortText -> Int +toLength st = I# (toLength# st) + +toLength# :: ShortText -> Int# +toLength# st = sizeofByteArray# (toByteArray# st) + toByteArray# :: ShortText -> ByteArray# toByteArray# (ShortText (BSSI.SBS ba#)) = ba# diff --git a/text-short.cabal b/text-short.cabal index 9f7778e..0ff5c71 100644 --- a/text-short.cabal +++ b/text-short.cabal @@ -37,6 +37,10 @@ library if !impl(ghc >= 8.0) build-depends: semigroups >= 0.18.2 && < 0.19 + c-sources: cbits/memcmp.c + hs-source-dirs: src-ghc708 + other-modules: PrimOps + hs-source-dirs: src default-language: Haskell2010