diff --git a/System/OsPath/Data/ByteString/Short/Internal.hs b/System/OsPath/Data/ByteString/Short/Internal.hs index 493b447b..ec1fc829 100644 --- a/System/OsPath/Data/ByteString/Short/Internal.hs +++ b/System/OsPath/Data/ByteString/Short/Internal.hs @@ -4,6 +4,7 @@ {-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE UnliftedFFITypes #-} -- | -- Module : System.OsPath.Data.ByteString.Short.Internal @@ -25,6 +26,13 @@ import Data.ByteString.Short.Internal (ShortByteString(..), length) #if !MIN_VERSION_base(4,11,0) import Data.Semigroup ( Semigroup((<>)) ) +import Foreign.C.Types + ( CSize(..) + , CInt(..) + ) +import Data.ByteString.Internal + ( accursedUnutterablePerformIO + ) #endif #if !MIN_VERSION_bytestring(0,10,9) import Foreign.Marshal.Alloc (allocaBytes) @@ -441,3 +449,29 @@ errorEmptySBS fun = moduleError fun "empty ShortByteString" moduleError :: HasCallStack => String -> String -> a moduleError fun msg = error (moduleErrorMsg fun msg) {-# NOINLINE moduleError #-} + +compareByteArraysOff :: BA -- ^ array 1 + -> Int -- ^ offset for array 1 + -> BA -- ^ array 2 + -> Int -- ^ offset for array 2 + -> Int -- ^ length to compare + -> Int -- ^ like memcmp +#if MIN_VERSION_base(4,11,0) +compareByteArraysOff (BA# ba1#) (I# ba1off#) (BA# ba2#) (I# ba2off#) (I# len#) = + I# (compareByteArrays# ba1# ba1off# ba2# ba2off# len#) +#else +compareByteArraysOff (BA# ba1#) ba1off (BA# ba2#) ba2off len = + assert (ba1off + len <= (I# (sizeofByteArray# ba1#))) + $ assert (ba2off + len <= (I# (sizeofByteArray# ba2#))) + $ fromIntegral $ accursedUnutterablePerformIO $ + c_memcmp_ByteArray ba1# + ba1off + ba2# + ba2off + (fromIntegral len) + + +foreign import ccall unsafe "static sbs_memcmp_off" + c_memcmp_ByteArray :: ByteArray# -> Int -> ByteArray# -> Int -> CSize -> IO CInt +#endif + diff --git a/System/OsPath/Data/ByteString/Short/Word16.hs b/System/OsPath/Data/ByteString/Short/Word16.hs index e611695b..6ad8134b 100644 --- a/System/OsPath/Data/ByteString/Short/Word16.hs +++ b/System/OsPath/Data/ByteString/Short/Word16.hs @@ -2,8 +2,10 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE MagicHash #-} {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} {-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-} @@ -143,10 +145,11 @@ module System.OsPath.Data.ByteString.Short.Word16 ( useAsCWStringLen ) where -import System.OsPath.Data.ByteString.Short ( append, intercalate, concat, stripSuffix, stripPrefix, isInfixOf, isPrefixOf, isSuffixOf, breakSubstring, length, empty, null, ShortByteString(..), fromShort, toShort ) +import System.OsPath.Data.ByteString.Short ( append, intercalate, concat, stripSuffix, stripPrefix, isPrefixOf, isSuffixOf, length, empty, null, ShortByteString(..), fromShort, toShort ) import System.OsPath.Data.ByteString.Short.Internal import Data.Bits - ( shiftR ) + ( shiftR + ) import Data.Word import Prelude hiding ( Foldable(..) @@ -172,6 +175,7 @@ import Prelude hiding import qualified Data.Foldable as Foldable import GHC.ST ( ST ) import GHC.Stack ( HasCallStack ) +import GHC.Exts ( inline ) import qualified Data.ByteString.Short.Internal as BS import qualified Data.List as List @@ -647,6 +651,28 @@ splitWith p = \(assertEven -> sbs) -> if | otherwise -> a : go (tail b) +-- | Check whether one string is a substring of another. +isInfixOf :: ShortByteString -> ShortByteString -> Bool +isInfixOf sbs = \s -> null sbs || not (null $ snd $ GHC.Exts.inline breakSubstring sbs s) + + +-- algorithm: https://github.com/haskell/filepath/issues/195#issuecomment-1605633713 +breakSubstring :: ShortByteString -- ^ String to search for + -> ShortByteString -- ^ String to search in + -> (ShortByteString, ShortByteString) -- ^ Head and tail of string broken at substring +breakSubstring bPat@(asBA -> pat) bInp@(asBA -> inp) = go 0 + where + lpat = BS.length bPat + linp = BS.length bInp + go ix + | let ix' = ix * 2 + , linp >= ix' + lpat = + if | compareByteArraysOff pat 0 inp ix' lpat == 0 -> splitAt ix bInp + | otherwise -> go (ix + 1) + | otherwise + = (bInp, mempty) + + -- --------------------------------------------------------------------- -- Reducing 'ByteString's diff --git a/tests/bytestring-tests/Properties/Common.hs b/tests/bytestring-tests/Properties/Common.hs index 321cbe6c..c5ef566a 100644 --- a/tests/bytestring-tests/Properties/Common.hs +++ b/tests/bytestring-tests/Properties/Common.hs @@ -21,6 +21,7 @@ module Properties.ShortByteString.Word16 (tests) where import System.OsPath.Data.ByteString.Short.Internal (_nul, isSpace) import qualified System.OsPath.Data.ByteString.Short.Word16 as B +import qualified System.OsPath.Data.ByteString.Short as BS #else module Properties.ShortByteString (tests) where import qualified System.OsPath.Data.ByteString.Short as B @@ -148,6 +149,28 @@ tests = , ("mempty []", once $ B.unpack mempty === []) +#ifdef WORD16 + , ("isInfixOf works correctly under UTF16", + once $ + let foo = BS.pack [0xbb, 0x03] + foo' = BS.pack [0xd2, 0xbb] + bar = BS.pack [0xd2, 0xbb, 0x03, 0xad] + bar' = BS.pack [0xd2, 0xbb, 0x03, 0xad, 0xd2, 0xbb, 0x03, 0xad, 0xbb, 0x03, 0x00, 0x00] + in [B.isInfixOf foo bar, B.isInfixOf foo' bar, B.isInfixOf foo bar'] === [False, True, True] + ) +#endif + , ("break breakSubstring", + property $ \(toElem -> c) x -> B.break (== c) x === B.breakSubstring (B.singleton c) x + ) + , ("breakSubstring", + property $ \x y -> not (B.null x) ==> B.null (snd (B.breakSubstring x y)) === not (B.isInfixOf x y) + ) + , ("breakSubstring empty", + property $ \x -> B.breakSubstring B.empty x === (B.empty, x) + ) + , ("isInfixOf", + property $ \x y -> B.isInfixOf x y === L.isInfixOf (B.unpack x) (B.unpack y)) + , ("mconcat" , property $ \xs -> B.unpack (mconcat xs) === mconcat (map B.unpack xs)) , ("mconcat [x,x]" ,