diff --git a/Data/Attoparsec/Text/Buffer.hs b/Data/Attoparsec/Text/Buffer.hs index 47e72c37..9eabbaa1 100644 --- a/Data/Attoparsec/Text/Buffer.hs +++ b/Data/Attoparsec/Text/Buffer.hs @@ -32,7 +32,7 @@ module Data.Attoparsec.Text.Buffer , length , pappend , iter - , iter_ + , Data.Attoparsec.Text.Buffer.iter_ , substring , dropWord16 ) where @@ -44,8 +44,12 @@ import Data.Monoid as Mon (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Text () import Data.Text.Internal (Text(..)) +#if MIN_VERSION_text(2,0,0) +import Data.Text.Unsafe (iterArray, iter_) +#else import Data.Text.Internal.Encoding.Utf16 (chr2) import Data.Text.Internal.Unsafe.Char (unsafeChr) +#endif import Data.Text.Unsafe (Iter(..)) import Foreign.Storable (sizeOf) import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#) @@ -108,7 +112,11 @@ append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do let newgen = gen + 1 marr <- unsafeThaw arr0 writeGen marr newgen +#if MIN_VERSION_text(2,0,0) + A.copyI len1 marr (off0+len0) arr1 off1 +#else A.copyI marr (off0+len0) arr1 off1 (off0+newlen) +#endif arr2 <- A.unsafeFreeze marr return (Buf arr2 off0 newlen cap0 newgen) else do @@ -116,8 +124,13 @@ append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do newgen = 1 marr <- A.new (newcap + woff) writeGen marr newgen +#if MIN_VERSION_text(2,0,0) + A.copyI len0 marr woff arr0 off0 + A.copyI len1 marr (woff+len0) arr1 off1 +#else A.copyI marr woff arr0 off0 (woff+len0) A.copyI marr (woff+len0) arr1 off1 (woff+newlen) +#endif arr2 <- A.unsafeFreeze marr return (Buf arr2 woff newlen newcap newgen) @@ -142,6 +155,9 @@ dropWord16 s (Buf arr off len _ _) = -- array, returning the current character and the delta to add to give -- the next offset to iterate at. iter :: Buffer -> Int -> Iter +#if MIN_VERSION_text(2,0,0) +iter (Buf arr _ _ _ _) i = iterArray arr i +#else iter (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1 | otherwise = Iter (chr2 m n) 2 @@ -149,24 +165,44 @@ iter (Buf arr off _ _ _) i n = A.unsafeIndex arr k j = off + i k = j + 1 +#endif {-# INLINE iter #-} -- | /O(1)/ Iterate one step through a UTF-16 array, returning the -- delta to add to give the next offset to iterate at. iter_ :: Buffer -> Int -> Int +#if MIN_VERSION_text(2,0,0) +iter_ (Buf arr off len _ _) i = Data.Text.Unsafe.iter_ (Text arr off len) i +#else iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1 | otherwise = 2 where m = A.unsafeIndex arr (off+i) +#endif {-# INLINE iter_ #-} unsafeThaw :: A.Array -> ST s (A.MArray s) +#if MIN_VERSION_text(2,0,0) +unsafeThaw (A.ByteArray arr) = ST $ \s# -> + (# s#, A.MutableByteArray (unsafeCoerce# arr) #) +#else unsafeThaw A.Array{..} = ST $ \s# -> (# s#, A.MArray (unsafeCoerce# aBA) #) +#endif readGen :: A.Array -> Int +#if MIN_VERSION_text(2,0,0) +readGen (A.ByteArray a) = case indexIntArray# a 0# of r# -> I# r# +#else readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r# +#endif writeGen :: A.MArray s -> Int -> ST s () +#if MIN_VERSION_text(2,0,0) +writeGen (A.MutableByteArray a) (I# gen#) = ST $ \s0# -> + case writeIntArray# a 0# gen# s0# of + s1# -> (# s1#, () #) +#else writeGen a (I# gen#) = ST $ \s0# -> case writeIntArray# (A.maBA a) 0# gen# s0# of s1# -> (# s1#, () #) +#endif diff --git a/Data/Attoparsec/Text/Internal.hs b/Data/Attoparsec/Text/Internal.hs index 710d23ef..d2dd13e6 100644 --- a/Data/Attoparsec/Text/Internal.hs +++ b/Data/Attoparsec/Text/Internal.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, FlexibleInstances, GADTs, OverloadedStrings, +{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, GADTs, OverloadedStrings, Rank2Types, RecordWildCards, TypeFamilies, TypeSynonymInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -- | @@ -176,8 +176,14 @@ string_ suspended f s0 = T.Parser $ \t pos more lose succ -> | T.null ft -> suspended s s t pos more lose succ | otherwise -> lose t pos more [] "string" Just (pfx,ssfx,tsfx) - | T.null ssfx -> let l = Pos (T.lengthWord16 pfx) + | T.null ssfx -> let l = +#if MIN_VERSION_text(2,0,0) + Pos (T.lengthWord8 pfx) +#else + Pos (T.lengthWord16 pfx) +#endif in succ t (pos + l) more (substring pos l t) + | not (T.null tsfx) -> lose t pos more [] "string" | otherwise -> suspended s ssfx t pos more lose succ {-# INLINE string_ #-} @@ -195,8 +201,13 @@ stringSuspended f s000 s0 t0 pos0 more0 lose0 succ0 = in case T.commonPrefixes s0 s of Nothing -> lose t pos more [] "string" Just (_pfx,ssfx,tsfx) - | T.null ssfx -> let l = Pos (T.lengthWord16 s000) - in succ t (pos + l) more (substring pos l t) + | T.null ssfx -> let +#if MIN_VERSION_text(2,0,0) + l = Pos (T.lengthWord8 s000) +#else + l = Pos (T.lengthWord16 s000) +#endif + in succ t (pos + l) more (substring pos l t) | T.null tsfx -> stringSuspended f s000 ssfx t pos more lose succ | otherwise -> lose t pos more [] "string" diff --git a/tests/QC/Buffer.hs b/tests/QC/Buffer.hs index 7227b723..a7406fb9 100644 --- a/tests/QC/Buffer.hs +++ b/tests/QC/Buffer.hs @@ -24,6 +24,14 @@ data BP t b = BP [t] !t !b type BPB = BP B.ByteString BB.Buffer type BPT = BP T.Text BT.Buffer +lengthN :: T.Text -> Int +#if MIN_VERSION_text(2,0,0) +lengthN = T.lengthWord8 +#else +lengthN = T.lengthWord16 +#endif + + instance Arbitrary BPB where arbitrary = do bss <- arbitrary @@ -51,7 +59,7 @@ b_length :: BPB -> Property b_length (BP _ts t buf) = B.length t === BB.length buf t_length :: BPT -> Property -t_length (BP _ts t buf) = T.lengthWord16 t === BT.length buf +t_length (BP _ts t buf) = lengthN t === BT.length buf b_unsafeIndex :: BPB -> Gen Property b_unsafeIndex (BP _ts t buf) = do @@ -61,14 +69,14 @@ b_unsafeIndex (BP _ts t buf) = do t_iter :: BPT -> Gen Property t_iter (BP _ts t buf) = do - let l = T.lengthWord16 t + let l = lengthN t i <- choose (0,l-1) let it (T.Iter c q) = (c,q) return $ l === 0 .||. it (T.iter t i) === it (BT.iter buf i) t_iter_ :: BPT -> Gen Property t_iter_ (BP _ts t buf) = do - let l = T.lengthWord16 t + let l = lengthN t i <- choose (0,l-1) return $ l === 0 .||. T.iter_ t i === BT.iter_ buf i @@ -79,8 +87,12 @@ b_unsafeDrop (BP _ts t buf) = do t_dropWord16 :: BPT -> Gen Property t_dropWord16 (BP _ts t buf) = do - i <- choose (0, T.lengthWord16 t) + i <- choose (0, lengthN t) +#if MIN_VERSION_text(2,0,0) + return $ T.dropWord8 i t === BT.dropWord16 i buf +#else return $ T.dropWord16 i t === BT.dropWord16 i buf +#endif tests :: [TestTree] tests = [