Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatibility for text-2 #7

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion Data/Attoparsec/Text/Buffer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ module Data.Attoparsec.Text.Buffer
, length
, pappend
, iter
, iter_
, Data.Attoparsec.Text.Buffer.iter_
, substring
, dropWord16
) where
Expand All @@ -44,8 +44,12 @@ import Data.Monoid as Mon (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.Text ()
import Data.Text.Internal (Text(..))
#if MIN_VERSION_text(2,0,0)
import Data.Text.Unsafe (iterArray, iter_)
#else
import Data.Text.Internal.Encoding.Utf16 (chr2)
import Data.Text.Internal.Unsafe.Char (unsafeChr)
#endif
import Data.Text.Unsafe (Iter(..))
import Foreign.Storable (sizeOf)
import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
Expand Down Expand Up @@ -108,16 +112,25 @@ append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
let newgen = gen + 1
marr <- unsafeThaw arr0
writeGen marr newgen
#if MIN_VERSION_text(2,0,0)
A.copyI len1 marr (off0+len0) arr1 off1
#else
A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
#endif
arr2 <- A.unsafeFreeze marr
return (Buf arr2 off0 newlen cap0 newgen)
else do
let newcap = newlen * 2
newgen = 1
marr <- A.new (newcap + woff)
writeGen marr newgen
#if MIN_VERSION_text(2,0,0)
A.copyI len0 marr woff arr0 off0
A.copyI len1 marr (woff+len0) arr1 off1
#else
A.copyI marr woff arr0 off0 (woff+len0)
A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
#endif
arr2 <- A.unsafeFreeze marr
return (Buf arr2 woff newlen newcap newgen)

Expand All @@ -142,31 +155,54 @@ dropWord16 s (Buf arr off len _ _) =
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter :: Buffer -> Int -> Iter
#if MIN_VERSION_text(2,0,0)
iter (Buf arr _ _ _ _) i = iterArray arr i
#else
iter (Buf arr off _ _ _) i
| m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
| otherwise = Iter (chr2 m n) 2
where m = A.unsafeIndex arr j
n = A.unsafeIndex arr k
j = off + i
k = j + 1
#endif
{-# INLINE iter #-}

-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Buffer -> Int -> Int
#if MIN_VERSION_text(2,0,0)
iter_ (Buf arr off len _ _) i = Data.Text.Unsafe.iter_ (Text arr off len) i
#else
iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
| otherwise = 2
where m = A.unsafeIndex arr (off+i)
#endif
{-# INLINE iter_ #-}

unsafeThaw :: A.Array -> ST s (A.MArray s)
#if MIN_VERSION_text(2,0,0)
unsafeThaw (A.ByteArray arr) = ST $ \s# ->
(# s#, A.MutableByteArray (unsafeCoerce# arr) #)
#else
unsafeThaw A.Array{..} = ST $ \s# ->
(# s#, A.MArray (unsafeCoerce# aBA) #)
#endif

readGen :: A.Array -> Int
#if MIN_VERSION_text(2,0,0)
readGen (A.ByteArray a) = case indexIntArray# a 0# of r# -> I# r#
#else
readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#
#endif

writeGen :: A.MArray s -> Int -> ST s ()
#if MIN_VERSION_text(2,0,0)
writeGen (A.MutableByteArray a) (I# gen#) = ST $ \s0# ->
case writeIntArray# a 0# gen# s0# of
s1# -> (# s1#, () #)
#else
writeGen a (I# gen#) = ST $ \s0# ->
case writeIntArray# (A.maBA a) 0# gen# s0# of
s1# -> (# s1#, () #)
#endif
19 changes: 15 additions & 4 deletions Data/Attoparsec/Text/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{-# LANGUAGE BangPatterns, FlexibleInstances, GADTs, OverloadedStrings,
{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, GADTs, OverloadedStrings,
Rank2Types, RecordWildCards, TypeFamilies, TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-- |
Expand Down Expand Up @@ -176,8 +176,14 @@ string_ suspended f s0 = T.Parser $ \t pos more lose succ ->
| T.null ft -> suspended s s t pos more lose succ
| otherwise -> lose t pos more [] "string"
Just (pfx,ssfx,tsfx)
| T.null ssfx -> let l = Pos (T.lengthWord16 pfx)
| T.null ssfx -> let l =
#if MIN_VERSION_text(2,0,0)
Pos (T.lengthWord8 pfx)
#else
Pos (T.lengthWord16 pfx)
#endif
in succ t (pos + l) more (substring pos l t)

| not (T.null tsfx) -> lose t pos more [] "string"
| otherwise -> suspended s ssfx t pos more lose succ
{-# INLINE string_ #-}
Expand All @@ -195,8 +201,13 @@ stringSuspended f s000 s0 t0 pos0 more0 lose0 succ0 =
in case T.commonPrefixes s0 s of
Nothing -> lose t pos more [] "string"
Just (_pfx,ssfx,tsfx)
| T.null ssfx -> let l = Pos (T.lengthWord16 s000)
in succ t (pos + l) more (substring pos l t)
| T.null ssfx -> let
#if MIN_VERSION_text(2,0,0)
l = Pos (T.lengthWord8 s000)
#else
l = Pos (T.lengthWord16 s000)
#endif
in succ t (pos + l) more (substring pos l t)
| T.null tsfx -> stringSuspended f s000 ssfx t pos more lose succ
| otherwise -> lose t pos more [] "string"

Expand Down
20 changes: 16 additions & 4 deletions tests/QC/Buffer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ data BP t b = BP [t] !t !b
type BPB = BP B.ByteString BB.Buffer
type BPT = BP T.Text BT.Buffer

lengthN :: T.Text -> Int
#if MIN_VERSION_text(2,0,0)
lengthN = T.lengthWord8
#else
lengthN = T.lengthWord16
#endif


instance Arbitrary BPB where
arbitrary = do
bss <- arbitrary
Expand Down Expand Up @@ -51,7 +59,7 @@ b_length :: BPB -> Property
b_length (BP _ts t buf) = B.length t === BB.length buf

t_length :: BPT -> Property
t_length (BP _ts t buf) = T.lengthWord16 t === BT.length buf
t_length (BP _ts t buf) = lengthN t === BT.length buf

b_unsafeIndex :: BPB -> Gen Property
b_unsafeIndex (BP _ts t buf) = do
Expand All @@ -61,14 +69,14 @@ b_unsafeIndex (BP _ts t buf) = do

t_iter :: BPT -> Gen Property
t_iter (BP _ts t buf) = do
let l = T.lengthWord16 t
let l = lengthN t
i <- choose (0,l-1)
let it (T.Iter c q) = (c,q)
return $ l === 0 .||. it (T.iter t i) === it (BT.iter buf i)

t_iter_ :: BPT -> Gen Property
t_iter_ (BP _ts t buf) = do
let l = T.lengthWord16 t
let l = lengthN t
i <- choose (0,l-1)
return $ l === 0 .||. T.iter_ t i === BT.iter_ buf i

Expand All @@ -79,8 +87,12 @@ b_unsafeDrop (BP _ts t buf) = do

t_dropWord16 :: BPT -> Gen Property
t_dropWord16 (BP _ts t buf) = do
i <- choose (0, T.lengthWord16 t)
i <- choose (0, lengthN t)
#if MIN_VERSION_text(2,0,0)
return $ T.dropWord8 i t === BT.dropWord16 i buf
#else
return $ T.dropWord16 i t === BT.dropWord16 i buf
#endif

tests :: [TestTree]
tests = [
Expand Down