From 9de32eb873c25254232c326ac30fc8bc1768da14 Mon Sep 17 00:00:00 2001 From: TimLuq Date: Tue, 25 Jun 2024 10:58:27 +0200 Subject: [PATCH] fmt + `StringQueue::chars` --- src/bytedata.rs | 13 ++++++ src/queue/byte_iter.rs | 7 +++- src/queue/byte_queue.rs | 4 +- src/queue/char_iter.rs | 38 ++++++++++++++++++ src/queue/chunk_iter.rs | 12 ++++-- src/queue/linked_iter.rs | 11 ++++- src/queue/linked_node_leaf.rs | 1 - src/queue/linked_root.rs | 7 ++-- src/queue/mod.rs | 6 ++- src/queue/string_queue.rs | 76 +++++++++++++++++++++++++++++++++-- 10 files changed, 156 insertions(+), 19 deletions(-) create mode 100644 src/queue/char_iter.rs diff --git a/src/bytedata.rs b/src/bytedata.rs index f81d644..f8d2d6b 100644 --- a/src/bytedata.rs +++ b/src/bytedata.rs @@ -476,3 +476,16 @@ impl core::fmt::UpperHex for ByteData<'_> { Ok(()) } } + +impl<'a> Iterator for ByteData<'a> { + type Item = u8; + + fn next(&mut self) -> Option { + if self.is_empty() { + return None; + } + let r = self[0]; + self.make_sliced(1..); + Some(r) + } +} diff --git a/src/queue/byte_iter.rs b/src/queue/byte_iter.rs index 67e6703..d145f21 100644 --- a/src/queue/byte_iter.rs +++ b/src/queue/byte_iter.rs @@ -13,7 +13,12 @@ pub struct ByteIter<'a, 'b> { impl<'a, 'b> ByteIter<'a, 'b> { #[inline] pub(super) fn new(queue: &'b ByteQueue<'a>) -> Self { - Self { inner: queue.chunks(), chunk: None, offset: 0, len: queue.len() } + Self { + inner: queue.chunks(), + chunk: None, + offset: 0, + len: queue.len(), + } } /// Skip the next `n` bytes. diff --git a/src/queue/byte_queue.rs b/src/queue/byte_queue.rs index eeb614e..1f97690 100644 --- a/src/queue/byte_queue.rs +++ b/src/queue/byte_queue.rs @@ -2,9 +2,9 @@ use core::{ops::RangeBounds, panic}; use crate::ByteData; -use crate::queue::ChunkIter; -use super::linked_root::LinkedRoot; use super::byte_iter::ByteIter; +use super::linked_root::LinkedRoot; +use crate::queue::ChunkIter; /// A queue of byte chunks. #[cfg_attr(docsrs, doc(cfg(feature = "queue")))] diff --git a/src/queue/char_iter.rs b/src/queue/char_iter.rs new file mode 100644 index 0000000..bb306a7 --- /dev/null +++ b/src/queue/char_iter.rs @@ -0,0 +1,38 @@ +use super::StringQueue; + +/// An iterator over the characters of a [`StringQueue`]. +pub struct CharIter<'a, 'b> { + bytes: super::ByteIter<'a, 'b>, +} + +impl<'a, 'b> CharIter<'a, 'b> { + #[inline] + pub(super) fn new(queue: &'b StringQueue<'a>) -> Self { + Self { + bytes: super::ByteIter::new(queue.as_inner()), + } + } +} + +impl<'a, 'b> Iterator for CharIter<'a, 'b> { + type Item = char; + + fn next(&mut self) -> Option { + let b0 = self.bytes.next()?; + let (mut ch, expects) = match b0 { + b0 if b0 & 0b1000_0000 == 0 => (b0 as u32, 0), + b0 if b0 & 0b1110_0000 == 0b1100_0000 => (b0 as u32 & 0b0001_1111, 1), + b0 if b0 & 0b1111_0000 == 0b1110_0000 => (b0 as u32 & 0b0000_1111, 2), + b0 if b0 & 0b1111_1000 == 0b1111_0000 => (b0 as u32 & 0b0000_0111, 3), + _ => return None, + }; + for _ in 0..expects { + let b = self.bytes.next()?; + if b & 0b1100_0000 != 0b1000_0000 { + panic!("CharIter: Invalid UTF-8 continuation byte"); + } + ch = (ch << 6) | (b as u32 & 0b0011_1111); + } + Some(unsafe { core::char::from_u32_unchecked(ch) }) + } +} diff --git a/src/queue/chunk_iter.rs b/src/queue/chunk_iter.rs index 9811172..cb1c891 100644 --- a/src/queue/chunk_iter.rs +++ b/src/queue/chunk_iter.rs @@ -3,7 +3,7 @@ use crate::ByteData; use super::linked_root::LinkedRoot; /// An iterator over the chunks of a [`ByteQueue`]. -/// +/// /// [`ByteQueue`]: crate::ByteQueue #[cfg_attr(docsrs, doc(cfg(feature = "queue")))] pub struct ChunkIter<'a>(LinkedRoot<'a>); @@ -51,7 +51,7 @@ impl<'a> ExactSizeIterator for ChunkIter<'a> { impl<'a> core::iter::FusedIterator for ChunkIter<'a> {} /// An iterator over the chunks of a [`ByteQueue`]. -/// +/// /// [`ByteQueue`]: crate::ByteQueue #[cfg_attr(docsrs, doc(cfg(feature = "queue")))] pub struct StrChunkIter<'a>(LinkedRoot<'a>); @@ -67,7 +67,9 @@ impl<'a> Iterator for StrChunkIter<'a> { type Item = crate::StringData<'a>; fn next(&mut self) -> Option { - self.0.pop_front().map(|x| unsafe { crate::StringData::from_bytedata_unchecked(x) }) + self.0 + .pop_front() + .map(|x| unsafe { crate::StringData::from_bytedata_unchecked(x) }) } #[inline] @@ -84,7 +86,9 @@ impl<'a> Iterator for StrChunkIter<'a> { impl<'a> DoubleEndedIterator for StrChunkIter<'a> { #[inline] fn next_back(&mut self) -> Option { - self.0.pop_back().map(|x| unsafe { crate::StringData::from_bytedata_unchecked(x) }) + self.0 + .pop_back() + .map(|x| unsafe { crate::StringData::from_bytedata_unchecked(x) }) } } diff --git a/src/queue/linked_iter.rs b/src/queue/linked_iter.rs index c88cf65..5dffc06 100644 --- a/src/queue/linked_iter.rs +++ b/src/queue/linked_iter.rs @@ -14,8 +14,15 @@ pub struct LinkedIter<'a: 'b, 'b> { #[cfg(feature = "alloc")] impl<'a: 'b, 'b> LinkedIter<'a, 'b> { #[inline] - pub(super) const fn new(chamber: Option<&'b crate::ByteData<'a>>, node: Option<&'b super::linked_node_leaf::LinkedNodeLeaf<'a>>) -> Self { - Self { chamber, node, offset: 0 } + pub(super) const fn new( + chamber: Option<&'b crate::ByteData<'a>>, + node: Option<&'b super::linked_node_leaf::LinkedNodeLeaf<'a>>, + ) -> Self { + Self { + chamber, + node, + offset: 0, + } } fn item_len(&self) -> usize { diff --git a/src/queue/linked_node_leaf.rs b/src/queue/linked_node_leaf.rs index a464cff..13a70f6 100644 --- a/src/queue/linked_node_leaf.rs +++ b/src/queue/linked_node_leaf.rs @@ -2,7 +2,6 @@ use crate::ByteData; use super::linked_node_data::LinkedNodeData; - pub(super) struct LinkedNodeLeaf<'a> { pub(super) prev: *mut LinkedNodeLeaf<'a>, pub(super) data: LinkedNodeData<'a>, diff --git a/src/queue/linked_root.rs b/src/queue/linked_root.rs index 18de0ba..0dad56b 100644 --- a/src/queue/linked_root.rs +++ b/src/queue/linked_root.rs @@ -41,12 +41,12 @@ impl<'a> LinkedRoot<'a> { pub(super) const fn len(&self) -> usize { self.data.len as usize } - + #[inline] fn first_mut(&mut self) -> Option<&mut super::linked_node_data::LinkedNodeData<'a>> { Some(&mut self.data) } - + #[inline] fn last_mut(&mut self) -> Option<&mut super::linked_node_data::LinkedNodeData<'a>> { Some(&mut self.data) @@ -171,7 +171,7 @@ impl<'a> LinkedRoot<'a> { } Some(r) } - + pub(super) fn pop_front(&mut self) -> Option> { if self.count == 0 { return None; @@ -226,7 +226,6 @@ impl<'a> LinkedRoot<'a> { } impl<'a> LinkedRoot<'a> { - pub(super) fn push_back(&mut self, mut data: ByteData<'a>) { if data.is_empty() { return; diff --git a/src/queue/mod.rs b/src/queue/mod.rs index c40bb16..da919c5 100644 --- a/src/queue/mod.rs +++ b/src/queue/mod.rs @@ -1,13 +1,14 @@ //! # Queue -//! +//! //! This module contains the queue data structure and its iterators. -//! +//! //! The queue is a list of byte slices, which allows for efficient appending and consuming of byte data. mod byte_queue; mod string_queue; mod byte_iter; +mod char_iter; mod chunk_iter; mod linked_iter; @@ -18,6 +19,7 @@ mod linked_root; pub use byte_iter::ByteIter; pub use byte_queue::ByteQueue; +pub use char_iter::CharIter; pub use chunk_iter::{ChunkIter, StrChunkIter}; pub use linked_iter::LinkedIter; pub use string_queue::StringQueue; diff --git a/src/queue/string_queue.rs b/src/queue/string_queue.rs index 99567c9..24520f1 100644 --- a/src/queue/string_queue.rs +++ b/src/queue/string_queue.rs @@ -1,5 +1,5 @@ -use crate::StringData; use super::ByteQueue; +use crate::StringData; /// A queue of strings. pub struct StringQueue<'a> { @@ -10,13 +10,42 @@ impl<'a> StringQueue<'a> { /// Create a new empty `StringQueue`. #[inline] pub const fn new() -> Self { - Self { queue: ByteQueue::new() } + Self { + queue: ByteQueue::new(), + } } /// Create a new `StringQueue` with a single item. #[inline] pub const fn with_item(data: StringData<'a>) -> Self { - Self { queue: ByteQueue::with_item(data.into_bytedata()) } + Self { + queue: ByteQueue::with_item(data.into_bytedata()), + } + } + + #[inline] + pub(super) const fn as_inner(&self) -> &ByteQueue<'a> { + &self.queue + } + + /// Checks if the queue is full. When the feature `alloc` is enabled, this will always return `false`. + #[inline] + pub const fn is_full(&self) -> bool { + self.queue.is_full() + } + + /// Append string to the queue. + #[inline] + pub fn push_back(&mut self, data: impl Into>) { + let data: StringData = data.into(); + self.queue.push_back(data.into_bytedata()); + } + + /// Prepend string into the queue. + #[inline] + pub fn push_front(&mut self, data: impl Into>) { + let data = data.into(); + self.queue.push_front(data.into_bytedata()); } /// Pop the first item from the queue. @@ -72,6 +101,47 @@ impl<'a> StringQueue<'a> { pub fn into_iter(self) -> super::StrChunkIter<'a> { super::StrChunkIter::new(self.queue.queue) } + + /// Slices the queue and returns a new queue that represents the given range. + /// Panics if the range boundary is invalid UTF-8. + pub fn slice(&self, range: impl core::ops::RangeBounds) -> Self { + let slic = self.queue.slice(range); + if slic.is_empty() { + return Self::new(); + } + let f = slic.front().unwrap(); + if f[0] & 0b1100_0000 == 0b1000_0000 { + panic!("StringQueue: Invalid UTF-8 start in range"); + } + let b = slic.back().unwrap(); + let end_byte = b[b.len() - 1]; + if end_byte & 0b1100_0000 == 0b1100_0000 { + panic!("StringQueue: Invalid UTF-8 end in range"); + } + if end_byte & 0b1100_0000 == 0b1000_0000 { + // compute backwards to find the start of the char to see if the number of bytes is correct + let mut i = b.len() - 2; + while b[i] & 0b1100_0000 == 0b1000_0000 { + i -= 1; + } + let char_len = b.len() - i; + if char_len == 2 && end_byte & 0b1110_0000 != 0b1100_0000 { + panic!("StringQueue: Invalid UTF-8 end in range"); + } + if char_len == 3 && end_byte & 0b1111_0000 != 0b1110_0000 { + panic!("StringQueue: Invalid UTF-8 end in range"); + } + if char_len == 4 && end_byte & 0b1111_1000 != 0b1111_0000 { + panic!("StringQueue: Invalid UTF-8 end in range"); + } + } + Self { queue: slic } + } + + /// Iterates over each character in the queue. + pub fn chars(&self) -> super::CharIter<'a, '_> { + super::char_iter::CharIter::new(self) + } } impl<'a> From> for StringQueue<'a> {