Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds binary eexp subfield span accessors #850

Merged
merged 2 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/lazy/any_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,10 @@ pub enum LazyRawAnyEExpressionKind<'top> {
}

impl<'top> LazyRawAnyEExpression<'top> {
pub fn kind(&self) -> LazyRawAnyEExpressionKind<'top> {
self.encoding
}

pub fn encoding(&self) -> IonEncoding {
use LazyRawAnyEExpressionKind::*;
match self.encoding {
Expand Down
79 changes: 48 additions & 31 deletions src/lazy/binary/raw/v1_1/e_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,6 @@ use crate::lazy::text::raw::v1_1::arg_group::{EExpArg, EExpArgExpr};
use crate::lazy::text::raw::v1_1::reader::MacroIdRef;
use crate::{try_or_some_err, v1_1, Environment, HasRange, HasSpan, IonResult, Span};

#[derive(Copy, Clone)]
pub struct BinaryEExpHeader {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ This type was created to offer methods like those added in this PR, but it was never completed or used.

// The number of bytes that were used to encode the e-expression's opcode and address.
opcode_and_address_length: u8,
// The number of bytes that were used to encode the e-expression's arg grouping bitmap, if any.
bitmap_length: u8,
}

impl BinaryEExpHeader {
pub fn new(opcode_length: u8, bitmap_length: u8) -> Self {
Self {
opcode_and_address_length: opcode_length,
bitmap_length,
}
}
pub fn address_and_opcode_length(&self) -> usize {
self.opcode_and_address_length as usize
}
pub fn bitmap_length(&self) -> usize {
self.bitmap_length as usize
}
pub fn header_length(&self) -> usize {
self.address_and_opcode_length() + self.bitmap_length()
}
}

/// An e-expression which has been parsed from a binary Ion 1.1 stream.
#[derive(Copy, Clone)]
pub struct BinaryEExpression_1_1<'top> {
Expand All @@ -64,11 +38,14 @@ pub struct BinaryEExpression_1_1<'top> {
cache: Option<&'top [ValueExpr<'top, BinaryEncoding_1_1>]>,
macro_ref: MacroRef<'top>,
bitmap_bits: u64,
// The index of `input` at which the bitmap can be found. If there is no bitmap, this index
// will be the beginning of the encoded arguments.
// This index is the first position after the opcode and address.
// If the e-expression has a length prefix, it will begin at this position in `input`.
length_offset: u8,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ This type had 6 bytes of padding, so adding another u8 field did not increase its size.

// This index is the first position after the opcode, address, and length prefix.
// If the e-expression has a bitmap, it will begin at this position in `input`.
bitmap_offset: u8,
// The index at which the arguments to the e-expression begin within `input`. This index is
// the first position after the opcode, address, length, and bitmap.
// This index is the first position after the opcode, address, length, and bitmap.
// If the e-expression has arguments, they will begin at this position in `input`.
args_offset: u8,

pub(crate) input: BinaryBuffer<'top>,
Expand All @@ -79,26 +56,66 @@ impl<'top> BinaryEExpression_1_1<'top> {
macro_ref: MacroRef<'top>,
bitmap_bits: u64,
input: BinaryBuffer<'top>,
length_offset: u8,
bitmap_offset: u8,
args_offset: u8,
) -> Self {
Self {
bitmap_bits,
input,
macro_ref,
length_offset,
bitmap_offset,
args_offset,
cache: None,
}
}

pub fn with_arg_expr_cache(
pub(crate) fn with_arg_expr_cache(
mut self,
cache: &'top [ValueExpr<'top, BinaryEncoding_1_1>],
) -> Self {
self.cache = Some(cache);
self
}

/// Returns a span of bytes representing the opcode and macro address.
/// Depending on the encoding, these may be distinct (for example, the span: `0xF4 0x01`,
/// where the `0xF4` is the opcode and the `0x01` is the `FlexUInt` address) or combined
/// (for example: `0x00` is both an opcode and a macro address).
pub fn opcode_and_address_span(&self) -> Span<'top> {
self.input.slice(0, self.length_offset as usize).into()
}

/// Returns `true` if this binary e-expression includes a length prefix.
pub fn has_length_prefix(&self) -> bool {
// If these offsets are equal, there are no bytes representing the length.
self.length_offset != self.bitmap_offset
}

/// Returns a span of bytes representing the length prefix. If there is no length prefix,
/// the returned span will be empty.
pub fn length_prefix_span(&self) -> Span<'top> {
let num_bytes = (self.bitmap_offset - self.length_offset) as usize;
self.input
.slice(self.length_offset as usize, num_bytes)
.into()
}

/// Returns `true` if this binary e-expression includes an argument encoding bitmap.
pub fn has_bitmap(&self) -> bool {
// If these offsets are equal, there are no bytes representing the bitmap.
self.bitmap_offset != self.args_offset
}

/// Returns a span of bytes representing the e-expression's argument encoding bitmap.
/// If there is no argument encoding bitmap, the returned span will be empty.
pub fn bitmap_span(&self) -> Span<'top> {
let num_bytes = (self.args_offset - self.bitmap_offset) as usize;
self.input
.slice(self.bitmap_offset as usize, num_bytes)
.into()
}
}

impl<'top> HasSpan<'top> for &'top BinaryEExpression_1_1<'top> {
Expand Down
5 changes: 5 additions & 0 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,9 @@ impl<'a> BinaryBuffer<'a> {
MacroRef::new(macro_address, macro_ref),
bitmap_bits,
matched_eexp_bytes,
// There is no length prefix, so we re-use the bitmap_offset as the first position
// beyond the opcode and address subfields.
bitmap_offset as u8,
Comment on lines +966 to +968
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ This is in the parser for e-expressions with combined opcode+address bytes. There is never a length prefix.

bitmap_offset as u8,
args_offset as u8,
)
Expand Down Expand Up @@ -996,6 +999,7 @@ impl<'a> BinaryBuffer<'a> {
})?
.reference();
// Offset from `self`, not offset from the beginning of the stream.
let length_offset = (input_after_address.offset() - self.offset()) as u8;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ This is in the parser for 0xF5, e-expression w/length prefix.

let bitmap_offset = (input_after_length.offset() - self.offset()) as u8;
let (bitmap_bits, _input_after_bitmap) =
input_after_length.read_eexp_bitmap(macro_ref.signature().bitmap_size_in_bytes())?;
Expand All @@ -1006,6 +1010,7 @@ impl<'a> BinaryBuffer<'a> {
MacroRef::new(macro_address, macro_ref),
bitmap_bits,
matched_bytes,
length_offset,
bitmap_offset,
args_offset,
),
Expand Down
20 changes: 20 additions & 0 deletions src/lazy/span.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::lazy::binary::raw::v1_1::immutable_buffer::BinaryBuffer;
use crate::lazy::text::buffer::TextBuffer;
use crate::result::IonFailure;
use crate::{IonError, IonResult};
use std::ops::Range;
Expand Down Expand Up @@ -62,3 +64,21 @@ impl<'a> Span<'a> {
self.bytes.is_empty()
}
}

impl<'a> From<BinaryBuffer<'a>> for Span<'a> {
fn from(value: BinaryBuffer<'a>) -> Self {
Span {
bytes: value.bytes(),
offset: value.offset(),
}
}
}

impl<'a> From<TextBuffer<'a>> for Span<'a> {
fn from(value: TextBuffer<'a>) -> Self {
Span {
bytes: value.bytes(),
offset: value.offset(),
}
}
}
Loading