Skip to content

Commit

Permalink
NBT Tweaks (#522)
Browse files Browse the repository at this point in the history
# Objective

`valence_nbt` could use some adjustments.

# Solution

- Move top-level conversion functions to `conv` module.
- Remove public `Display` impl from `Tag`.
- Remove implicit "null" byte check when decoding compounds. This check
can be done in advance if desired.
- Remove potentially dangerous memory preallocation when decoding
binary.
- Make `Compound::to_binary` and `Compound::from_binary` free functions
for discoverability and familiarity with serde_json.
  • Loading branch information
rj00a authored Sep 6, 2023
1 parent a89deeb commit 1ddde71
Show file tree
Hide file tree
Showing 12 changed files with 222 additions and 201 deletions.
10 changes: 6 additions & 4 deletions crates/valence_anvil/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ impl Region {
None => return Err(RegionError::InvalidCompressionScheme(compression)),
};

let (data, _) = Compound::from_binary(&mut nbt_slice)?;
let (data, _) = valence_nbt::from_binary(&mut nbt_slice)?;

if !nbt_slice.is_empty() {
return Err(RegionError::TrailingNbtData);
Expand Down Expand Up @@ -553,15 +553,17 @@ impl Region {
compress_buf.clear();
let mut compress_cursor = Cursor::new(compress_buf);
match options.compression {
Compression::Gzip => chunk.to_binary(
Compression::Gzip => valence_nbt::to_binary(
chunk,
GzEncoder::new(&mut compress_cursor, flate2::Compression::default()),
"",
)?,
Compression::Zlib => chunk.to_binary(
Compression::Zlib => valence_nbt::to_binary(
chunk,
ZlibEncoder::new(&mut compress_cursor, flate2::Compression::default()),
"",
)?,
Compression::None => chunk.to_binary(&mut compress_cursor, "")?,
Compression::None => valence_nbt::to_binary(chunk, &mut compress_cursor, "")?,
}
let compress_buf = compress_cursor.into_inner();

Expand Down
33 changes: 29 additions & 4 deletions crates/valence_nbt/src/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! # Examples
//!
//! ```
//! use valence_nbt::{compound, Compound, List};
//! use valence_nbt::{compound, to_binary, Compound, List};
//!
//! let c = compound! {
//! "byte" => 5_i8,
Expand All @@ -18,21 +18,21 @@
//!
//! let mut buf = vec![];
//!
//! c.to_binary(&mut buf, "").unwrap();
//! to_binary(&c, &mut buf, "").unwrap();
//! ```
//!
//! Decode NBT data from its binary form.
//!
//! ```
//! use valence_nbt::{compound, Compound};
//! use valence_nbt::{compound, from_binary, Compound};
//!
//! let some_bytes = [10, 0, 0, 3, 0, 3, 105, 110, 116, 0, 0, 222, 173, 0];
//!
//! let expected_value = compound! {
//! "int" => 0xdead
//! };
//!
//! let (nbt, root_name) = Compound::from_binary(&mut some_bytes.as_slice()).unwrap();
//! let (nbt, root_name) = from_binary(&mut some_bytes.as_slice()).unwrap();
//!
//! assert_eq!(nbt, expected_value);
//! assert_eq!(root_name, "");
Expand All @@ -45,4 +45,29 @@ mod modified_utf8;
#[cfg(test)]
mod tests;

pub use decode::from_binary;
pub use encode::{to_binary, written_size};
pub use error::*;

use crate::Tag;

impl Tag {
/// Returns the name of this tag for error reporting purposes.
const fn name(self) -> &'static str {
match self {
Tag::End => "end",
Tag::Byte => "byte",
Tag::Short => "short",
Tag::Int => "int",
Tag::Long => "long",
Tag::Float => "float",
Tag::Double => "double",
Tag::ByteArray => "byte array",
Tag::String => "string",
Tag::List => "list",
Tag::Compound => "compound",
Tag::IntArray => "int array",
Tag::LongArray => "long array",
}
}
}
70 changes: 33 additions & 37 deletions crates/valence_nbt/src/binary/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,28 @@ use super::{Error, Result};
use crate::tag::Tag;
use crate::{Compound, List, Value};

impl Compound {
/// Decodes uncompressed NBT binary data from the provided slice.
///
/// The string returned in the tuple is the name of the root compound
/// (typically the empty string).
pub fn from_binary(slice: &mut &[u8]) -> Result<(Self, String)> {
let mut state = DecodeState { slice, depth: 0 };

let root_tag = state.read_tag()?;

// For cases such as Block Entity Data in the chunk packet.
// https://wiki.vg/Protocol#Chunk_Data_and_Update_Light
if root_tag == Tag::End {
return Ok((Compound::new(), String::new()));
}

if root_tag != Tag::Compound {
return Err(Error::new_owned(format!(
"expected root tag for compound (got {root_tag})",
)));
}
/// Decodes uncompressed NBT binary data from the provided slice.
///
/// The string returned in the tuple is the name of the root compound
/// (typically the empty string).
pub fn from_binary(slice: &mut &[u8]) -> Result<(Compound, String)> {
let mut state = DecodeState { slice, depth: 0 };

let root_tag = state.read_tag()?;

if root_tag != Tag::Compound {
return Err(Error::new_owned(format!(
"expected root tag for compound (got {})",
root_tag.name(),
)));
}

let root_name = state.read_string()?;
let root = state.read_compound()?;
let root_name = state.read_string()?;
let root = state.read_compound()?;

debug_assert_eq!(state.depth, 0);
debug_assert_eq!(state.depth, 0);

Ok((root, root_name))
}
Ok((root, root_name))
}

/// Maximum recursion depth to prevent overflowing the call stack.
Expand Down Expand Up @@ -183,23 +176,23 @@ impl DecodeState<'_, '_> {
.read_list(Tag::Double, 8, |st| st.read_double())?
.into()),
Tag::ByteArray => Ok(self
.read_list(Tag::ByteArray, 4, |st| st.read_byte_array())?
.read_list(Tag::ByteArray, 0, |st| st.read_byte_array())?
.into()),
Tag::String => Ok(self
.read_list(Tag::String, 2, |st| st.read_string())?
.read_list(Tag::String, 0, |st| st.read_string())?
.into()),
Tag::List => self
.check_depth(|st| Ok(st.read_list(Tag::List, 5, |st| st.read_any_list())?.into())),
.check_depth(|st| Ok(st.read_list(Tag::List, 0, |st| st.read_any_list())?.into())),
Tag::Compound => self.check_depth(|st| {
Ok(st
.read_list(Tag::Compound, 1, |st| st.read_compound())?
.read_list(Tag::Compound, 0, |st| st.read_compound())?
.into())
}),
Tag::IntArray => Ok(self
.read_list(Tag::IntArray, 4, |st| st.read_int_array())?
.read_list(Tag::IntArray, 0, |st| st.read_int_array())?
.into()),
Tag::LongArray => Ok(self
.read_list(Tag::LongArray, 4, |st| st.read_long_array())?
.read_list(Tag::LongArray, 0, |st| st.read_long_array())?
.into()),
}
}
Expand All @@ -211,7 +204,7 @@ impl DecodeState<'_, '_> {
fn read_list<T, F>(
&mut self,
elem_type: Tag,
min_elem_size: usize,
elem_size: usize,
mut read_elem: F,
) -> Result<Vec<T>>
where
Expand All @@ -221,19 +214,22 @@ impl DecodeState<'_, '_> {

if len.is_negative() {
return Err(Error::new_owned(format!(
"negative {elem_type} list length of {len}",
"negative {} list length of {len}",
elem_type.name()
)));
}

// Ensure we don't reserve more than the maximum amount of memory required given
// the size of the remaining input.
if len as u64 * min_elem_size as u64 > self.slice.len() as u64 {
if len as u64 * elem_size as u64 > self.slice.len() as u64 {
return Err(Error::new_owned(format!(
"{elem_type} list of length {len} exceeds remainder of input"
"{} list of length {len} exceeds remainder of input",
elem_type.name()
)));
}

let mut list = Vec::with_capacity(len as usize);
let mut list = Vec::with_capacity(if elem_size == 0 { 0 } else { len as usize });

for _ in 0..len {
list.push(read_elem(self)?);
}
Expand Down
146 changes: 73 additions & 73 deletions crates/valence_nbt/src/binary/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,85 +3,84 @@ use std::io::Write;
use byteorder::{BigEndian, WriteBytesExt};

use super::{modified_utf8, Error, Result};
use crate::conv::i8_slice_as_u8_slice;
use crate::tag::Tag;
use crate::{i8_slice_as_u8_slice, Compound, List, Value};

impl Compound {
/// Encodes uncompressed NBT binary data to the provided writer.
///
/// Only compounds are permitted at the top level. This is why the function
/// accepts a [`Compound`] reference rather than a [`Value`].
///
/// Additionally, the root compound can be given a name. Typically the empty
/// string `""` is used.
pub fn to_binary<W: Write>(&self, writer: W, root_name: &str) -> Result<()> {
let mut state = EncodeState { writer };

state.write_tag(Tag::Compound)?;
state.write_string(root_name)?;
state.write_compound(self)?;

Ok(())
}

/// Returns the number of bytes that will be written when
/// [`Compound::to_binary`] is called with this compound and root name.
///
/// If `to_binary` results in `Ok`, the exact number of bytes
/// reported by this function will have been written. If the result is
/// `Err`, then the reported count will be greater than or equal to the
/// number of bytes that have actually been written.
pub fn written_size(&self, root_name: &str) -> usize {
fn value_size(val: &Value) -> usize {
match val {
Value::Byte(_) => 1,
Value::Short(_) => 2,
Value::Int(_) => 4,
Value::Long(_) => 8,
Value::Float(_) => 4,
Value::Double(_) => 8,
Value::ByteArray(v) => 4 + v.len(),
Value::String(v) => string_size(v),
Value::List(v) => list_size(v),
Value::Compound(v) => compound_size(v),
Value::IntArray(v) => 4 + v.len() * 4,
Value::LongArray(v) => 4 + v.len() * 8,
}
}
use crate::{Compound, List, Value};

/// Encodes uncompressed NBT binary data to the provided writer.
///
/// Only compounds are permitted at the top level. This is why the function
/// accepts a [`Compound`] reference rather than a [`Value`].
///
/// Additionally, the root compound can be given a name. Typically the empty
/// string `""` is used.
pub fn to_binary<W: Write>(comp: &Compound, writer: W, root_name: &str) -> Result<()> {
let mut state = EncodeState { writer };

state.write_tag(Tag::Compound)?;
state.write_string(root_name)?;
state.write_compound(comp)?;

Ok(())
}

fn list_size(l: &List) -> usize {
let elems_size = match l {
List::End => 0,
List::Byte(v) => v.len(),
List::Short(v) => v.len() * 2,
List::Int(v) => v.len() * 4,
List::Long(v) => v.len() * 8,
List::Float(v) => v.len() * 4,
List::Double(v) => v.len() * 8,
List::ByteArray(v) => v.iter().map(|b| 4 + b.len()).sum(),
List::String(v) => v.iter().map(|s| string_size(s)).sum(),
List::List(v) => v.iter().map(list_size).sum(),
List::Compound(v) => v.iter().map(compound_size).sum(),
List::IntArray(v) => v.iter().map(|i| 4 + i.len() * 4).sum(),
List::LongArray(v) => v.iter().map(|l| 4 + l.len() * 8).sum(),
};

1 + 4 + elems_size
/// Returns the number of bytes that will be written when
/// [`to_binary`] is called with this compound and root name.
///
/// If `to_binary` results in `Ok`, the exact number of bytes
/// reported by this function will have been written. If the result is
/// `Err`, then the reported count will be greater than or equal to the
/// number of bytes that have actually been written.
pub fn written_size(comp: &Compound, root_name: &str) -> usize {
fn value_size(val: &Value) -> usize {
match val {
Value::Byte(_) => 1,
Value::Short(_) => 2,
Value::Int(_) => 4,
Value::Long(_) => 8,
Value::Float(_) => 4,
Value::Double(_) => 8,
Value::ByteArray(v) => 4 + v.len(),
Value::String(v) => string_size(v),
Value::List(v) => list_size(v),
Value::Compound(v) => compound_size(v),
Value::IntArray(v) => 4 + v.len() * 4,
Value::LongArray(v) => 4 + v.len() * 8,
}
}

fn string_size(s: &str) -> usize {
2 + modified_utf8::encoded_len(s)
}
fn list_size(l: &List) -> usize {
let elems_size = match l {
List::End => 0,
List::Byte(v) => v.len(),
List::Short(v) => v.len() * 2,
List::Int(v) => v.len() * 4,
List::Long(v) => v.len() * 8,
List::Float(v) => v.len() * 4,
List::Double(v) => v.len() * 8,
List::ByteArray(v) => v.iter().map(|b| 4 + b.len()).sum(),
List::String(v) => v.iter().map(|s| string_size(s)).sum(),
List::List(v) => v.iter().map(list_size).sum(),
List::Compound(v) => v.iter().map(compound_size).sum(),
List::IntArray(v) => v.iter().map(|i| 4 + i.len() * 4).sum(),
List::LongArray(v) => v.iter().map(|l| 4 + l.len() * 8).sum(),
};

1 + 4 + elems_size
}

fn compound_size(c: &Compound) -> usize {
c.iter()
.map(|(k, v)| 1 + string_size(k) + value_size(v))
.sum::<usize>()
+ 1
}
fn string_size(s: &str) -> usize {
2 + modified_utf8::encoded_len(s)
}

1 + string_size(root_name) + compound_size(self)
fn compound_size(c: &Compound) -> usize {
c.iter()
.map(|(k, v)| 1 + string_size(k) + value_size(v))
.sum::<usize>()
+ 1
}

1 + string_size(root_name) + compound_size(comp)
}

struct EncodeState<W> {
Expand Down Expand Up @@ -223,8 +222,9 @@ impl<W: Write> EncodeState<W> {
Ok(len) => self.writer.write_i32::<BigEndian>(len)?,
Err(_) => {
return Err(Error::new_owned(format!(
"{elem_type} list of length {} exceeds maximum of i32::MAX",
"{} list of length {} exceeds maximum of i32::MAX",
list.len(),
elem_type.name()
)))
}
}
Expand Down
Loading

0 comments on commit 1ddde71

Please sign in to comment.