Skip to content

Commit

Permalink
feat: optimize faststr usage (#151)
Browse files Browse the repository at this point in the history
  • Loading branch information
PureWhiteWu committed May 5, 2023
1 parent 0fbc05d commit 6b98d37
Show file tree
Hide file tree
Showing 8 changed files with 461 additions and 129 deletions.
364 changes: 248 additions & 116 deletions Cargo.lock

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
[workspace]
members = ["pilota", "pilota-build", "pilota-thrift-parser"]
resolver = "2"

[profile.bench]
opt-level = 3
debug = true
debug-assertions = false
overflow-checks = false
lto = true
incremental = false
codegen-units = 1
rpath = false
6 changes: 4 additions & 2 deletions pilota-build/src/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ use tempfile::tempdir;
use crate::plugin::SerdePlugin;

fn diff_file(old: impl AsRef<Path>, new: impl AsRef<Path>) {
let old_content = unsafe { String::from_utf8_unchecked(std::fs::read(old).unwrap()) };
let old_content =
unsafe { String::from_utf8_unchecked(std::fs::read(old).unwrap()) }.replace("\r", "");

let new_content = unsafe { String::from_utf8_unchecked(std::fs::read(new).unwrap()) };
let new_content =
unsafe { String::from_utf8_unchecked(std::fs::read(new).unwrap()) }.replace("\r", "");

let patch = diffy::create_patch(&old_content, &new_content);
if !patch.hunks().is_empty() {
Expand Down
15 changes: 13 additions & 2 deletions pilota/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pilota"
version = "0.6.2"
version = "0.6.3"
edition = "2021"
description = "Pilota is a thrift and protobuf implementation in pure rust with high performance and extensibility."
documentation = "https://docs.rs/pilota"
Expand Down Expand Up @@ -29,7 +29,18 @@ derivative = "2"
anyhow = "1"
thiserror = "1"
faststr = { version = "0.2", features = ["serde"] }
integer-encoding = { version = "3", features = ["async-trait", "tokio", "tokio_async"] }
integer-encoding = { version = "3", features = [
"async-trait",
"tokio",
"tokio_async",
] }
proptest = "1"
serde = { version = "1", features = ["derive"] }

[dev-dependencies]
criterion = "0.4"
rand = "0.8"

[[bench]]
name = "faststr"
harness = false
170 changes: 170 additions & 0 deletions pilota/benches/faststr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#![allow(clippy::redundant_clone)]
use std::{iter::repeat_with, sync::Arc};

use bytes::BytesMut;
use criterion::{black_box, criterion_group, criterion_main};
use faststr::FastStr;
use rand::Rng;

const INLINE_CAP: usize = 22;
const KB: usize = 1024;

fn faststr_bench(c: &mut criterion::Criterion) {
let mut group = c.benchmark_group("Bench FastStr Inline");

let s = gen_string(INLINE_CAP);
group.bench_function("faststr inline no clone", |b| {
b.iter(|| {
black_box(FastStr::new_inline(&s));
})
});
group.bench_function("Arc<str> new no clone", |b| {
b.iter(|| {
let _: Arc<str> = black_box(s.as_str().into());
})
});

group.bench_function("faststr inline 1 clone", |b| {
b.iter(|| {
let s = FastStr::new_inline(&s);
let s1 = black_box(s.clone());
black_box(s1);
})
});
group.bench_function("Arc<str> new 1 clone", |b| {
b.iter(|| {
let s: Arc<str> = s.as_str().into();
let s1 = black_box(s.clone());
black_box(s1);
})
});

group.bench_function("faststr inline 3 clones", |b| {
b.iter(|| {
let s = FastStr::new_inline(&s);
let s1 = black_box(s.clone());
let s2 = black_box(s.clone());
let s3 = black_box(s.clone());
black_box(s1);
black_box(s2);
black_box(s3);
})
});
group.bench_function("Arc<str> new 3 clone", |b| {
b.iter(|| {
let s: Arc<str> = s.as_str().into();
let s1 = black_box(s.clone());
let s2 = black_box(s.clone());
let s3 = black_box(s.clone());
black_box(s1);
black_box(s2);
black_box(s3);
})
});

group.finish();

let mut group = c.benchmark_group("Bench FastStr BytesMut");

let lens = [KB, 2 * KB, 4 * KB, 8 * KB, 16 * KB, 32 * KB];
for len in lens {
let s = gen_string(len);

#[inline]
fn gen_bytes_mut(s: &str) -> BytesMut {
let mut s2 = String::with_capacity(s.len() * 3);
s2.push_str(s);
s2.push_str(s);
s2.push_str(s);
BytesMut::from(s2.as_str())
}

#[inline]
fn read_faststr_from_bytes_mut(buf: &mut BytesMut) -> FastStr {
let l = buf.len();
let b = buf.split_to(l / 3).freeze();
unsafe { FastStr::from_bytes_unchecked(b) }
}

group.bench_function(format!("from bytes no clone, length: {}", len), |b| {
b.iter_batched_ref(
|| gen_bytes_mut(s.as_str()),
|buf| {
black_box(read_faststr_from_bytes_mut(buf));
},
criterion::BatchSize::PerIteration,
);
});
group.bench_function(format!("faststr new no clone, length: {}", len), |b| {
b.iter(|| {
black_box(FastStr::new(&s));
})
});

group.bench_function(format!("from bytes 1 clone, length: {}", len), |b| {
b.iter_batched_ref(
|| gen_bytes_mut(s.as_str()),
|buf| {
let s = read_faststr_from_bytes_mut(buf);
let s1 = black_box(s.clone());
black_box(s1);
},
criterion::BatchSize::PerIteration,
);
});
group.bench_function(format!("faststr new 1 clone, length: {}", len), |b| {
b.iter(|| {
let s = FastStr::new(&s);
let s1 = black_box(s.clone());
black_box(s1);
})
});

group.bench_function(format!("from bytes 3 clones, length: {}", len), |b| {
b.iter_batched_ref(
|| gen_bytes_mut(s.as_str()),
|buf| {
let s = read_faststr_from_bytes_mut(buf);
let s1 = black_box(s.clone());
let s2 = black_box(s.clone());
let s3 = black_box(s.clone());
black_box(s1);
black_box(s2);
black_box(s3);
},
criterion::BatchSize::PerIteration,
);
});
group.bench_function(format!("faststr new 3 clones, length: {}", len), |b| {
b.iter(|| {
let s = FastStr::new(&s);
let s1 = black_box(s.clone());
let s2 = black_box(s.clone());
let s3 = black_box(s.clone());
black_box(s1);
black_box(s2);
black_box(s3);
})
});
}

group.finish();
}

fn gen_string(size: usize) -> String {
const CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
abcdefghijklmnopqrstuvwxyz\
0123456789";
let mut rng = rand::thread_rng();
let b: Vec<u8> = repeat_with(|| {
let i = rng.gen_range(0..CHARSET.len());
CHARSET[i]
})
.take(size)
.collect();
let chars: Vec<char> = b.into_iter().map(|b| b as char).collect();
chars.into_iter().collect()
}

criterion_group!(benches, faststr_bench);
criterion_main!(benches);
6 changes: 3 additions & 3 deletions pilota/src/thrift/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use super::{
DecodeError, DecodeErrorKind, EncodeError, ProtocolError, TAsyncInputProtocol,
TFieldIdentifier, TInputProtocol, TLengthProtocol, TListIdentifier, TMapIdentifier,
TMessageIdentifier, TMessageType, TOutputProtocol, TSetIdentifier, TStructIdentifier, TType,
INLINE_CAP, ZERO_COPY_THRESHOLD,
ZERO_COPY_THRESHOLD,
};

static VERSION_1: u32 = 0x80010000;
Expand Down Expand Up @@ -904,10 +904,10 @@ impl TInputProtocol for TBinaryProtocol<&mut BytesMut> {
fn read_faststr(&mut self) -> Result<FastStr, DecodeError> {
let len = self.trans.read_i32()? as usize;
let bytes = self.trans.split_to(len).freeze();
if len > INLINE_CAP {
if len >= ZERO_COPY_THRESHOLD {
unsafe { return Ok(FastStr::from_bytes_unchecked(bytes)) };
}
unsafe { Ok(FastStr::new_inline(str::from_utf8_unchecked(bytes.deref()))) }
unsafe { Ok(FastStr::new(str::from_utf8_unchecked(bytes.deref()))) }
}

#[inline]
Expand Down
6 changes: 3 additions & 3 deletions pilota/src/thrift/compact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use super::{
DecodeError, DecodeErrorKind, EncodeError, ProtocolError, TAsyncInputProtocol,
TFieldIdentifier, TInputProtocol, TLengthProtocol, TListIdentifier, TMapIdentifier,
TMessageIdentifier, TMessageType, TOutputProtocol, TSetIdentifier, TStructIdentifier, TType,
INLINE_CAP, ZERO_COPY_THRESHOLD,
ZERO_COPY_THRESHOLD,
};

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
Expand Down Expand Up @@ -1417,10 +1417,10 @@ impl TInputProtocol for TCompactInputProtocol<&mut BytesMut> {
fn read_faststr(&mut self) -> Result<FastStr, DecodeError> {
let size = self.read_varint::<u32>()? as usize;
let bytes = self.trans.split_to(size);
if size > INLINE_CAP {
if size >= ZERO_COPY_THRESHOLD {
unsafe { return Ok(FastStr::from_bytes_mut_unchecked(bytes)) }
}
unsafe { Ok(FastStr::new_inline(str::from_utf8_unchecked(bytes.deref()))) }
unsafe { Ok(FastStr::new(str::from_utf8_unchecked(bytes.deref()))) }
}

#[inline]
Expand Down
13 changes: 10 additions & 3 deletions pilota/src/thrift/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,15 @@ use faststr::FastStr;
pub use self::{binary::TAsyncBinaryProtocol, compact::TAsyncCompactProtocol};

const MAXIMUM_SKIP_DEPTH: i8 = 64;
const ZERO_COPY_THRESHOLD: usize = 4 * 1024; // 4KB
const INLINE_CAP: usize = 22;

#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
// According to the benchmark, 1KB is the suitable threshold for zero-copy on
// Apple Silicon.
const ZERO_COPY_THRESHOLD: usize = 1024;

#[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
// While 4KB is better for other platforms (mainly amd64 linux).
const ZERO_COPY_THRESHOLD: usize = 4 * 1024;

lazy_static::lazy_static! {
pub static ref VOID_IDENT: TStructIdentifier = TStructIdentifier { name: "void" };
Expand Down Expand Up @@ -639,7 +646,7 @@ pub trait TAsyncInputProtocol: Send {
/// Read the end of a Thrift message.
async fn read_message_end(&mut self) -> Result<(), DecodeError>;

/// Read the beginning of a Thrift struct.
/// Read the beginning of a Thrift struct.
async fn read_struct_begin(&mut self) -> Result<Option<TStructIdentifier>, DecodeError>;

/// Read the end of a Thrift struct.
Expand Down

0 comments on commit 6b98d37

Please sign in to comment.