Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: replace utf-8 encoder (add unicode-normalization, graphscheme clustering,..) #646

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 0 additions & 5 deletions copa/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,7 @@ edition = "2021"

[dependencies]
rio-proc-macros = { workspace = true }
arrayvec = { version = "0.7.6", default-features = false, optional = true }

[dev-dependencies]
unicode-normalization = "0.1.22"

[features]
default = ["no_std"]
no_std = ["arrayvec"]
nightly = []
161 changes: 42 additions & 119 deletions copa/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,9 @@
//! [`Perform`]: trait.Perform.html
//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
#![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)]
#![cfg_attr(feature = "no_std", no_std)]

use core::mem::MaybeUninit;

#[cfg(feature = "no_std")]
use arrayvec::ArrayVec;

mod definitions;
mod params;
mod table;
Expand Down Expand Up @@ -68,21 +64,15 @@ impl<'a, P: Perform> utf8::Receiver for VtUtf8Receiver<'a, P> {
/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
///
/// [`Perform`]: trait.Perform.html
///
/// Generic over the value for the size of the raw Operating System Command
/// buffer. Only used when the `no_std` feature is enabled.
#[derive(Default)]
pub struct Parser<const OSC_RAW_BUF_SIZE: usize = MAX_OSC_RAW> {
state: State,
intermediates: [u8; MAX_INTERMEDIATES],
intermediate_idx: usize,
params: Params,
param: u16,
#[cfg(feature = "no_std")]
osc_raw: ArrayVec<u8, OSC_RAW_BUF_SIZE>,
#[cfg(not(feature = "no_std"))]
osc_raw: Vec<u8>,
osc_params: [(usize, usize); MAX_OSC_PARAMS],
osc_raw: Vec<u8>,
osc_num_params: usize,
ignoring: bool,
utf8_parser: utf8::Parser,
Expand All @@ -103,10 +93,6 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
/// ```rust
/// let mut p = copa::Parser::<64>::new_with_size();
/// ```
#[cfg(feature = "no_std")]
pub fn new_with_size() -> Parser<OSC_RAW_BUF_SIZE> {
Default::default()
}

#[inline]
fn params(&self) -> &Params {
Expand All @@ -118,6 +104,31 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
&self.intermediates[..self.intermediate_idx]
}

#[inline]
pub fn advance_bytes<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) {
// Utf8 characters are handled out-of-band.
if let State::Utf8 = self.state {
self.process_utf8_bytes(performer, bytes);
return;
}

for byte in bytes {
let byte = *byte;
// Handle state changes in the anywhere state before evaluating changes
// for current state.
let mut change = table::STATE_CHANGES[State::Anywhere as usize][byte as usize];

if change == 0 {
change = table::STATE_CHANGES[self.state as usize][byte as usize];
}

// Unpack into a state and action
let (state, action) = unpack(change);

self.perform_state_change(performer, state, action, byte);
}
}

/// Advance the parser state
///
/// Requires a [`Perform`] in case `byte` triggers an action
Expand Down Expand Up @@ -145,11 +156,26 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
self.perform_state_change(performer, state, action, byte);
}

#[inline]
fn process_utf8_bytes<P>(&mut self, performer: &mut P, bytes: &[u8])
where
P: Perform,
{
// encoding_rs::Encoder::encode_from_utf8(bytes)
let a = unsafe {
std::str::from_utf8_unchecked(bytes).chars()
};
// let mut receiver = VtUtf8Receiver(performer, &mut self.state);
// let utf8_parser = &mut self.utf8_parser;
// utf8_parser.advance(&mut receiver, bytes);
}

#[inline]
fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
where
P: Perform,
{
// encoder_rs.encode_from_utf8(byte)
let mut receiver = VtUtf8Receiver(performer, &mut self.state);
let utf8_parser = &mut self.utf8_parser;
utf8_parser.advance(&mut receiver, byte);
Expand Down Expand Up @@ -264,13 +290,6 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
self.osc_num_params = 0;
}
Action::OscPut => {
#[cfg(feature = "no_std")]
{
if self.osc_raw.is_full() {
return;
}
}

let idx = self.osc_raw.len();

// Param separator
Expand Down Expand Up @@ -450,7 +469,7 @@ pub trait Perform {
fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {}
}

#[cfg(all(test, feature = "no_std"))]
#[cfg(all(test))]
#[macro_use]
extern crate std;

Expand Down Expand Up @@ -717,11 +736,7 @@ mod tests {
assert_eq!(params.len(), 2);
assert_eq!(params[0], b"52");

#[cfg(not(feature = "no_std"))]
assert_eq!(params[1].len(), NUM_BYTES + INPUT_END.len());

#[cfg(feature = "no_std")]
assert_eq!(params[1].len(), MAX_OSC_RAW - params[0].len());
}
_ => panic!("expected osc sequence"),
}
Expand Down Expand Up @@ -998,98 +1013,6 @@ mod tests {
_ => panic!("expected csi sequence"),
}
}

#[cfg(feature = "no_std")]
#[test]
fn build_with_fixed_size() {
static INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
let mut dispatcher = Dispatcher::default();
let mut parser: Parser<30> = Parser::new_with_size();

for byte in INPUT {
parser.advance(&mut dispatcher, *byte);
}

assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
Sequence::Csi(params, intermediates, ignore, _) => {
assert_eq!(intermediates, &[b'?']);
assert_eq!(params, &[[1049]]);
assert!(!ignore);
}
_ => panic!("expected csi sequence"),
}
}

#[cfg(feature = "no_std")]
#[test]
fn exceed_fixed_osc_buffer_size() {
const OSC_BUFFER_SIZE: usize = 32;
static NUM_BYTES: usize = OSC_BUFFER_SIZE + 100;
static INPUT_START: &[u8] = b"\x1b]52;";
static INPUT_END: &[u8] = b"\x07";

let mut dispatcher = Dispatcher::default();
let mut parser: Parser<OSC_BUFFER_SIZE> = Parser::new_with_size();

// Create valid OSC escape
for byte in INPUT_START {
parser.advance(&mut dispatcher, *byte);
}

// Exceed max buffer size
for _ in 0..NUM_BYTES {
parser.advance(&mut dispatcher, b'a');
}

// Terminate escape for dispatch
for byte in INPUT_END {
parser.advance(&mut dispatcher, *byte);
}

assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
Sequence::Osc(params, _) => {
assert_eq!(params.len(), 2);
assert_eq!(params[0], b"52");
assert_eq!(params[1].len(), OSC_BUFFER_SIZE - params[0].len());
for item in params[1].iter() {
assert_eq!(*item, b'a');
}
}
_ => panic!("expected osc sequence"),
}
}

#[cfg(feature = "no_std")]
#[test]
fn fixed_size_osc_containing_string_terminator() {
static INPUT_START: &[u8] = b"\x1b]2;";
static INPUT_MIDDLE: &[u8] = b"s\xe6\x9c\xab";
static INPUT_END: &[u8] = b"\x1b\\";

let mut dispatcher = Dispatcher::default();
let mut parser: Parser<5> = Parser::new_with_size();

for byte in INPUT_START {
parser.advance(&mut dispatcher, *byte);
}
for byte in INPUT_MIDDLE {
parser.advance(&mut dispatcher, *byte);
}
for byte in INPUT_END {
parser.advance(&mut dispatcher, *byte);
}

assert_eq!(dispatcher.dispatched.len(), 2);
match &dispatcher.dispatched[0] {
Sequence::Osc(params, false) => {
assert_eq!(params[0], b"2");
assert_eq!(params[1], INPUT_MIDDLE);
}
_ => panic!("expected osc sequence"),
}
}
}

// #[cfg(all(feature = "nightly", test))]
Expand Down
30 changes: 25 additions & 5 deletions rio-backend/src/performer/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,17 +438,37 @@ impl ParserProcessor {
}
}

/// Process a new byte from the PTY.
#[inline]
pub fn advance_bytes<H>(&mut self, handler: &mut H, bytes: &[u8])
where
H: Handler,
{
if self.state.sync_state.timeout.is_none() {
let mut performer = Performer::new(&mut self.state, handler);
self.parser.advance_bytes(&mut performer, bytes);
} else {
for byte in bytes {
self.advance_sync(handler, *byte);
}
}
}

/// End a synchronized update.
pub fn stop_sync<H>(&mut self, handler: &mut H)
where
H: Handler,
{
// Process all synchronized bytes.
for i in 0..self.state.sync_state.buffer.len() {
let byte = self.state.sync_state.buffer[i];
let mut performer = Performer::new(&mut self.state, handler);
self.parser.advance(&mut performer, byte);
}
let bytes = self.state.sync_state.buffer;
let mut performer = Performer::new(&mut self.state, handler);
self.parser.advance_bytes(&mut performer, bytes);

// for i in 0..self.state.sync_state.buffer.len() {
// let byte = self.state.sync_state.buffer[i];
// let mut performer = Performer::new(&mut self.state, handler);
// self.parser.advance(&mut performer, byte);
// }

// Resetting state after processing makes sure we don't interpret buffered sync escapes.
self.state.sync_state.buffer.clear();
Expand Down
9 changes: 6 additions & 3 deletions rio-backend/src/performer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,12 @@ where
};

// Parse the incoming bytes.
for byte in &buf[..unprocessed] {
state.parser.advance(&mut **terminal, *byte);
}
// for byte in &buf[..unprocessed] {
// state.parser.advance(&mut **terminal, *byte);
// }

// Parse the incoming bytes.
state.parser.advance_bytes(&mut **terminal, &buf[..unprocessed]);

processed += unprocessed;
unprocessed = 0;
Expand Down
Loading