diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ansi.rs | 460 | ||||
-rw-r--r-- | src/definitions.rs | 104 | ||||
-rw-r--r-- | src/lib.rs | 910 | ||||
-rw-r--r-- | src/params.rs | 5 | ||||
-rw-r--r-- | src/table.rs | 135 |
5 files changed, 978 insertions, 636 deletions
diff --git a/src/ansi.rs b/src/ansi.rs index 8cac26d..fa5b1ed 100644 --- a/src/ansi.rs +++ b/src/ansi.rs @@ -11,21 +11,20 @@ extern crate alloc; use alloc::borrow::ToOwned; use alloc::string::{String, ToString}; use alloc::vec::Vec; -use bitflags::bitflags; - use core::convert::TryFrom; use core::fmt::{self, Display, Formatter, Write}; +#[cfg(not(feature = "no_std"))] +use core::ops::Mul; use core::ops::{Add, Sub}; use core::str::FromStr; use core::time::Duration; -use core::{iter, str}; - -#[cfg(not(feature = "no_std"))] -use core::ops::Mul; - +use core::{iter, mem, str}; #[cfg(not(feature = "no_std"))] use std::time::Instant; +use bitflags::bitflags; +#[doc(inline)] +pub use cursor_icon; use cursor_icon::CursorIcon; use log::debug; #[cfg(feature = "serde")] @@ -33,9 +32,6 @@ use serde::{Deserialize, Serialize}; use crate::{Params, ParamsIter}; -#[doc(inline)] -pub use cursor_icon; - /// Maximum time before a synchronized update is aborted. const SYNC_UPDATE_TIMEOUT: Duration = Duration::from_millis(150); @@ -168,9 +164,9 @@ impl FromStr for Rgb { match u32::from_str_radix(chars, 16) { Ok(mut color) => { - let b = (color & 0xff) as u8; + let b = (color & 0xFF) as u8; color >>= 8; - let g = (color & 0xff) as u8; + let g = (color & 0xFF) as u8; color >>= 8; let r = color as u8; Ok(Rgb { r, g, b }) @@ -237,14 +233,8 @@ fn parse_number(input: &[u8]) -> Option<u8> { let mut num: u8 = 0; for c in input { let c = *c as char; - if let Some(digit) = c.to_digit(10) { - num = match num.checked_mul(10).and_then(|v| v.checked_add(digit as u8)) { - Some(v) => v, - None => return None, - } - } else { - return None; - } + let digit = c.to_digit(10)?; + num = num.checked_mul(10).and_then(|v| v.checked_add(digit as u8))?; } Some(num) } @@ -270,11 +260,12 @@ struct SyncState<T: Timeout> { impl<T: Timeout> Default for SyncState<T> { fn default() -> Self { - Self { buffer: Vec::with_capacity(SYNC_BUFFER_SIZE), timeout: T::default() } + Self { buffer: Vec::with_capacity(SYNC_BUFFER_SIZE), timeout: Default::default() } } } -/// The processor wraps a `crate::Parser` to ultimately call methods on a Handler. +/// The processor wraps a `crate::Parser` to ultimately call methods on a +/// Handler. #[cfg(not(feature = "no_std"))] #[derive(Default)] pub struct Processor<T: Timeout = StdSyncHandler> { @@ -282,7 +273,8 @@ pub struct Processor<T: Timeout = StdSyncHandler> { parser: crate::Parser, } -/// The processor wraps a `crate::Parser` to ultimately call methods on a Handler. +/// The processor wraps a `crate::Parser` to ultimately call methods on a +/// Handler. #[cfg(feature = "no_std")] #[derive(Default)] pub struct Processor<T: Timeout> { @@ -303,15 +295,19 @@ impl<T: Timeout> Processor<T> { /// Process a new byte from the PTY. #[inline] - pub fn advance<H>(&mut self, handler: &mut H, byte: u8) + pub fn advance<H>(&mut self, handler: &mut H, bytes: &[u8]) where H: Handler, { - if self.state.sync_state.timeout.pending_timeout() { - self.advance_sync(handler, byte); - } else { - let mut performer = Performer::new(&mut self.state, handler); - self.parser.advance(&mut performer, byte); + let mut processed = 0; + while processed != bytes.len() { + if self.state.sync_state.timeout.pending_timeout() { + processed += self.advance_sync(handler, &bytes[processed..]); + } else { + let mut performer = Performer::new(&mut self.state, handler); + processed += + self.parser.advance_until_terminated(&mut performer, &bytes[processed..]); + } } } @@ -320,18 +316,45 @@ impl<T: Timeout> Processor<T> { where H: Handler, { + self.stop_sync_internal(handler, None); + } + + /// End a synchronized update. + /// + /// The `bsu_offset` parameter should be passed if the sync buffer contains + /// a new BSU escape that is not part of the current synchronized + /// update. + fn stop_sync_internal<H>(&mut self, handler: &mut H, bsu_offset: Option<usize>) + where + H: Handler, + { // Process all synchronized bytes. - for i in 0..self.state.sync_state.buffer.len() { - let byte = self.state.sync_state.buffer[i]; - let mut performer = Performer::new(&mut self.state, handler); - self.parser.advance(&mut performer, byte); + // + // NOTE: We do not use `advance_until_terminated` here since BSU sequences are + // processed automatically during the synchronized update. + let buffer = mem::take(&mut self.state.sync_state.buffer); + let offset = bsu_offset.unwrap_or(buffer.len()); + let mut performer = Performer::new(&mut self.state, handler); + self.parser.advance(&mut performer, &buffer[..offset]); + self.state.sync_state.buffer = buffer; + + match bsu_offset { + // Just clear processed bytes if there is a new BSU. + // + // NOTE: We do not need to re-process for a new ESU since the `advance_sync` + // function checks for BSUs in reverse. + Some(bsu_offset) => { + let new_len = self.state.sync_state.buffer.len() - bsu_offset; + self.state.sync_state.buffer.copy_within(bsu_offset.., 0); + self.state.sync_state.buffer.truncate(new_len); + }, + // Report mode and clear state if no new BSU is present. + None => { + handler.unset_private_mode(NamedPrivateMode::SyncUpdate.into()); + self.state.sync_state.timeout.clear_timeout(); + self.state.sync_state.buffer.clear(); + }, } - - // Report that update ended, since we could end due to timeout. - handler.unset_private_mode(NamedPrivateMode::SyncUpdate.into()); - // Resetting state after processing makes sure we don't interpret buffered sync escapes. - self.state.sync_state.buffer.clear(); - self.state.sync_state.timeout.clear_timeout(); } /// Number of bytes in the synchronization buffer. @@ -341,36 +364,56 @@ impl<T: Timeout> Processor<T> { } /// Process a new byte during a synchronized update. + /// + /// Returns the number of bytes processed. #[cold] - fn advance_sync<H>(&mut self, handler: &mut H, byte: u8) + fn advance_sync<H>(&mut self, handler: &mut H, bytes: &[u8]) -> usize where H: Handler, { - self.state.sync_state.buffer.push(byte); + // Advance sync parser or stop sync if we'd exceed the maximum buffer size. + if self.state.sync_state.buffer.len() + bytes.len() >= SYNC_BUFFER_SIZE - 1 { + // Terminate the synchronized update. + self.stop_sync_internal(handler, None); - // Handle sync CSI escape sequences. - self.advance_sync_csi(handler); + // Just parse the bytes normally. + let mut performer = Performer::new(&mut self.state, handler); + self.parser.advance_until_terminated(&mut performer, bytes) + } else { + self.state.sync_state.buffer.extend(bytes); + self.advance_sync_csi(handler, bytes.len()); + bytes.len() + } } /// Handle BSU/ESU CSI sequences during synchronized update. - fn advance_sync_csi<H>(&mut self, handler: &mut H) + fn advance_sync_csi<H>(&mut self, handler: &mut H, new_bytes: usize) where H: Handler, { - // Get the last few bytes for comparison. - let len = self.state.sync_state.buffer.len(); - let offset = len.saturating_sub(SYNC_ESCAPE_LEN); - let end = &self.state.sync_state.buffer[offset..]; + // Get constraints within which a new escape character might be relevant. + let buffer_len = self.state.sync_state.buffer.len(); + let start_offset = (buffer_len - new_bytes).saturating_sub(SYNC_ESCAPE_LEN - 1); + let end_offset = buffer_len.saturating_sub(SYNC_ESCAPE_LEN - 1); + let search_buffer = &self.state.sync_state.buffer[start_offset..end_offset]; + // Search for termination/extension escapes in the added bytes. + // // NOTE: It is technically legal to specify multiple private modes in the same // escape, but we only allow EXACTLY `\e[?2026h`/`\e[?2026l` to keep the parser - // reasonable. - // - // Check for extension/termination of the synchronized update. - if end == BSU_CSI { - self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT); - } else if end == ESU_CSI || len >= SYNC_BUFFER_SIZE - 1 { - self.stop_sync(handler); + // more simple. + let mut bsu_offset = None; + for index in memchr::memchr_iter(0x1B, search_buffer).rev() { + let offset = start_offset + index; + let escape = &self.state.sync_state.buffer[offset..offset + SYNC_ESCAPE_LEN]; + + if escape == BSU_CSI { + self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT); + bsu_offset = Some(offset); + } else if escape == ESU_CSI { + self.stop_sync_internal(handler, bsu_offset); + break; + } } } } @@ -382,13 +425,16 @@ impl<T: Timeout> Processor<T> { struct Performer<'a, H: Handler, T: Timeout> { state: &'a mut ProcessorState<T>, handler: &'a mut H, + + /// Whether the parser should be prematurely terminated. + terminated: bool, } impl<'a, H: Handler + 'a, T: Timeout> Performer<'a, H, T> { /// Create a performer. #[inline] pub fn new<'b>(state: &'b mut ProcessorState<T>, handler: &'b mut H) -> Performer<'b, H, T> { - Performer { state, handler } + Performer { state, handler, terminated: Default::default() } } } @@ -710,13 +756,14 @@ bitflags! { /// /// This only applies to keys corresponding to ascii characters. /// -/// For the details on how to implement the mode handling correctly, consult [`XTerm's -/// implementation`] and the [`output`] of XTerm's provided [`perl script`]. Some libraries and -/// implementations also use the [`fixterms`] definition of the `CSI u`. +/// For the details on how to implement the mode handling correctly, consult +/// [`XTerm's implementation`] and the [`output`] of XTerm's provided [`perl +/// script`]. Some libraries and implementations also use the [`fixterms`] +/// definition of the `CSI u`. /// -/// The end escape sequence has a `CSI char; modifiers u` form while the original -/// `CSI 27 ; modifier ; char ~`. The clients should prefer the `CSI u`, since it has -/// more adoption. +/// The end escape sequence has a `CSI char; modifiers u` form while the +/// original `CSI 27 ; modifier ; char ~`. The clients should prefer the `CSI +/// u`, since it has more adoption. /// /// [`XTerm's implementation`]: https://invisible-island.net/xterm/modified-keys.html /// [`perl script`]: https://github.com/ThomasDickey/xterm-snapshots/blob/master/vttests/modify-keys.pl @@ -727,12 +774,14 @@ bitflags! { pub enum ModifyOtherKeys { /// Reset the state. Reset, - /// Enables this feature except for keys with well-known behavior, e.g., Tab, Backspace and - /// some special control character cases which are built into the X11 library (e.g., - /// Control-Space to make a NUL, or Control-3 to make an Escape character). + /// Enables this feature except for keys with well-known behavior, e.g., + /// Tab, Backspace and some special control character cases which are + /// built into the X11 library (e.g., Control-Space to make a NUL, or + /// Control-3 to make an Escape character). /// /// Escape sequences shouldn't be emitted under the following circumstances: - /// - When the key is in range of `[64;127]` and the modifier is either Control or Shift + /// - When the key is in range of `[64;127]` and the modifier is either + /// Control or Shift /// - When the key combination is a known control combination alias /// /// For more details, consult the [`example`] for the suggested translation. @@ -740,9 +789,10 @@ pub enum ModifyOtherKeys { /// [`example`]: https://github.com/alacritty/vte/blob/master/doc/modifyOtherKeys-example.txt EnableExceptWellDefined, /// Enables this feature for all keys including the exceptions of - /// [`Self::EnableExceptWellDefined`]. XTerm still ignores the special cases built into the - /// X11 library. Any shifted (modified) ordinary key send an escape sequence. The Alt- and - /// Meta- modifiers cause XTerm to send escape sequences. + /// [`Self::EnableExceptWellDefined`]. XTerm still ignores the special + /// cases built into the X11 library. Any shifted (modified) ordinary + /// key send an escape sequence. The Alt- and Meta- modifiers cause + /// XTerm to send escape sequences. /// /// For more details, consult the [`example`] for the suggested translation. /// @@ -1203,16 +1253,20 @@ impl StandardCharset { pub enum ScpCharPath { /// SCP's first parameter value of 0. Behavior is implementation defined. Default, - /// SCP's first parameter value of 1 which sets character path to LEFT-TO-RIGHT. + /// SCP's first parameter value of 1 which sets character path to + /// LEFT-TO-RIGHT. LTR, - /// SCP's first parameter value of 2 which sets character path to RIGHT-TO-LEFT. + /// SCP's first parameter value of 2 which sets character path to + /// RIGHT-TO-LEFT. RTL, } -/// SCP control's second parameter which determines update mode/direction between components. +/// SCP control's second parameter which determines update mode/direction +/// between components. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum ScpUpdateMode { - /// SCP's second parameter value of 0 (the default). Implementation dependant update. + /// SCP's second parameter value of 0 (the default). Implementation + /// dependant update. ImplementationDependant, /// SCP's second parameter value of 1. /// @@ -1351,8 +1405,8 @@ where return; } - // Link parameters are in format of `key1=value1:key2=value2`. Currently only key - // `id` is defined. + // Link parameters are in format of `key1=value1:key2=value2`. Currently only + // key `id` is defined. let id = link_params .split(|&b| b == b':') .find_map(|kv| kv.strip_prefix(b"id=")) @@ -1547,6 +1601,7 @@ where // Handle sync updates opaquely. if param == NamedPrivateMode::SyncUpdate as u16 { self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT); + self.terminated = true; } handler.set_private_mode(PrivateMode::new(param)) @@ -1761,6 +1816,11 @@ where _ => unhandled!(), } } + + #[inline] + fn terminated(&self) -> bool { + self.terminated + } } #[inline] @@ -1943,7 +2003,7 @@ pub mod C0 { /// Unit Separator. pub const US: u8 = 0x1F; /// Delete, should be ignored by terminal. - pub const DEL: u8 = 0x7f; + pub const DEL: u8 = 0x7F; } // Tests for parsing escape sequences. @@ -1954,22 +2014,24 @@ mod tests { use super::*; #[derive(Default)] - pub struct TestSyncHandler; + pub struct TestSyncHandler { + is_sync: usize, + } impl Timeout for TestSyncHandler { #[inline] fn set_timeout(&mut self, _: Duration) { - unreachable!() + self.is_sync += 1; } #[inline] fn clear_timeout(&mut self) { - unreachable!() + self.is_sync = 0; } #[inline] fn pending_timeout(&self) -> bool { - false + self.is_sync != 0 } } @@ -2028,72 +2090,60 @@ mod tests { #[test] fn parse_control_attribute() { - static BYTES: &[u8] = &[0x1b, b'[', b'1', b'm']; + static BYTES: &[u8] = &[0x1B, b'[', b'1', b'm']; let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in BYTES { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, BYTES); assert_eq!(handler.attr, Some(Attr::Bold)); } #[test] fn parse_terminal_identity_csi() { - let bytes: &[u8] = &[0x1b, b'[', b'1', b'c']; + let bytes: &[u8] = &[0x1B, b'[', b'1', b'c']; let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(!handler.identity_reported); handler.reset_state(); - let bytes: &[u8] = &[0x1b, b'[', b'c']; + let bytes: &[u8] = &[0x1B, b'[', b'c']; - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(handler.identity_reported); handler.reset_state(); - let bytes: &[u8] = &[0x1b, b'[', b'0', b'c']; + let bytes: &[u8] = &[0x1B, b'[', b'0', b'c']; - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(handler.identity_reported); } #[test] fn parse_terminal_identity_esc() { - let bytes: &[u8] = &[0x1b, b'Z']; + let bytes: &[u8] = &[0x1B, b'Z']; let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(handler.identity_reported); handler.reset_state(); - let bytes: &[u8] = &[0x1b, b'#', b'Z']; + let bytes: &[u8] = &[0x1B, b'#', b'Z']; let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert!(!handler.identity_reported); handler.reset_state(); @@ -2102,16 +2152,14 @@ mod tests { #[test] fn parse_truecolor_attr() { static BYTES: &[u8] = &[ - 0x1b, b'[', b'3', b'8', b';', b'2', b';', b'1', b'2', b'8', b';', b'6', b'6', b';', + 0x1B, b'[', b'3', b'8', b';', b'2', b';', b'1', b'2', b'8', b';', b'6', b'6', b';', b'2', b'5', b'5', b'm', ]; let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in BYTES { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, BYTES); let spec = Rgb { r: 128, g: 66, b: 255 }; @@ -2122,38 +2170,34 @@ mod tests { #[test] fn parse_zsh_startup() { static BYTES: &[u8] = &[ - 0x1b, b'[', b'1', b'm', 0x1b, b'[', b'7', b'm', b'%', 0x1b, b'[', b'2', b'7', b'm', - 0x1b, b'[', b'1', b'm', 0x1b, b'[', b'0', b'm', b' ', b' ', b' ', b' ', b' ', b' ', + 0x1B, b'[', b'1', b'm', 0x1B, b'[', b'7', b'm', b'%', 0x1B, b'[', b'2', b'7', b'm', + 0x1B, b'[', b'1', b'm', 0x1B, b'[', b'0', b'm', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', - b' ', b' ', b' ', b'\r', b' ', b'\r', b'\r', 0x1b, b'[', b'0', b'm', 0x1b, b'[', b'2', - b'7', b'm', 0x1b, b'[', b'2', b'4', b'm', 0x1b, b'[', b'J', b'j', b'w', b'i', b'l', - b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b' ', 0x1b, - b'[', b'0', b'1', b';', b'3', b'2', b'm', 0xe2, 0x9e, 0x9c, b' ', 0x1b, b'[', b'0', - b'1', b';', b'3', b'2', b'm', b' ', 0x1b, b'[', b'3', b'6', b'm', b'~', b'/', b'c', + b' ', b' ', b' ', b'\r', b' ', b'\r', b'\r', 0x1B, b'[', b'0', b'm', 0x1B, b'[', b'2', + b'7', b'm', 0x1B, b'[', b'2', b'4', b'm', 0x1B, b'[', b'J', b'j', b'w', b'i', b'l', + b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b' ', 0x1B, + b'[', b'0', b'1', b';', b'3', b'2', b'm', 0xE2, 0x9E, 0x9C, b' ', 0x1B, b'[', b'0', + b'1', b';', b'3', b'2', b'm', b' ', 0x1B, b'[', b'3', b'6', b'm', b'~', b'/', b'c', b'o', b'd', b'e', ]; let mut handler = MockHandler::default(); let mut parser = Processor::<TestSyncHandler>::new(); - for byte in BYTES { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, BYTES); } #[test] fn parse_designate_g0_as_line_drawing() { - static BYTES: &[u8] = &[0x1b, b'(', b'0']; + static BYTES: &[u8] = &[0x1B, b'(', b'0']; let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in BYTES { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, BYTES); assert_eq!(handler.index, CharsetIndex::G0); assert_eq!(handler.charset, StandardCharset::SpecialCharacterAndLineDrawing); @@ -2161,37 +2205,35 @@ mod tests { #[test] fn parse_designate_g1_as_line_drawing_and_invoke() { - static BYTES: &[u8] = &[0x1b, b')', b'0', 0x0e]; + static BYTES: &[u8] = &[0x1B, b')', b'0', 0x0E]; let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in &BYTES[..3] { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, &BYTES[..3]); assert_eq!(handler.index, CharsetIndex::G1); assert_eq!(handler.charset, StandardCharset::SpecialCharacterAndLineDrawing); let mut handler = MockHandler::default(); - parser.advance(&mut handler, BYTES[3]); + parser.advance(&mut handler, &[BYTES[3]]); assert_eq!(handler.index, CharsetIndex::G1); } #[test] fn parse_valid_rgb_colors() { - assert_eq!(xparse_color(b"rgb:f/e/d"), Some(Rgb { r: 0xff, g: 0xee, b: 0xdd })); - assert_eq!(xparse_color(b"rgb:11/aa/ff"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff })); - assert_eq!(xparse_color(b"rgb:f/ed1/cb23"), Some(Rgb { r: 0xff, g: 0xec, b: 0xca })); - assert_eq!(xparse_color(b"rgb:ffff/0/0"), Some(Rgb { r: 0xff, g: 0x0, b: 0x0 })); + assert_eq!(xparse_color(b"rgb:f/e/d"), Some(Rgb { r: 0xFF, g: 0xEE, b: 0xDD })); + assert_eq!(xparse_color(b"rgb:11/aa/ff"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF })); + assert_eq!(xparse_color(b"rgb:f/ed1/cb23"), Some(Rgb { r: 0xFF, g: 0xEC, b: 0xCA })); + assert_eq!(xparse_color(b"rgb:ffff/0/0"), Some(Rgb { r: 0xFF, g: 0x0, b: 0x0 })); } #[test] fn parse_valid_legacy_rgb_colors() { - assert_eq!(xparse_color(b"#1af"), Some(Rgb { r: 0x10, g: 0xa0, b: 0xf0 })); - assert_eq!(xparse_color(b"#11aaff"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff })); - assert_eq!(xparse_color(b"#110aa0ff0"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff })); - assert_eq!(xparse_color(b"#1100aa00ff00"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff })); + assert_eq!(xparse_color(b"#1af"), Some(Rgb { r: 0x10, g: 0xA0, b: 0xF0 })); + assert_eq!(xparse_color(b"#11aaff"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF })); + assert_eq!(xparse_color(b"#110aa0ff0"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF })); + assert_eq!(xparse_color(b"#1100aa00ff00"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF })); } #[test] @@ -2228,11 +2270,9 @@ mod tests { let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); - assert_eq!(handler.color, Some(Rgb { r: 0xf0, g: 0xf0, b: 0xf0 })); + assert_eq!(handler.color, Some(Rgb { r: 0xF0, g: 0xF0, b: 0xF0 })); } #[test] @@ -2242,9 +2282,7 @@ mod tests { let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); assert_eq!(handler.reset_colors, vec![1]); } @@ -2256,9 +2294,7 @@ mod tests { let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); let expected: Vec<usize> = (0..256).collect(); assert_eq!(handler.reset_colors, expected); @@ -2271,30 +2307,148 @@ mod tests { let mut parser = Processor::<TestSyncHandler>::new(); let mut handler = MockHandler::default(); - for byte in bytes { - parser.advance(&mut handler, *byte); - } + parser.advance(&mut handler, bytes); let expected: Vec<usize> = (0..256).collect(); assert_eq!(handler.reset_colors, expected); } #[test] + fn partial_sync_updates() { + let mut parser = Processor::<TestSyncHandler>::new(); + let mut handler = MockHandler::default(); + + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_none()); + + // Start synchronized update. + + parser.advance(&mut handler, b"\x1b[?20"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_none()); + + parser.advance(&mut handler, b"26h"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + // Dispatch some data. + + parser.advance(&mut handler, b"random \x1b[31m stuff"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + // Extend synchronized update. + + parser.advance(&mut handler, b"\x1b[?20"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + parser.advance(&mut handler, b"26h"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 2); + assert!(handler.attr.is_none()); + + // Terminate synchronized update. + + parser.advance(&mut handler, b"\x1b[?20"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 2); + assert!(handler.attr.is_none()); + + parser.advance(&mut handler, b"26l"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_some()); + } + + #[test] + fn sync_bursts_buffer() { + let mut parser = Processor::<TestSyncHandler>::new(); + let mut handler = MockHandler::default(); + + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_none()); + + // Repeat test twice to ensure internal state is reset properly. + for _ in 0..2 { + // Start synchronized update. + parser.advance(&mut handler, b"\x1b[?2026h"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + // Ensure sync works. + parser.advance(&mut handler, b"\x1b[31m"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + // Exceed sync buffer dimensions. + parser.advance(&mut handler, "a".repeat(SYNC_BUFFER_SIZE).as_bytes()); + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.take().is_some()); + + // Ensure new events are dispatched directly. + parser.advance(&mut handler, b"\x1b[31m"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.take().is_some()); + } + } + + #[test] + fn mixed_sync_escape() { + let mut parser = Processor::<TestSyncHandler>::new(); + let mut handler = MockHandler::default(); + + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_none()); + + // Start synchronized update with immediate SGR. + parser.advance(&mut handler, b"\x1b[?2026h\x1b[31m"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + // Terminate synchronized update and check for SGR. + parser.advance(&mut handler, b"\x1b[?2026l"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_some()); + } + + #[test] + fn sync_bsu_with_esu() { + let mut parser = Processor::<TestSyncHandler>::new(); + let mut handler = MockHandler::default(); + + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert!(handler.attr.is_none()); + + // Start synchronized update with immediate SGR. + parser.advance(&mut handler, b"\x1b[?2026h\x1b[1m"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 1); + assert!(handler.attr.is_none()); + + // Terminate synchronized update, but immediately start a new one. + parser.advance(&mut handler, b"\x1b[?2026l\x1b[?2026h\x1b[4m"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 2); + assert_eq!(handler.attr.take(), Some(Attr::Bold)); + + // Terminate again, expecting one buffered SGR. + parser.advance(&mut handler, b"\x1b[?2026l"); + assert_eq!(parser.state.sync_state.timeout.is_sync, 0); + assert_eq!(handler.attr.take(), Some(Attr::Underline)); + } + + #[test] #[cfg(not(feature = "no_std"))] fn contrast() { - let rgb1 = Rgb { r: 0xff, g: 0xff, b: 0xff }; + let rgb1 = Rgb { r: 0xFF, g: 0xFF, b: 0xFF }; let rgb2 = Rgb { r: 0x00, g: 0x00, b: 0x00 }; assert!((rgb1.contrast(rgb2) - 21.).abs() < f64::EPSILON); - let rgb1 = Rgb { r: 0xff, g: 0xff, b: 0xff }; + let rgb1 = Rgb { r: 0xFF, g: 0xFF, b: 0xFF }; assert!((rgb1.contrast(rgb1) - 1.).abs() < f64::EPSILON); - let rgb1 = Rgb { r: 0xff, g: 0x00, b: 0xff }; - let rgb2 = Rgb { r: 0x00, g: 0xff, b: 0x00 }; + let rgb1 = Rgb { r: 0xFF, g: 0x00, b: 0xFF }; + let rgb2 = Rgb { r: 0x00, g: 0xFF, b: 0x00 }; assert!((rgb1.contrast(rgb2) - 2.285_543_608_124_253_3).abs() < f64::EPSILON); let rgb1 = Rgb { r: 0x12, g: 0x34, b: 0x56 }; - let rgb2 = Rgb { r: 0xfe, g: 0xdc, b: 0xba }; + let rgb2 = Rgb { r: 0xFE, g: 0xDC, b: 0xBA }; assert!((rgb1.contrast(rgb2) - 9.786_558_997_257_74).abs() < f64::EPSILON); } } diff --git a/src/definitions.rs b/src/definitions.rs index 568a8a8..694c783 100644 --- a/src/definitions.rs +++ b/src/definitions.rs @@ -2,54 +2,53 @@ use core::mem; #[allow(dead_code)] #[repr(u8)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(PartialEq, Eq, Debug, Default, Copy, Clone)] pub enum State { - Anywhere = 0, - CsiEntry = 1, - CsiIgnore = 2, - CsiIntermediate = 3, - CsiParam = 4, - DcsEntry = 5, - DcsIgnore = 6, - DcsIntermediate = 7, - DcsParam = 8, - DcsPassthrough = 9, - Escape = 10, - EscapeIntermediate = 11, + CsiEntry, + CsiIgnore, + CsiIntermediate, + CsiParam, + DcsEntry, + DcsIgnore, + DcsIntermediate, + DcsParam, + DcsPassthrough, + Escape, + EscapeIntermediate, + OscString, + SosPmApcString, + Anywhere, #[default] - Ground = 12, - OscString = 13, - SosPmApcString = 14, - Utf8 = 15, + Ground, } +// NOTE: Removing the unused actions prefixed with `_` will reduce performance. #[allow(dead_code)] #[repr(u8)] -#[derive(Debug, Clone, Copy)] +#[derive(PartialEq, Eq, Debug, Clone, Copy)] pub enum Action { - None = 0, - Clear = 1, - Collect = 2, - CsiDispatch = 3, - EscDispatch = 4, - Execute = 5, - Hook = 6, - Ignore = 7, - OscEnd = 8, - OscPut = 9, - OscStart = 10, - Param = 11, - Print = 12, - Put = 13, - Unhook = 14, - BeginUtf8 = 15, + None, + _Clear, + Collect, + CsiDispatch, + EscDispatch, + Execute, + _Hook, + _Ignore, + _OscEnd, + OscPut, + _OscStart, + Param, + _Print, + Put, + _Unhook, } /// Unpack a u8 into a State and Action /// -/// The implementation of this assumes that there are *precisely* 16 variants for both Action and -/// State. Furthermore, it assumes that the enums are tag-only; that is, there is no data in any -/// variant. +/// The implementation of this assumes that there are *precisely* 16 variants +/// for both Action and State. Furthermore, it assumes that the enums are +/// tag-only; that is, there is no data in any variant. /// /// Bad things will happen if those invariants are violated. #[inline(always)] @@ -57,7 +56,7 @@ pub fn unpack(delta: u8) -> (State, Action) { unsafe { ( // State is stored in bottom 4 bits - mem::transmute::<u8, State>(delta & 0x0f), + mem::transmute::<u8, State>(delta & 0x0F), // Action is stored in top 4 bits mem::transmute::<u8, Action>(delta >> 4), ) @@ -75,37 +74,26 @@ mod tests { #[test] fn unpack_state_action() { - match unpack(0xee) { - (State::SosPmApcString, Action::Unhook) => (), + match unpack(0xEE) { + (State::Ground, Action::_Unhook) => (), _ => panic!("unpack failed"), } - match unpack(0x0f) { - (State::Utf8, Action::None) => (), + match unpack(0x0E) { + (State::Ground, Action::None) => (), _ => panic!("unpack failed"), } - match unpack(0xff) { - (State::Utf8, Action::BeginUtf8) => (), + match unpack(0xE0) { + (State::CsiEntry, Action::_Unhook) => (), _ => panic!("unpack failed"), } } #[test] fn pack_state_action() { - match unpack(0xee) { - (State::SosPmApcString, Action::Unhook) => (), - _ => panic!("unpack failed"), - } - - match unpack(0x0f) { - (State::Utf8, Action::None) => (), - _ => panic!("unpack failed"), - } - - match unpack(0xff) { - (State::Utf8, Action::BeginUtf8) => (), - _ => panic!("unpack failed"), - } + assert_eq!(pack(State::Ground, Action::_Unhook), 0xEE); + assert_eq!(pack(State::Ground, Action::None), 0x0E); + assert_eq!(pack(State::CsiEntry, Action::_Unhook), 0xE0); } } @@ -1,44 +1,39 @@ //! Parser for implementing virtual terminal emulators //! -//! [`Parser`] is implemented according to [Paul Williams' ANSI parser -//! state machine]. The state machine doesn't assign meaning to the parsed data -//! and is thus not itself sufficient for writing a terminal emulator. Instead, -//! it is expected that an implementation of [`Perform`] is provided which does +//! [`Parser`] is implemented according to [Paul Williams' ANSI parser state +//! machine]. The state machine doesn't assign meaning to the parsed data and is +//! thus not itself sufficient for writing a terminal emulator. Instead, it is +//! expected that an implementation of [`Perform`] is provided which does //! something useful with the parsed data. The [`Parser`] handles the book //! keeping, and the [`Perform`] gets to simply handle actions. //! //! # Examples //! -//! For an example of using the [`Parser`] please see the examples folder. The example included -//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to -//! pipe `vim` into it +//! For an example of using the [`Parser`] please see the examples folder. The +//! example included there simply logs all the actions [`Perform`] does. One +//! quick way to see it in action is to pipe `printf` into it //! //! ```sh -//! cargo build --release --example parselog -//! vim | target/release/examples/parselog +//! printf '\x1b[31mExample' | cargo run --example parselog //! ``` //! -//! Just type `:q` to exit. -//! //! # Differences from original state machine description //! //! * UTF-8 Support for Input //! * OSC Strings can be terminated by 0x07 -//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in -//! all states. +//! * Only supports 7-bit codes //! //! [`Parser`]: struct.Parser.html //! [`Perform`]: trait.Perform.html //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser #![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)] -#![cfg_attr(all(feature = "nightly", test), feature(test))] #![cfg_attr(feature = "no_std", no_std)] use core::mem::MaybeUninit; +use core::str; #[cfg(feature = "no_std")] use arrayvec::ArrayVec; -use utf8parse as utf8; mod definitions; mod params; @@ -46,28 +41,13 @@ mod table; #[cfg(feature = "ansi")] pub mod ansi; -pub use params::{Params, ParamsIter}; - use definitions::{unpack, Action, State}; +pub use params::{Params, ParamsIter}; const MAX_INTERMEDIATES: usize = 2; const MAX_OSC_PARAMS: usize = 16; const MAX_OSC_RAW: usize = 1024; -struct VtUtf8Receiver<'a, P: Perform>(&'a mut P, &'a mut State); - -impl<P: Perform> utf8::Receiver for VtUtf8Receiver<'_, P> { - fn codepoint(&mut self, c: char) { - self.0.print(c); - *self.1 = State::Ground; - } - - fn invalid_sequence(&mut self) { - self.0.print('�'); - *self.1 = State::Ground; - } -} - /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] /// /// [`Perform`]: trait.Perform.html @@ -88,7 +68,8 @@ pub struct Parser<const OSC_RAW_BUF_SIZE: usize = MAX_OSC_RAW> { osc_params: [(usize, usize); MAX_OSC_PARAMS], osc_num_params: usize, ignoring: bool, - utf8_parser: utf8::Parser, + partial_utf8: [u8; 4], + partial_utf8_len: usize, } impl Parser { @@ -99,7 +80,8 @@ impl Parser { } impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> { - /// Create a new Parser with a custom size for the Operating System Command buffer. + /// Create a new Parser with a custom size for the Operating System Command + /// buffer. /// /// Call with a const-generic param on `Parser`, like: /// @@ -121,41 +103,74 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> { &self.intermediates[..self.intermediate_idx] } - /// Advance the parser state + /// Advance the parser state. /// - /// Requires a [`Perform`] in case `byte` triggers an action + /// Requires a [`Perform`] implementation to handle the triggered actions. /// /// [`Perform`]: trait.Perform.html #[inline] - pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) { - // Utf8 characters are handled out-of-band. - if let State::Utf8 = self.state { - self.process_utf8(performer, byte); - return; - } - - // Handle state changes in the anywhere state before evaluating changes - // for current state. - let mut change = table::STATE_CHANGES[State::Anywhere as usize][byte as usize]; + pub fn advance<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) { + let mut i = 0; - if change == 0 { - change = table::STATE_CHANGES[self.state as usize][byte as usize]; + // Handle partial codepoints from previous calls to `advance`. + if self.partial_utf8_len > 0 { + i += self.advance_partial_utf8(performer, bytes); } - // Unpack into a state and action - let (state, action) = unpack(change); + while i != bytes.len() { + match self.state { + State::Ground => i += self.advance_ground(performer, &bytes[i..]), + _ => { + let byte = bytes[i]; + let change = table::STATE_CHANGES[self.state as usize][byte as usize]; + let (state, action) = unpack(change); - self.perform_state_change(performer, state, action, byte); + self.perform_state_change(performer, state, action, byte); + + i += 1; + }, + } + } } + /// Partially advance the parser state. + /// + /// This is equivalent to [`Self::advance`], but stops when + /// [`Perform::terminated`] is true after reading a byte. + /// + /// Returns the number of bytes read before termination. + /// + /// See [`Perform::advance`] for more details. #[inline] - fn process_utf8<P>(&mut self, performer: &mut P, byte: u8) - where - P: Perform, - { - let mut receiver = VtUtf8Receiver(performer, &mut self.state); - let utf8_parser = &mut self.utf8_parser; - utf8_parser.advance(&mut receiver, byte); + #[must_use = "Returned value should be used to processs the remaining bytes"] + pub fn advance_until_terminated<P: Perform>( + &mut self, + performer: &mut P, + bytes: &[u8], + ) -> usize { + let mut i = 0; + + // Handle partial codepoints from previous calls to `advance`. + if self.partial_utf8_len != 0 { + i += self.advance_partial_utf8(performer, bytes); + } + + while i != bytes.len() && !performer.terminated() { + match self.state { + State::Ground => i += self.advance_ground(performer, &bytes[i..]), + _ => { + let byte = bytes[i]; + let change = table::STATE_CHANGES[self.state as usize][byte as usize]; + let (state, action) = unpack(change); + + self.perform_state_change(performer, state, action, byte); + + i += 1; + }, + } + } + + i } #[inline] @@ -163,93 +178,75 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> { where P: Perform, { - macro_rules! maybe_action { - ($action:expr, $arg:expr) => { - match $action { - Action::None => (), - action => { - self.perform_action(performer, action, $arg); - }, - } - }; + if state == State::Anywhere { + self.perform_action(performer, action, byte); + return; } - match state { - State::Anywhere => { - // Just run the action - self.perform_action(performer, action, byte); - }, - state => { - match self.state { - State::DcsPassthrough => { - self.perform_action(performer, Action::Unhook, byte); - }, - State::OscString => { - self.perform_action(performer, Action::OscEnd, byte); - }, - _ => (), - } + match self.state { + State::DcsPassthrough => performer.unhook(), + State::OscString => { + let param_idx = self.osc_num_params; + let idx = self.osc_raw.len(); - maybe_action!(action, byte); + match param_idx { + // Finish last parameter if not already maxed + MAX_OSC_PARAMS => (), - match state { - State::CsiEntry | State::DcsEntry | State::Escape => { - self.perform_action(performer, Action::Clear, byte); - }, - State::DcsPassthrough => { - self.perform_action(performer, Action::Hook, byte); + // First param is special - 0 to current byte index + 0 => { + self.osc_params[param_idx] = (0, idx); + self.osc_num_params += 1; }, - State::OscString => { - self.perform_action(performer, Action::OscStart, byte); + + // All other params depend on previous indexing + _ => { + let prev = self.osc_params[param_idx - 1]; + let begin = prev.1; + self.osc_params[param_idx] = (begin, idx); + self.osc_num_params += 1; }, - _ => (), } - - // Assume the new state - self.state = state; + self.osc_dispatch(performer, byte); }, + _ => (), } - } - /// Separate method for osc_dispatch that borrows self as read-only - /// - /// The aliasing is needed here for multiple slices into self.osc_raw - #[inline] - fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) { - let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = - unsafe { MaybeUninit::uninit().assume_init() }; + if action == Action::None { + match state { + State::CsiEntry | State::DcsEntry | State::Escape => self.reset_params(), + State::DcsPassthrough => { + if self.params.is_full() { + self.ignoring = true; + } else { + self.params.push(self.param); + } - for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { - let indices = self.osc_params[i]; - *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); + performer.hook( + self.params(), + self.intermediates(), + self.ignoring, + byte as char, + ); + }, + State::OscString => { + self.osc_raw.clear(); + self.osc_num_params = 0; + }, + _ => (), + } + } else { + self.perform_action(performer, action, byte); } - unsafe { - let num_params = self.osc_num_params; - let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; - performer.osc_dispatch(&*params, byte == 0x07); - } + self.state = state; } #[inline] fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) { match action { - Action::Print => performer.print(byte as char), Action::Execute => performer.execute(byte), - Action::Hook => { - if self.params.is_full() { - self.ignoring = true; - } else { - self.params.push(self.param); - } - - performer.hook(self.params(), self.intermediates(), self.ignoring, byte as char); - }, Action::Put => performer.put(byte), - Action::OscStart => { - self.osc_raw.clear(); - self.osc_num_params = 0; - }, Action::OscPut => { #[cfg(feature = "no_std")] { @@ -285,31 +282,6 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> { self.osc_raw.push(byte); } }, - Action::OscEnd => { - let param_idx = self.osc_num_params; - let idx = self.osc_raw.len(); - - match param_idx { - // Finish last parameter if not already maxed - MAX_OSC_PARAMS => (), - - // First param is special - 0 to current byte index - 0 => { - self.osc_params[param_idx] = (0, idx); - self.osc_num_params += 1; - }, - - // All other params depend on previous indexing - _ => { - let prev = self.osc_params[param_idx - 1]; - let begin = prev.1; - self.osc_params[param_idx] = (begin, idx); - self.osc_num_params += 1; - }, - } - self.osc_dispatch(performer, byte); - }, - Action::Unhook => performer.unhook(), Action::CsiDispatch => { if self.params.is_full() { self.ignoring = true; @@ -341,37 +313,203 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> { return; } - if byte == b';' { - self.params.push(self.param); - self.param = 0; - } else if byte == b':' { - self.params.extend(self.param); - self.param = 0; - } else { - // Continue collecting bytes into param - self.param = self.param.saturating_mul(10); - self.param = self.param.saturating_add((byte - b'0') as u16); + match byte { + b';' => { + self.params.push(self.param); + self.param = 0; + }, + b':' => { + self.params.extend(self.param); + self.param = 0; + }, + _ => { + // Continue collecting bytes into param + self.param = self.param.saturating_mul(10); + self.param = self.param.saturating_add((byte - b'0') as u16); + }, } }, - Action::Clear => { - // Reset everything on ESC/CSI/DCS entry - self.intermediate_idx = 0; - self.ignoring = false; - self.param = 0; + _ => (), + } + } + + /// Reset escape sequence parameters and intermediates. + #[inline] + fn reset_params(&mut self) { + self.intermediate_idx = 0; + self.ignoring = false; + self.param = 0; + + self.params.clear(); + } + + /// Separate method for osc_dispatch that borrows self as read-only + /// + /// The aliasing is needed here for multiple slices into self.osc_raw + #[inline] + fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) { + let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = + unsafe { MaybeUninit::uninit().assume_init() }; + + for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { + let indices = self.osc_params[i]; + *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); + } + + unsafe { + let num_params = self.osc_num_params; + let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; + performer.osc_dispatch(&*params, byte == 0x07); + } + } - self.params.clear(); + /// Advance the parser state from ground. + /// + /// The ground state is handled separately since it can only be left using + /// the escape character (`\x1b`). This allows more efficient parsing by + /// using SIMD search with [`memchr`]. + #[inline] + fn advance_ground<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) -> usize { + // Find the next escape character. + let num_bytes = bytes.len(); + let plain_chars = memchr::memchr(0x1B, bytes).unwrap_or(num_bytes); + + // If the next character is ESC, just process it and short-circuit. + if plain_chars == 0 { + self.state = State::Escape; + self.reset_params(); + return 1; + } + + match str::from_utf8(&bytes[..plain_chars]) { + Ok(parsed) => { + Self::ground_dispatch(performer, parsed); + let mut processed = plain_chars; + + // If there's another character, it must be escape so process it directly. + if processed < num_bytes { + self.state = State::Escape; + self.reset_params(); + processed += 1; + } + + processed + }, + // Handle invalid and partial utf8. + Err(err) => { + // Dispatch all the valid bytes. + let valid_bytes = err.valid_up_to(); + let parsed = unsafe { str::from_utf8_unchecked(&bytes[..valid_bytes]) }; + Self::ground_dispatch(performer, parsed); + + match err.error_len() { + Some(len) => { + // Execute C1 escapes or emit replacement character. + if len == 1 && bytes[valid_bytes] <= 0x9F { + performer.execute(bytes[valid_bytes]); + } else { + performer.print('�'); + } + + // Restart processing after the invalid bytes. + // + // While we could theoretically try to just re-parse + // `bytes[valid_bytes + len..plain_chars]`, it's easier + // to just skip it and invalid utf8 is pretty rare anyway. + valid_bytes + len + }, + None => { + if plain_chars < num_bytes { + // Process bytes cut off by escape. + performer.print('�'); + self.state = State::Escape; + self.reset_params(); + plain_chars + 1 + } else { + // Process bytes cut off by the buffer end. + let extra_bytes = num_bytes - valid_bytes; + let partial_len = self.partial_utf8_len + extra_bytes; + self.partial_utf8[self.partial_utf8_len..partial_len] + .copy_from_slice(&bytes[valid_bytes..valid_bytes + extra_bytes]); + self.partial_utf8_len = partial_len; + num_bytes + } + }, + } + }, + } + } + + /// Advance the parser while processing a partial utf8 codepoint. + #[inline] + fn advance_partial_utf8<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) -> usize { + // Try to copy up to 3 more characters, to ensure the codepoint is complete. + let old_bytes = self.partial_utf8_len; + let to_copy = bytes.len().min(self.partial_utf8.len() - old_bytes); + self.partial_utf8[old_bytes..old_bytes + to_copy].copy_from_slice(&bytes[..to_copy]); + self.partial_utf8_len += to_copy; + + // Parse the unicode character. + match str::from_utf8(&self.partial_utf8[..self.partial_utf8_len]) { + // If the entire buffer is valid, use the first character and continue parsing. + Ok(parsed) => { + let c = unsafe { parsed.chars().next().unwrap_unchecked() }; + performer.print(c); + + self.partial_utf8_len = 0; + c.len_utf8() - old_bytes + }, + Err(err) => { + match err.error_len() { + // If the partial character was also invalid, emit the replacement + // character. + Some(invalid_len) => { + performer.print('�'); + + self.partial_utf8_len = 0; + invalid_len - old_bytes + }, + None => { + // If we have any valid bytes, that means we partially copied another + // utf8 character into `partial_utf8`. Since we only care about the + // first character, we just ignore the rest. + let valid_bytes = err.valid_up_to(); + if valid_bytes > 0 { + let c = unsafe { + let parsed = + str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]); + parsed.chars().next().unwrap_unchecked() + }; + performer.print(c); + + self.partial_utf8_len = 0; + valid_bytes - old_bytes + } else { + // If the character still isn't complete, wait for more data. + bytes.len() + } + }, + } }, - Action::BeginUtf8 => self.process_utf8(performer, byte), - Action::Ignore => (), - Action::None => (), + } + } + + /// Handle ground dispatch of print/execute for all characters in a string. + #[inline] + fn ground_dispatch<P: Perform>(performer: &mut P, text: &str) { + for c in text.chars() { + match c { + '\x00'..='\x1f' | '\u{80}'..='\u{9f}' => performer.execute(c as u8), + _ => performer.print(c), + } } } } /// Performs actions requested by the Parser /// -/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor -/// movement, or simply printing characters to the screen. +/// Actions in this case mean, for example, handling a CSI escape sequence +/// describing cursor movement, or simply printing characters to the screen. /// /// The methods on this type correspond to actions described in /// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in @@ -385,19 +523,21 @@ pub trait Perform { /// Execute a C0 or C1 control function. fn execute(&mut self, _byte: u8) {} - /// Invoked when a final character arrives in first part of device control string. + /// Invoked when a final character arrives in first part of device control + /// string. /// - /// The control function should be determined from the private marker, final character, and - /// execute with a parameter list. A handler should be selected for remaining characters in the - /// string; the handler function should subsequently be called by `put` for every character in + /// The control function should be determined from the private marker, final + /// character, and execute with a parameter list. A handler should be + /// selected for remaining characters in the string; the handler + /// function should subsequently be called by `put` for every character in /// the control string. /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: char) {} - /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls - /// will also be passed to the handler. + /// Pass bytes as part of a device control string to the handle chosen in + /// `hook`. C0 controls will also be passed to the handler. fn put(&mut self, _byte: u8) {} /// Called when a device control string is terminated. @@ -411,9 +551,9 @@ pub trait Perform { /// A final character has arrived for a CSI sequence /// - /// The `ignore` flag indicates that either more than two intermediates arrived - /// or the number of parameters exceeded the maximum supported length, - /// and subsequent characters were ignored. + /// The `ignore` flag indicates that either more than two intermediates + /// arrived or the number of parameters exceeded the maximum supported + /// length, and subsequent characters were ignored. fn csi_dispatch( &mut self, _params: &Params, @@ -428,6 +568,19 @@ pub trait Perform { /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} + + /// Whether the parser should terminate prematurely. + /// + /// This can be used in conjunction with + /// [`Parser::advance_until_terminated`] to terminate the parser after + /// receiving certain escape sequences like synchronized updates. + /// + /// This is checked after every parsed byte, so no expensive computation + /// should take place in this function. + #[inline(always)] + fn terminated(&self) -> bool { + false + } } #[cfg(all(test, feature = "no_std"))] @@ -436,12 +589,12 @@ extern crate std; #[cfg(test)] mod tests { - use super::*; - use std::vec::Vec; - static OSC_BYTES: &[u8] = &[ - 0x1b, 0x5d, // Begin OSC + use super::*; + + const OSC_BYTES: &[u8] = &[ + 0x1B, 0x5D, // Begin OSC b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o', b'd', b'e', b'/', b'a', b'l', b'a', b'c', b'r', b'i', b't', b't', b'y', 0x07, // End OSC @@ -459,6 +612,8 @@ mod tests { Esc(Vec<u8>, bool, u8), DcsHook(Vec<Vec<u16>>, Vec<u8>, bool, char), DcsPut(u8), + Print(char), + Execute(u8), DcsUnhook, } @@ -492,6 +647,14 @@ mod tests { fn unhook(&mut self) { self.dispatched.push(Sequence::DcsUnhook); } + + fn print(&mut self, c: char) { + self.dispatched.push(Sequence::Print(c)); + } + + fn execute(&mut self, byte: u8) { + self.dispatched.push(Sequence::Execute(byte)); + } } #[test] @@ -499,9 +662,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in OSC_BYTES { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, OSC_BYTES); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -519,9 +680,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in &[0x1b, 0x5d, 0x07] { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, &[0x1B, 0x5D, 0x07]); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -537,9 +696,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in input { - parser.advance(&mut dispatcher, byte); - } + parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -553,13 +710,11 @@ mod tests { #[test] fn osc_bell_terminated() { - static INPUT: &[u8] = b"\x1b]11;ff/00/ff\x07"; + const INPUT: &[u8] = b"\x1b]11;ff/00/ff\x07"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -570,13 +725,11 @@ mod tests { #[test] fn osc_c0_st_terminated() { - static INPUT: &[u8] = b"\x1b]11;ff/00/ff\x1b\\"; + const INPUT: &[u8] = b"\x1b]11;ff/00/ff\x1b\\"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { @@ -587,37 +740,29 @@ mod tests { #[test] fn parse_osc_with_utf8_arguments() { - static INPUT: &[u8] = &[ - 0x0d, 0x1b, 0x5d, 0x32, 0x3b, 0x65, 0x63, 0x68, 0x6f, 0x20, 0x27, 0xc2, 0xaf, 0x5c, - 0x5f, 0x28, 0xe3, 0x83, 0x84, 0x29, 0x5f, 0x2f, 0xc2, 0xaf, 0x27, 0x20, 0x26, 0x26, - 0x20, 0x73, 0x6c, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07, + const INPUT: &[u8] = &[ + 0x0D, 0x1B, 0x5D, 0x32, 0x3B, 0x65, 0x63, 0x68, 0x6F, 0x20, 0x27, 0xC2, 0xAF, 0x5C, + 0x5F, 0x28, 0xE3, 0x83, 0x84, 0x29, 0x5F, 0x2F, 0xC2, 0xAF, 0x27, 0x20, 0x26, 0x26, + 0x20, 0x73, 0x6C, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07, ]; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); - assert_eq!(dispatcher.dispatched.len(), 1); - match &dispatcher.dispatched[0] { - Sequence::Osc(params, _) => { - assert_eq!(params[0], &[b'2']); - assert_eq!(params[1], &INPUT[5..(INPUT.len() - 1)]); - }, - _ => panic!("expected osc sequence"), - } + assert_eq!(dispatcher.dispatched[0], Sequence::Execute(b'\r')); + let osc_data = INPUT[5..(INPUT.len() - 1)].into(); + assert_eq!(dispatcher.dispatched[1], Sequence::Osc(vec![vec![b'2'], osc_data], true)); + assert_eq!(dispatcher.dispatched.len(), 2); } #[test] fn osc_containing_string_terminator() { - static INPUT: &[u8] = b"\x1b]2;\xe6\x9c\xab\x1b\\"; + const INPUT: &[u8] = b"\x1b]2;\xe6\x9c\xab\x1b\\"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { @@ -630,27 +775,21 @@ mod tests { #[test] fn exceed_max_buffer_size() { - static NUM_BYTES: usize = MAX_OSC_RAW + 100; - static INPUT_START: &[u8] = &[0x1b, b']', b'5', b'2', b';', b's']; - static INPUT_END: &[u8] = &[b'\x07']; + const NUM_BYTES: usize = MAX_OSC_RAW + 100; + const INPUT_START: &[u8] = b"\x1b]52;s"; + const INPUT_END: &[u8] = b"\x07"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); // Create valid OSC escape - for byte in INPUT_START { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_START); // Exceed max buffer size - for _ in 0..NUM_BYTES { - parser.advance(&mut dispatcher, b'a'); - } + parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]); // Terminate escape for dispatch - for byte in INPUT_END { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -679,9 +818,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in input { - parser.advance(&mut dispatcher, byte); - } + parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -704,9 +841,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in input { - parser.advance(&mut dispatcher, byte); - } + parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -723,9 +858,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in b"\x1b[4;m" { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, b"\x1b[4;m"); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -740,9 +873,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in b"\x1b[;4m" { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, b"\x1b[;4m"); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -754,35 +885,31 @@ mod tests { #[test] fn parse_long_csi_param() { // The important part is the parameter, which is (i64::MAX + 1) - static INPUT: &[u8] = b"\x1b[9223372036854775808m"; + const INPUT: &[u8] = b"\x1b[9223372036854775808m"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { - Sequence::Csi(params, ..) => assert_eq!(params, &[[std::u16::MAX as u16]]), + Sequence::Csi(params, ..) => assert_eq!(params, &[[u16::MAX]]), _ => panic!("expected csi sequence"), } } #[test] fn csi_reset() { - static INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h"; + const INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, intermediates, ignore, _) => { - assert_eq!(intermediates, &[b'?']); + assert_eq!(intermediates, b"?"); assert_eq!(params, &[[1049]]); assert!(!ignore); }, @@ -792,13 +919,11 @@ mod tests { #[test] fn csi_subparameters() { - static INPUT: &[u8] = b"\x1b[38:2:255:0:255;1m"; + const INPUT: &[u8] = b"\x1b[38:2:255:0:255;1m"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -818,9 +943,7 @@ mod tests { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in input { - parser.advance(&mut dispatcher, byte); - } + parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -835,18 +958,16 @@ mod tests { #[test] fn dcs_reset() { - static INPUT: &[u8] = b"\x1b[3;1\x1bP1$tx\x9c"; + const INPUT: &[u8] = b"\x1b[3;1\x1bP1$tx\x9c"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 3); match &dispatcher.dispatched[0] { Sequence::DcsHook(params, intermediates, ignore, _) => { - assert_eq!(intermediates, &[b'$']); + assert_eq!(intermediates, b"$"); assert_eq!(params, &[[1]]); assert!(!ignore); }, @@ -858,13 +979,11 @@ mod tests { #[test] fn parse_dcs() { - static INPUT: &[u8] = b"\x1bP0;1|17/ab\x9c"; + const INPUT: &[u8] = b"\x1bP0;1|17/ab\x9c"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 7); match &dispatcher.dispatched[0] { @@ -882,35 +1001,31 @@ mod tests { #[test] fn intermediate_reset_on_dcs_exit() { - static INPUT: &[u8] = b"\x1bP=1sZZZ\x1b+\x5c"; + const INPUT: &[u8] = b"\x1bP=1sZZZ\x1b+\x5c"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 6); match &dispatcher.dispatched[5] { - Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, &[b'+']), + Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, b"+"), _ => panic!("expected esc sequence"), } } #[test] fn esc_reset() { - static INPUT: &[u8] = b"\x1b[3;1\x1b(A"; + const INPUT: &[u8] = b"\x1b[3;1\x1b(A"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Esc(intermediates, ignore, byte) => { - assert_eq!(intermediates, &[b'(']); + assert_eq!(intermediates, b"("); assert_eq!(*byte, b'A'); assert!(!ignore); }, @@ -919,14 +1034,25 @@ mod tests { } #[test] + fn esc_reset_intermediates() { + const INPUT: &[u8] = b"\x1b[?2004l\x1b#8"; + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, INPUT); + + assert_eq!(dispatcher.dispatched.len(), 2); + assert_eq!(dispatcher.dispatched[0], Sequence::Csi(vec![vec![2004]], vec![63], false, 'l')); + assert_eq!(dispatcher.dispatched[1], Sequence::Esc(vec![35], false, 56)); + } + + #[test] fn params_buffer_filled_with_subparam() { - static INPUT: &[u8] = b"\x1b[::::::::::::::::::::::::::::::::x\x1b"; + const INPUT: &[u8] = b"\x1b[::::::::::::::::::::::::::::::::x\x1b"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -943,18 +1069,16 @@ mod tests { #[cfg(feature = "no_std")] #[test] fn build_with_fixed_size() { - static INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h"; + const INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h"; let mut dispatcher = Dispatcher::default(); let mut parser: Parser<30> = Parser::new_with_size(); - for byte in INPUT { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, intermediates, ignore, _) => { - assert_eq!(intermediates, &[b'?']); + assert_eq!(intermediates, b"?"); assert_eq!(params, &[[1049]]); assert!(!ignore); }, @@ -966,27 +1090,21 @@ mod tests { #[test] fn exceed_fixed_osc_buffer_size() { const OSC_BUFFER_SIZE: usize = 32; - static NUM_BYTES: usize = OSC_BUFFER_SIZE + 100; - static INPUT_START: &[u8] = b"\x1b]52;"; - static INPUT_END: &[u8] = b"\x07"; + const NUM_BYTES: usize = OSC_BUFFER_SIZE + 100; + const INPUT_START: &[u8] = b"\x1b]52;"; + const INPUT_END: &[u8] = b"\x07"; let mut dispatcher = Dispatcher::default(); let mut parser: Parser<OSC_BUFFER_SIZE> = Parser::new_with_size(); // Create valid OSC escape - for byte in INPUT_START { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_START); // Exceed max buffer size - for _ in 0..NUM_BYTES { - parser.advance(&mut dispatcher, b'a'); - } + parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]); // Terminate escape for dispatch - for byte in INPUT_END { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { @@ -1005,22 +1123,16 @@ mod tests { #[cfg(feature = "no_std")] #[test] fn fixed_size_osc_containing_string_terminator() { - static INPUT_START: &[u8] = b"\x1b]2;"; - static INPUT_MIDDLE: &[u8] = b"s\xe6\x9c\xab"; - static INPUT_END: &[u8] = b"\x1b\\"; + const INPUT_START: &[u8] = b"\x1b]2;"; + const INPUT_MIDDLE: &[u8] = b"s\xe6\x9c\xab"; + const INPUT_END: &[u8] = b"\x1b\\"; let mut dispatcher = Dispatcher::default(); let mut parser: Parser<5> = Parser::new_with_size(); - for byte in INPUT_START { - parser.advance(&mut dispatcher, *byte); - } - for byte in INPUT_MIDDLE { - parser.advance(&mut dispatcher, *byte); - } - for byte in INPUT_END { - parser.advance(&mut dispatcher, *byte); - } + parser.advance(&mut dispatcher, INPUT_START); + parser.advance(&mut dispatcher, INPUT_MIDDLE); + parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { @@ -1031,74 +1143,144 @@ mod tests { _ => panic!("expected osc sequence"), } } -} -#[cfg(all(feature = "nightly", test))] -mod bench { - extern crate std; - extern crate test; + #[test] + fn unicode() { + const INPUT: &[u8] = b"\xF0\x9F\x8E\x89_\xF0\x9F\xA6\x80\xF0\x9F\xA6\x80_\xF0\x9F\x8E\x89"; - use super::*; + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); - use test::{black_box, Bencher}; + parser.advance(&mut dispatcher, INPUT); - static VTE_DEMO: &[u8] = include_bytes!("../tests/demo.vte"); + assert_eq!(dispatcher.dispatched.len(), 6); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('🎉')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('_')); + assert_eq!(dispatcher.dispatched[2], Sequence::Print('🦀')); + assert_eq!(dispatcher.dispatched[3], Sequence::Print('🦀')); + assert_eq!(dispatcher.dispatched[4], Sequence::Print('_')); + assert_eq!(dispatcher.dispatched[5], Sequence::Print('🎉')); + } - struct BenchDispatcher; - impl Perform for BenchDispatcher { - fn print(&mut self, c: char) { - black_box(c); - } + #[test] + fn invalid_utf8() { + const INPUT: &[u8] = b"a\xEF\xBCb"; - fn execute(&mut self, byte: u8) { - black_box(byte); - } + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); - fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) { - black_box((params, intermediates, ignore, c)); - } + parser.advance(&mut dispatcher, INPUT); - fn put(&mut self, byte: u8) { - black_box(byte); - } + assert_eq!(dispatcher.dispatched.len(), 3); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('a')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('�')); + assert_eq!(dispatcher.dispatched[2], Sequence::Print('b')); + } - fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) { - black_box((params, bell_terminated)); - } + #[test] + fn partial_utf8() { + const INPUT: &[u8] = b"\xF0\x9F\x9A\x80"; - fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) { - black_box((params, intermediates, ignore, c)); - } + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); - fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) { - black_box((intermediates, ignore, byte)); - } + parser.advance(&mut dispatcher, &INPUT[..1]); + parser.advance(&mut dispatcher, &INPUT[1..2]); + parser.advance(&mut dispatcher, &INPUT[2..3]); + parser.advance(&mut dispatcher, &INPUT[3..]); + + assert_eq!(dispatcher.dispatched.len(), 1); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('🚀')); } - #[bench] - fn testfile(b: &mut Bencher) { - b.iter(|| { - let mut dispatcher = BenchDispatcher; - let mut parser = Parser::new(); + #[test] + fn partial_utf8_separating_utf8() { + // This is different from the `partial_utf8` test since it has a multi-byte UTF8 + // character after the partial UTF8 state, causing a partial byte to be present + // in the `partial_utf8` buffer after the 2-byte codepoint. - for byte in VTE_DEMO { - parser.advance(&mut dispatcher, *byte); - } - }); + // "ĸ🎉" + const INPUT: &[u8] = b"\xC4\xB8\xF0\x9F\x8E\x89"; + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, &INPUT[..1]); + parser.advance(&mut dispatcher, &INPUT[1..]); + + assert_eq!(dispatcher.dispatched.len(), 2); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('ĸ')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('🎉')); } - #[bench] - fn state_changes(b: &mut Bencher) { - let input = b"\x1b]2;X\x1b\\ \x1b[0m \x1bP0@\x1b\\"; - b.iter(|| { - let mut dispatcher = BenchDispatcher; - let mut parser = Parser::new(); + #[test] + fn partial_invalid_utf8() { + const INPUT: &[u8] = b"a\xEF\xBCb"; - for _ in 0..1_000 { - for byte in input { - parser.advance(&mut dispatcher, *byte); - } - } - }); + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, &INPUT[..1]); + parser.advance(&mut dispatcher, &INPUT[1..2]); + parser.advance(&mut dispatcher, &INPUT[2..3]); + parser.advance(&mut dispatcher, &INPUT[3..]); + + assert_eq!(dispatcher.dispatched.len(), 3); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('a')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('�')); + assert_eq!(dispatcher.dispatched[2], Sequence::Print('b')); + } + + #[test] + fn partial_utf8_into_esc() { + const INPUT: &[u8] = b"\xD8\x1b012"; + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, INPUT); + + assert_eq!(dispatcher.dispatched.len(), 4); + assert_eq!(dispatcher.dispatched[0], Sequence::Print('�')); + assert_eq!(dispatcher.dispatched[1], Sequence::Esc(Vec::new(), false, b'0')); + assert_eq!(dispatcher.dispatched[2], Sequence::Print('1')); + assert_eq!(dispatcher.dispatched[3], Sequence::Print('2')); + } + + #[test] + fn c1s() { + const INPUT: &[u8] = b"\x00\x1f\x80\x90\x98\x9b\x9c\x9d\x9e\x9fa"; + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, INPUT); + + assert_eq!(dispatcher.dispatched.len(), 11); + assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0)); + assert_eq!(dispatcher.dispatched[1], Sequence::Execute(31)); + assert_eq!(dispatcher.dispatched[2], Sequence::Execute(128)); + assert_eq!(dispatcher.dispatched[3], Sequence::Execute(144)); + assert_eq!(dispatcher.dispatched[4], Sequence::Execute(152)); + assert_eq!(dispatcher.dispatched[5], Sequence::Execute(155)); + assert_eq!(dispatcher.dispatched[6], Sequence::Execute(156)); + assert_eq!(dispatcher.dispatched[7], Sequence::Execute(157)); + assert_eq!(dispatcher.dispatched[8], Sequence::Execute(158)); + assert_eq!(dispatcher.dispatched[9], Sequence::Execute(159)); + assert_eq!(dispatcher.dispatched[10], Sequence::Print('a')); + } + + #[test] + fn execute_anywhere() { + const INPUT: &[u8] = b"\x18\x1a"; + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, INPUT); + + assert_eq!(dispatcher.dispatched.len(), 2); + assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0x18)); + assert_eq!(dispatcher.dispatched[1], Sequence::Execute(0x1A)); } } diff --git a/src/params.rs b/src/params.rs index 608c040..967befb 100644 --- a/src/params.rs +++ b/src/params.rs @@ -8,8 +8,9 @@ pub(crate) const MAX_PARAMS: usize = 32; pub struct Params { /// Number of subparameters for each parameter. /// - /// For each entry in the `params` slice, this stores the length of the param as number of - /// subparams at the same index as the param in the `params` slice. + /// For each entry in the `params` slice, this stores the length of the + /// param as number of subparams at the same index as the param in the + /// `params` slice. /// /// At the subparam positions the length will always be `0`. subparams: [u8; MAX_PARAMS], diff --git a/src/table.rs b/src/table.rs index f2c0105..ac288e7 100644 --- a/src/table.rs +++ b/src/table.rs @@ -1,39 +1,20 @@ -/// This is the state change table. It's indexed first by current state and then by the next -/// character in the pty stream. -use crate::definitions::{pack, Action, State}; - use vte_generate_state_changes::generate_state_changes; +/// This is the state change table. It's indexed first by current state and then +/// by the next character in the pty stream. +use crate::definitions::{pack, Action, State}; + // Generate state changes at compile-time -pub static STATE_CHANGES: [[u8; 256]; 16] = state_changes(); +pub const STATE_CHANGES: [[u8; 256]; 13] = state_changes(); generate_state_changes!(state_changes, { - Anywhere { - 0x18 => (Ground, Execute), - 0x1a => (Ground, Execute), - 0x1b => (Escape, None), - }, - - Ground { - 0x00..=0x17 => (Anywhere, Execute), - 0x19 => (Anywhere, Execute), - 0x1c..=0x1f => (Anywhere, Execute), - 0x20..=0x7f => (Anywhere, Print), - 0x80..=0x8f => (Anywhere, Execute), - 0x91..=0x9a => (Anywhere, Execute), - 0x9c => (Anywhere, Execute), - // Beginning of UTF-8 2 byte sequence - 0xc2..=0xdf => (Utf8, BeginUtf8), - // Beginning of UTF-8 3 byte sequence - 0xe0..=0xef => (Utf8, BeginUtf8), - // Beginning of UTF-8 4 byte sequence - 0xf0..=0xf4 => (Utf8, BeginUtf8), - }, - Escape { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x20..=0x2f => (EscapeIntermediate, Collect), 0x30..=0x4f => (Ground, EscDispatch), 0x51..=0x57 => (Ground, EscDispatch), @@ -51,18 +32,24 @@ generate_state_changes!(state_changes, { EscapeIntermediate { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), 0x20..=0x2f => (Anywhere, Collect), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x30..=0x7e => (Ground, EscDispatch), }, CsiEntry { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x20..=0x2f => (CsiIntermediate, Collect), 0x30..=0x39 => (CsiParam, Param), 0x3a..=0x3b => (CsiParam, Param), @@ -72,20 +59,26 @@ generate_state_changes!(state_changes, { CsiIgnore { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), - 0x20..=0x3f => (Anywhere, Ignore), - 0x7f => (Anywhere, Ignore), + 0x20..=0x3f => (Anywhere, None), + 0x7f => (Anywhere, None), 0x40..=0x7e => (Ground, None), }, CsiParam { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), 0x30..=0x39 => (Anywhere, Param), 0x3a..=0x3b => (Anywhere, Param), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x3c..=0x3f => (CsiIgnore, None), 0x20..=0x2f => (CsiIntermediate, Collect), 0x40..=0x7e => (Ground, CsiDispatch), @@ -93,19 +86,25 @@ generate_state_changes!(state_changes, { CsiIntermediate { 0x00..=0x17 => (Anywhere, Execute), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Execute), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Execute), 0x20..=0x2f => (Anywhere, Collect), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x30..=0x3f => (CsiIgnore, None), 0x40..=0x7e => (Ground, CsiDispatch), }, DcsEntry { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), - 0x7f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), + 0x7f => (Anywhere, None), 0x20..=0x2f => (DcsIntermediate, Collect), 0x30..=0x39 => (DcsParam, Param), 0x3a..=0x3b => (DcsParam, Param), @@ -114,30 +113,39 @@ generate_state_changes!(state_changes, { }, DcsIntermediate { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), 0x20..=0x2f => (Anywhere, Collect), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x30..=0x3f => (DcsIgnore, None), 0x40..=0x7e => (DcsPassthrough, None), }, DcsIgnore { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), - 0x20..=0x7f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), + 0x20..=0x7f => (Anywhere, None), 0x9c => (Ground, None), }, DcsParam { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), 0x30..=0x39 => (Anywhere, Param), 0x3a..=0x3b => (Anywhere, Param), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x3c..=0x3f => (DcsIgnore, None), 0x20..=0x2f => (DcsIntermediate, Collect), 0x40..=0x7e => (DcsPassthrough, None), @@ -145,27 +153,36 @@ generate_state_changes!(state_changes, { DcsPassthrough { 0x00..=0x17 => (Anywhere, Put), + 0x18 => (Ground, Execute), 0x19 => (Anywhere, Put), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), 0x1c..=0x1f => (Anywhere, Put), 0x20..=0x7e => (Anywhere, Put), - 0x7f => (Anywhere, Ignore), + 0x7f => (Anywhere, None), 0x9c => (Ground, None), }, SosPmApcString { - 0x00..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), - 0x20..=0x7f => (Anywhere, Ignore), + 0x00..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), + 0x20..=0x7f => (Anywhere, None), 0x9c => (Ground, None), }, OscString { - 0x00..=0x06 => (Anywhere, Ignore), + 0x00..=0x06 => (Anywhere, None), 0x07 => (Ground, None), - 0x08..=0x17 => (Anywhere, Ignore), - 0x19 => (Anywhere, Ignore), - 0x1c..=0x1f => (Anywhere, Ignore), + 0x08..=0x17 => (Anywhere, None), + 0x18 => (Ground, Execute), + 0x19 => (Anywhere, None), + 0x1a => (Ground, Execute), + 0x1b => (Escape, None), + 0x1c..=0x1f => (Anywhere, None), 0x20..=0xff => (Anywhere, OscPut), } }); |