//! Parser for implementing virtual terminal emulators //! //! [`Parser`] is implemented according to [Paul Williams' ANSI parser state //! machine]. The state machine doesn't assign meaning to the parsed data and is //! thus not itself sufficient for writing a terminal emulator. Instead, it is //! expected that an implementation of [`Perform`] is provided which does //! something useful with the parsed data. The [`Parser`] handles the book //! keeping, and the [`Perform`] gets to simply handle actions. //! //! # Examples //! //! For an example of using the [`Parser`] please see the examples folder. The //! example included there simply logs all the actions [`Perform`] does. One //! quick way to see it in action is to pipe `printf` into it //! //! ```sh //! printf '\x1b[31mExample' | cargo run --example parselog //! ``` //! //! # Differences from original state machine description //! //! * UTF-8 Support for Input //! * OSC Strings can be terminated by 0x07 //! * Only supports 7-bit codes //! //! [`Parser`]: struct.Parser.html //! [`Perform`]: trait.Perform.html //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser #![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)] #![cfg_attr(feature = "no_std", no_std)] use core::mem::MaybeUninit; use core::str; #[cfg(feature = "no_std")] use arrayvec::ArrayVec; mod definitions; mod params; mod table; #[cfg(feature = "ansi")] pub mod ansi; use definitions::{unpack, Action, State}; pub use params::{Params, ParamsIter}; const MAX_INTERMEDIATES: usize = 2; const MAX_OSC_PARAMS: usize = 16; const MAX_OSC_RAW: usize = 1024; /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] /// /// [`Perform`]: trait.Perform.html /// /// Generic over the value for the size of the raw Operating System Command /// buffer. Only used when the `no_std` feature is enabled. #[derive(Default)] pub struct Parser { state: State, intermediates: [u8; MAX_INTERMEDIATES], intermediate_idx: usize, params: Params, param: u16, #[cfg(feature = "no_std")] osc_raw: ArrayVec, #[cfg(not(feature = "no_std"))] osc_raw: Vec, osc_params: [(usize, usize); MAX_OSC_PARAMS], osc_num_params: usize, ignoring: bool, partial_utf8: [u8; 4], partial_utf8_len: usize, } impl Parser { /// Create a new Parser pub fn new() -> Parser { Default::default() } } impl Parser { /// Create a new Parser with a custom size for the Operating System Command /// buffer. /// /// Call with a const-generic param on `Parser`, like: /// /// ```rust /// let mut p = vte::Parser::<64>::new_with_size(); /// ``` #[cfg(feature = "no_std")] pub fn new_with_size() -> Parser { Default::default() } #[inline] fn params(&self) -> &Params { &self.params } #[inline] fn intermediates(&self) -> &[u8] { &self.intermediates[..self.intermediate_idx] } /// Advance the parser state. /// /// Requires a [`Perform`] implementation to handle the triggered actions. /// /// [`Perform`]: trait.Perform.html #[inline] pub fn advance(&mut self, performer: &mut P, bytes: &[u8]) { let mut i = 0; // Handle partial codepoints from previous calls to `advance`. if self.partial_utf8_len > 0 { i += self.advance_partial_utf8(performer, bytes); } while i != bytes.len() { match self.state { State::Ground => i += self.advance_ground(performer, &bytes[i..]), _ => { let byte = bytes[i]; let change = table::STATE_CHANGES[self.state as usize][byte as usize]; let (state, action) = unpack(change); self.perform_state_change(performer, state, action, byte); i += 1; }, } } } /// Partially advance the parser state. /// /// This is equivalent to [`Self::advance`], but stops when /// [`Perform::terminated`] is true after reading a byte. /// /// Returns the number of bytes read before termination. /// /// See [`Perform::advance`] for more details. #[inline] #[must_use = "Returned value should be used to processs the remaining bytes"] pub fn advance_until_terminated( &mut self, performer: &mut P, bytes: &[u8], ) -> usize { let mut i = 0; // Handle partial codepoints from previous calls to `advance`. if self.partial_utf8_len != 0 { i += self.advance_partial_utf8(performer, bytes); } while i != bytes.len() && !performer.terminated() { match self.state { State::Ground => i += self.advance_ground(performer, &bytes[i..]), _ => { let byte = bytes[i]; let change = table::STATE_CHANGES[self.state as usize][byte as usize]; let (state, action) = unpack(change); self.perform_state_change(performer, state, action, byte); i += 1; }, } } i } #[inline] fn perform_state_change

(&mut self, performer: &mut P, state: State, action: Action, byte: u8) where P: Perform, { if state == State::Anywhere { self.perform_action(performer, action, byte); return; } match self.state { State::DcsPassthrough => performer.unhook(), State::OscString => { let param_idx = self.osc_num_params; let idx = self.osc_raw.len(); match param_idx { // Finish last parameter if not already maxed MAX_OSC_PARAMS => (), // First param is special - 0 to current byte index 0 => { self.osc_params[param_idx] = (0, idx); self.osc_num_params += 1; }, // All other params depend on previous indexing _ => { let prev = self.osc_params[param_idx - 1]; let begin = prev.1; self.osc_params[param_idx] = (begin, idx); self.osc_num_params += 1; }, } self.osc_dispatch(performer, byte); }, _ => (), } if action == Action::None { match state { State::CsiEntry | State::DcsEntry | State::Escape => self.reset_params(), State::DcsPassthrough => { if self.params.is_full() { self.ignoring = true; } else { self.params.push(self.param); } performer.hook( self.params(), self.intermediates(), self.ignoring, byte as char, ); }, State::OscString => { self.osc_raw.clear(); self.osc_num_params = 0; }, _ => (), } } else { self.perform_action(performer, action, byte); } self.state = state; } #[inline] fn perform_action(&mut self, performer: &mut P, action: Action, byte: u8) { match action { Action::Execute => performer.execute(byte), Action::Put => performer.put(byte), Action::OscPut => { #[cfg(feature = "no_std")] { if self.osc_raw.is_full() { return; } } let idx = self.osc_raw.len(); // Param separator if byte == b';' { let param_idx = self.osc_num_params; match param_idx { // Only process up to MAX_OSC_PARAMS MAX_OSC_PARAMS => return, // First param is special - 0 to current byte index 0 => { self.osc_params[param_idx] = (0, idx); }, // All other params depend on previous indexing _ => { let prev = self.osc_params[param_idx - 1]; let begin = prev.1; self.osc_params[param_idx] = (begin, idx); }, } self.osc_num_params += 1; } else { self.osc_raw.push(byte); } }, Action::CsiDispatch => { if self.params.is_full() { self.ignoring = true; } else { self.params.push(self.param); } performer.csi_dispatch( self.params(), self.intermediates(), self.ignoring, byte as char, ); }, Action::EscDispatch => { performer.esc_dispatch(self.intermediates(), self.ignoring, byte); }, Action::Collect => { if self.intermediate_idx == MAX_INTERMEDIATES { self.ignoring = true; } else { self.intermediates[self.intermediate_idx] = byte; self.intermediate_idx += 1; } }, Action::Param => { if self.params.is_full() { self.ignoring = true; return; } match byte { b';' => { self.params.push(self.param); self.param = 0; }, b':' => { self.params.extend(self.param); self.param = 0; }, _ => { // Continue collecting bytes into param self.param = self.param.saturating_mul(10); self.param = self.param.saturating_add((byte - b'0') as u16); }, } }, _ => (), } } /// Reset escape sequence parameters and intermediates. #[inline] fn reset_params(&mut self) { self.intermediate_idx = 0; self.ignoring = false; self.param = 0; self.params.clear(); } /// Separate method for osc_dispatch that borrows self as read-only /// /// The aliasing is needed here for multiple slices into self.osc_raw #[inline] fn osc_dispatch(&self, performer: &mut P, byte: u8) { let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = unsafe { MaybeUninit::uninit().assume_init() }; for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { let indices = self.osc_params[i]; *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); } unsafe { let num_params = self.osc_num_params; let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; performer.osc_dispatch(&*params, byte == 0x07); } } /// Advance the parser state from ground. /// /// The ground state is handled separately since it can only be left using /// the escape character (`\x1b`). This allows more efficient parsing by /// using SIMD search with [`memchr`]. #[inline] fn advance_ground(&mut self, performer: &mut P, bytes: &[u8]) -> usize { // Find the next escape character. let num_bytes = bytes.len(); let plain_chars = memchr::memchr(0x1B, bytes).unwrap_or(num_bytes); // If the next character is ESC, just process it and short-circuit. if plain_chars == 0 { self.state = State::Escape; self.reset_params(); return 1; } match str::from_utf8(&bytes[..plain_chars]) { Ok(parsed) => { Self::ground_dispatch(performer, parsed); let mut processed = plain_chars; // If there's another character, it must be escape so process it directly. if processed < num_bytes { self.state = State::Escape; self.reset_params(); processed += 1; } processed }, // Handle invalid and partial utf8. Err(err) => { // Dispatch all the valid bytes. let valid_bytes = err.valid_up_to(); let parsed = unsafe { str::from_utf8_unchecked(&bytes[..valid_bytes]) }; Self::ground_dispatch(performer, parsed); match err.error_len() { Some(len) => { // Execute C1 escapes or emit replacement character. if len == 1 && bytes[valid_bytes] <= 0x9F { performer.execute(bytes[valid_bytes]); } else { performer.print('�'); } // Restart processing after the invalid bytes. // // While we could theoretically try to just re-parse // `bytes[valid_bytes + len..plain_chars]`, it's easier // to just skip it and invalid utf8 is pretty rare anyway. valid_bytes + len }, None => { if plain_chars < num_bytes { // Process bytes cut off by escape. performer.print('�'); self.state = State::Escape; self.reset_params(); plain_chars + 1 } else { // Process bytes cut off by the buffer end. let extra_bytes = num_bytes - valid_bytes; let partial_len = self.partial_utf8_len + extra_bytes; self.partial_utf8[self.partial_utf8_len..partial_len] .copy_from_slice(&bytes[valid_bytes..valid_bytes + extra_bytes]); self.partial_utf8_len = partial_len; num_bytes } }, } }, } } /// Advance the parser while processing a partial utf8 codepoint. #[inline] fn advance_partial_utf8(&mut self, performer: &mut P, bytes: &[u8]) -> usize { // Try to copy up to 3 more characters, to ensure the codepoint is complete. let old_bytes = self.partial_utf8_len; let to_copy = bytes.len().min(self.partial_utf8.len() - old_bytes); self.partial_utf8[old_bytes..old_bytes + to_copy].copy_from_slice(&bytes[..to_copy]); self.partial_utf8_len += to_copy; // Parse the unicode character. match str::from_utf8(&self.partial_utf8[..self.partial_utf8_len]) { // If the entire buffer is valid, use the first character and continue parsing. Ok(parsed) => { let c = unsafe { parsed.chars().next().unwrap_unchecked() }; performer.print(c); self.partial_utf8_len = 0; c.len_utf8() - old_bytes }, Err(err) => { match err.error_len() { // If the partial character was also invalid, emit the replacement // character. Some(invalid_len) => { performer.print('�'); self.partial_utf8_len = 0; invalid_len - old_bytes }, None => { // If we have any valid bytes, that means we partially copied another // utf8 character into `partial_utf8`. Since we only care about the // first character, we just ignore the rest. let valid_bytes = err.valid_up_to(); if valid_bytes > 0 { let c = unsafe { let parsed = str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]); parsed.chars().next().unwrap_unchecked() }; performer.print(c); self.partial_utf8_len = 0; valid_bytes - old_bytes } else { // If the character still isn't complete, wait for more data. bytes.len() } }, } }, } } /// Handle ground dispatch of print/execute for all characters in a string. #[inline] fn ground_dispatch(performer: &mut P, text: &str) { for c in text.chars() { match c { '\x00'..='\x1f' | '\u{80}'..='\u{9f}' => performer.execute(c as u8), _ => performer.print(c), } } } } /// Performs actions requested by the Parser /// /// Actions in this case mean, for example, handling a CSI escape sequence /// describing cursor movement, or simply printing characters to the screen. /// /// The methods on this type correspond to actions described in /// . I've done my best to describe them in /// a useful way in my own words for completeness, but the site should be /// referenced if something isn't clear. If the site disappears at some point in /// the future, consider checking archive.org. pub trait Perform { /// Draw a character to the screen and update states. fn print(&mut self, _c: char) {} /// Execute a C0 or C1 control function. fn execute(&mut self, _byte: u8) {} /// Invoked when a final character arrives in first part of device control /// string. /// /// The control function should be determined from the private marker, final /// character, and execute with a parameter list. A handler should be /// selected for remaining characters in the string; the handler /// function should subsequently be called by `put` for every character in /// the control string. /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: char) {} /// Pass bytes as part of a device control string to the handle chosen in /// `hook`. C0 controls will also be passed to the handler. fn put(&mut self, _byte: u8) {} /// Called when a device control string is terminated. /// /// The previously selected handler should be notified that the DCS has /// terminated. fn unhook(&mut self) {} /// Dispatch an operating system command. fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {} /// A final character has arrived for a CSI sequence /// /// The `ignore` flag indicates that either more than two intermediates /// arrived or the number of parameters exceeded the maximum supported /// length, and subsequent characters were ignored. fn csi_dispatch( &mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: char, ) { } /// The final character of an escape sequence has arrived. /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} /// Whether the parser should terminate prematurely. /// /// This can be used in conjunction with /// [`Parser::advance_until_terminated`] to terminate the parser after /// receiving certain escape sequences like synchronized updates. /// /// This is checked after every parsed byte, so no expensive computation /// should take place in this function. #[inline(always)] fn terminated(&self) -> bool { false } } #[cfg(all(test, feature = "no_std"))] #[macro_use] extern crate std; #[cfg(test)] mod tests { use std::vec::Vec; use super::*; const OSC_BYTES: &[u8] = &[ 0x1B, 0x5D, // Begin OSC b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o', b'd', b'e', b'/', b'a', b'l', b'a', b'c', b'r', b'i', b't', b't', b'y', 0x07, // End OSC ]; #[derive(Default)] struct Dispatcher { dispatched: Vec, } #[derive(Debug, PartialEq, Eq)] enum Sequence { Osc(Vec>, bool), Csi(Vec>, Vec, bool, char), Esc(Vec, bool, u8), DcsHook(Vec>, Vec, bool, char), DcsPut(u8), Print(char), Execute(u8), DcsUnhook, } impl Perform for Dispatcher { fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) { let params = params.iter().map(|p| p.to_vec()).collect(); self.dispatched.push(Sequence::Osc(params, bell_terminated)); } fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) { let params = params.iter().map(|subparam| subparam.to_vec()).collect(); let intermediates = intermediates.to_vec(); self.dispatched.push(Sequence::Csi(params, intermediates, ignore, c)); } fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) { let intermediates = intermediates.to_vec(); self.dispatched.push(Sequence::Esc(intermediates, ignore, byte)); } fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) { let params = params.iter().map(|subparam| subparam.to_vec()).collect(); let intermediates = intermediates.to_vec(); self.dispatched.push(Sequence::DcsHook(params, intermediates, ignore, c)); } fn put(&mut self, byte: u8) { self.dispatched.push(Sequence::DcsPut(byte)); } fn unhook(&mut self) { self.dispatched.push(Sequence::DcsUnhook); } fn print(&mut self, c: char) { self.dispatched.push(Sequence::Print(c)); } fn execute(&mut self, byte: u8) { self.dispatched.push(Sequence::Execute(byte)); } } #[test] fn parse_osc() { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, OSC_BYTES); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Osc(params, _) => { assert_eq!(params.len(), 2); assert_eq!(params[0], &OSC_BYTES[2..3]); assert_eq!(params[1], &OSC_BYTES[4..(OSC_BYTES.len() - 1)]); }, _ => panic!("expected osc sequence"), } } #[test] fn parse_empty_osc() { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, &[0x1B, 0x5D, 0x07]); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Osc(..) => (), _ => panic!("expected osc sequence"), } } #[test] fn parse_osc_max_params() { let params = ";".repeat(params::MAX_PARAMS + 1); let input = format!("\x1b]{}\x1b", ¶ms[..]).into_bytes(); let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Osc(params, _) => { assert_eq!(params.len(), MAX_OSC_PARAMS); assert!(params.iter().all(Vec::is_empty)); }, _ => panic!("expected osc sequence"), } } #[test] fn osc_bell_terminated() { const INPUT: &[u8] = b"\x1b]11;ff/00/ff\x07"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Osc(_, true) => (), _ => panic!("expected osc with bell terminator"), } } #[test] fn osc_c0_st_terminated() { const INPUT: &[u8] = b"\x1b]11;ff/00/ff\x1b\\"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { Sequence::Osc(_, false) => (), _ => panic!("expected osc with ST terminator"), } } #[test] fn parse_osc_with_utf8_arguments() { const INPUT: &[u8] = &[ 0x0D, 0x1B, 0x5D, 0x32, 0x3B, 0x65, 0x63, 0x68, 0x6F, 0x20, 0x27, 0xC2, 0xAF, 0x5C, 0x5F, 0x28, 0xE3, 0x83, 0x84, 0x29, 0x5F, 0x2F, 0xC2, 0xAF, 0x27, 0x20, 0x26, 0x26, 0x20, 0x73, 0x6C, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07, ]; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched[0], Sequence::Execute(b'\r')); let osc_data = INPUT[5..(INPUT.len() - 1)].into(); assert_eq!(dispatcher.dispatched[1], Sequence::Osc(vec![vec![b'2'], osc_data], true)); assert_eq!(dispatcher.dispatched.len(), 2); } #[test] fn osc_containing_string_terminator() { const INPUT: &[u8] = b"\x1b]2;\xe6\x9c\xab\x1b\\"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { Sequence::Osc(params, _) => { assert_eq!(params[1], &INPUT[4..(INPUT.len() - 2)]); }, _ => panic!("expected osc sequence"), } } #[test] fn exceed_max_buffer_size() { const NUM_BYTES: usize = MAX_OSC_RAW + 100; const INPUT_START: &[u8] = b"\x1b]52;s"; const INPUT_END: &[u8] = b"\x07"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); // Create valid OSC escape parser.advance(&mut dispatcher, INPUT_START); // Exceed max buffer size parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]); // Terminate escape for dispatch parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Osc(params, _) => { assert_eq!(params.len(), 2); assert_eq!(params[0], b"52"); #[cfg(not(feature = "no_std"))] assert_eq!(params[1].len(), NUM_BYTES + INPUT_END.len()); #[cfg(feature = "no_std")] assert_eq!(params[1].len(), MAX_OSC_RAW - params[0].len()); }, _ => panic!("expected osc sequence"), } } #[test] fn parse_csi_max_params() { // This will build a list of repeating '1;'s // The length is MAX_PARAMS - 1 because the last semicolon is interpreted // as an implicit zero, making the total number of parameters MAX_PARAMS let params = "1;".repeat(params::MAX_PARAMS - 1); let input = format!("\x1b[{}p", ¶ms[..]).into_bytes(); let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, _, ignore, _) => { assert_eq!(params.len(), params::MAX_PARAMS); assert!(!ignore); }, _ => panic!("expected csi sequence"), } } #[test] fn parse_csi_params_ignore_long_params() { // This will build a list of repeating '1;'s // The length is MAX_PARAMS because the last semicolon is interpreted // as an implicit zero, making the total number of parameters MAX_PARAMS + 1 let params = "1;".repeat(params::MAX_PARAMS); let input = format!("\x1b[{}p", ¶ms[..]).into_bytes(); let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, _, ignore, _) => { assert_eq!(params.len(), params::MAX_PARAMS); assert!(ignore); }, _ => panic!("expected csi sequence"), } } #[test] fn parse_csi_params_trailing_semicolon() { let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, b"\x1b[4;m"); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, ..) => assert_eq!(params, &[[4], [0]]), _ => panic!("expected csi sequence"), } } #[test] fn parse_csi_params_leading_semicolon() { // Create dispatcher and check state let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, b"\x1b[;4m"); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, ..) => assert_eq!(params, &[[0], [4]]), _ => panic!("expected csi sequence"), } } #[test] fn parse_long_csi_param() { // The important part is the parameter, which is (i64::MAX + 1) const INPUT: &[u8] = b"\x1b[9223372036854775808m"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, ..) => assert_eq!(params, &[[u16::MAX]]), _ => panic!("expected csi sequence"), } } #[test] fn csi_reset() { const INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, intermediates, ignore, _) => { assert_eq!(intermediates, b"?"); assert_eq!(params, &[[1049]]); assert!(!ignore); }, _ => panic!("expected csi sequence"), } } #[test] fn csi_subparameters() { const INPUT: &[u8] = b"\x1b[38:2:255:0:255;1m"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, intermediates, ignore, _) => { assert_eq!(params, &[vec![38, 2, 255, 0, 255], vec![1]]); assert_eq!(intermediates, &[]); assert!(!ignore); }, _ => panic!("expected csi sequence"), } } #[test] fn parse_dcs_max_params() { let params = "1;".repeat(params::MAX_PARAMS + 1); let input = format!("\x1bP{}p", ¶ms[..]).into_bytes(); let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, &input); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::DcsHook(params, _, ignore, _) => { assert_eq!(params.len(), params::MAX_PARAMS); assert!(params.iter().all(|param| param == &[1])); assert!(ignore); }, _ => panic!("expected dcs sequence"), } } #[test] fn dcs_reset() { const INPUT: &[u8] = b"\x1b[3;1\x1bP1$tx\x9c"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 3); match &dispatcher.dispatched[0] { Sequence::DcsHook(params, intermediates, ignore, _) => { assert_eq!(intermediates, b"$"); assert_eq!(params, &[[1]]); assert!(!ignore); }, _ => panic!("expected dcs sequence"), } assert_eq!(dispatcher.dispatched[1], Sequence::DcsPut(b'x')); assert_eq!(dispatcher.dispatched[2], Sequence::DcsUnhook); } #[test] fn parse_dcs() { const INPUT: &[u8] = b"\x1bP0;1|17/ab\x9c"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 7); match &dispatcher.dispatched[0] { Sequence::DcsHook(params, _, _, c) => { assert_eq!(params, &[[0], [1]]); assert_eq!(c, &'|'); }, _ => panic!("expected dcs sequence"), } for (i, byte) in b"17/ab".iter().enumerate() { assert_eq!(dispatcher.dispatched[1 + i], Sequence::DcsPut(*byte)); } assert_eq!(dispatcher.dispatched[6], Sequence::DcsUnhook); } #[test] fn intermediate_reset_on_dcs_exit() { const INPUT: &[u8] = b"\x1bP=1sZZZ\x1b+\x5c"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 6); match &dispatcher.dispatched[5] { Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, b"+"), _ => panic!("expected esc sequence"), } } #[test] fn esc_reset() { const INPUT: &[u8] = b"\x1b[3;1\x1b(A"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Esc(intermediates, ignore, byte) => { assert_eq!(intermediates, b"("); assert_eq!(*byte, b'A'); assert!(!ignore); }, _ => panic!("expected esc sequence"), } } #[test] fn esc_reset_intermediates() { const INPUT: &[u8] = b"\x1b[?2004l\x1b#8"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 2); assert_eq!(dispatcher.dispatched[0], Sequence::Csi(vec![vec![2004]], vec![63], false, 'l')); assert_eq!(dispatcher.dispatched[1], Sequence::Esc(vec![35], false, 56)); } #[test] fn params_buffer_filled_with_subparam() { const INPUT: &[u8] = b"\x1b[::::::::::::::::::::::::::::::::x\x1b"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, intermediates, ignore, c) => { assert_eq!(intermediates, &[]); assert_eq!(params, &[[0; 32]]); assert_eq!(c, &'x'); assert!(ignore); }, _ => panic!("expected csi sequence"), } } #[cfg(feature = "no_std")] #[test] fn build_with_fixed_size() { const INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h"; let mut dispatcher = Dispatcher::default(); let mut parser: Parser<30> = Parser::new_with_size(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Csi(params, intermediates, ignore, _) => { assert_eq!(intermediates, b"?"); assert_eq!(params, &[[1049]]); assert!(!ignore); }, _ => panic!("expected csi sequence"), } } #[cfg(feature = "no_std")] #[test] fn exceed_fixed_osc_buffer_size() { const OSC_BUFFER_SIZE: usize = 32; const NUM_BYTES: usize = OSC_BUFFER_SIZE + 100; const INPUT_START: &[u8] = b"\x1b]52;"; const INPUT_END: &[u8] = b"\x07"; let mut dispatcher = Dispatcher::default(); let mut parser: Parser = Parser::new_with_size(); // Create valid OSC escape parser.advance(&mut dispatcher, INPUT_START); // Exceed max buffer size parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]); // Terminate escape for dispatch parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 1); match &dispatcher.dispatched[0] { Sequence::Osc(params, _) => { assert_eq!(params.len(), 2); assert_eq!(params[0], b"52"); assert_eq!(params[1].len(), OSC_BUFFER_SIZE - params[0].len()); for item in params[1].iter() { assert_eq!(*item, b'a'); } }, _ => panic!("expected osc sequence"), } } #[cfg(feature = "no_std")] #[test] fn fixed_size_osc_containing_string_terminator() { const INPUT_START: &[u8] = b"\x1b]2;"; const INPUT_MIDDLE: &[u8] = b"s\xe6\x9c\xab"; const INPUT_END: &[u8] = b"\x1b\\"; let mut dispatcher = Dispatcher::default(); let mut parser: Parser<5> = Parser::new_with_size(); parser.advance(&mut dispatcher, INPUT_START); parser.advance(&mut dispatcher, INPUT_MIDDLE); parser.advance(&mut dispatcher, INPUT_END); assert_eq!(dispatcher.dispatched.len(), 2); match &dispatcher.dispatched[0] { Sequence::Osc(params, false) => { assert_eq!(params[0], b"2"); assert_eq!(params[1], INPUT_MIDDLE); }, _ => panic!("expected osc sequence"), } } #[test] fn unicode() { const INPUT: &[u8] = b"\xF0\x9F\x8E\x89_\xF0\x9F\xA6\x80\xF0\x9F\xA6\x80_\xF0\x9F\x8E\x89"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 6); assert_eq!(dispatcher.dispatched[0], Sequence::Print('🎉')); assert_eq!(dispatcher.dispatched[1], Sequence::Print('_')); assert_eq!(dispatcher.dispatched[2], Sequence::Print('🦀')); assert_eq!(dispatcher.dispatched[3], Sequence::Print('🦀')); assert_eq!(dispatcher.dispatched[4], Sequence::Print('_')); assert_eq!(dispatcher.dispatched[5], Sequence::Print('🎉')); } #[test] fn invalid_utf8() { const INPUT: &[u8] = b"a\xEF\xBCb"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 3); assert_eq!(dispatcher.dispatched[0], Sequence::Print('a')); assert_eq!(dispatcher.dispatched[1], Sequence::Print('�')); assert_eq!(dispatcher.dispatched[2], Sequence::Print('b')); } #[test] fn partial_utf8() { const INPUT: &[u8] = b"\xF0\x9F\x9A\x80"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, &INPUT[..1]); parser.advance(&mut dispatcher, &INPUT[1..2]); parser.advance(&mut dispatcher, &INPUT[2..3]); parser.advance(&mut dispatcher, &INPUT[3..]); assert_eq!(dispatcher.dispatched.len(), 1); assert_eq!(dispatcher.dispatched[0], Sequence::Print('🚀')); } #[test] fn partial_utf8_separating_utf8() { // This is different from the `partial_utf8` test since it has a multi-byte UTF8 // character after the partial UTF8 state, causing a partial byte to be present // in the `partial_utf8` buffer after the 2-byte codepoint. // "ĸ🎉" const INPUT: &[u8] = b"\xC4\xB8\xF0\x9F\x8E\x89"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, &INPUT[..1]); parser.advance(&mut dispatcher, &INPUT[1..]); assert_eq!(dispatcher.dispatched.len(), 2); assert_eq!(dispatcher.dispatched[0], Sequence::Print('ĸ')); assert_eq!(dispatcher.dispatched[1], Sequence::Print('🎉')); } #[test] fn partial_invalid_utf8() { const INPUT: &[u8] = b"a\xEF\xBCb"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, &INPUT[..1]); parser.advance(&mut dispatcher, &INPUT[1..2]); parser.advance(&mut dispatcher, &INPUT[2..3]); parser.advance(&mut dispatcher, &INPUT[3..]); assert_eq!(dispatcher.dispatched.len(), 3); assert_eq!(dispatcher.dispatched[0], Sequence::Print('a')); assert_eq!(dispatcher.dispatched[1], Sequence::Print('�')); assert_eq!(dispatcher.dispatched[2], Sequence::Print('b')); } #[test] fn partial_utf8_into_esc() { const INPUT: &[u8] = b"\xD8\x1b012"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 4); assert_eq!(dispatcher.dispatched[0], Sequence::Print('�')); assert_eq!(dispatcher.dispatched[1], Sequence::Esc(Vec::new(), false, b'0')); assert_eq!(dispatcher.dispatched[2], Sequence::Print('1')); assert_eq!(dispatcher.dispatched[3], Sequence::Print('2')); } #[test] fn c1s() { const INPUT: &[u8] = b"\x00\x1f\x80\x90\x98\x9b\x9c\x9d\x9e\x9fa"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 11); assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0)); assert_eq!(dispatcher.dispatched[1], Sequence::Execute(31)); assert_eq!(dispatcher.dispatched[2], Sequence::Execute(128)); assert_eq!(dispatcher.dispatched[3], Sequence::Execute(144)); assert_eq!(dispatcher.dispatched[4], Sequence::Execute(152)); assert_eq!(dispatcher.dispatched[5], Sequence::Execute(155)); assert_eq!(dispatcher.dispatched[6], Sequence::Execute(156)); assert_eq!(dispatcher.dispatched[7], Sequence::Execute(157)); assert_eq!(dispatcher.dispatched[8], Sequence::Execute(158)); assert_eq!(dispatcher.dispatched[9], Sequence::Execute(159)); assert_eq!(dispatcher.dispatched[10], Sequence::Print('a')); } #[test] fn execute_anywhere() { const INPUT: &[u8] = b"\x18\x1a"; let mut dispatcher = Dispatcher::default(); let mut parser = Parser::new(); parser.advance(&mut dispatcher, INPUT); assert_eq!(dispatcher.dispatched.len(), 2); assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0x18)); assert_eq!(dispatcher.dispatched[1], Sequence::Execute(0x1A)); } }