//! Parser for implementing virtual terminal emulators //! //! [`Parser`] is implemented according to [Paul Williams' ANSI parser //! state machine]. The state machine doesn't assign meaning to the parsed data //! and is thus not itself sufficient for writing a terminal emulator. Instead, //! it is expected that an implementation of [`Perform`] is provided which does //! something useful with the parsed data. The [`Parser`] handles the book //! keeping, and the [`Perform`] gets to simply handle actions. //! //! # Examples //! //! For an example of using the [`Parser`] please see the examples folder. The example included //! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to //! pipe `vim` into it //! //! ```sh //! cargo build --release --example parselog //! vim | target/release/examples/parselog //! ``` //! //! Just type `:q` to exit. //! //! # Differences from original state machine description //! //! * UTF-8 Support for Input //! * OSC Strings can be terminated by 0x07 //! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they //! no longer work in all states. //! //! [`Parser`]: struct.Parser.html //! [`Perform`]: trait.Perform.html //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser #![no_std] extern crate utf8parse as utf8; use core::mem; mod table; mod definitions; use definitions::{Action, State, unpack}; use table::{EXIT_ACTIONS, ENTRY_ACTIONS, STATE_CHANGE}; impl State { /// Get exit action for this state #[inline(always)] pub fn exit_action(&self) -> Action { unsafe { *EXIT_ACTIONS.get_unchecked(*self as usize) } } /// Get entry action for this state #[inline(always)] pub fn entry_action(&self) -> Action { unsafe { *ENTRY_ACTIONS.get_unchecked(*self as usize) } } } const MAX_INTERMEDIATES: usize = 2; const MAX_OSC_RAW: usize = 1024; const MAX_PARAMS: usize = 16; struct VtUtf8Receiver<'a, P: Perform + 'a>(&'a mut P, &'a mut State); impl<'a, P: Perform> utf8::Receiver for VtUtf8Receiver<'a, P> { fn codepoint(&mut self, c: char) { self.0.print(c); *self.1 = State::Ground; } fn invalid_sequence(&mut self) { self.0.print('�'); *self.1 = State::Ground; } } /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] /// /// [`Perform`]: trait.Perform.html pub struct Parser { state: State, intermediates: [u8; MAX_INTERMEDIATES], intermediate_idx: usize, params: [i64; MAX_PARAMS], param: i64, num_params: usize, osc_raw: [u8; MAX_OSC_RAW], osc_params: [(usize, usize); MAX_PARAMS], osc_idx: usize, osc_num_params: usize, ignoring: bool, utf8_parser: utf8::Parser, } impl Parser { /// Create a new Parser pub fn new() -> Parser { Parser { state: State::Ground, intermediates: [0u8; MAX_INTERMEDIATES], intermediate_idx: 0, params: [0i64; MAX_PARAMS], param: 0, num_params: 0, osc_raw: [0; MAX_OSC_RAW], osc_params: [(0, 0); MAX_PARAMS], osc_idx: 0, osc_num_params: 0, ignoring: false, utf8_parser: utf8::Parser::new(), } } #[inline] fn params(&self) -> &[i64] { &self.params[..self.num_params] } #[inline] fn intermediates(&self) -> &[u8] { &self.intermediates[..self.intermediate_idx] } /// Advance the parser state /// /// Requires a [`Perform`] in case `byte` triggers an action /// /// [`Perform`]: trait.Perform.html #[inline] pub fn advance(&mut self, performer: &mut P, byte: u8) { // Utf8 characters are handled out-of-band. if let State::Utf8 = self.state { self.process_utf8(performer, byte); return; } // Handle state changes in the anywhere state before evaluating changes // for current state. let mut change = STATE_CHANGE[State::Anywhere as usize][byte as usize]; if change == 0 { change = STATE_CHANGE[self.state as usize][byte as usize]; } // Unpack into a state and action let (state, action) = unpack(change); self.perform_state_change(performer, state, action, byte); } #[inline] fn process_utf8

(&mut self, performer: &mut P, byte: u8) where P: Perform { let mut receiver = VtUtf8Receiver(performer, &mut self.state); let utf8_parser = &mut self.utf8_parser; utf8_parser.advance(&mut receiver, byte); } #[inline] fn perform_state_change

(&mut self, performer: &mut P, state: State, action: Action, byte: u8) where P: Perform { macro_rules! maybe_action { ($action:expr, $arg:expr) => { match $action { Action::None => (), action => { self.perform_action(performer, action, $arg); }, } } } match state { State::Anywhere => { // Just run the action self.perform_action(performer, action, byte); }, state => { // Exit action for previous state let exit_action = self.state.exit_action(); maybe_action!(exit_action, 0); // Transition action maybe_action!(action, byte); // Entry action for new state maybe_action!(state.entry_action(), 0); // Assume the new state self.state = state; } } } /// Separate method for osc_dispatch that borrows self as read-only /// /// The aliasing is needed here for multiple slices into self.osc_raw #[inline] fn osc_dispatch(&self, performer: &mut P) { let mut slices: [&[u8]; MAX_PARAMS] = unsafe { mem::uninitialized() }; for i in 0..self.osc_num_params { let indices = self.osc_params[i]; slices[i] = &self.osc_raw[indices.0..indices.1]; } performer.osc_dispatch( &slices[..self.osc_num_params], ); } #[inline] fn perform_action(&mut self, performer: &mut P, action: Action, byte: u8) { match action { Action::Print => performer.print(byte as char), Action::Execute => performer.execute(byte), Action::Hook => { performer.hook( self.params(), self.intermediates(), self.ignoring, ); }, Action::Put => performer.put(byte), Action::OscStart => { self.osc_idx = 0; self.osc_num_params = 0; }, Action::OscPut => { let idx = self.osc_idx; if idx == self.osc_raw.len() { return; } // Param separator if byte == b';' { let param_idx = self.osc_num_params; match param_idx { // Only process up to MAX_PARAMS MAX_PARAMS => return, // First param is special - 0 to current byte index 0 => { self.osc_params[param_idx] = (0, idx); }, // All other params depend on previous indexing _ => { let prev = self.osc_params[param_idx - 1]; let begin = prev.1; self.osc_params[param_idx] = (begin, idx); } } self.osc_num_params += 1; } else { self.osc_raw[idx] = byte; self.osc_idx += 1; } }, Action::OscEnd => { let param_idx = self.osc_num_params; let idx = self.osc_idx; match param_idx { // Finish last parameter if not already maxed MAX_PARAMS => (), // First param is special - 0 to current byte index 0 => { self.osc_params[param_idx] = (0, idx); self.osc_num_params += 1; }, // All other params depend on previous indexing _ => { let prev = self.osc_params[param_idx - 1]; let begin = prev.1; self.osc_params[param_idx] = (begin, idx); self.osc_num_params += 1; } } self.osc_dispatch(performer); }, Action::Unhook => performer.unhook(), Action::CsiDispatch => { self.params[self.num_params] = self.param; self.num_params += 1; performer.csi_dispatch( self.params(), self.intermediates(), self.ignoring, byte as char ); self.num_params = 0; self.param = 0; } Action::EscDispatch => { performer.esc_dispatch( self.params(), self.intermediates(), self.ignoring, byte ); }, Action::Ignore | Action::None => (), Action::Collect => { if self.intermediate_idx == MAX_INTERMEDIATES { self.ignoring = true; } else { self.intermediates[self.intermediate_idx] = byte; self.intermediate_idx += 1; } }, Action::Param => { if byte == b';' { // Completed a param let idx = self.num_params; if idx == MAX_PARAMS - 1 { return; } self.params[idx] = self.param; self.param = 0; self.num_params += 1; } else { // Continue collecting bytes into param self.param = self.param.saturating_mul(10); self.param = self.param.saturating_add((byte - b'0') as i64); } }, Action::Clear => { self.intermediate_idx = 0; self.num_params = 0; self.ignoring = false; }, Action::BeginUtf8 => { self.process_utf8(performer, byte); }, } } } /// Performs actions requested by the Parser /// /// Actions in this case mean, for example, handling a CSI escape sequence describing cursor /// movement, or simply printing characters to the screen. /// /// The methods on this type correspond to actions described in /// http://vt100.net/emu/dec_ansi_parser. I've done my best to describe them in /// a useful way in my own words for completeness, but the site should be /// referenced if something isn't clear. If the site disappears at some point in /// the future, consider checking archive.org. pub trait Perform { /// Draw a character to the screen and update states fn print(&mut self, char); /// Execute a C0 or C1 control function fn execute(&mut self, byte: u8); /// Invoked when a final character arrives in first part of device control string /// /// The control function should be determined from the private marker, final character, and /// execute with a parameter list. A handler should be selected for remaining characters in the /// string; the handler function should subsequently be called by `put` for every character in /// the control string. /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn hook(&mut self, params: &[i64], intermediates: &[u8], ignore: bool); /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls /// will also be passed to the handler. fn put(&mut self, byte: u8); /// Called when a device control string is terminated /// /// The previously selected handler should be notified that the DCS has /// terminated. fn unhook(&mut self); /// Dispatch an operating system command fn osc_dispatch(&mut self, params: &[&[u8]]); /// A final character has arrived for a CSI sequence /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn csi_dispatch(&mut self, params: &[i64], intermediates: &[u8], ignore: bool, char); /// The final character of an escape sequence has arrived. /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn esc_dispatch(&mut self, params: &[i64], intermediates: &[u8], ignore: bool, byte: u8); } #[cfg(test)] #[macro_use] extern crate std; #[cfg(test)] mod tests { use std::vec::Vec; use super::{Parser, Perform}; use core::i64; static OSC_BYTES: &'static [u8] = &[0x1b, 0x5d, // Begin OSC b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o', b'd', b'e', b'/', b'a', b'l', b'a', b'c', b'r', b'i', b't', b't', b'y', 0x07 // End OSC ]; #[derive(Default)] struct OscDispatcher { dispatched_osc: bool, params: Vec>, } // All empty bodies except osc_dispatch impl Perform for OscDispatcher { fn print(&mut self, _: char) {} fn execute(&mut self, _byte: u8) {} fn hook(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool) {} fn put(&mut self, _byte: u8) {} fn unhook(&mut self) {} fn osc_dispatch(&mut self, params: &[&[u8]]) { // Set a flag so we know these assertions all run self.dispatched_osc = true; self.params = params.iter().map(|p| p.to_vec()).collect(); } fn csi_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _c: char) {} fn esc_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _byte: u8) {} } #[derive(Default)] struct CsiDispatcher { dispatched_csi: bool, params: Vec>, } impl Perform for CsiDispatcher { fn print(&mut self, _: char) {} fn execute(&mut self, _byte: u8) {} fn hook(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool) {} fn put(&mut self, _byte: u8) {} fn unhook(&mut self) {} fn osc_dispatch(&mut self, _params: &[&[u8]]) { } fn csi_dispatch(&mut self, params: &[i64], _intermediates: &[u8], _ignore: bool, _c: char) { self.dispatched_csi = true; self.params.push(params.to_vec()); } fn esc_dispatch(&mut self, _params: &[i64], _intermediates: &[u8], _ignore: bool, _byte: u8) {} } #[test] fn parse_osc() { // Create dispatcher and check state let mut dispatcher = OscDispatcher::default(); assert_eq!(dispatcher.dispatched_osc, false); // Run parser using OSC_BYTES let mut parser = Parser::new(); for byte in OSC_BYTES { parser.advance(&mut dispatcher, *byte); } // Check that flag is set and thus osc_dispatch assertions ran. assert!(dispatcher.dispatched_osc); assert_eq!(dispatcher.params.len(), 2); assert_eq!(dispatcher.params[0], &OSC_BYTES[2..3]); assert_eq!(dispatcher.params[1], &OSC_BYTES[4..(OSC_BYTES.len() - 1)]); } #[test] fn parse_empty_osc() { // Create dispatcher and check state let mut dispatcher = OscDispatcher::default(); assert_eq!(dispatcher.dispatched_osc, false); // Run parser using OSC_BYTES let mut parser = Parser::new(); for byte in &[0x1b, 0x5d, 0x07] { parser.advance(&mut dispatcher, *byte); } // Check that flag is set and thus osc_dispatch assertions ran. assert!(dispatcher.dispatched_osc); } #[test] fn parse_osc_max_params() { use MAX_PARAMS; static INPUT: &'static [u8] = b"\x1b];;;;;;;;;;;;;;;;;\x1b"; // Create dispatcher and check state let mut dispatcher = OscDispatcher::default(); assert_eq!(dispatcher.dispatched_osc, false); // Run parser using OSC_BYTES let mut parser = Parser::new(); for byte in INPUT { parser.advance(&mut dispatcher, *byte); } // Check that flag is set and thus osc_dispatch assertions ran. assert!(dispatcher.dispatched_osc); assert_eq!(dispatcher.params.len(), MAX_PARAMS); for param in dispatcher.params.iter() { assert_eq!(param.len(), 0); } } #[test] fn parse_csi_max_params() { use MAX_PARAMS; static INPUT: &'static [u8] = b"\x1b[1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;p"; // Create dispatcher and check state let mut dispatcher = CsiDispatcher::default(); assert!(!dispatcher.dispatched_csi); // Run parser using OSC_BYTES let mut parser = Parser::new(); for byte in INPUT { parser.advance(&mut dispatcher, *byte); } // Check that flag is set and thus csi_dispatch assertions ran. assert!(dispatcher.dispatched_csi); assert_eq!(dispatcher.params.len(), 1); assert_eq!(dispatcher.params[0].len(), MAX_PARAMS); } #[test] fn parse_csi_params_trailing_semicolon() { let mut dispatcher = CsiDispatcher::default(); let mut parser = Parser::new(); for byte in b"\x1b[4;m" { parser.advance(&mut dispatcher, *byte); } assert_eq!(dispatcher.params.len(), 1); assert_eq!(dispatcher.params[0], &[4, 0]); } #[test] fn parse_semi_set_underline() { // Create dispatcher and check state let mut dispatcher = CsiDispatcher::default(); // Run parser using OSC_BYTES let mut parser = Parser::new(); for byte in b"\x1b[;4m" { parser.advance(&mut dispatcher, *byte); } // Check that flag is set and thus osc_dispatch assertions ran. assert_eq!(dispatcher.params[0], &[0, 4]); } #[test] fn parse_long_csi_param() { // The important part is the parameter, which is (i64::MAX + 1) static INPUT: &'static [u8] = b"\x1b[9223372036854775808m"; let mut dispatcher = CsiDispatcher::default(); let mut parser = Parser::new(); for byte in INPUT { parser.advance(&mut dispatcher, *byte); } assert_eq!(dispatcher.params[0], &[i64::MAX as i64]); } #[test] fn parse_osc_with_utf8_arguments() { static INPUT: &'static [u8] = &[ 0x0d, 0x1b, 0x5d, 0x32, 0x3b, 0x65, 0x63, 0x68, 0x6f, 0x20, 0x27, 0xc2, 0xaf, 0x5c, 0x5f, 0x28, 0xe3, 0x83, 0x84, 0x29, 0x5f, 0x2f, 0xc2, 0xaf, 0x27, 0x20, 0x26, 0x26, 0x20, 0x73, 0x6c, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07 ]; // Create dispatcher and check state let mut dispatcher = OscDispatcher { params: vec![], dispatched_osc: false }; // Run parser using OSC_BYTES let mut parser = Parser::new(); for byte in INPUT { parser.advance(&mut dispatcher, *byte); } // Check that flag is set and thus osc_dispatch assertions ran. assert_eq!(dispatcher.params[0], &[b'2']); assert_eq!(dispatcher.params[1], &INPUT[5..(INPUT.len() - 1)]); } }