aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.toml21
-rw-r--r--examples/parselog.rs6
-rw-r--r--rustfmt.toml6
-rw-r--r--src/ansi.rs460
-rw-r--r--src/definitions.rs104
-rw-r--r--src/lib.rs910
-rw-r--r--src/params.rs5
-rw-r--r--src/table.rs135
-rw-r--r--utf8parse/Cargo.toml15
l---------utf8parse/LICENSE-APACHE1
l---------utf8parse/LICENSE-MIT1
-rw-r--r--utf8parse/src/lib.rs132
-rw-r--r--utf8parse/src/types.rs100
-rw-r--r--utf8parse/tests/UTF-8-demo.txt212
-rw-r--r--utf8parse/tests/utf-8-demo.rs31
-rw-r--r--vte_generate_state_changes/src/lib.rs12
16 files changed, 1000 insertions, 1151 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 040aa28..aee7453 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,21 +13,20 @@ name = "vte"
edition = "2021"
rust-version = "1.62.1"
-[dependencies]
-arrayvec = { version = "0.7.2", default-features = false, optional = true }
-bitflags = { version = "2.3.3", default-features = false, optional = true }
-cursor-icon = { version = "1.0.0", default-features = false, optional = true }
-log = { version = "0.4.17", optional = true }
-serde = { version = "1.0.160", features = ["derive"], optional = true }
-utf8parse = { version = "0.2.0", path = "utf8parse" }
-vte_generate_state_changes = { version = "0.1.0", path = "vte_generate_state_changes" }
+[workspace]
+members = ["vte_generate_state_changes"]
[features]
ansi = ["log", "cursor-icon", "bitflags"]
default = ["no_std"]
-nightly = ["utf8parse/nightly"]
no_std = ["arrayvec"]
serde = ["dep:serde"]
-[workspace]
-members = ["utf8parse", "vte_generate_state_changes"]
+[dependencies]
+arrayvec = { version = "0.7.2", default-features = false, optional = true }
+bitflags = { version = "2.3.3", default-features = false, optional = true }
+cursor-icon = { version = "1.0.0", default-features = false, optional = true }
+log = { version = "0.4.17", optional = true }
+memchr = "2.7.4"
+serde = { version = "1.0.160", features = ["derive"], optional = true }
+vte_generate_state_changes = { version = "0.1.0", path = "vte_generate_state_changes" }
diff --git a/examples/parselog.rs b/examples/parselog.rs
index dfd0aee..c41c150 100644
--- a/examples/parselog.rs
+++ b/examples/parselog.rs
@@ -61,11 +61,7 @@ fn main() {
loop {
match handle.read(&mut buf) {
Ok(0) => break,
- Ok(n) => {
- for byte in &buf[..n] {
- statemachine.advance(&mut performer, *byte);
- }
- },
+ Ok(n) => statemachine.advance(&mut performer, &buf[..n]),
Err(err) => {
println!("err: {}", err);
break;
diff --git a/rustfmt.toml b/rustfmt.toml
index 9308ba9..f82517e 100644
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -1,13 +1,17 @@
format_code_in_doc_comments = true
+group_imports = "StdExternalCrate"
match_block_trailing_comma = true
condense_wildcard_suffixes = true
use_field_init_shorthand = true
+normalize_doc_attributes = true
overflow_delimited_expr = true
+imports_granularity = "Module"
+format_macro_matchers = true
use_small_heuristics = "Max"
+hex_literal_case = "Upper"
normalize_comments = true
reorder_impl_items = true
use_try_shorthand = true
newline_style = "Unix"
format_strings = true
wrap_comments = true
-comment_width = 100
diff --git a/src/ansi.rs b/src/ansi.rs
index 8cac26d..fa5b1ed 100644
--- a/src/ansi.rs
+++ b/src/ansi.rs
@@ -11,21 +11,20 @@ extern crate alloc;
use alloc::borrow::ToOwned;
use alloc::string::{String, ToString};
use alloc::vec::Vec;
-use bitflags::bitflags;
-
use core::convert::TryFrom;
use core::fmt::{self, Display, Formatter, Write};
+#[cfg(not(feature = "no_std"))]
+use core::ops::Mul;
use core::ops::{Add, Sub};
use core::str::FromStr;
use core::time::Duration;
-use core::{iter, str};
-
-#[cfg(not(feature = "no_std"))]
-use core::ops::Mul;
-
+use core::{iter, mem, str};
#[cfg(not(feature = "no_std"))]
use std::time::Instant;
+use bitflags::bitflags;
+#[doc(inline)]
+pub use cursor_icon;
use cursor_icon::CursorIcon;
use log::debug;
#[cfg(feature = "serde")]
@@ -33,9 +32,6 @@ use serde::{Deserialize, Serialize};
use crate::{Params, ParamsIter};
-#[doc(inline)]
-pub use cursor_icon;
-
/// Maximum time before a synchronized update is aborted.
const SYNC_UPDATE_TIMEOUT: Duration = Duration::from_millis(150);
@@ -168,9 +164,9 @@ impl FromStr for Rgb {
match u32::from_str_radix(chars, 16) {
Ok(mut color) => {
- let b = (color & 0xff) as u8;
+ let b = (color & 0xFF) as u8;
color >>= 8;
- let g = (color & 0xff) as u8;
+ let g = (color & 0xFF) as u8;
color >>= 8;
let r = color as u8;
Ok(Rgb { r, g, b })
@@ -237,14 +233,8 @@ fn parse_number(input: &[u8]) -> Option<u8> {
let mut num: u8 = 0;
for c in input {
let c = *c as char;
- if let Some(digit) = c.to_digit(10) {
- num = match num.checked_mul(10).and_then(|v| v.checked_add(digit as u8)) {
- Some(v) => v,
- None => return None,
- }
- } else {
- return None;
- }
+ let digit = c.to_digit(10)?;
+ num = num.checked_mul(10).and_then(|v| v.checked_add(digit as u8))?;
}
Some(num)
}
@@ -270,11 +260,12 @@ struct SyncState<T: Timeout> {
impl<T: Timeout> Default for SyncState<T> {
fn default() -> Self {
- Self { buffer: Vec::with_capacity(SYNC_BUFFER_SIZE), timeout: T::default() }
+ Self { buffer: Vec::with_capacity(SYNC_BUFFER_SIZE), timeout: Default::default() }
}
}
-/// The processor wraps a `crate::Parser` to ultimately call methods on a Handler.
+/// The processor wraps a `crate::Parser` to ultimately call methods on a
+/// Handler.
#[cfg(not(feature = "no_std"))]
#[derive(Default)]
pub struct Processor<T: Timeout = StdSyncHandler> {
@@ -282,7 +273,8 @@ pub struct Processor<T: Timeout = StdSyncHandler> {
parser: crate::Parser,
}
-/// The processor wraps a `crate::Parser` to ultimately call methods on a Handler.
+/// The processor wraps a `crate::Parser` to ultimately call methods on a
+/// Handler.
#[cfg(feature = "no_std")]
#[derive(Default)]
pub struct Processor<T: Timeout> {
@@ -303,15 +295,19 @@ impl<T: Timeout> Processor<T> {
/// Process a new byte from the PTY.
#[inline]
- pub fn advance<H>(&mut self, handler: &mut H, byte: u8)
+ pub fn advance<H>(&mut self, handler: &mut H, bytes: &[u8])
where
H: Handler,
{
- if self.state.sync_state.timeout.pending_timeout() {
- self.advance_sync(handler, byte);
- } else {
- let mut performer = Performer::new(&mut self.state, handler);
- self.parser.advance(&mut performer, byte);
+ let mut processed = 0;
+ while processed != bytes.len() {
+ if self.state.sync_state.timeout.pending_timeout() {
+ processed += self.advance_sync(handler, &bytes[processed..]);
+ } else {
+ let mut performer = Performer::new(&mut self.state, handler);
+ processed +=
+ self.parser.advance_until_terminated(&mut performer, &bytes[processed..]);
+ }
}
}
@@ -320,18 +316,45 @@ impl<T: Timeout> Processor<T> {
where
H: Handler,
{
+ self.stop_sync_internal(handler, None);
+ }
+
+ /// End a synchronized update.
+ ///
+ /// The `bsu_offset` parameter should be passed if the sync buffer contains
+ /// a new BSU escape that is not part of the current synchronized
+ /// update.
+ fn stop_sync_internal<H>(&mut self, handler: &mut H, bsu_offset: Option<usize>)
+ where
+ H: Handler,
+ {
// Process all synchronized bytes.
- for i in 0..self.state.sync_state.buffer.len() {
- let byte = self.state.sync_state.buffer[i];
- let mut performer = Performer::new(&mut self.state, handler);
- self.parser.advance(&mut performer, byte);
+ //
+ // NOTE: We do not use `advance_until_terminated` here since BSU sequences are
+ // processed automatically during the synchronized update.
+ let buffer = mem::take(&mut self.state.sync_state.buffer);
+ let offset = bsu_offset.unwrap_or(buffer.len());
+ let mut performer = Performer::new(&mut self.state, handler);
+ self.parser.advance(&mut performer, &buffer[..offset]);
+ self.state.sync_state.buffer = buffer;
+
+ match bsu_offset {
+ // Just clear processed bytes if there is a new BSU.
+ //
+ // NOTE: We do not need to re-process for a new ESU since the `advance_sync`
+ // function checks for BSUs in reverse.
+ Some(bsu_offset) => {
+ let new_len = self.state.sync_state.buffer.len() - bsu_offset;
+ self.state.sync_state.buffer.copy_within(bsu_offset.., 0);
+ self.state.sync_state.buffer.truncate(new_len);
+ },
+ // Report mode and clear state if no new BSU is present.
+ None => {
+ handler.unset_private_mode(NamedPrivateMode::SyncUpdate.into());
+ self.state.sync_state.timeout.clear_timeout();
+ self.state.sync_state.buffer.clear();
+ },
}
-
- // Report that update ended, since we could end due to timeout.
- handler.unset_private_mode(NamedPrivateMode::SyncUpdate.into());
- // Resetting state after processing makes sure we don't interpret buffered sync escapes.
- self.state.sync_state.buffer.clear();
- self.state.sync_state.timeout.clear_timeout();
}
/// Number of bytes in the synchronization buffer.
@@ -341,36 +364,56 @@ impl<T: Timeout> Processor<T> {
}
/// Process a new byte during a synchronized update.
+ ///
+ /// Returns the number of bytes processed.
#[cold]
- fn advance_sync<H>(&mut self, handler: &mut H, byte: u8)
+ fn advance_sync<H>(&mut self, handler: &mut H, bytes: &[u8]) -> usize
where
H: Handler,
{
- self.state.sync_state.buffer.push(byte);
+ // Advance sync parser or stop sync if we'd exceed the maximum buffer size.
+ if self.state.sync_state.buffer.len() + bytes.len() >= SYNC_BUFFER_SIZE - 1 {
+ // Terminate the synchronized update.
+ self.stop_sync_internal(handler, None);
- // Handle sync CSI escape sequences.
- self.advance_sync_csi(handler);
+ // Just parse the bytes normally.
+ let mut performer = Performer::new(&mut self.state, handler);
+ self.parser.advance_until_terminated(&mut performer, bytes)
+ } else {
+ self.state.sync_state.buffer.extend(bytes);
+ self.advance_sync_csi(handler, bytes.len());
+ bytes.len()
+ }
}
/// Handle BSU/ESU CSI sequences during synchronized update.
- fn advance_sync_csi<H>(&mut self, handler: &mut H)
+ fn advance_sync_csi<H>(&mut self, handler: &mut H, new_bytes: usize)
where
H: Handler,
{
- // Get the last few bytes for comparison.
- let len = self.state.sync_state.buffer.len();
- let offset = len.saturating_sub(SYNC_ESCAPE_LEN);
- let end = &self.state.sync_state.buffer[offset..];
+ // Get constraints within which a new escape character might be relevant.
+ let buffer_len = self.state.sync_state.buffer.len();
+ let start_offset = (buffer_len - new_bytes).saturating_sub(SYNC_ESCAPE_LEN - 1);
+ let end_offset = buffer_len.saturating_sub(SYNC_ESCAPE_LEN - 1);
+ let search_buffer = &self.state.sync_state.buffer[start_offset..end_offset];
+ // Search for termination/extension escapes in the added bytes.
+ //
// NOTE: It is technically legal to specify multiple private modes in the same
// escape, but we only allow EXACTLY `\e[?2026h`/`\e[?2026l` to keep the parser
- // reasonable.
- //
- // Check for extension/termination of the synchronized update.
- if end == BSU_CSI {
- self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT);
- } else if end == ESU_CSI || len >= SYNC_BUFFER_SIZE - 1 {
- self.stop_sync(handler);
+ // more simple.
+ let mut bsu_offset = None;
+ for index in memchr::memchr_iter(0x1B, search_buffer).rev() {
+ let offset = start_offset + index;
+ let escape = &self.state.sync_state.buffer[offset..offset + SYNC_ESCAPE_LEN];
+
+ if escape == BSU_CSI {
+ self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT);
+ bsu_offset = Some(offset);
+ } else if escape == ESU_CSI {
+ self.stop_sync_internal(handler, bsu_offset);
+ break;
+ }
}
}
}
@@ -382,13 +425,16 @@ impl<T: Timeout> Processor<T> {
struct Performer<'a, H: Handler, T: Timeout> {
state: &'a mut ProcessorState<T>,
handler: &'a mut H,
+
+ /// Whether the parser should be prematurely terminated.
+ terminated: bool,
}
impl<'a, H: Handler + 'a, T: Timeout> Performer<'a, H, T> {
/// Create a performer.
#[inline]
pub fn new<'b>(state: &'b mut ProcessorState<T>, handler: &'b mut H) -> Performer<'b, H, T> {
- Performer { state, handler }
+ Performer { state, handler, terminated: Default::default() }
}
}
@@ -710,13 +756,14 @@ bitflags! {
///
/// This only applies to keys corresponding to ascii characters.
///
-/// For the details on how to implement the mode handling correctly, consult [`XTerm's
-/// implementation`] and the [`output`] of XTerm's provided [`perl script`]. Some libraries and
-/// implementations also use the [`fixterms`] definition of the `CSI u`.
+/// For the details on how to implement the mode handling correctly, consult
+/// [`XTerm's implementation`] and the [`output`] of XTerm's provided [`perl
+/// script`]. Some libraries and implementations also use the [`fixterms`]
+/// definition of the `CSI u`.
///
-/// The end escape sequence has a `CSI char; modifiers u` form while the original
-/// `CSI 27 ; modifier ; char ~`. The clients should prefer the `CSI u`, since it has
-/// more adoption.
+/// The end escape sequence has a `CSI char; modifiers u` form while the
+/// original `CSI 27 ; modifier ; char ~`. The clients should prefer the `CSI
+/// u`, since it has more adoption.
///
/// [`XTerm's implementation`]: https://invisible-island.net/xterm/modified-keys.html
/// [`perl script`]: https://github.com/ThomasDickey/xterm-snapshots/blob/master/vttests/modify-keys.pl
@@ -727,12 +774,14 @@ bitflags! {
pub enum ModifyOtherKeys {
/// Reset the state.
Reset,
- /// Enables this feature except for keys with well-known behavior, e.g., Tab, Backspace and
- /// some special control character cases which are built into the X11 library (e.g.,
- /// Control-Space to make a NUL, or Control-3 to make an Escape character).
+ /// Enables this feature except for keys with well-known behavior, e.g.,
+ /// Tab, Backspace and some special control character cases which are
+ /// built into the X11 library (e.g., Control-Space to make a NUL, or
+ /// Control-3 to make an Escape character).
///
/// Escape sequences shouldn't be emitted under the following circumstances:
- /// - When the key is in range of `[64;127]` and the modifier is either Control or Shift
+ /// - When the key is in range of `[64;127]` and the modifier is either
+ /// Control or Shift
/// - When the key combination is a known control combination alias
///
/// For more details, consult the [`example`] for the suggested translation.
@@ -740,9 +789,10 @@ pub enum ModifyOtherKeys {
/// [`example`]: https://github.com/alacritty/vte/blob/master/doc/modifyOtherKeys-example.txt
EnableExceptWellDefined,
/// Enables this feature for all keys including the exceptions of
- /// [`Self::EnableExceptWellDefined`]. XTerm still ignores the special cases built into the
- /// X11 library. Any shifted (modified) ordinary key send an escape sequence. The Alt- and
- /// Meta- modifiers cause XTerm to send escape sequences.
+ /// [`Self::EnableExceptWellDefined`]. XTerm still ignores the special
+ /// cases built into the X11 library. Any shifted (modified) ordinary
+ /// key send an escape sequence. The Alt- and Meta- modifiers cause
+ /// XTerm to send escape sequences.
///
/// For more details, consult the [`example`] for the suggested translation.
///
@@ -1203,16 +1253,20 @@ impl StandardCharset {
pub enum ScpCharPath {
/// SCP's first parameter value of 0. Behavior is implementation defined.
Default,
- /// SCP's first parameter value of 1 which sets character path to LEFT-TO-RIGHT.
+ /// SCP's first parameter value of 1 which sets character path to
+ /// LEFT-TO-RIGHT.
LTR,
- /// SCP's first parameter value of 2 which sets character path to RIGHT-TO-LEFT.
+ /// SCP's first parameter value of 2 which sets character path to
+ /// RIGHT-TO-LEFT.
RTL,
}
-/// SCP control's second parameter which determines update mode/direction between components.
+/// SCP control's second parameter which determines update mode/direction
+/// between components.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ScpUpdateMode {
- /// SCP's second parameter value of 0 (the default). Implementation dependant update.
+ /// SCP's second parameter value of 0 (the default). Implementation
+ /// dependant update.
ImplementationDependant,
/// SCP's second parameter value of 1.
///
@@ -1351,8 +1405,8 @@ where
return;
}
- // Link parameters are in format of `key1=value1:key2=value2`. Currently only key
- // `id` is defined.
+ // Link parameters are in format of `key1=value1:key2=value2`. Currently only
+ // key `id` is defined.
let id = link_params
.split(|&b| b == b':')
.find_map(|kv| kv.strip_prefix(b"id="))
@@ -1547,6 +1601,7 @@ where
// Handle sync updates opaquely.
if param == NamedPrivateMode::SyncUpdate as u16 {
self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT);
+ self.terminated = true;
}
handler.set_private_mode(PrivateMode::new(param))
@@ -1761,6 +1816,11 @@ where
_ => unhandled!(),
}
}
+
+ #[inline]
+ fn terminated(&self) -> bool {
+ self.terminated
+ }
}
#[inline]
@@ -1943,7 +2003,7 @@ pub mod C0 {
/// Unit Separator.
pub const US: u8 = 0x1F;
/// Delete, should be ignored by terminal.
- pub const DEL: u8 = 0x7f;
+ pub const DEL: u8 = 0x7F;
}
// Tests for parsing escape sequences.
@@ -1954,22 +2014,24 @@ mod tests {
use super::*;
#[derive(Default)]
- pub struct TestSyncHandler;
+ pub struct TestSyncHandler {
+ is_sync: usize,
+ }
impl Timeout for TestSyncHandler {
#[inline]
fn set_timeout(&mut self, _: Duration) {
- unreachable!()
+ self.is_sync += 1;
}
#[inline]
fn clear_timeout(&mut self) {
- unreachable!()
+ self.is_sync = 0;
}
#[inline]
fn pending_timeout(&self) -> bool {
- false
+ self.is_sync != 0
}
}
@@ -2028,72 +2090,60 @@ mod tests {
#[test]
fn parse_control_attribute() {
- static BYTES: &[u8] = &[0x1b, b'[', b'1', b'm'];
+ static BYTES: &[u8] = &[0x1B, b'[', b'1', b'm'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in BYTES {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, BYTES);
assert_eq!(handler.attr, Some(Attr::Bold));
}
#[test]
fn parse_terminal_identity_csi() {
- let bytes: &[u8] = &[0x1b, b'[', b'1', b'c'];
+ let bytes: &[u8] = &[0x1B, b'[', b'1', b'c'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(!handler.identity_reported);
handler.reset_state();
- let bytes: &[u8] = &[0x1b, b'[', b'c'];
+ let bytes: &[u8] = &[0x1B, b'[', b'c'];
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(handler.identity_reported);
handler.reset_state();
- let bytes: &[u8] = &[0x1b, b'[', b'0', b'c'];
+ let bytes: &[u8] = &[0x1B, b'[', b'0', b'c'];
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(handler.identity_reported);
}
#[test]
fn parse_terminal_identity_esc() {
- let bytes: &[u8] = &[0x1b, b'Z'];
+ let bytes: &[u8] = &[0x1B, b'Z'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(handler.identity_reported);
handler.reset_state();
- let bytes: &[u8] = &[0x1b, b'#', b'Z'];
+ let bytes: &[u8] = &[0x1B, b'#', b'Z'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(!handler.identity_reported);
handler.reset_state();
@@ -2102,16 +2152,14 @@ mod tests {
#[test]
fn parse_truecolor_attr() {
static BYTES: &[u8] = &[
- 0x1b, b'[', b'3', b'8', b';', b'2', b';', b'1', b'2', b'8', b';', b'6', b'6', b';',
+ 0x1B, b'[', b'3', b'8', b';', b'2', b';', b'1', b'2', b'8', b';', b'6', b'6', b';',
b'2', b'5', b'5', b'm',
];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in BYTES {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, BYTES);
let spec = Rgb { r: 128, g: 66, b: 255 };
@@ -2122,38 +2170,34 @@ mod tests {
#[test]
fn parse_zsh_startup() {
static BYTES: &[u8] = &[
- 0x1b, b'[', b'1', b'm', 0x1b, b'[', b'7', b'm', b'%', 0x1b, b'[', b'2', b'7', b'm',
- 0x1b, b'[', b'1', b'm', 0x1b, b'[', b'0', b'm', b' ', b' ', b' ', b' ', b' ', b' ',
+ 0x1B, b'[', b'1', b'm', 0x1B, b'[', b'7', b'm', b'%', 0x1B, b'[', b'2', b'7', b'm',
+ 0x1B, b'[', b'1', b'm', 0x1B, b'[', b'0', b'm', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
- b' ', b' ', b' ', b'\r', b' ', b'\r', b'\r', 0x1b, b'[', b'0', b'm', 0x1b, b'[', b'2',
- b'7', b'm', 0x1b, b'[', b'2', b'4', b'm', 0x1b, b'[', b'J', b'j', b'w', b'i', b'l',
- b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b' ', 0x1b,
- b'[', b'0', b'1', b';', b'3', b'2', b'm', 0xe2, 0x9e, 0x9c, b' ', 0x1b, b'[', b'0',
- b'1', b';', b'3', b'2', b'm', b' ', 0x1b, b'[', b'3', b'6', b'm', b'~', b'/', b'c',
+ b' ', b' ', b' ', b'\r', b' ', b'\r', b'\r', 0x1B, b'[', b'0', b'm', 0x1B, b'[', b'2',
+ b'7', b'm', 0x1B, b'[', b'2', b'4', b'm', 0x1B, b'[', b'J', b'j', b'w', b'i', b'l',
+ b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b' ', 0x1B,
+ b'[', b'0', b'1', b';', b'3', b'2', b'm', 0xE2, 0x9E, 0x9C, b' ', 0x1B, b'[', b'0',
+ b'1', b';', b'3', b'2', b'm', b' ', 0x1B, b'[', b'3', b'6', b'm', b'~', b'/', b'c',
b'o', b'd', b'e',
];
let mut handler = MockHandler::default();
let mut parser = Processor::<TestSyncHandler>::new();
- for byte in BYTES {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, BYTES);
}
#[test]
fn parse_designate_g0_as_line_drawing() {
- static BYTES: &[u8] = &[0x1b, b'(', b'0'];
+ static BYTES: &[u8] = &[0x1B, b'(', b'0'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in BYTES {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, BYTES);
assert_eq!(handler.index, CharsetIndex::G0);
assert_eq!(handler.charset, StandardCharset::SpecialCharacterAndLineDrawing);
@@ -2161,37 +2205,35 @@ mod tests {
#[test]
fn parse_designate_g1_as_line_drawing_and_invoke() {
- static BYTES: &[u8] = &[0x1b, b')', b'0', 0x0e];
+ static BYTES: &[u8] = &[0x1B, b')', b'0', 0x0E];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in &BYTES[..3] {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, &BYTES[..3]);
assert_eq!(handler.index, CharsetIndex::G1);
assert_eq!(handler.charset, StandardCharset::SpecialCharacterAndLineDrawing);
let mut handler = MockHandler::default();
- parser.advance(&mut handler, BYTES[3]);
+ parser.advance(&mut handler, &[BYTES[3]]);
assert_eq!(handler.index, CharsetIndex::G1);
}
#[test]
fn parse_valid_rgb_colors() {
- assert_eq!(xparse_color(b"rgb:f/e/d"), Some(Rgb { r: 0xff, g: 0xee, b: 0xdd }));
- assert_eq!(xparse_color(b"rgb:11/aa/ff"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff }));
- assert_eq!(xparse_color(b"rgb:f/ed1/cb23"), Some(Rgb { r: 0xff, g: 0xec, b: 0xca }));
- assert_eq!(xparse_color(b"rgb:ffff/0/0"), Some(Rgb { r: 0xff, g: 0x0, b: 0x0 }));
+ assert_eq!(xparse_color(b"rgb:f/e/d"), Some(Rgb { r: 0xFF, g: 0xEE, b: 0xDD }));
+ assert_eq!(xparse_color(b"rgb:11/aa/ff"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF }));
+ assert_eq!(xparse_color(b"rgb:f/ed1/cb23"), Some(Rgb { r: 0xFF, g: 0xEC, b: 0xCA }));
+ assert_eq!(xparse_color(b"rgb:ffff/0/0"), Some(Rgb { r: 0xFF, g: 0x0, b: 0x0 }));
}
#[test]
fn parse_valid_legacy_rgb_colors() {
- assert_eq!(xparse_color(b"#1af"), Some(Rgb { r: 0x10, g: 0xa0, b: 0xf0 }));
- assert_eq!(xparse_color(b"#11aaff"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff }));
- assert_eq!(xparse_color(b"#110aa0ff0"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff }));
- assert_eq!(xparse_color(b"#1100aa00ff00"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff }));
+ assert_eq!(xparse_color(b"#1af"), Some(Rgb { r: 0x10, g: 0xA0, b: 0xF0 }));
+ assert_eq!(xparse_color(b"#11aaff"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF }));
+ assert_eq!(xparse_color(b"#110aa0ff0"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF }));
+ assert_eq!(xparse_color(b"#1100aa00ff00"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF }));
}
#[test]
@@ -2228,11 +2270,9 @@ mod tests {
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
- assert_eq!(handler.color, Some(Rgb { r: 0xf0, g: 0xf0, b: 0xf0 }));
+ assert_eq!(handler.color, Some(Rgb { r: 0xF0, g: 0xF0, b: 0xF0 }));
}
#[test]
@@ -2242,9 +2282,7 @@ mod tests {
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert_eq!(handler.reset_colors, vec![1]);
}
@@ -2256,9 +2294,7 @@ mod tests {
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
let expected: Vec<usize> = (0..256).collect();
assert_eq!(handler.reset_colors, expected);
@@ -2271,30 +2307,148 @@ mod tests {
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
let expected: Vec<usize> = (0..256).collect();
assert_eq!(handler.reset_colors, expected);
}
#[test]
+ fn partial_sync_updates() {
+ let mut parser = Processor::<TestSyncHandler>::new();
+ let mut handler = MockHandler::default();
+
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ // Start synchronized update.
+
+ parser.advance(&mut handler, b"\x1b[?20");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ parser.advance(&mut handler, b"26h");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Dispatch some data.
+
+ parser.advance(&mut handler, b"random \x1b[31m stuff");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Extend synchronized update.
+
+ parser.advance(&mut handler, b"\x1b[?20");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ parser.advance(&mut handler, b"26h");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 2);
+ assert!(handler.attr.is_none());
+
+ // Terminate synchronized update.
+
+ parser.advance(&mut handler, b"\x1b[?20");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 2);
+ assert!(handler.attr.is_none());
+
+ parser.advance(&mut handler, b"26l");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_some());
+ }
+
+ #[test]
+ fn sync_bursts_buffer() {
+ let mut parser = Processor::<TestSyncHandler>::new();
+ let mut handler = MockHandler::default();
+
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ // Repeat test twice to ensure internal state is reset properly.
+ for _ in 0..2 {
+ // Start synchronized update.
+ parser.advance(&mut handler, b"\x1b[?2026h");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Ensure sync works.
+ parser.advance(&mut handler, b"\x1b[31m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Exceed sync buffer dimensions.
+ parser.advance(&mut handler, "a".repeat(SYNC_BUFFER_SIZE).as_bytes());
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.take().is_some());
+
+ // Ensure new events are dispatched directly.
+ parser.advance(&mut handler, b"\x1b[31m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.take().is_some());
+ }
+ }
+
+ #[test]
+ fn mixed_sync_escape() {
+ let mut parser = Processor::<TestSyncHandler>::new();
+ let mut handler = MockHandler::default();
+
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ // Start synchronized update with immediate SGR.
+ parser.advance(&mut handler, b"\x1b[?2026h\x1b[31m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Terminate synchronized update and check for SGR.
+ parser.advance(&mut handler, b"\x1b[?2026l");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_some());
+ }
+
+ #[test]
+ fn sync_bsu_with_esu() {
+ let mut parser = Processor::<TestSyncHandler>::new();
+ let mut handler = MockHandler::default();
+
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ // Start synchronized update with immediate SGR.
+ parser.advance(&mut handler, b"\x1b[?2026h\x1b[1m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Terminate synchronized update, but immediately start a new one.
+ parser.advance(&mut handler, b"\x1b[?2026l\x1b[?2026h\x1b[4m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 2);
+ assert_eq!(handler.attr.take(), Some(Attr::Bold));
+
+ // Terminate again, expecting one buffered SGR.
+ parser.advance(&mut handler, b"\x1b[?2026l");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert_eq!(handler.attr.take(), Some(Attr::Underline));
+ }
+
+ #[test]
#[cfg(not(feature = "no_std"))]
fn contrast() {
- let rgb1 = Rgb { r: 0xff, g: 0xff, b: 0xff };
+ let rgb1 = Rgb { r: 0xFF, g: 0xFF, b: 0xFF };
let rgb2 = Rgb { r: 0x00, g: 0x00, b: 0x00 };
assert!((rgb1.contrast(rgb2) - 21.).abs() < f64::EPSILON);
- let rgb1 = Rgb { r: 0xff, g: 0xff, b: 0xff };
+ let rgb1 = Rgb { r: 0xFF, g: 0xFF, b: 0xFF };
assert!((rgb1.contrast(rgb1) - 1.).abs() < f64::EPSILON);
- let rgb1 = Rgb { r: 0xff, g: 0x00, b: 0xff };
- let rgb2 = Rgb { r: 0x00, g: 0xff, b: 0x00 };
+ let rgb1 = Rgb { r: 0xFF, g: 0x00, b: 0xFF };
+ let rgb2 = Rgb { r: 0x00, g: 0xFF, b: 0x00 };
assert!((rgb1.contrast(rgb2) - 2.285_543_608_124_253_3).abs() < f64::EPSILON);
let rgb1 = Rgb { r: 0x12, g: 0x34, b: 0x56 };
- let rgb2 = Rgb { r: 0xfe, g: 0xdc, b: 0xba };
+ let rgb2 = Rgb { r: 0xFE, g: 0xDC, b: 0xBA };
assert!((rgb1.contrast(rgb2) - 9.786_558_997_257_74).abs() < f64::EPSILON);
}
}
diff --git a/src/definitions.rs b/src/definitions.rs
index 568a8a8..694c783 100644
--- a/src/definitions.rs
+++ b/src/definitions.rs
@@ -2,54 +2,53 @@ use core::mem;
#[allow(dead_code)]
#[repr(u8)]
-#[derive(Debug, Default, Copy, Clone)]
+#[derive(PartialEq, Eq, Debug, Default, Copy, Clone)]
pub enum State {
- Anywhere = 0,
- CsiEntry = 1,
- CsiIgnore = 2,
- CsiIntermediate = 3,
- CsiParam = 4,
- DcsEntry = 5,
- DcsIgnore = 6,
- DcsIntermediate = 7,
- DcsParam = 8,
- DcsPassthrough = 9,
- Escape = 10,
- EscapeIntermediate = 11,
+ CsiEntry,
+ CsiIgnore,
+ CsiIntermediate,
+ CsiParam,
+ DcsEntry,
+ DcsIgnore,
+ DcsIntermediate,
+ DcsParam,
+ DcsPassthrough,
+ Escape,
+ EscapeIntermediate,
+ OscString,
+ SosPmApcString,
+ Anywhere,
#[default]
- Ground = 12,
- OscString = 13,
- SosPmApcString = 14,
- Utf8 = 15,
+ Ground,
}
+// NOTE: Removing the unused actions prefixed with `_` will reduce performance.
#[allow(dead_code)]
#[repr(u8)]
-#[derive(Debug, Clone, Copy)]
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum Action {
- None = 0,
- Clear = 1,
- Collect = 2,
- CsiDispatch = 3,
- EscDispatch = 4,
- Execute = 5,
- Hook = 6,
- Ignore = 7,
- OscEnd = 8,
- OscPut = 9,
- OscStart = 10,
- Param = 11,
- Print = 12,
- Put = 13,
- Unhook = 14,
- BeginUtf8 = 15,
+ None,
+ _Clear,
+ Collect,
+ CsiDispatch,
+ EscDispatch,
+ Execute,
+ _Hook,
+ _Ignore,
+ _OscEnd,
+ OscPut,
+ _OscStart,
+ Param,
+ _Print,
+ Put,
+ _Unhook,
}
/// Unpack a u8 into a State and Action
///
-/// The implementation of this assumes that there are *precisely* 16 variants for both Action and
-/// State. Furthermore, it assumes that the enums are tag-only; that is, there is no data in any
-/// variant.
+/// The implementation of this assumes that there are *precisely* 16 variants
+/// for both Action and State. Furthermore, it assumes that the enums are
+/// tag-only; that is, there is no data in any variant.
///
/// Bad things will happen if those invariants are violated.
#[inline(always)]
@@ -57,7 +56,7 @@ pub fn unpack(delta: u8) -> (State, Action) {
unsafe {
(
// State is stored in bottom 4 bits
- mem::transmute::<u8, State>(delta & 0x0f),
+ mem::transmute::<u8, State>(delta & 0x0F),
// Action is stored in top 4 bits
mem::transmute::<u8, Action>(delta >> 4),
)
@@ -75,37 +74,26 @@ mod tests {
#[test]
fn unpack_state_action() {
- match unpack(0xee) {
- (State::SosPmApcString, Action::Unhook) => (),
+ match unpack(0xEE) {
+ (State::Ground, Action::_Unhook) => (),
_ => panic!("unpack failed"),
}
- match unpack(0x0f) {
- (State::Utf8, Action::None) => (),
+ match unpack(0x0E) {
+ (State::Ground, Action::None) => (),
_ => panic!("unpack failed"),
}
- match unpack(0xff) {
- (State::Utf8, Action::BeginUtf8) => (),
+ match unpack(0xE0) {
+ (State::CsiEntry, Action::_Unhook) => (),
_ => panic!("unpack failed"),
}
}
#[test]
fn pack_state_action() {
- match unpack(0xee) {
- (State::SosPmApcString, Action::Unhook) => (),
- _ => panic!("unpack failed"),
- }
-
- match unpack(0x0f) {
- (State::Utf8, Action::None) => (),
- _ => panic!("unpack failed"),
- }
-
- match unpack(0xff) {
- (State::Utf8, Action::BeginUtf8) => (),
- _ => panic!("unpack failed"),
- }
+ assert_eq!(pack(State::Ground, Action::_Unhook), 0xEE);
+ assert_eq!(pack(State::Ground, Action::None), 0x0E);
+ assert_eq!(pack(State::CsiEntry, Action::_Unhook), 0xE0);
}
}
diff --git a/src/lib.rs b/src/lib.rs
index 0f12902..3c2f863 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,44 +1,39 @@
//! Parser for implementing virtual terminal emulators
//!
-//! [`Parser`] is implemented according to [Paul Williams' ANSI parser
-//! state machine]. The state machine doesn't assign meaning to the parsed data
-//! and is thus not itself sufficient for writing a terminal emulator. Instead,
-//! it is expected that an implementation of [`Perform`] is provided which does
+//! [`Parser`] is implemented according to [Paul Williams' ANSI parser state
+//! machine]. The state machine doesn't assign meaning to the parsed data and is
+//! thus not itself sufficient for writing a terminal emulator. Instead, it is
+//! expected that an implementation of [`Perform`] is provided which does
//! something useful with the parsed data. The [`Parser`] handles the book
//! keeping, and the [`Perform`] gets to simply handle actions.
//!
//! # Examples
//!
-//! For an example of using the [`Parser`] please see the examples folder. The example included
-//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to
-//! pipe `vim` into it
+//! For an example of using the [`Parser`] please see the examples folder. The
+//! example included there simply logs all the actions [`Perform`] does. One
+//! quick way to see it in action is to pipe `printf` into it
//!
//! ```sh
-//! cargo build --release --example parselog
-//! vim | target/release/examples/parselog
+//! printf '\x1b[31mExample' | cargo run --example parselog
//! ```
//!
-//! Just type `:q` to exit.
-//!
//! # Differences from original state machine description
//!
//! * UTF-8 Support for Input
//! * OSC Strings can be terminated by 0x07
-//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in
-//! all states.
+//! * Only supports 7-bit codes
//!
//! [`Parser`]: struct.Parser.html
//! [`Perform`]: trait.Perform.html
//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
#![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)]
-#![cfg_attr(all(feature = "nightly", test), feature(test))]
#![cfg_attr(feature = "no_std", no_std)]
use core::mem::MaybeUninit;
+use core::str;
#[cfg(feature = "no_std")]
use arrayvec::ArrayVec;
-use utf8parse as utf8;
mod definitions;
mod params;
@@ -46,28 +41,13 @@ mod table;
#[cfg(feature = "ansi")]
pub mod ansi;
-pub use params::{Params, ParamsIter};
-
use definitions::{unpack, Action, State};
+pub use params::{Params, ParamsIter};
const MAX_INTERMEDIATES: usize = 2;
const MAX_OSC_PARAMS: usize = 16;
const MAX_OSC_RAW: usize = 1024;
-struct VtUtf8Receiver<'a, P: Perform>(&'a mut P, &'a mut State);
-
-impl<P: Perform> utf8::Receiver for VtUtf8Receiver<'_, P> {
- fn codepoint(&mut self, c: char) {
- self.0.print(c);
- *self.1 = State::Ground;
- }
-
- fn invalid_sequence(&mut self) {
- self.0.print('�');
- *self.1 = State::Ground;
- }
-}
-
/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
///
/// [`Perform`]: trait.Perform.html
@@ -88,7 +68,8 @@ pub struct Parser<const OSC_RAW_BUF_SIZE: usize = MAX_OSC_RAW> {
osc_params: [(usize, usize); MAX_OSC_PARAMS],
osc_num_params: usize,
ignoring: bool,
- utf8_parser: utf8::Parser,
+ partial_utf8: [u8; 4],
+ partial_utf8_len: usize,
}
impl Parser {
@@ -99,7 +80,8 @@ impl Parser {
}
impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
- /// Create a new Parser with a custom size for the Operating System Command buffer.
+ /// Create a new Parser with a custom size for the Operating System Command
+ /// buffer.
///
/// Call with a const-generic param on `Parser`, like:
///
@@ -121,41 +103,74 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
&self.intermediates[..self.intermediate_idx]
}
- /// Advance the parser state
+ /// Advance the parser state.
///
- /// Requires a [`Perform`] in case `byte` triggers an action
+ /// Requires a [`Perform`] implementation to handle the triggered actions.
///
/// [`Perform`]: trait.Perform.html
#[inline]
- pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
- // Utf8 characters are handled out-of-band.
- if let State::Utf8 = self.state {
- self.process_utf8(performer, byte);
- return;
- }
-
- // Handle state changes in the anywhere state before evaluating changes
- // for current state.
- let mut change = table::STATE_CHANGES[State::Anywhere as usize][byte as usize];
+ pub fn advance<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) {
+ let mut i = 0;
- if change == 0 {
- change = table::STATE_CHANGES[self.state as usize][byte as usize];
+ // Handle partial codepoints from previous calls to `advance`.
+ if self.partial_utf8_len > 0 {
+ i += self.advance_partial_utf8(performer, bytes);
}
- // Unpack into a state and action
- let (state, action) = unpack(change);
+ while i != bytes.len() {
+ match self.state {
+ State::Ground => i += self.advance_ground(performer, &bytes[i..]),
+ _ => {
+ let byte = bytes[i];
+ let change = table::STATE_CHANGES[self.state as usize][byte as usize];
+ let (state, action) = unpack(change);
- self.perform_state_change(performer, state, action, byte);
+ self.perform_state_change(performer, state, action, byte);
+
+ i += 1;
+ },
+ }
+ }
}
+ /// Partially advance the parser state.
+ ///
+ /// This is equivalent to [`Self::advance`], but stops when
+ /// [`Perform::terminated`] is true after reading a byte.
+ ///
+ /// Returns the number of bytes read before termination.
+ ///
+ /// See [`Perform::advance`] for more details.
#[inline]
- fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
- where
- P: Perform,
- {
- let mut receiver = VtUtf8Receiver(performer, &mut self.state);
- let utf8_parser = &mut self.utf8_parser;
- utf8_parser.advance(&mut receiver, byte);
+ #[must_use = "Returned value should be used to processs the remaining bytes"]
+ pub fn advance_until_terminated<P: Perform>(
+ &mut self,
+ performer: &mut P,
+ bytes: &[u8],
+ ) -> usize {
+ let mut i = 0;
+
+ // Handle partial codepoints from previous calls to `advance`.
+ if self.partial_utf8_len != 0 {
+ i += self.advance_partial_utf8(performer, bytes);
+ }
+
+ while i != bytes.len() && !performer.terminated() {
+ match self.state {
+ State::Ground => i += self.advance_ground(performer, &bytes[i..]),
+ _ => {
+ let byte = bytes[i];
+ let change = table::STATE_CHANGES[self.state as usize][byte as usize];
+ let (state, action) = unpack(change);
+
+ self.perform_state_change(performer, state, action, byte);
+
+ i += 1;
+ },
+ }
+ }
+
+ i
}
#[inline]
@@ -163,93 +178,75 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
where
P: Perform,
{
- macro_rules! maybe_action {
- ($action:expr, $arg:expr) => {
- match $action {
- Action::None => (),
- action => {
- self.perform_action(performer, action, $arg);
- },
- }
- };
+ if state == State::Anywhere {
+ self.perform_action(performer, action, byte);
+ return;
}
- match state {
- State::Anywhere => {
- // Just run the action
- self.perform_action(performer, action, byte);
- },
- state => {
- match self.state {
- State::DcsPassthrough => {
- self.perform_action(performer, Action::Unhook, byte);
- },
- State::OscString => {
- self.perform_action(performer, Action::OscEnd, byte);
- },
- _ => (),
- }
+ match self.state {
+ State::DcsPassthrough => performer.unhook(),
+ State::OscString => {
+ let param_idx = self.osc_num_params;
+ let idx = self.osc_raw.len();
- maybe_action!(action, byte);
+ match param_idx {
+ // Finish last parameter if not already maxed
+ MAX_OSC_PARAMS => (),
- match state {
- State::CsiEntry | State::DcsEntry | State::Escape => {
- self.perform_action(performer, Action::Clear, byte);
- },
- State::DcsPassthrough => {
- self.perform_action(performer, Action::Hook, byte);
+ // First param is special - 0 to current byte index
+ 0 => {
+ self.osc_params[param_idx] = (0, idx);
+ self.osc_num_params += 1;
},
- State::OscString => {
- self.perform_action(performer, Action::OscStart, byte);
+
+ // All other params depend on previous indexing
+ _ => {
+ let prev = self.osc_params[param_idx - 1];
+ let begin = prev.1;
+ self.osc_params[param_idx] = (begin, idx);
+ self.osc_num_params += 1;
},
- _ => (),
}
-
- // Assume the new state
- self.state = state;
+ self.osc_dispatch(performer, byte);
},
+ _ => (),
}
- }
- /// Separate method for osc_dispatch that borrows self as read-only
- ///
- /// The aliasing is needed here for multiple slices into self.osc_raw
- #[inline]
- fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
- let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
- unsafe { MaybeUninit::uninit().assume_init() };
+ if action == Action::None {
+ match state {
+ State::CsiEntry | State::DcsEntry | State::Escape => self.reset_params(),
+ State::DcsPassthrough => {
+ if self.params.is_full() {
+ self.ignoring = true;
+ } else {
+ self.params.push(self.param);
+ }
- for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) {
- let indices = self.osc_params[i];
- *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]);
+ performer.hook(
+ self.params(),
+ self.intermediates(),
+ self.ignoring,
+ byte as char,
+ );
+ },
+ State::OscString => {
+ self.osc_raw.clear();
+ self.osc_num_params = 0;
+ },
+ _ => (),
+ }
+ } else {
+ self.perform_action(performer, action, byte);
}
- unsafe {
- let num_params = self.osc_num_params;
- let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]];
- performer.osc_dispatch(&*params, byte == 0x07);
- }
+ self.state = state;
}
#[inline]
fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
match action {
- Action::Print => performer.print(byte as char),
Action::Execute => performer.execute(byte),
- Action::Hook => {
- if self.params.is_full() {
- self.ignoring = true;
- } else {
- self.params.push(self.param);
- }
-
- performer.hook(self.params(), self.intermediates(), self.ignoring, byte as char);
- },
Action::Put => performer.put(byte),
- Action::OscStart => {
- self.osc_raw.clear();
- self.osc_num_params = 0;
- },
Action::OscPut => {
#[cfg(feature = "no_std")]
{
@@ -285,31 +282,6 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
self.osc_raw.push(byte);
}
},
- Action::OscEnd => {
- let param_idx = self.osc_num_params;
- let idx = self.osc_raw.len();
-
- match param_idx {
- // Finish last parameter if not already maxed
- MAX_OSC_PARAMS => (),
-
- // First param is special - 0 to current byte index
- 0 => {
- self.osc_params[param_idx] = (0, idx);
- self.osc_num_params += 1;
- },
-
- // All other params depend on previous indexing
- _ => {
- let prev = self.osc_params[param_idx - 1];
- let begin = prev.1;
- self.osc_params[param_idx] = (begin, idx);
- self.osc_num_params += 1;
- },
- }
- self.osc_dispatch(performer, byte);
- },
- Action::Unhook => performer.unhook(),
Action::CsiDispatch => {
if self.params.is_full() {
self.ignoring = true;
@@ -341,37 +313,203 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
return;
}
- if byte == b';' {
- self.params.push(self.param);
- self.param = 0;
- } else if byte == b':' {
- self.params.extend(self.param);
- self.param = 0;
- } else {
- // Continue collecting bytes into param
- self.param = self.param.saturating_mul(10);
- self.param = self.param.saturating_add((byte - b'0') as u16);
+ match byte {
+ b';' => {
+ self.params.push(self.param);
+ self.param = 0;
+ },
+ b':' => {
+ self.params.extend(self.param);
+ self.param = 0;
+ },
+ _ => {
+ // Continue collecting bytes into param
+ self.param = self.param.saturating_mul(10);
+ self.param = self.param.saturating_add((byte - b'0') as u16);
+ },
}
},
- Action::Clear => {
- // Reset everything on ESC/CSI/DCS entry
- self.intermediate_idx = 0;
- self.ignoring = false;
- self.param = 0;
+ _ => (),
+ }
+ }
+
+ /// Reset escape sequence parameters and intermediates.
+ #[inline]
+ fn reset_params(&mut self) {
+ self.intermediate_idx = 0;
+ self.ignoring = false;
+ self.param = 0;
+
+ self.params.clear();
+ }
+
+ /// Separate method for osc_dispatch that borrows self as read-only
+ ///
+ /// The aliasing is needed here for multiple slices into self.osc_raw
+ #[inline]
+ fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
+ let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
+ unsafe { MaybeUninit::uninit().assume_init() };
+
+ for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) {
+ let indices = self.osc_params[i];
+ *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]);
+ }
+
+ unsafe {
+ let num_params = self.osc_num_params;
+ let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]];
+ performer.osc_dispatch(&*params, byte == 0x07);
+ }
+ }
- self.params.clear();
+ /// Advance the parser state from ground.
+ ///
+ /// The ground state is handled separately since it can only be left using
+ /// the escape character (`\x1b`). This allows more efficient parsing by
+ /// using SIMD search with [`memchr`].
+ #[inline]
+ fn advance_ground<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) -> usize {
+ // Find the next escape character.
+ let num_bytes = bytes.len();
+ let plain_chars = memchr::memchr(0x1B, bytes).unwrap_or(num_bytes);
+
+ // If the next character is ESC, just process it and short-circuit.
+ if plain_chars == 0 {
+ self.state = State::Escape;
+ self.reset_params();
+ return 1;
+ }
+
+ match str::from_utf8(&bytes[..plain_chars]) {
+ Ok(parsed) => {
+ Self::ground_dispatch(performer, parsed);
+ let mut processed = plain_chars;
+
+ // If there's another character, it must be escape so process it directly.
+ if processed < num_bytes {
+ self.state = State::Escape;
+ self.reset_params();
+ processed += 1;
+ }
+
+ processed
+ },
+ // Handle invalid and partial utf8.
+ Err(err) => {
+ // Dispatch all the valid bytes.
+ let valid_bytes = err.valid_up_to();
+ let parsed = unsafe { str::from_utf8_unchecked(&bytes[..valid_bytes]) };
+ Self::ground_dispatch(performer, parsed);
+
+ match err.error_len() {
+ Some(len) => {
+ // Execute C1 escapes or emit replacement character.
+ if len == 1 && bytes[valid_bytes] <= 0x9F {
+ performer.execute(bytes[valid_bytes]);
+ } else {
+ performer.print('�');
+ }
+
+ // Restart processing after the invalid bytes.
+ //
+ // While we could theoretically try to just re-parse
+ // `bytes[valid_bytes + len..plain_chars]`, it's easier
+ // to just skip it and invalid utf8 is pretty rare anyway.
+ valid_bytes + len
+ },
+ None => {
+ if plain_chars < num_bytes {
+ // Process bytes cut off by escape.
+ performer.print('�');
+ self.state = State::Escape;
+ self.reset_params();
+ plain_chars + 1
+ } else {
+ // Process bytes cut off by the buffer end.
+ let extra_bytes = num_bytes - valid_bytes;
+ let partial_len = self.partial_utf8_len + extra_bytes;
+ self.partial_utf8[self.partial_utf8_len..partial_len]
+ .copy_from_slice(&bytes[valid_bytes..valid_bytes + extra_bytes]);
+ self.partial_utf8_len = partial_len;
+ num_bytes
+ }
+ },
+ }
+ },
+ }
+ }
+
+ /// Advance the parser while processing a partial utf8 codepoint.
+ #[inline]
+ fn advance_partial_utf8<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) -> usize {
+ // Try to copy up to 3 more characters, to ensure the codepoint is complete.
+ let old_bytes = self.partial_utf8_len;
+ let to_copy = bytes.len().min(self.partial_utf8.len() - old_bytes);
+ self.partial_utf8[old_bytes..old_bytes + to_copy].copy_from_slice(&bytes[..to_copy]);
+ self.partial_utf8_len += to_copy;
+
+ // Parse the unicode character.
+ match str::from_utf8(&self.partial_utf8[..self.partial_utf8_len]) {
+ // If the entire buffer is valid, use the first character and continue parsing.
+ Ok(parsed) => {
+ let c = unsafe { parsed.chars().next().unwrap_unchecked() };
+ performer.print(c);
+
+ self.partial_utf8_len = 0;
+ c.len_utf8() - old_bytes
+ },
+ Err(err) => {
+ match err.error_len() {
+ // If the partial character was also invalid, emit the replacement
+ // character.
+ Some(invalid_len) => {
+ performer.print('�');
+
+ self.partial_utf8_len = 0;
+ invalid_len - old_bytes
+ },
+ None => {
+ // If we have any valid bytes, that means we partially copied another
+ // utf8 character into `partial_utf8`. Since we only care about the
+ // first character, we just ignore the rest.
+ let valid_bytes = err.valid_up_to();
+ if valid_bytes > 0 {
+ let c = unsafe {
+ let parsed =
+ str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
+ parsed.chars().next().unwrap_unchecked()
+ };
+ performer.print(c);
+
+ self.partial_utf8_len = 0;
+ valid_bytes - old_bytes
+ } else {
+ // If the character still isn't complete, wait for more data.
+ bytes.len()
+ }
+ },
+ }
},
- Action::BeginUtf8 => self.process_utf8(performer, byte),
- Action::Ignore => (),
- Action::None => (),
+ }
+ }
+
+ /// Handle ground dispatch of print/execute for all characters in a string.
+ #[inline]
+ fn ground_dispatch<P: Perform>(performer: &mut P, text: &str) {
+ for c in text.chars() {
+ match c {
+ '\x00'..='\x1f' | '\u{80}'..='\u{9f}' => performer.execute(c as u8),
+ _ => performer.print(c),
+ }
}
}
}
/// Performs actions requested by the Parser
///
-/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor
-/// movement, or simply printing characters to the screen.
+/// Actions in this case mean, for example, handling a CSI escape sequence
+/// describing cursor movement, or simply printing characters to the screen.
///
/// The methods on this type correspond to actions described in
/// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in
@@ -385,19 +523,21 @@ pub trait Perform {
/// Execute a C0 or C1 control function.
fn execute(&mut self, _byte: u8) {}
- /// Invoked when a final character arrives in first part of device control string.
+ /// Invoked when a final character arrives in first part of device control
+ /// string.
///
- /// The control function should be determined from the private marker, final character, and
- /// execute with a parameter list. A handler should be selected for remaining characters in the
- /// string; the handler function should subsequently be called by `put` for every character in
+ /// The control function should be determined from the private marker, final
+ /// character, and execute with a parameter list. A handler should be
+ /// selected for remaining characters in the string; the handler
+ /// function should subsequently be called by `put` for every character in
/// the control string.
///
/// The `ignore` flag indicates that more than two intermediates arrived and
/// subsequent characters were ignored.
fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: char) {}
- /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls
- /// will also be passed to the handler.
+ /// Pass bytes as part of a device control string to the handle chosen in
+ /// `hook`. C0 controls will also be passed to the handler.
fn put(&mut self, _byte: u8) {}
/// Called when a device control string is terminated.
@@ -411,9 +551,9 @@ pub trait Perform {
/// A final character has arrived for a CSI sequence
///
- /// The `ignore` flag indicates that either more than two intermediates arrived
- /// or the number of parameters exceeded the maximum supported length,
- /// and subsequent characters were ignored.
+ /// The `ignore` flag indicates that either more than two intermediates
+ /// arrived or the number of parameters exceeded the maximum supported
+ /// length, and subsequent characters were ignored.
fn csi_dispatch(
&mut self,
_params: &Params,
@@ -428,6 +568,19 @@ pub trait Perform {
/// The `ignore` flag indicates that more than two intermediates arrived and
/// subsequent characters were ignored.
fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {}
+
+ /// Whether the parser should terminate prematurely.
+ ///
+ /// This can be used in conjunction with
+ /// [`Parser::advance_until_terminated`] to terminate the parser after
+ /// receiving certain escape sequences like synchronized updates.
+ ///
+ /// This is checked after every parsed byte, so no expensive computation
+ /// should take place in this function.
+ #[inline(always)]
+ fn terminated(&self) -> bool {
+ false
+ }
}
#[cfg(all(test, feature = "no_std"))]
@@ -436,12 +589,12 @@ extern crate std;
#[cfg(test)]
mod tests {
- use super::*;
-
use std::vec::Vec;
- static OSC_BYTES: &[u8] = &[
- 0x1b, 0x5d, // Begin OSC
+ use super::*;
+
+ const OSC_BYTES: &[u8] = &[
+ 0x1B, 0x5D, // Begin OSC
b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd',
b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o', b'd', b'e', b'/', b'a', b'l', b'a',
b'c', b'r', b'i', b't', b't', b'y', 0x07, // End OSC
@@ -459,6 +612,8 @@ mod tests {
Esc(Vec<u8>, bool, u8),
DcsHook(Vec<Vec<u16>>, Vec<u8>, bool, char),
DcsPut(u8),
+ Print(char),
+ Execute(u8),
DcsUnhook,
}
@@ -492,6 +647,14 @@ mod tests {
fn unhook(&mut self) {
self.dispatched.push(Sequence::DcsUnhook);
}
+
+ fn print(&mut self, c: char) {
+ self.dispatched.push(Sequence::Print(c));
+ }
+
+ fn execute(&mut self, byte: u8) {
+ self.dispatched.push(Sequence::Execute(byte));
+ }
}
#[test]
@@ -499,9 +662,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in OSC_BYTES {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, OSC_BYTES);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -519,9 +680,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in &[0x1b, 0x5d, 0x07] {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, &[0x1B, 0x5D, 0x07]);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -537,9 +696,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in input {
- parser.advance(&mut dispatcher, byte);
- }
+ parser.advance(&mut dispatcher, &input);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -553,13 +710,11 @@ mod tests {
#[test]
fn osc_bell_terminated() {
- static INPUT: &[u8] = b"\x1b]11;ff/00/ff\x07";
+ const INPUT: &[u8] = b"\x1b]11;ff/00/ff\x07";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -570,13 +725,11 @@ mod tests {
#[test]
fn osc_c0_st_terminated() {
- static INPUT: &[u8] = b"\x1b]11;ff/00/ff\x1b\\";
+ const INPUT: &[u8] = b"\x1b]11;ff/00/ff\x1b\\";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 2);
match &dispatcher.dispatched[0] {
@@ -587,37 +740,29 @@ mod tests {
#[test]
fn parse_osc_with_utf8_arguments() {
- static INPUT: &[u8] = &[
- 0x0d, 0x1b, 0x5d, 0x32, 0x3b, 0x65, 0x63, 0x68, 0x6f, 0x20, 0x27, 0xc2, 0xaf, 0x5c,
- 0x5f, 0x28, 0xe3, 0x83, 0x84, 0x29, 0x5f, 0x2f, 0xc2, 0xaf, 0x27, 0x20, 0x26, 0x26,
- 0x20, 0x73, 0x6c, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07,
+ const INPUT: &[u8] = &[
+ 0x0D, 0x1B, 0x5D, 0x32, 0x3B, 0x65, 0x63, 0x68, 0x6F, 0x20, 0x27, 0xC2, 0xAF, 0x5C,
+ 0x5F, 0x28, 0xE3, 0x83, 0x84, 0x29, 0x5F, 0x2F, 0xC2, 0xAF, 0x27, 0x20, 0x26, 0x26,
+ 0x20, 0x73, 0x6C, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07,
];
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
- assert_eq!(dispatcher.dispatched.len(), 1);
- match &dispatcher.dispatched[0] {
- Sequence::Osc(params, _) => {
- assert_eq!(params[0], &[b'2']);
- assert_eq!(params[1], &INPUT[5..(INPUT.len() - 1)]);
- },
- _ => panic!("expected osc sequence"),
- }
+ assert_eq!(dispatcher.dispatched[0], Sequence::Execute(b'\r'));
+ let osc_data = INPUT[5..(INPUT.len() - 1)].into();
+ assert_eq!(dispatcher.dispatched[1], Sequence::Osc(vec![vec![b'2'], osc_data], true));
+ assert_eq!(dispatcher.dispatched.len(), 2);
}
#[test]
fn osc_containing_string_terminator() {
- static INPUT: &[u8] = b"\x1b]2;\xe6\x9c\xab\x1b\\";
+ const INPUT: &[u8] = b"\x1b]2;\xe6\x9c\xab\x1b\\";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 2);
match &dispatcher.dispatched[0] {
@@ -630,27 +775,21 @@ mod tests {
#[test]
fn exceed_max_buffer_size() {
- static NUM_BYTES: usize = MAX_OSC_RAW + 100;
- static INPUT_START: &[u8] = &[0x1b, b']', b'5', b'2', b';', b's'];
- static INPUT_END: &[u8] = &[b'\x07'];
+ const NUM_BYTES: usize = MAX_OSC_RAW + 100;
+ const INPUT_START: &[u8] = b"\x1b]52;s";
+ const INPUT_END: &[u8] = b"\x07";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
// Create valid OSC escape
- for byte in INPUT_START {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_START);
// Exceed max buffer size
- for _ in 0..NUM_BYTES {
- parser.advance(&mut dispatcher, b'a');
- }
+ parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]);
// Terminate escape for dispatch
- for byte in INPUT_END {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_END);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -679,9 +818,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in input {
- parser.advance(&mut dispatcher, byte);
- }
+ parser.advance(&mut dispatcher, &input);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -704,9 +841,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in input {
- parser.advance(&mut dispatcher, byte);
- }
+ parser.advance(&mut dispatcher, &input);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -723,9 +858,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in b"\x1b[4;m" {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, b"\x1b[4;m");
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -740,9 +873,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in b"\x1b[;4m" {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, b"\x1b[;4m");
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -754,35 +885,31 @@ mod tests {
#[test]
fn parse_long_csi_param() {
// The important part is the parameter, which is (i64::MAX + 1)
- static INPUT: &[u8] = b"\x1b[9223372036854775808m";
+ const INPUT: &[u8] = b"\x1b[9223372036854775808m";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
- Sequence::Csi(params, ..) => assert_eq!(params, &[[std::u16::MAX as u16]]),
+ Sequence::Csi(params, ..) => assert_eq!(params, &[[u16::MAX]]),
_ => panic!("expected csi sequence"),
}
}
#[test]
fn csi_reset() {
- static INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
+ const INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
Sequence::Csi(params, intermediates, ignore, _) => {
- assert_eq!(intermediates, &[b'?']);
+ assert_eq!(intermediates, b"?");
assert_eq!(params, &[[1049]]);
assert!(!ignore);
},
@@ -792,13 +919,11 @@ mod tests {
#[test]
fn csi_subparameters() {
- static INPUT: &[u8] = b"\x1b[38:2:255:0:255;1m";
+ const INPUT: &[u8] = b"\x1b[38:2:255:0:255;1m";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -818,9 +943,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in input {
- parser.advance(&mut dispatcher, byte);
- }
+ parser.advance(&mut dispatcher, &input);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -835,18 +958,16 @@ mod tests {
#[test]
fn dcs_reset() {
- static INPUT: &[u8] = b"\x1b[3;1\x1bP1$tx\x9c";
+ const INPUT: &[u8] = b"\x1b[3;1\x1bP1$tx\x9c";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 3);
match &dispatcher.dispatched[0] {
Sequence::DcsHook(params, intermediates, ignore, _) => {
- assert_eq!(intermediates, &[b'$']);
+ assert_eq!(intermediates, b"$");
assert_eq!(params, &[[1]]);
assert!(!ignore);
},
@@ -858,13 +979,11 @@ mod tests {
#[test]
fn parse_dcs() {
- static INPUT: &[u8] = b"\x1bP0;1|17/ab\x9c";
+ const INPUT: &[u8] = b"\x1bP0;1|17/ab\x9c";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 7);
match &dispatcher.dispatched[0] {
@@ -882,35 +1001,31 @@ mod tests {
#[test]
fn intermediate_reset_on_dcs_exit() {
- static INPUT: &[u8] = b"\x1bP=1sZZZ\x1b+\x5c";
+ const INPUT: &[u8] = b"\x1bP=1sZZZ\x1b+\x5c";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 6);
match &dispatcher.dispatched[5] {
- Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, &[b'+']),
+ Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, b"+"),
_ => panic!("expected esc sequence"),
}
}
#[test]
fn esc_reset() {
- static INPUT: &[u8] = b"\x1b[3;1\x1b(A";
+ const INPUT: &[u8] = b"\x1b[3;1\x1b(A";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
Sequence::Esc(intermediates, ignore, byte) => {
- assert_eq!(intermediates, &[b'(']);
+ assert_eq!(intermediates, b"(");
assert_eq!(*byte, b'A');
assert!(!ignore);
},
@@ -919,14 +1034,25 @@ mod tests {
}
#[test]
+ fn esc_reset_intermediates() {
+ const INPUT: &[u8] = b"\x1b[?2004l\x1b#8";
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, INPUT);
+
+ assert_eq!(dispatcher.dispatched.len(), 2);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Csi(vec![vec![2004]], vec![63], false, 'l'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Esc(vec![35], false, 56));
+ }
+
+ #[test]
fn params_buffer_filled_with_subparam() {
- static INPUT: &[u8] = b"\x1b[::::::::::::::::::::::::::::::::x\x1b";
+ const INPUT: &[u8] = b"\x1b[::::::::::::::::::::::::::::::::x\x1b";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -943,18 +1069,16 @@ mod tests {
#[cfg(feature = "no_std")]
#[test]
fn build_with_fixed_size() {
- static INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
+ const INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
let mut dispatcher = Dispatcher::default();
let mut parser: Parser<30> = Parser::new_with_size();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
Sequence::Csi(params, intermediates, ignore, _) => {
- assert_eq!(intermediates, &[b'?']);
+ assert_eq!(intermediates, b"?");
assert_eq!(params, &[[1049]]);
assert!(!ignore);
},
@@ -966,27 +1090,21 @@ mod tests {
#[test]
fn exceed_fixed_osc_buffer_size() {
const OSC_BUFFER_SIZE: usize = 32;
- static NUM_BYTES: usize = OSC_BUFFER_SIZE + 100;
- static INPUT_START: &[u8] = b"\x1b]52;";
- static INPUT_END: &[u8] = b"\x07";
+ const NUM_BYTES: usize = OSC_BUFFER_SIZE + 100;
+ const INPUT_START: &[u8] = b"\x1b]52;";
+ const INPUT_END: &[u8] = b"\x07";
let mut dispatcher = Dispatcher::default();
let mut parser: Parser<OSC_BUFFER_SIZE> = Parser::new_with_size();
// Create valid OSC escape
- for byte in INPUT_START {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_START);
// Exceed max buffer size
- for _ in 0..NUM_BYTES {
- parser.advance(&mut dispatcher, b'a');
- }
+ parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]);
// Terminate escape for dispatch
- for byte in INPUT_END {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_END);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -1005,22 +1123,16 @@ mod tests {
#[cfg(feature = "no_std")]
#[test]
fn fixed_size_osc_containing_string_terminator() {
- static INPUT_START: &[u8] = b"\x1b]2;";
- static INPUT_MIDDLE: &[u8] = b"s\xe6\x9c\xab";
- static INPUT_END: &[u8] = b"\x1b\\";
+ const INPUT_START: &[u8] = b"\x1b]2;";
+ const INPUT_MIDDLE: &[u8] = b"s\xe6\x9c\xab";
+ const INPUT_END: &[u8] = b"\x1b\\";
let mut dispatcher = Dispatcher::default();
let mut parser: Parser<5> = Parser::new_with_size();
- for byte in INPUT_START {
- parser.advance(&mut dispatcher, *byte);
- }
- for byte in INPUT_MIDDLE {
- parser.advance(&mut dispatcher, *byte);
- }
- for byte in INPUT_END {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_START);
+ parser.advance(&mut dispatcher, INPUT_MIDDLE);
+ parser.advance(&mut dispatcher, INPUT_END);
assert_eq!(dispatcher.dispatched.len(), 2);
match &dispatcher.dispatched[0] {
@@ -1031,74 +1143,144 @@ mod tests {
_ => panic!("expected osc sequence"),
}
}
-}
-#[cfg(all(feature = "nightly", test))]
-mod bench {
- extern crate std;
- extern crate test;
+ #[test]
+ fn unicode() {
+ const INPUT: &[u8] = b"\xF0\x9F\x8E\x89_\xF0\x9F\xA6\x80\xF0\x9F\xA6\x80_\xF0\x9F\x8E\x89";
- use super::*;
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
- use test::{black_box, Bencher};
+ parser.advance(&mut dispatcher, INPUT);
- static VTE_DEMO: &[u8] = include_bytes!("../tests/demo.vte");
+ assert_eq!(dispatcher.dispatched.len(), 6);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('🎉'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('_'));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Print('🦀'));
+ assert_eq!(dispatcher.dispatched[3], Sequence::Print('🦀'));
+ assert_eq!(dispatcher.dispatched[4], Sequence::Print('_'));
+ assert_eq!(dispatcher.dispatched[5], Sequence::Print('🎉'));
+ }
- struct BenchDispatcher;
- impl Perform for BenchDispatcher {
- fn print(&mut self, c: char) {
- black_box(c);
- }
+ #[test]
+ fn invalid_utf8() {
+ const INPUT: &[u8] = b"a\xEF\xBCb";
- fn execute(&mut self, byte: u8) {
- black_box(byte);
- }
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
- fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) {
- black_box((params, intermediates, ignore, c));
- }
+ parser.advance(&mut dispatcher, INPUT);
- fn put(&mut self, byte: u8) {
- black_box(byte);
- }
+ assert_eq!(dispatcher.dispatched.len(), 3);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('a'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('�'));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Print('b'));
+ }
- fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) {
- black_box((params, bell_terminated));
- }
+ #[test]
+ fn partial_utf8() {
+ const INPUT: &[u8] = b"\xF0\x9F\x9A\x80";
- fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) {
- black_box((params, intermediates, ignore, c));
- }
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
- fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) {
- black_box((intermediates, ignore, byte));
- }
+ parser.advance(&mut dispatcher, &INPUT[..1]);
+ parser.advance(&mut dispatcher, &INPUT[1..2]);
+ parser.advance(&mut dispatcher, &INPUT[2..3]);
+ parser.advance(&mut dispatcher, &INPUT[3..]);
+
+ assert_eq!(dispatcher.dispatched.len(), 1);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('🚀'));
}
- #[bench]
- fn testfile(b: &mut Bencher) {
- b.iter(|| {
- let mut dispatcher = BenchDispatcher;
- let mut parser = Parser::new();
+ #[test]
+ fn partial_utf8_separating_utf8() {
+ // This is different from the `partial_utf8` test since it has a multi-byte UTF8
+ // character after the partial UTF8 state, causing a partial byte to be present
+ // in the `partial_utf8` buffer after the 2-byte codepoint.
- for byte in VTE_DEMO {
- parser.advance(&mut dispatcher, *byte);
- }
- });
+ // "ĸ🎉"
+ const INPUT: &[u8] = b"\xC4\xB8\xF0\x9F\x8E\x89";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, &INPUT[..1]);
+ parser.advance(&mut dispatcher, &INPUT[1..]);
+
+ assert_eq!(dispatcher.dispatched.len(), 2);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('ĸ'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('🎉'));
}
- #[bench]
- fn state_changes(b: &mut Bencher) {
- let input = b"\x1b]2;X\x1b\\ \x1b[0m \x1bP0@\x1b\\";
- b.iter(|| {
- let mut dispatcher = BenchDispatcher;
- let mut parser = Parser::new();
+ #[test]
+ fn partial_invalid_utf8() {
+ const INPUT: &[u8] = b"a\xEF\xBCb";
- for _ in 0..1_000 {
- for byte in input {
- parser.advance(&mut dispatcher, *byte);
- }
- }
- });
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, &INPUT[..1]);
+ parser.advance(&mut dispatcher, &INPUT[1..2]);
+ parser.advance(&mut dispatcher, &INPUT[2..3]);
+ parser.advance(&mut dispatcher, &INPUT[3..]);
+
+ assert_eq!(dispatcher.dispatched.len(), 3);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('a'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('�'));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Print('b'));
+ }
+
+ #[test]
+ fn partial_utf8_into_esc() {
+ const INPUT: &[u8] = b"\xD8\x1b012";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, INPUT);
+
+ assert_eq!(dispatcher.dispatched.len(), 4);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('�'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Esc(Vec::new(), false, b'0'));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Print('1'));
+ assert_eq!(dispatcher.dispatched[3], Sequence::Print('2'));
+ }
+
+ #[test]
+ fn c1s() {
+ const INPUT: &[u8] = b"\x00\x1f\x80\x90\x98\x9b\x9c\x9d\x9e\x9fa";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, INPUT);
+
+ assert_eq!(dispatcher.dispatched.len(), 11);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Execute(31));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Execute(128));
+ assert_eq!(dispatcher.dispatched[3], Sequence::Execute(144));
+ assert_eq!(dispatcher.dispatched[4], Sequence::Execute(152));
+ assert_eq!(dispatcher.dispatched[5], Sequence::Execute(155));
+ assert_eq!(dispatcher.dispatched[6], Sequence::Execute(156));
+ assert_eq!(dispatcher.dispatched[7], Sequence::Execute(157));
+ assert_eq!(dispatcher.dispatched[8], Sequence::Execute(158));
+ assert_eq!(dispatcher.dispatched[9], Sequence::Execute(159));
+ assert_eq!(dispatcher.dispatched[10], Sequence::Print('a'));
+ }
+
+ #[test]
+ fn execute_anywhere() {
+ const INPUT: &[u8] = b"\x18\x1a";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, INPUT);
+
+ assert_eq!(dispatcher.dispatched.len(), 2);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0x18));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Execute(0x1A));
}
}
diff --git a/src/params.rs b/src/params.rs
index 608c040..967befb 100644
--- a/src/params.rs
+++ b/src/params.rs
@@ -8,8 +8,9 @@ pub(crate) const MAX_PARAMS: usize = 32;
pub struct Params {
/// Number of subparameters for each parameter.
///
- /// For each entry in the `params` slice, this stores the length of the param as number of
- /// subparams at the same index as the param in the `params` slice.
+ /// For each entry in the `params` slice, this stores the length of the
+ /// param as number of subparams at the same index as the param in the
+ /// `params` slice.
///
/// At the subparam positions the length will always be `0`.
subparams: [u8; MAX_PARAMS],
diff --git a/src/table.rs b/src/table.rs
index f2c0105..ac288e7 100644
--- a/src/table.rs
+++ b/src/table.rs
@@ -1,39 +1,20 @@
-/// This is the state change table. It's indexed first by current state and then by the next
-/// character in the pty stream.
-use crate::definitions::{pack, Action, State};
-
use vte_generate_state_changes::generate_state_changes;
+/// This is the state change table. It's indexed first by current state and then
+/// by the next character in the pty stream.
+use crate::definitions::{pack, Action, State};
+
// Generate state changes at compile-time
-pub static STATE_CHANGES: [[u8; 256]; 16] = state_changes();
+pub const STATE_CHANGES: [[u8; 256]; 13] = state_changes();
generate_state_changes!(state_changes, {
- Anywhere {
- 0x18 => (Ground, Execute),
- 0x1a => (Ground, Execute),
- 0x1b => (Escape, None),
- },
-
- Ground {
- 0x00..=0x17 => (Anywhere, Execute),
- 0x19 => (Anywhere, Execute),
- 0x1c..=0x1f => (Anywhere, Execute),
- 0x20..=0x7f => (Anywhere, Print),
- 0x80..=0x8f => (Anywhere, Execute),
- 0x91..=0x9a => (Anywhere, Execute),
- 0x9c => (Anywhere, Execute),
- // Beginning of UTF-8 2 byte sequence
- 0xc2..=0xdf => (Utf8, BeginUtf8),
- // Beginning of UTF-8 3 byte sequence
- 0xe0..=0xef => (Utf8, BeginUtf8),
- // Beginning of UTF-8 4 byte sequence
- 0xf0..=0xf4 => (Utf8, BeginUtf8),
- },
-
Escape {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x20..=0x2f => (EscapeIntermediate, Collect),
0x30..=0x4f => (Ground, EscDispatch),
0x51..=0x57 => (Ground, EscDispatch),
@@ -51,18 +32,24 @@ generate_state_changes!(state_changes, {
EscapeIntermediate {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
0x20..=0x2f => (Anywhere, Collect),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x30..=0x7e => (Ground, EscDispatch),
},
CsiEntry {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x20..=0x2f => (CsiIntermediate, Collect),
0x30..=0x39 => (CsiParam, Param),
0x3a..=0x3b => (CsiParam, Param),
@@ -72,20 +59,26 @@ generate_state_changes!(state_changes, {
CsiIgnore {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
- 0x20..=0x3f => (Anywhere, Ignore),
- 0x7f => (Anywhere, Ignore),
+ 0x20..=0x3f => (Anywhere, None),
+ 0x7f => (Anywhere, None),
0x40..=0x7e => (Ground, None),
},
CsiParam {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
0x30..=0x39 => (Anywhere, Param),
0x3a..=0x3b => (Anywhere, Param),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x3c..=0x3f => (CsiIgnore, None),
0x20..=0x2f => (CsiIntermediate, Collect),
0x40..=0x7e => (Ground, CsiDispatch),
@@ -93,19 +86,25 @@ generate_state_changes!(state_changes, {
CsiIntermediate {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
0x20..=0x2f => (Anywhere, Collect),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x30..=0x3f => (CsiIgnore, None),
0x40..=0x7e => (Ground, CsiDispatch),
},
DcsEntry {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
- 0x7f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
+ 0x7f => (Anywhere, None),
0x20..=0x2f => (DcsIntermediate, Collect),
0x30..=0x39 => (DcsParam, Param),
0x3a..=0x3b => (DcsParam, Param),
@@ -114,30 +113,39 @@ generate_state_changes!(state_changes, {
},
DcsIntermediate {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
0x20..=0x2f => (Anywhere, Collect),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x30..=0x3f => (DcsIgnore, None),
0x40..=0x7e => (DcsPassthrough, None),
},
DcsIgnore {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
- 0x20..=0x7f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
+ 0x20..=0x7f => (Anywhere, None),
0x9c => (Ground, None),
},
DcsParam {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
0x30..=0x39 => (Anywhere, Param),
0x3a..=0x3b => (Anywhere, Param),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x3c..=0x3f => (DcsIgnore, None),
0x20..=0x2f => (DcsIntermediate, Collect),
0x40..=0x7e => (DcsPassthrough, None),
@@ -145,27 +153,36 @@ generate_state_changes!(state_changes, {
DcsPassthrough {
0x00..=0x17 => (Anywhere, Put),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Put),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Put),
0x20..=0x7e => (Anywhere, Put),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x9c => (Ground, None),
},
SosPmApcString {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
- 0x20..=0x7f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
+ 0x20..=0x7f => (Anywhere, None),
0x9c => (Ground, None),
},
OscString {
- 0x00..=0x06 => (Anywhere, Ignore),
+ 0x00..=0x06 => (Anywhere, None),
0x07 => (Ground, None),
- 0x08..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
+ 0x08..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
0x20..=0xff => (Anywhere, OscPut),
}
});
diff --git a/utf8parse/Cargo.toml b/utf8parse/Cargo.toml
deleted file mode 100644
index 71ea44b..0000000
--- a/utf8parse/Cargo.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-[package]
-authors = ["Joe Wilm <joe@jwilm.com>", "Christian Duerr <contact@christianduerr.com>"]
-description = "Table-driven UTF-8 parser"
-documentation = "https://docs.rs/utf8parse/"
-repository = "https://github.com/alacritty/vte"
-keywords = ["utf8", "parse", "table"]
-categories = ["parsing", "no-std"]
-license = "Apache-2.0 OR MIT"
-version = "0.2.2"
-name = "utf8parse"
-edition = "2018"
-
-[features]
-nightly = []
-default = []
diff --git a/utf8parse/LICENSE-APACHE b/utf8parse/LICENSE-APACHE
deleted file mode 120000
index 965b606..0000000
--- a/utf8parse/LICENSE-APACHE
+++ /dev/null
@@ -1 +0,0 @@
-../LICENSE-APACHE \ No newline at end of file
diff --git a/utf8parse/LICENSE-MIT b/utf8parse/LICENSE-MIT
deleted file mode 120000
index 76219eb..0000000
--- a/utf8parse/LICENSE-MIT
+++ /dev/null
@@ -1 +0,0 @@
-../LICENSE-MIT \ No newline at end of file
diff --git a/utf8parse/src/lib.rs b/utf8parse/src/lib.rs
deleted file mode 100644
index 093de81..0000000
--- a/utf8parse/src/lib.rs
+++ /dev/null
@@ -1,132 +0,0 @@
-//! A table-driven UTF-8 Parser
-//!
-//! This module implements a table-driven UTF-8 parser which should
-//! theoretically contain the minimal number of branches (1). The only branch is
-//! on the `Action` returned from unpacking a transition.
-#![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)]
-#![cfg_attr(all(feature = "nightly", test), feature(test))]
-#![no_std]
-
-use core::char;
-
-mod types;
-
-use types::{Action, State};
-
-/// Handles codepoint and invalid sequence events from the parser.
-pub trait Receiver {
- /// Called whenever a codepoint is parsed successfully
- fn codepoint(&mut self, _: char);
-
- /// Called when an invalid_sequence is detected
- fn invalid_sequence(&mut self);
-}
-
-/// A parser for Utf8 Characters
-///
-/// Repeatedly call `advance` with bytes to emit Utf8 characters
-#[derive(Clone, Default, PartialEq, Eq, Debug)]
-pub struct Parser {
- point: u32,
- state: State,
-}
-
-/// Continuation bytes are masked with this value.
-const CONTINUATION_MASK: u8 = 0b0011_1111;
-
-impl Parser {
- /// Create a new Parser
- pub fn new() -> Parser {
- Parser { point: 0, state: State::Ground }
- }
-
- /// Advance the parser
- ///
- /// The provider receiver will be called whenever a codepoint is completed or an invalid
- /// sequence is detected.
- pub fn advance<R>(&mut self, receiver: &mut R, byte: u8)
- where
- R: Receiver,
- {
- let (state, action) = self.state.advance(byte);
- self.perform_action(receiver, byte, action);
- self.state = state;
- }
-
- fn perform_action<R>(&mut self, receiver: &mut R, byte: u8, action: Action)
- where
- R: Receiver,
- {
- match action {
- Action::InvalidSequence => {
- self.point = 0;
- receiver.invalid_sequence();
- },
- Action::EmitByte => {
- receiver.codepoint(byte as char);
- },
- Action::SetByte1 => {
- let point = self.point | ((byte & CONTINUATION_MASK) as u32);
- let c = unsafe { char::from_u32_unchecked(point) };
- self.point = 0;
-
- receiver.codepoint(c);
- },
- Action::SetByte2 => {
- self.point |= ((byte & CONTINUATION_MASK) as u32) << 6;
- },
- Action::SetByte2Top => {
- self.point |= ((byte & 0b0001_1111) as u32) << 6;
- },
- Action::SetByte3 => {
- self.point |= ((byte & CONTINUATION_MASK) as u32) << 12;
- },
- Action::SetByte3Top => {
- self.point |= ((byte & 0b0000_1111) as u32) << 12;
- },
- Action::SetByte4 => {
- self.point |= ((byte & 0b0000_0111) as u32) << 18;
- },
- }
- }
-}
-
-#[cfg(all(feature = "nightly", test))]
-mod benches {
- extern crate std;
- extern crate test;
-
- use super::{Parser, Receiver};
-
- use self::test::{black_box, Bencher};
-
- static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt");
-
- impl Receiver for () {
- fn codepoint(&mut self, c: char) {
- black_box(c);
- }
-
- fn invalid_sequence(&mut self) {}
- }
-
- #[bench]
- fn parse_bench_utf8_demo(b: &mut Bencher) {
- let mut parser = Parser::new();
-
- b.iter(|| {
- for byte in UTF8_DEMO {
- parser.advance(&mut (), *byte);
- }
- })
- }
-
- #[bench]
- fn std_string_parse_utf8(b: &mut Bencher) {
- b.iter(|| {
- for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() {
- black_box(c);
- }
- });
- }
-}
diff --git a/utf8parse/src/types.rs b/utf8parse/src/types.rs
deleted file mode 100644
index 8a52c67..0000000
--- a/utf8parse/src/types.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-//! Types supporting the UTF-8 parser
-
-/// Action to take when receiving a byte
-#[derive(Debug, Copy, Clone)]
-pub enum Action {
- /// Unexpected byte; sequence is invalid
- InvalidSequence = 0,
- /// Received valid 7-bit ASCII byte which can be directly emitted.
- EmitByte = 1,
- /// Set the bottom continuation byte
- SetByte1 = 2,
- /// Set the 2nd-from-last continuation byte
- SetByte2 = 3,
- /// Set the 2nd-from-last byte which is part of a two byte sequence
- SetByte2Top = 4,
- /// Set the 3rd-from-last continuation byte
- SetByte3 = 5,
- /// Set the 3rd-from-last byte which is part of a three byte sequence
- SetByte3Top = 6,
- /// Set the top byte of a four byte sequence.
- SetByte4 = 7,
-}
-
-/// States the parser can be in.
-///
-/// There is a state for each initial input of the 3 and 4 byte sequences since
-/// the following bytes are subject to different conditions than a tail byte.
-#[allow(non_camel_case_types)]
-#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]
-pub enum State {
- /// Ground state; expect anything
- #[default]
- Ground = 0,
- /// 3 tail bytes
- Tail3 = 1,
- /// 2 tail bytes
- Tail2 = 2,
- /// 1 tail byte
- Tail1 = 3,
- /// UTF8-3 starting with E0
- U3_2_e0 = 4,
- /// UTF8-3 starting with ED
- U3_2_ed = 5,
- /// UTF8-4 starting with F0
- Utf8_4_3_f0 = 6,
- /// UTF8-4 starting with F4
- Utf8_4_3_f4 = 7,
-}
-
-impl State {
- /// Advance the parser state.
- ///
- /// This takes the current state and input byte into consideration, to determine the next state
- /// and any action that should be taken.
- #[inline]
- pub fn advance(self, byte: u8) -> (State, Action) {
- match self {
- State::Ground => match byte {
- 0x00..=0x7f => (State::Ground, Action::EmitByte),
- 0xc2..=0xdf => (State::Tail1, Action::SetByte2Top),
- 0xe0 => (State::U3_2_e0, Action::SetByte3Top),
- 0xe1..=0xec => (State::Tail2, Action::SetByte3Top),
- 0xed => (State::U3_2_ed, Action::SetByte3Top),
- 0xee..=0xef => (State::Tail2, Action::SetByte3Top),
- 0xf0 => (State::Utf8_4_3_f0, Action::SetByte4),
- 0xf1..=0xf3 => (State::Tail3, Action::SetByte4),
- 0xf4 => (State::Utf8_4_3_f4, Action::SetByte4),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::U3_2_e0 => match byte {
- 0xa0..=0xbf => (State::Tail1, Action::SetByte2),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::U3_2_ed => match byte {
- 0x80..=0x9f => (State::Tail1, Action::SetByte2),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Utf8_4_3_f0 => match byte {
- 0x90..=0xbf => (State::Tail2, Action::SetByte3),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Utf8_4_3_f4 => match byte {
- 0x80..=0x8f => (State::Tail2, Action::SetByte3),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Tail3 => match byte {
- 0x80..=0xbf => (State::Tail2, Action::SetByte3),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Tail2 => match byte {
- 0x80..=0xbf => (State::Tail1, Action::SetByte2),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Tail1 => match byte {
- 0x80..=0xbf => (State::Ground, Action::SetByte1),
- _ => (State::Ground, Action::InvalidSequence),
- },
- }
- }
-}
diff --git a/utf8parse/tests/UTF-8-demo.txt b/utf8parse/tests/UTF-8-demo.txt
deleted file mode 100644
index 4363f27..0000000
--- a/utf8parse/tests/UTF-8-demo.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-
-UTF-8 encoded sample plain-text file
-‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
-
-Markus Kuhn [ˈmaʳkʊs kuːn] <http://www.cl.cam.ac.uk/~mgk25/> — 2002-07-25
-
-
-The ASCII compatible UTF-8 encoding used in this plain-text file
-is defined in Unicode, ISO 10646-1, and RFC 2279.
-
-
-Using Unicode/UTF-8, you can write in emails and source code things such as
-
-Mathematics and sciences:
-
- ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫
- ⎪⎢⎜│a²+b³ ⎟⎥⎪
- ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪
- ⎪⎢⎜⎷ c₈ ⎟⎥⎪
- ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬
- ⎪⎢⎜ ∞ ⎟⎥⎪
- ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪
- ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪
- 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭
-
-Linguistics and dictionaries:
-
- ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn
- Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]
-
-APL:
-
- ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈
-
-Nicer typography in plain text files:
-
- ╔══════════════════════════════════════════╗
- ║ ║
- ║ • ‘single’ and “double” quotes ║
- ║ ║
- ║ • Curly apostrophes: “We’ve been here” ║
- ║ ║
- ║ • Latin-1 apostrophe and accents: '´` ║
- ║ ║
- ║ • ‚deutsche‘ „Anführungszeichen“ ║
- ║ ║
- ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║
- ║ ║
- ║ • ASCII safety test: 1lI|, 0OD, 8B ║
- ║ ╭─────────╮ ║
- ║ • the euro symbol: │ 14.95 € │ ║
- ║ ╰─────────╯ ║
- ╚══════════════════════════════════════════╝
-
-Combining characters:
-
- STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑
-
-Greek (in Polytonic):
-
- The Greek anthem:
-
- Σὲ γνωρίζω ἀπὸ τὴν κόψη
- τοῦ σπαθιοῦ τὴν τρομερή,
- σὲ γνωρίζω ἀπὸ τὴν ὄψη
- ποὺ μὲ βία μετράει τὴ γῆ.
-
- ᾿Απ᾿ τὰ κόκκαλα βγαλμένη
- τῶν ῾Ελλήνων τὰ ἱερά
- καὶ σὰν πρῶτα ἀνδρειωμένη
- χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά!
-
- From a speech of Demosthenes in the 4th century BC:
-
- Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι,
- ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς
- λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ
- τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿
- εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ
- πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν
- οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι,
- οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν
- ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον
- τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι
- γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν
- προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους
- σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ
- τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ
- τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς
- τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον.
-
- Δημοσθένους, Γ´ ᾿Ολυνθιακὸς
-
-Georgian:
-
- From a Unicode conference invitation:
-
- გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო
- კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს,
- ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს
- ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი,
- ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება
- ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში,
- ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში.
-
-Russian:
-
- From a Unicode conference invitation:
-
- Зарегистрируйтесь сейчас на Десятую Международную Конференцию по
- Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии.
- Конференция соберет широкий круг экспертов по вопросам глобального
- Интернета и Unicode, локализации и интернационализации, воплощению и
- применению Unicode в различных операционных системах и программных
- приложениях, шрифтах, верстке и многоязычных компьютерных системах.
-
-Thai (UCS Level 2):
-
- Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese
- classic 'San Gua'):
-
- [----------------------------|------------------------]
- ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่
- สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา
- ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา
- โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ
- เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ
- ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ
- พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้
- ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ
-
- (The above is a two-column text. If combining characters are handled
- correctly, the lines of the second column should be aligned with the
- | character above.)
-
-Ethiopian:
-
- Proverbs in the Amharic language:
-
- ሰማይ አይታረስ ንጉሥ አይከሰስ።
- ብላ ካለኝ እንደአባቴ በቆመጠኝ።
- ጌጥ ያለቤቱ ቁምጥና ነው።
- ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው።
- የአፍ ወለምታ በቅቤ አይታሽም።
- አይጥ በበላ ዳዋ ተመታ።
- ሲተረጉሙ ይደረግሙ።
- ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል።
- ድር ቢያብር አንበሳ ያስር።
- ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም።
- እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም።
- የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ።
- ሥራ ከመፍታት ልጄን ላፋታት።
- ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል።
- የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ።
- ተንጋሎ ቢተፉ ተመልሶ ባፉ።
- ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው።
- እግርህን በፍራሽህ ልክ ዘርጋ።
-
-Runes:
-
- ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ
-
- (Old English, which transcribed into Latin reads 'He cwaeth that he
- bude thaem lande northweardum with tha Westsae.' and means 'He said
- that he lived in the northern land near the Western Sea.')
-
-Braille:
-
- ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌
-
- ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞
- ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎
- ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂
- ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙
- ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑
- ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲
-
- ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
-
- ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹
- ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞
- ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕
- ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹
- ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎
- ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎
- ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳
- ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞
- ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
-
- (The first couple of paragraphs of "A Christmas Carol" by Dickens)
-
-Compact font selection example text:
-
- ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789
- abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ
- –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд
- ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა
-
-Greetings in various languages:
-
- Hello world, Καλημέρα κόσμε, コンニチハ
-
-Box drawing alignment tests: █
- ▉
- ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳
- ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳
- ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳
- ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳
- ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎
- ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏
- ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█
- ▝▀▘▙▄▟
diff --git a/utf8parse/tests/utf-8-demo.rs b/utf8parse/tests/utf-8-demo.rs
deleted file mode 100644
index 51df492..0000000
--- a/utf8parse/tests/utf-8-demo.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-use utf8parse::{Parser, Receiver};
-
-static UTF8_DEMO: &[u8] = include_bytes!("UTF-8-demo.txt");
-
-#[derive(Debug, PartialEq)]
-struct StringWrapper(String);
-
-impl Receiver for StringWrapper {
- fn codepoint(&mut self, c: char) {
- self.0.push(c);
- }
-
- fn invalid_sequence(&mut self) {}
-}
-
-#[test]
-fn utf8parse_test() {
- let mut parser = Parser::new();
-
- // utf8parse implementation
- let mut actual = StringWrapper(String::new());
-
- for byte in UTF8_DEMO {
- parser.advance(&mut actual, *byte)
- }
-
- // standard library implementation
- let expected = String::from_utf8_lossy(UTF8_DEMO);
-
- assert_eq!(actual.0, expected);
-}
diff --git a/vte_generate_state_changes/src/lib.rs b/vte_generate_state_changes/src/lib.rs
index b016518..ff8ea49 100644
--- a/vte_generate_state_changes/src/lib.rs
+++ b/vte_generate_state_changes/src/lib.rs
@@ -25,8 +25,8 @@ pub fn generate_state_changes(item: proc_macro::TokenStream) -> proc_macro::Toke
let assignments_stream = states_stream(&mut iter);
quote!(
- const fn #fn_name() -> [[u8; 256]; 16] {
- let mut state_changes = [[0; 256]; 16];
+ const fn #fn_name() -> [[u8; 256]; 13] {
+ let mut state_changes = [[0; 256]; 13];
#assignments_stream
@@ -71,7 +71,8 @@ fn state_entry_stream(iter: &mut Peekable<token_stream::IntoIter>) -> TokenStrea
tokens
}
-/// Generate the array assignment statement for a single byte->target mapping for one state.
+/// Generate the array assignment statement for a single byte->target mapping
+/// for one state.
fn change_stream(iter: &mut Peekable<token_stream::IntoIter>, state: &TokenTree) -> TokenStream {
// Start of input byte range
let start = next_usize(iter);
@@ -101,8 +102,6 @@ fn change_stream(iter: &mut Peekable<token_stream::IntoIter>, state: &TokenTree)
// Create a new entry for every byte in the range
for byte in start..=end {
- // TODO: Force adding `State::` and `Action::`?
- // TODO: Should we really use `pack` here without import?
tokens.extend(quote!(
state_changes[State::#state as usize][#byte] =
pack(State::#target_state, Action::#target_action);
@@ -148,7 +147,8 @@ fn expect_punct(iter: &mut impl Iterator<Item = TokenTree>, c: char) {
///
/// # Panics
///
-/// Panics if the next token is not a [`usize`] in hex or decimal literal format.
+/// Panics if the next token is not a [`usize`] in hex or decimal literal
+/// format.
fn next_usize(iter: &mut impl Iterator<Item = TokenTree>) -> usize {
match iter.next() {
Some(Literal(literal)) => {