aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Duerr <contact@christianduerr.com>2025-01-09 06:27:15 +0000
committerGitHub <noreply@github.com>2025-01-09 06:27:15 +0000
commit7321a442a6fc0fc5b6d6ed7af364477d25e706fd (patch)
tree11ff2608e63a160b8b204b6f78ec3977f019d081
parent89c12df969145ffb5084d1122627d7292c2c638f (diff)
downloadr-alacritty-vte-7321a442a6fc0fc5b6d6ed7af364477d25e706fd.tar.gz
r-alacritty-vte-7321a442a6fc0fc5b6d6ed7af364477d25e706fd.tar.bz2
r-alacritty-vte-7321a442a6fc0fc5b6d6ed7af364477d25e706fd.zip
Switch parser to multi-byte processing
This patch overhauls the `Parser::advance` API to operate on byte slices instead of individual bytes, which allows for additional performance optimizations. VTE does not support C1 escapes and C0 escapes always start with an escape character. This makes it possible to simplify processing if a byte stream is determined to not contain any escapes. The `memchr` crate provides a battle-tested implementation for SIMD-accelerated byte searches, which is why this implementation makes use of it. VTE also only supports UTF8 characters in the ground state, which means that the new non-escape parsing path is able to rely completely on STD's `str::from_utf8` since `memchr` gives us the full length of the plain text character buffer. This allows us to completely remove `utf8parse` and all related code. We also make use of `memchr` in the synchronized escape handling in `ansi.rs`, since it relies heavily on scanning large amounts of text for the extension/termination escape sequences.
-rw-r--r--Cargo.toml21
-rw-r--r--examples/parselog.rs6
-rw-r--r--rustfmt.toml6
-rw-r--r--src/ansi.rs460
-rw-r--r--src/definitions.rs104
-rw-r--r--src/lib.rs910
-rw-r--r--src/params.rs5
-rw-r--r--src/table.rs135
-rw-r--r--utf8parse/Cargo.toml15
l---------utf8parse/LICENSE-APACHE1
l---------utf8parse/LICENSE-MIT1
-rw-r--r--utf8parse/src/lib.rs132
-rw-r--r--utf8parse/src/types.rs100
-rw-r--r--utf8parse/tests/UTF-8-demo.txt212
-rw-r--r--utf8parse/tests/utf-8-demo.rs31
-rw-r--r--vte_generate_state_changes/src/lib.rs12
16 files changed, 1000 insertions, 1151 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 040aa28..aee7453 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,21 +13,20 @@ name = "vte"
edition = "2021"
rust-version = "1.62.1"
-[dependencies]
-arrayvec = { version = "0.7.2", default-features = false, optional = true }
-bitflags = { version = "2.3.3", default-features = false, optional = true }
-cursor-icon = { version = "1.0.0", default-features = false, optional = true }
-log = { version = "0.4.17", optional = true }
-serde = { version = "1.0.160", features = ["derive"], optional = true }
-utf8parse = { version = "0.2.0", path = "utf8parse" }
-vte_generate_state_changes = { version = "0.1.0", path = "vte_generate_state_changes" }
+[workspace]
+members = ["vte_generate_state_changes"]
[features]
ansi = ["log", "cursor-icon", "bitflags"]
default = ["no_std"]
-nightly = ["utf8parse/nightly"]
no_std = ["arrayvec"]
serde = ["dep:serde"]
-[workspace]
-members = ["utf8parse", "vte_generate_state_changes"]
+[dependencies]
+arrayvec = { version = "0.7.2", default-features = false, optional = true }
+bitflags = { version = "2.3.3", default-features = false, optional = true }
+cursor-icon = { version = "1.0.0", default-features = false, optional = true }
+log = { version = "0.4.17", optional = true }
+memchr = "2.7.4"
+serde = { version = "1.0.160", features = ["derive"], optional = true }
+vte_generate_state_changes = { version = "0.1.0", path = "vte_generate_state_changes" }
diff --git a/examples/parselog.rs b/examples/parselog.rs
index dfd0aee..c41c150 100644
--- a/examples/parselog.rs
+++ b/examples/parselog.rs
@@ -61,11 +61,7 @@ fn main() {
loop {
match handle.read(&mut buf) {
Ok(0) => break,
- Ok(n) => {
- for byte in &buf[..n] {
- statemachine.advance(&mut performer, *byte);
- }
- },
+ Ok(n) => statemachine.advance(&mut performer, &buf[..n]),
Err(err) => {
println!("err: {}", err);
break;
diff --git a/rustfmt.toml b/rustfmt.toml
index 9308ba9..f82517e 100644
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -1,13 +1,17 @@
format_code_in_doc_comments = true
+group_imports = "StdExternalCrate"
match_block_trailing_comma = true
condense_wildcard_suffixes = true
use_field_init_shorthand = true
+normalize_doc_attributes = true
overflow_delimited_expr = true
+imports_granularity = "Module"
+format_macro_matchers = true
use_small_heuristics = "Max"
+hex_literal_case = "Upper"
normalize_comments = true
reorder_impl_items = true
use_try_shorthand = true
newline_style = "Unix"
format_strings = true
wrap_comments = true
-comment_width = 100
diff --git a/src/ansi.rs b/src/ansi.rs
index 8cac26d..fa5b1ed 100644
--- a/src/ansi.rs
+++ b/src/ansi.rs
@@ -11,21 +11,20 @@ extern crate alloc;
use alloc::borrow::ToOwned;
use alloc::string::{String, ToString};
use alloc::vec::Vec;
-use bitflags::bitflags;
-
use core::convert::TryFrom;
use core::fmt::{self, Display, Formatter, Write};
+#[cfg(not(feature = "no_std"))]
+use core::ops::Mul;
use core::ops::{Add, Sub};
use core::str::FromStr;
use core::time::Duration;
-use core::{iter, str};
-
-#[cfg(not(feature = "no_std"))]
-use core::ops::Mul;
-
+use core::{iter, mem, str};
#[cfg(not(feature = "no_std"))]
use std::time::Instant;
+use bitflags::bitflags;
+#[doc(inline)]
+pub use cursor_icon;
use cursor_icon::CursorIcon;
use log::debug;
#[cfg(feature = "serde")]
@@ -33,9 +32,6 @@ use serde::{Deserialize, Serialize};
use crate::{Params, ParamsIter};
-#[doc(inline)]
-pub use cursor_icon;
-
/// Maximum time before a synchronized update is aborted.
const SYNC_UPDATE_TIMEOUT: Duration = Duration::from_millis(150);
@@ -168,9 +164,9 @@ impl FromStr for Rgb {
match u32::from_str_radix(chars, 16) {
Ok(mut color) => {
- let b = (color & 0xff) as u8;
+ let b = (color & 0xFF) as u8;
color >>= 8;
- let g = (color & 0xff) as u8;
+ let g = (color & 0xFF) as u8;
color >>= 8;
let r = color as u8;
Ok(Rgb { r, g, b })
@@ -237,14 +233,8 @@ fn parse_number(input: &[u8]) -> Option<u8> {
let mut num: u8 = 0;
for c in input {
let c = *c as char;
- if let Some(digit) = c.to_digit(10) {
- num = match num.checked_mul(10).and_then(|v| v.checked_add(digit as u8)) {
- Some(v) => v,
- None => return None,
- }
- } else {
- return None;
- }
+ let digit = c.to_digit(10)?;
+ num = num.checked_mul(10).and_then(|v| v.checked_add(digit as u8))?;
}
Some(num)
}
@@ -270,11 +260,12 @@ struct SyncState<T: Timeout> {
impl<T: Timeout> Default for SyncState<T> {
fn default() -> Self {
- Self { buffer: Vec::with_capacity(SYNC_BUFFER_SIZE), timeout: T::default() }
+ Self { buffer: Vec::with_capacity(SYNC_BUFFER_SIZE), timeout: Default::default() }
}
}
-/// The processor wraps a `crate::Parser` to ultimately call methods on a Handler.
+/// The processor wraps a `crate::Parser` to ultimately call methods on a
+/// Handler.
#[cfg(not(feature = "no_std"))]
#[derive(Default)]
pub struct Processor<T: Timeout = StdSyncHandler> {
@@ -282,7 +273,8 @@ pub struct Processor<T: Timeout = StdSyncHandler> {
parser: crate::Parser,
}
-/// The processor wraps a `crate::Parser` to ultimately call methods on a Handler.
+/// The processor wraps a `crate::Parser` to ultimately call methods on a
+/// Handler.
#[cfg(feature = "no_std")]
#[derive(Default)]
pub struct Processor<T: Timeout> {
@@ -303,15 +295,19 @@ impl<T: Timeout> Processor<T> {
/// Process a new byte from the PTY.
#[inline]
- pub fn advance<H>(&mut self, handler: &mut H, byte: u8)
+ pub fn advance<H>(&mut self, handler: &mut H, bytes: &[u8])
where
H: Handler,
{
- if self.state.sync_state.timeout.pending_timeout() {
- self.advance_sync(handler, byte);
- } else {
- let mut performer = Performer::new(&mut self.state, handler);
- self.parser.advance(&mut performer, byte);
+ let mut processed = 0;
+ while processed != bytes.len() {
+ if self.state.sync_state.timeout.pending_timeout() {
+ processed += self.advance_sync(handler, &bytes[processed..]);
+ } else {
+ let mut performer = Performer::new(&mut self.state, handler);
+ processed +=
+ self.parser.advance_until_terminated(&mut performer, &bytes[processed..]);
+ }
}
}
@@ -320,18 +316,45 @@ impl<T: Timeout> Processor<T> {
where
H: Handler,
{
+ self.stop_sync_internal(handler, None);
+ }
+
+ /// End a synchronized update.
+ ///
+ /// The `bsu_offset` parameter should be passed if the sync buffer contains
+ /// a new BSU escape that is not part of the current synchronized
+ /// update.
+ fn stop_sync_internal<H>(&mut self, handler: &mut H, bsu_offset: Option<usize>)
+ where
+ H: Handler,
+ {
// Process all synchronized bytes.
- for i in 0..self.state.sync_state.buffer.len() {
- let byte = self.state.sync_state.buffer[i];
- let mut performer = Performer::new(&mut self.state, handler);
- self.parser.advance(&mut performer, byte);
+ //
+ // NOTE: We do not use `advance_until_terminated` here since BSU sequences are
+ // processed automatically during the synchronized update.
+ let buffer = mem::take(&mut self.state.sync_state.buffer);
+ let offset = bsu_offset.unwrap_or(buffer.len());
+ let mut performer = Performer::new(&mut self.state, handler);
+ self.parser.advance(&mut performer, &buffer[..offset]);
+ self.state.sync_state.buffer = buffer;
+
+ match bsu_offset {
+ // Just clear processed bytes if there is a new BSU.
+ //
+ // NOTE: We do not need to re-process for a new ESU since the `advance_sync`
+ // function checks for BSUs in reverse.
+ Some(bsu_offset) => {
+ let new_len = self.state.sync_state.buffer.len() - bsu_offset;
+ self.state.sync_state.buffer.copy_within(bsu_offset.., 0);
+ self.state.sync_state.buffer.truncate(new_len);
+ },
+ // Report mode and clear state if no new BSU is present.
+ None => {
+ handler.unset_private_mode(NamedPrivateMode::SyncUpdate.into());
+ self.state.sync_state.timeout.clear_timeout();
+ self.state.sync_state.buffer.clear();
+ },
}
-
- // Report that update ended, since we could end due to timeout.
- handler.unset_private_mode(NamedPrivateMode::SyncUpdate.into());
- // Resetting state after processing makes sure we don't interpret buffered sync escapes.
- self.state.sync_state.buffer.clear();
- self.state.sync_state.timeout.clear_timeout();
}
/// Number of bytes in the synchronization buffer.
@@ -341,36 +364,56 @@ impl<T: Timeout> Processor<T> {
}
/// Process a new byte during a synchronized update.
+ ///
+ /// Returns the number of bytes processed.
#[cold]
- fn advance_sync<H>(&mut self, handler: &mut H, byte: u8)
+ fn advance_sync<H>(&mut self, handler: &mut H, bytes: &[u8]) -> usize
where
H: Handler,
{
- self.state.sync_state.buffer.push(byte);
+ // Advance sync parser or stop sync if we'd exceed the maximum buffer size.
+ if self.state.sync_state.buffer.len() + bytes.len() >= SYNC_BUFFER_SIZE - 1 {
+ // Terminate the synchronized update.
+ self.stop_sync_internal(handler, None);
- // Handle sync CSI escape sequences.
- self.advance_sync_csi(handler);
+ // Just parse the bytes normally.
+ let mut performer = Performer::new(&mut self.state, handler);
+ self.parser.advance_until_terminated(&mut performer, bytes)
+ } else {
+ self.state.sync_state.buffer.extend(bytes);
+ self.advance_sync_csi(handler, bytes.len());
+ bytes.len()
+ }
}
/// Handle BSU/ESU CSI sequences during synchronized update.
- fn advance_sync_csi<H>(&mut self, handler: &mut H)
+ fn advance_sync_csi<H>(&mut self, handler: &mut H, new_bytes: usize)
where
H: Handler,
{
- // Get the last few bytes for comparison.
- let len = self.state.sync_state.buffer.len();
- let offset = len.saturating_sub(SYNC_ESCAPE_LEN);
- let end = &self.state.sync_state.buffer[offset..];
+ // Get constraints within which a new escape character might be relevant.
+ let buffer_len = self.state.sync_state.buffer.len();
+ let start_offset = (buffer_len - new_bytes).saturating_sub(SYNC_ESCAPE_LEN - 1);
+ let end_offset = buffer_len.saturating_sub(SYNC_ESCAPE_LEN - 1);
+ let search_buffer = &self.state.sync_state.buffer[start_offset..end_offset];
+ // Search for termination/extension escapes in the added bytes.
+ //
// NOTE: It is technically legal to specify multiple private modes in the same
// escape, but we only allow EXACTLY `\e[?2026h`/`\e[?2026l` to keep the parser
- // reasonable.
- //
- // Check for extension/termination of the synchronized update.
- if end == BSU_CSI {
- self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT);
- } else if end == ESU_CSI || len >= SYNC_BUFFER_SIZE - 1 {
- self.stop_sync(handler);
+ // more simple.
+ let mut bsu_offset = None;
+ for index in memchr::memchr_iter(0x1B, search_buffer).rev() {
+ let offset = start_offset + index;
+ let escape = &self.state.sync_state.buffer[offset..offset + SYNC_ESCAPE_LEN];
+
+ if escape == BSU_CSI {
+ self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT);
+ bsu_offset = Some(offset);
+ } else if escape == ESU_CSI {
+ self.stop_sync_internal(handler, bsu_offset);
+ break;
+ }
}
}
}
@@ -382,13 +425,16 @@ impl<T: Timeout> Processor<T> {
struct Performer<'a, H: Handler, T: Timeout> {
state: &'a mut ProcessorState<T>,
handler: &'a mut H,
+
+ /// Whether the parser should be prematurely terminated.
+ terminated: bool,
}
impl<'a, H: Handler + 'a, T: Timeout> Performer<'a, H, T> {
/// Create a performer.
#[inline]
pub fn new<'b>(state: &'b mut ProcessorState<T>, handler: &'b mut H) -> Performer<'b, H, T> {
- Performer { state, handler }
+ Performer { state, handler, terminated: Default::default() }
}
}
@@ -710,13 +756,14 @@ bitflags! {
///
/// This only applies to keys corresponding to ascii characters.
///
-/// For the details on how to implement the mode handling correctly, consult [`XTerm's
-/// implementation`] and the [`output`] of XTerm's provided [`perl script`]. Some libraries and
-/// implementations also use the [`fixterms`] definition of the `CSI u`.
+/// For the details on how to implement the mode handling correctly, consult
+/// [`XTerm's implementation`] and the [`output`] of XTerm's provided [`perl
+/// script`]. Some libraries and implementations also use the [`fixterms`]
+/// definition of the `CSI u`.
///
-/// The end escape sequence has a `CSI char; modifiers u` form while the original
-/// `CSI 27 ; modifier ; char ~`. The clients should prefer the `CSI u`, since it has
-/// more adoption.
+/// The end escape sequence has a `CSI char; modifiers u` form while the
+/// original `CSI 27 ; modifier ; char ~`. The clients should prefer the `CSI
+/// u`, since it has more adoption.
///
/// [`XTerm's implementation`]: https://invisible-island.net/xterm/modified-keys.html
/// [`perl script`]: https://github.com/ThomasDickey/xterm-snapshots/blob/master/vttests/modify-keys.pl
@@ -727,12 +774,14 @@ bitflags! {
pub enum ModifyOtherKeys {
/// Reset the state.
Reset,
- /// Enables this feature except for keys with well-known behavior, e.g., Tab, Backspace and
- /// some special control character cases which are built into the X11 library (e.g.,
- /// Control-Space to make a NUL, or Control-3 to make an Escape character).
+ /// Enables this feature except for keys with well-known behavior, e.g.,
+ /// Tab, Backspace and some special control character cases which are
+ /// built into the X11 library (e.g., Control-Space to make a NUL, or
+ /// Control-3 to make an Escape character).
///
/// Escape sequences shouldn't be emitted under the following circumstances:
- /// - When the key is in range of `[64;127]` and the modifier is either Control or Shift
+ /// - When the key is in range of `[64;127]` and the modifier is either
+ /// Control or Shift
/// - When the key combination is a known control combination alias
///
/// For more details, consult the [`example`] for the suggested translation.
@@ -740,9 +789,10 @@ pub enum ModifyOtherKeys {
/// [`example`]: https://github.com/alacritty/vte/blob/master/doc/modifyOtherKeys-example.txt
EnableExceptWellDefined,
/// Enables this feature for all keys including the exceptions of
- /// [`Self::EnableExceptWellDefined`]. XTerm still ignores the special cases built into the
- /// X11 library. Any shifted (modified) ordinary key send an escape sequence. The Alt- and
- /// Meta- modifiers cause XTerm to send escape sequences.
+ /// [`Self::EnableExceptWellDefined`]. XTerm still ignores the special
+ /// cases built into the X11 library. Any shifted (modified) ordinary
+ /// key send an escape sequence. The Alt- and Meta- modifiers cause
+ /// XTerm to send escape sequences.
///
/// For more details, consult the [`example`] for the suggested translation.
///
@@ -1203,16 +1253,20 @@ impl StandardCharset {
pub enum ScpCharPath {
/// SCP's first parameter value of 0. Behavior is implementation defined.
Default,
- /// SCP's first parameter value of 1 which sets character path to LEFT-TO-RIGHT.
+ /// SCP's first parameter value of 1 which sets character path to
+ /// LEFT-TO-RIGHT.
LTR,
- /// SCP's first parameter value of 2 which sets character path to RIGHT-TO-LEFT.
+ /// SCP's first parameter value of 2 which sets character path to
+ /// RIGHT-TO-LEFT.
RTL,
}
-/// SCP control's second parameter which determines update mode/direction between components.
+/// SCP control's second parameter which determines update mode/direction
+/// between components.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ScpUpdateMode {
- /// SCP's second parameter value of 0 (the default). Implementation dependant update.
+ /// SCP's second parameter value of 0 (the default). Implementation
+ /// dependant update.
ImplementationDependant,
/// SCP's second parameter value of 1.
///
@@ -1351,8 +1405,8 @@ where
return;
}
- // Link parameters are in format of `key1=value1:key2=value2`. Currently only key
- // `id` is defined.
+ // Link parameters are in format of `key1=value1:key2=value2`. Currently only
+ // key `id` is defined.
let id = link_params
.split(|&b| b == b':')
.find_map(|kv| kv.strip_prefix(b"id="))
@@ -1547,6 +1601,7 @@ where
// Handle sync updates opaquely.
if param == NamedPrivateMode::SyncUpdate as u16 {
self.state.sync_state.timeout.set_timeout(SYNC_UPDATE_TIMEOUT);
+ self.terminated = true;
}
handler.set_private_mode(PrivateMode::new(param))
@@ -1761,6 +1816,11 @@ where
_ => unhandled!(),
}
}
+
+ #[inline]
+ fn terminated(&self) -> bool {
+ self.terminated
+ }
}
#[inline]
@@ -1943,7 +2003,7 @@ pub mod C0 {
/// Unit Separator.
pub const US: u8 = 0x1F;
/// Delete, should be ignored by terminal.
- pub const DEL: u8 = 0x7f;
+ pub const DEL: u8 = 0x7F;
}
// Tests for parsing escape sequences.
@@ -1954,22 +2014,24 @@ mod tests {
use super::*;
#[derive(Default)]
- pub struct TestSyncHandler;
+ pub struct TestSyncHandler {
+ is_sync: usize,
+ }
impl Timeout for TestSyncHandler {
#[inline]
fn set_timeout(&mut self, _: Duration) {
- unreachable!()
+ self.is_sync += 1;
}
#[inline]
fn clear_timeout(&mut self) {
- unreachable!()
+ self.is_sync = 0;
}
#[inline]
fn pending_timeout(&self) -> bool {
- false
+ self.is_sync != 0
}
}
@@ -2028,72 +2090,60 @@ mod tests {
#[test]
fn parse_control_attribute() {
- static BYTES: &[u8] = &[0x1b, b'[', b'1', b'm'];
+ static BYTES: &[u8] = &[0x1B, b'[', b'1', b'm'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in BYTES {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, BYTES);
assert_eq!(handler.attr, Some(Attr::Bold));
}
#[test]
fn parse_terminal_identity_csi() {
- let bytes: &[u8] = &[0x1b, b'[', b'1', b'c'];
+ let bytes: &[u8] = &[0x1B, b'[', b'1', b'c'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(!handler.identity_reported);
handler.reset_state();
- let bytes: &[u8] = &[0x1b, b'[', b'c'];
+ let bytes: &[u8] = &[0x1B, b'[', b'c'];
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(handler.identity_reported);
handler.reset_state();
- let bytes: &[u8] = &[0x1b, b'[', b'0', b'c'];
+ let bytes: &[u8] = &[0x1B, b'[', b'0', b'c'];
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(handler.identity_reported);
}
#[test]
fn parse_terminal_identity_esc() {
- let bytes: &[u8] = &[0x1b, b'Z'];
+ let bytes: &[u8] = &[0x1B, b'Z'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(handler.identity_reported);
handler.reset_state();
- let bytes: &[u8] = &[0x1b, b'#', b'Z'];
+ let bytes: &[u8] = &[0x1B, b'#', b'Z'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert!(!handler.identity_reported);
handler.reset_state();
@@ -2102,16 +2152,14 @@ mod tests {
#[test]
fn parse_truecolor_attr() {
static BYTES: &[u8] = &[
- 0x1b, b'[', b'3', b'8', b';', b'2', b';', b'1', b'2', b'8', b';', b'6', b'6', b';',
+ 0x1B, b'[', b'3', b'8', b';', b'2', b';', b'1', b'2', b'8', b';', b'6', b'6', b';',
b'2', b'5', b'5', b'm',
];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in BYTES {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, BYTES);
let spec = Rgb { r: 128, g: 66, b: 255 };
@@ -2122,38 +2170,34 @@ mod tests {
#[test]
fn parse_zsh_startup() {
static BYTES: &[u8] = &[
- 0x1b, b'[', b'1', b'm', 0x1b, b'[', b'7', b'm', b'%', 0x1b, b'[', b'2', b'7', b'm',
- 0x1b, b'[', b'1', b'm', 0x1b, b'[', b'0', b'm', b' ', b' ', b' ', b' ', b' ', b' ',
+ 0x1B, b'[', b'1', b'm', 0x1B, b'[', b'7', b'm', b'%', 0x1B, b'[', b'2', b'7', b'm',
+ 0x1B, b'[', b'1', b'm', 0x1B, b'[', b'0', b'm', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ', b' ',
- b' ', b' ', b' ', b'\r', b' ', b'\r', b'\r', 0x1b, b'[', b'0', b'm', 0x1b, b'[', b'2',
- b'7', b'm', 0x1b, b'[', b'2', b'4', b'm', 0x1b, b'[', b'J', b'j', b'w', b'i', b'l',
- b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b' ', 0x1b,
- b'[', b'0', b'1', b';', b'3', b'2', b'm', 0xe2, 0x9e, 0x9c, b' ', 0x1b, b'[', b'0',
- b'1', b';', b'3', b'2', b'm', b' ', 0x1b, b'[', b'3', b'6', b'm', b'~', b'/', b'c',
+ b' ', b' ', b' ', b'\r', b' ', b'\r', b'\r', 0x1B, b'[', b'0', b'm', 0x1B, b'[', b'2',
+ b'7', b'm', 0x1B, b'[', b'2', b'4', b'm', 0x1B, b'[', b'J', b'j', b'w', b'i', b'l',
+ b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd', b'e', b's', b'k', b' ', 0x1B,
+ b'[', b'0', b'1', b';', b'3', b'2', b'm', 0xE2, 0x9E, 0x9C, b' ', 0x1B, b'[', b'0',
+ b'1', b';', b'3', b'2', b'm', b' ', 0x1B, b'[', b'3', b'6', b'm', b'~', b'/', b'c',
b'o', b'd', b'e',
];
let mut handler = MockHandler::default();
let mut parser = Processor::<TestSyncHandler>::new();
- for byte in BYTES {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, BYTES);
}
#[test]
fn parse_designate_g0_as_line_drawing() {
- static BYTES: &[u8] = &[0x1b, b'(', b'0'];
+ static BYTES: &[u8] = &[0x1B, b'(', b'0'];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in BYTES {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, BYTES);
assert_eq!(handler.index, CharsetIndex::G0);
assert_eq!(handler.charset, StandardCharset::SpecialCharacterAndLineDrawing);
@@ -2161,37 +2205,35 @@ mod tests {
#[test]
fn parse_designate_g1_as_line_drawing_and_invoke() {
- static BYTES: &[u8] = &[0x1b, b')', b'0', 0x0e];
+ static BYTES: &[u8] = &[0x1B, b')', b'0', 0x0E];
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in &BYTES[..3] {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, &BYTES[..3]);
assert_eq!(handler.index, CharsetIndex::G1);
assert_eq!(handler.charset, StandardCharset::SpecialCharacterAndLineDrawing);
let mut handler = MockHandler::default();
- parser.advance(&mut handler, BYTES[3]);
+ parser.advance(&mut handler, &[BYTES[3]]);
assert_eq!(handler.index, CharsetIndex::G1);
}
#[test]
fn parse_valid_rgb_colors() {
- assert_eq!(xparse_color(b"rgb:f/e/d"), Some(Rgb { r: 0xff, g: 0xee, b: 0xdd }));
- assert_eq!(xparse_color(b"rgb:11/aa/ff"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff }));
- assert_eq!(xparse_color(b"rgb:f/ed1/cb23"), Some(Rgb { r: 0xff, g: 0xec, b: 0xca }));
- assert_eq!(xparse_color(b"rgb:ffff/0/0"), Some(Rgb { r: 0xff, g: 0x0, b: 0x0 }));
+ assert_eq!(xparse_color(b"rgb:f/e/d"), Some(Rgb { r: 0xFF, g: 0xEE, b: 0xDD }));
+ assert_eq!(xparse_color(b"rgb:11/aa/ff"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF }));
+ assert_eq!(xparse_color(b"rgb:f/ed1/cb23"), Some(Rgb { r: 0xFF, g: 0xEC, b: 0xCA }));
+ assert_eq!(xparse_color(b"rgb:ffff/0/0"), Some(Rgb { r: 0xFF, g: 0x0, b: 0x0 }));
}
#[test]
fn parse_valid_legacy_rgb_colors() {
- assert_eq!(xparse_color(b"#1af"), Some(Rgb { r: 0x10, g: 0xa0, b: 0xf0 }));
- assert_eq!(xparse_color(b"#11aaff"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff }));
- assert_eq!(xparse_color(b"#110aa0ff0"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff }));
- assert_eq!(xparse_color(b"#1100aa00ff00"), Some(Rgb { r: 0x11, g: 0xaa, b: 0xff }));
+ assert_eq!(xparse_color(b"#1af"), Some(Rgb { r: 0x10, g: 0xA0, b: 0xF0 }));
+ assert_eq!(xparse_color(b"#11aaff"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF }));
+ assert_eq!(xparse_color(b"#110aa0ff0"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF }));
+ assert_eq!(xparse_color(b"#1100aa00ff00"), Some(Rgb { r: 0x11, g: 0xAA, b: 0xFF }));
}
#[test]
@@ -2228,11 +2270,9 @@ mod tests {
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
- assert_eq!(handler.color, Some(Rgb { r: 0xf0, g: 0xf0, b: 0xf0 }));
+ assert_eq!(handler.color, Some(Rgb { r: 0xF0, g: 0xF0, b: 0xF0 }));
}
#[test]
@@ -2242,9 +2282,7 @@ mod tests {
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
assert_eq!(handler.reset_colors, vec![1]);
}
@@ -2256,9 +2294,7 @@ mod tests {
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
let expected: Vec<usize> = (0..256).collect();
assert_eq!(handler.reset_colors, expected);
@@ -2271,30 +2307,148 @@ mod tests {
let mut parser = Processor::<TestSyncHandler>::new();
let mut handler = MockHandler::default();
- for byte in bytes {
- parser.advance(&mut handler, *byte);
- }
+ parser.advance(&mut handler, bytes);
let expected: Vec<usize> = (0..256).collect();
assert_eq!(handler.reset_colors, expected);
}
#[test]
+ fn partial_sync_updates() {
+ let mut parser = Processor::<TestSyncHandler>::new();
+ let mut handler = MockHandler::default();
+
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ // Start synchronized update.
+
+ parser.advance(&mut handler, b"\x1b[?20");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ parser.advance(&mut handler, b"26h");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Dispatch some data.
+
+ parser.advance(&mut handler, b"random \x1b[31m stuff");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Extend synchronized update.
+
+ parser.advance(&mut handler, b"\x1b[?20");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ parser.advance(&mut handler, b"26h");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 2);
+ assert!(handler.attr.is_none());
+
+ // Terminate synchronized update.
+
+ parser.advance(&mut handler, b"\x1b[?20");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 2);
+ assert!(handler.attr.is_none());
+
+ parser.advance(&mut handler, b"26l");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_some());
+ }
+
+ #[test]
+ fn sync_bursts_buffer() {
+ let mut parser = Processor::<TestSyncHandler>::new();
+ let mut handler = MockHandler::default();
+
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ // Repeat test twice to ensure internal state is reset properly.
+ for _ in 0..2 {
+ // Start synchronized update.
+ parser.advance(&mut handler, b"\x1b[?2026h");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Ensure sync works.
+ parser.advance(&mut handler, b"\x1b[31m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Exceed sync buffer dimensions.
+ parser.advance(&mut handler, "a".repeat(SYNC_BUFFER_SIZE).as_bytes());
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.take().is_some());
+
+ // Ensure new events are dispatched directly.
+ parser.advance(&mut handler, b"\x1b[31m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.take().is_some());
+ }
+ }
+
+ #[test]
+ fn mixed_sync_escape() {
+ let mut parser = Processor::<TestSyncHandler>::new();
+ let mut handler = MockHandler::default();
+
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ // Start synchronized update with immediate SGR.
+ parser.advance(&mut handler, b"\x1b[?2026h\x1b[31m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Terminate synchronized update and check for SGR.
+ parser.advance(&mut handler, b"\x1b[?2026l");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_some());
+ }
+
+ #[test]
+ fn sync_bsu_with_esu() {
+ let mut parser = Processor::<TestSyncHandler>::new();
+ let mut handler = MockHandler::default();
+
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert!(handler.attr.is_none());
+
+ // Start synchronized update with immediate SGR.
+ parser.advance(&mut handler, b"\x1b[?2026h\x1b[1m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 1);
+ assert!(handler.attr.is_none());
+
+ // Terminate synchronized update, but immediately start a new one.
+ parser.advance(&mut handler, b"\x1b[?2026l\x1b[?2026h\x1b[4m");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 2);
+ assert_eq!(handler.attr.take(), Some(Attr::Bold));
+
+ // Terminate again, expecting one buffered SGR.
+ parser.advance(&mut handler, b"\x1b[?2026l");
+ assert_eq!(parser.state.sync_state.timeout.is_sync, 0);
+ assert_eq!(handler.attr.take(), Some(Attr::Underline));
+ }
+
+ #[test]
#[cfg(not(feature = "no_std"))]
fn contrast() {
- let rgb1 = Rgb { r: 0xff, g: 0xff, b: 0xff };
+ let rgb1 = Rgb { r: 0xFF, g: 0xFF, b: 0xFF };
let rgb2 = Rgb { r: 0x00, g: 0x00, b: 0x00 };
assert!((rgb1.contrast(rgb2) - 21.).abs() < f64::EPSILON);
- let rgb1 = Rgb { r: 0xff, g: 0xff, b: 0xff };
+ let rgb1 = Rgb { r: 0xFF, g: 0xFF, b: 0xFF };
assert!((rgb1.contrast(rgb1) - 1.).abs() < f64::EPSILON);
- let rgb1 = Rgb { r: 0xff, g: 0x00, b: 0xff };
- let rgb2 = Rgb { r: 0x00, g: 0xff, b: 0x00 };
+ let rgb1 = Rgb { r: 0xFF, g: 0x00, b: 0xFF };
+ let rgb2 = Rgb { r: 0x00, g: 0xFF, b: 0x00 };
assert!((rgb1.contrast(rgb2) - 2.285_543_608_124_253_3).abs() < f64::EPSILON);
let rgb1 = Rgb { r: 0x12, g: 0x34, b: 0x56 };
- let rgb2 = Rgb { r: 0xfe, g: 0xdc, b: 0xba };
+ let rgb2 = Rgb { r: 0xFE, g: 0xDC, b: 0xBA };
assert!((rgb1.contrast(rgb2) - 9.786_558_997_257_74).abs() < f64::EPSILON);
}
}
diff --git a/src/definitions.rs b/src/definitions.rs
index 568a8a8..694c783 100644
--- a/src/definitions.rs
+++ b/src/definitions.rs
@@ -2,54 +2,53 @@ use core::mem;
#[allow(dead_code)]
#[repr(u8)]
-#[derive(Debug, Default, Copy, Clone)]
+#[derive(PartialEq, Eq, Debug, Default, Copy, Clone)]
pub enum State {
- Anywhere = 0,
- CsiEntry = 1,
- CsiIgnore = 2,
- CsiIntermediate = 3,
- CsiParam = 4,
- DcsEntry = 5,
- DcsIgnore = 6,
- DcsIntermediate = 7,
- DcsParam = 8,
- DcsPassthrough = 9,
- Escape = 10,
- EscapeIntermediate = 11,
+ CsiEntry,
+ CsiIgnore,
+ CsiIntermediate,
+ CsiParam,
+ DcsEntry,
+ DcsIgnore,
+ DcsIntermediate,
+ DcsParam,
+ DcsPassthrough,
+ Escape,
+ EscapeIntermediate,
+ OscString,
+ SosPmApcString,
+ Anywhere,
#[default]
- Ground = 12,
- OscString = 13,
- SosPmApcString = 14,
- Utf8 = 15,
+ Ground,
}
+// NOTE: Removing the unused actions prefixed with `_` will reduce performance.
#[allow(dead_code)]
#[repr(u8)]
-#[derive(Debug, Clone, Copy)]
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum Action {
- None = 0,
- Clear = 1,
- Collect = 2,
- CsiDispatch = 3,
- EscDispatch = 4,
- Execute = 5,
- Hook = 6,
- Ignore = 7,
- OscEnd = 8,
- OscPut = 9,
- OscStart = 10,
- Param = 11,
- Print = 12,
- Put = 13,
- Unhook = 14,
- BeginUtf8 = 15,
+ None,
+ _Clear,
+ Collect,
+ CsiDispatch,
+ EscDispatch,
+ Execute,
+ _Hook,
+ _Ignore,
+ _OscEnd,
+ OscPut,
+ _OscStart,
+ Param,
+ _Print,
+ Put,
+ _Unhook,
}
/// Unpack a u8 into a State and Action
///
-/// The implementation of this assumes that there are *precisely* 16 variants for both Action and
-/// State. Furthermore, it assumes that the enums are tag-only; that is, there is no data in any
-/// variant.
+/// The implementation of this assumes that there are *precisely* 16 variants
+/// for both Action and State. Furthermore, it assumes that the enums are
+/// tag-only; that is, there is no data in any variant.
///
/// Bad things will happen if those invariants are violated.
#[inline(always)]
@@ -57,7 +56,7 @@ pub fn unpack(delta: u8) -> (State, Action) {
unsafe {
(
// State is stored in bottom 4 bits
- mem::transmute::<u8, State>(delta & 0x0f),
+ mem::transmute::<u8, State>(delta & 0x0F),
// Action is stored in top 4 bits
mem::transmute::<u8, Action>(delta >> 4),
)
@@ -75,37 +74,26 @@ mod tests {
#[test]
fn unpack_state_action() {
- match unpack(0xee) {
- (State::SosPmApcString, Action::Unhook) => (),
+ match unpack(0xEE) {
+ (State::Ground, Action::_Unhook) => (),
_ => panic!("unpack failed"),
}
- match unpack(0x0f) {
- (State::Utf8, Action::None) => (),
+ match unpack(0x0E) {
+ (State::Ground, Action::None) => (),
_ => panic!("unpack failed"),
}
- match unpack(0xff) {
- (State::Utf8, Action::BeginUtf8) => (),
+ match unpack(0xE0) {
+ (State::CsiEntry, Action::_Unhook) => (),
_ => panic!("unpack failed"),
}
}
#[test]
fn pack_state_action() {
- match unpack(0xee) {
- (State::SosPmApcString, Action::Unhook) => (),
- _ => panic!("unpack failed"),
- }
-
- match unpack(0x0f) {
- (State::Utf8, Action::None) => (),
- _ => panic!("unpack failed"),
- }
-
- match unpack(0xff) {
- (State::Utf8, Action::BeginUtf8) => (),
- _ => panic!("unpack failed"),
- }
+ assert_eq!(pack(State::Ground, Action::_Unhook), 0xEE);
+ assert_eq!(pack(State::Ground, Action::None), 0x0E);
+ assert_eq!(pack(State::CsiEntry, Action::_Unhook), 0xE0);
}
}
diff --git a/src/lib.rs b/src/lib.rs
index 0f12902..3c2f863 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,44 +1,39 @@
//! Parser for implementing virtual terminal emulators
//!
-//! [`Parser`] is implemented according to [Paul Williams' ANSI parser
-//! state machine]. The state machine doesn't assign meaning to the parsed data
-//! and is thus not itself sufficient for writing a terminal emulator. Instead,
-//! it is expected that an implementation of [`Perform`] is provided which does
+//! [`Parser`] is implemented according to [Paul Williams' ANSI parser state
+//! machine]. The state machine doesn't assign meaning to the parsed data and is
+//! thus not itself sufficient for writing a terminal emulator. Instead, it is
+//! expected that an implementation of [`Perform`] is provided which does
//! something useful with the parsed data. The [`Parser`] handles the book
//! keeping, and the [`Perform`] gets to simply handle actions.
//!
//! # Examples
//!
-//! For an example of using the [`Parser`] please see the examples folder. The example included
-//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to
-//! pipe `vim` into it
+//! For an example of using the [`Parser`] please see the examples folder. The
+//! example included there simply logs all the actions [`Perform`] does. One
+//! quick way to see it in action is to pipe `printf` into it
//!
//! ```sh
-//! cargo build --release --example parselog
-//! vim | target/release/examples/parselog
+//! printf '\x1b[31mExample' | cargo run --example parselog
//! ```
//!
-//! Just type `:q` to exit.
-//!
//! # Differences from original state machine description
//!
//! * UTF-8 Support for Input
//! * OSC Strings can be terminated by 0x07
-//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in
-//! all states.
+//! * Only supports 7-bit codes
//!
//! [`Parser`]: struct.Parser.html
//! [`Perform`]: trait.Perform.html
//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser
#![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)]
-#![cfg_attr(all(feature = "nightly", test), feature(test))]
#![cfg_attr(feature = "no_std", no_std)]
use core::mem::MaybeUninit;
+use core::str;
#[cfg(feature = "no_std")]
use arrayvec::ArrayVec;
-use utf8parse as utf8;
mod definitions;
mod params;
@@ -46,28 +41,13 @@ mod table;
#[cfg(feature = "ansi")]
pub mod ansi;
-pub use params::{Params, ParamsIter};
-
use definitions::{unpack, Action, State};
+pub use params::{Params, ParamsIter};
const MAX_INTERMEDIATES: usize = 2;
const MAX_OSC_PARAMS: usize = 16;
const MAX_OSC_RAW: usize = 1024;
-struct VtUtf8Receiver<'a, P: Perform>(&'a mut P, &'a mut State);
-
-impl<P: Perform> utf8::Receiver for VtUtf8Receiver<'_, P> {
- fn codepoint(&mut self, c: char) {
- self.0.print(c);
- *self.1 = State::Ground;
- }
-
- fn invalid_sequence(&mut self) {
- self.0.print('�');
- *self.1 = State::Ground;
- }
-}
-
/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`]
///
/// [`Perform`]: trait.Perform.html
@@ -88,7 +68,8 @@ pub struct Parser<const OSC_RAW_BUF_SIZE: usize = MAX_OSC_RAW> {
osc_params: [(usize, usize); MAX_OSC_PARAMS],
osc_num_params: usize,
ignoring: bool,
- utf8_parser: utf8::Parser,
+ partial_utf8: [u8; 4],
+ partial_utf8_len: usize,
}
impl Parser {
@@ -99,7 +80,8 @@ impl Parser {
}
impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
- /// Create a new Parser with a custom size for the Operating System Command buffer.
+ /// Create a new Parser with a custom size for the Operating System Command
+ /// buffer.
///
/// Call with a const-generic param on `Parser`, like:
///
@@ -121,41 +103,74 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
&self.intermediates[..self.intermediate_idx]
}
- /// Advance the parser state
+ /// Advance the parser state.
///
- /// Requires a [`Perform`] in case `byte` triggers an action
+ /// Requires a [`Perform`] implementation to handle the triggered actions.
///
/// [`Perform`]: trait.Perform.html
#[inline]
- pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) {
- // Utf8 characters are handled out-of-band.
- if let State::Utf8 = self.state {
- self.process_utf8(performer, byte);
- return;
- }
-
- // Handle state changes in the anywhere state before evaluating changes
- // for current state.
- let mut change = table::STATE_CHANGES[State::Anywhere as usize][byte as usize];
+ pub fn advance<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) {
+ let mut i = 0;
- if change == 0 {
- change = table::STATE_CHANGES[self.state as usize][byte as usize];
+ // Handle partial codepoints from previous calls to `advance`.
+ if self.partial_utf8_len > 0 {
+ i += self.advance_partial_utf8(performer, bytes);
}
- // Unpack into a state and action
- let (state, action) = unpack(change);
+ while i != bytes.len() {
+ match self.state {
+ State::Ground => i += self.advance_ground(performer, &bytes[i..]),
+ _ => {
+ let byte = bytes[i];
+ let change = table::STATE_CHANGES[self.state as usize][byte as usize];
+ let (state, action) = unpack(change);
- self.perform_state_change(performer, state, action, byte);
+ self.perform_state_change(performer, state, action, byte);
+
+ i += 1;
+ },
+ }
+ }
}
+ /// Partially advance the parser state.
+ ///
+ /// This is equivalent to [`Self::advance`], but stops when
+ /// [`Perform::terminated`] is true after reading a byte.
+ ///
+ /// Returns the number of bytes read before termination.
+ ///
+ /// See [`Perform::advance`] for more details.
#[inline]
- fn process_utf8<P>(&mut self, performer: &mut P, byte: u8)
- where
- P: Perform,
- {
- let mut receiver = VtUtf8Receiver(performer, &mut self.state);
- let utf8_parser = &mut self.utf8_parser;
- utf8_parser.advance(&mut receiver, byte);
+ #[must_use = "Returned value should be used to processs the remaining bytes"]
+ pub fn advance_until_terminated<P: Perform>(
+ &mut self,
+ performer: &mut P,
+ bytes: &[u8],
+ ) -> usize {
+ let mut i = 0;
+
+ // Handle partial codepoints from previous calls to `advance`.
+ if self.partial_utf8_len != 0 {
+ i += self.advance_partial_utf8(performer, bytes);
+ }
+
+ while i != bytes.len() && !performer.terminated() {
+ match self.state {
+ State::Ground => i += self.advance_ground(performer, &bytes[i..]),
+ _ => {
+ let byte = bytes[i];
+ let change = table::STATE_CHANGES[self.state as usize][byte as usize];
+ let (state, action) = unpack(change);
+
+ self.perform_state_change(performer, state, action, byte);
+
+ i += 1;
+ },
+ }
+ }
+
+ i
}
#[inline]
@@ -163,93 +178,75 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
where
P: Perform,
{
- macro_rules! maybe_action {
- ($action:expr, $arg:expr) => {
- match $action {
- Action::None => (),
- action => {
- self.perform_action(performer, action, $arg);
- },
- }
- };
+ if state == State::Anywhere {
+ self.perform_action(performer, action, byte);
+ return;
}
- match state {
- State::Anywhere => {
- // Just run the action
- self.perform_action(performer, action, byte);
- },
- state => {
- match self.state {
- State::DcsPassthrough => {
- self.perform_action(performer, Action::Unhook, byte);
- },
- State::OscString => {
- self.perform_action(performer, Action::OscEnd, byte);
- },
- _ => (),
- }
+ match self.state {
+ State::DcsPassthrough => performer.unhook(),
+ State::OscString => {
+ let param_idx = self.osc_num_params;
+ let idx = self.osc_raw.len();
- maybe_action!(action, byte);
+ match param_idx {
+ // Finish last parameter if not already maxed
+ MAX_OSC_PARAMS => (),
- match state {
- State::CsiEntry | State::DcsEntry | State::Escape => {
- self.perform_action(performer, Action::Clear, byte);
- },
- State::DcsPassthrough => {
- self.perform_action(performer, Action::Hook, byte);
+ // First param is special - 0 to current byte index
+ 0 => {
+ self.osc_params[param_idx] = (0, idx);
+ self.osc_num_params += 1;
},
- State::OscString => {
- self.perform_action(performer, Action::OscStart, byte);
+
+ // All other params depend on previous indexing
+ _ => {
+ let prev = self.osc_params[param_idx - 1];
+ let begin = prev.1;
+ self.osc_params[param_idx] = (begin, idx);
+ self.osc_num_params += 1;
},
- _ => (),
}
-
- // Assume the new state
- self.state = state;
+ self.osc_dispatch(performer, byte);
},
+ _ => (),
}
- }
- /// Separate method for osc_dispatch that borrows self as read-only
- ///
- /// The aliasing is needed here for multiple slices into self.osc_raw
- #[inline]
- fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
- let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
- unsafe { MaybeUninit::uninit().assume_init() };
+ if action == Action::None {
+ match state {
+ State::CsiEntry | State::DcsEntry | State::Escape => self.reset_params(),
+ State::DcsPassthrough => {
+ if self.params.is_full() {
+ self.ignoring = true;
+ } else {
+ self.params.push(self.param);
+ }
- for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) {
- let indices = self.osc_params[i];
- *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]);
+ performer.hook(
+ self.params(),
+ self.intermediates(),
+ self.ignoring,
+ byte as char,
+ );
+ },
+ State::OscString => {
+ self.osc_raw.clear();
+ self.osc_num_params = 0;
+ },
+ _ => (),
+ }
+ } else {
+ self.perform_action(performer, action, byte);
}
- unsafe {
- let num_params = self.osc_num_params;
- let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]];
- performer.osc_dispatch(&*params, byte == 0x07);
- }
+ self.state = state;
}
#[inline]
fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) {
match action {
- Action::Print => performer.print(byte as char),
Action::Execute => performer.execute(byte),
- Action::Hook => {
- if self.params.is_full() {
- self.ignoring = true;
- } else {
- self.params.push(self.param);
- }
-
- performer.hook(self.params(), self.intermediates(), self.ignoring, byte as char);
- },
Action::Put => performer.put(byte),
- Action::OscStart => {
- self.osc_raw.clear();
- self.osc_num_params = 0;
- },
Action::OscPut => {
#[cfg(feature = "no_std")]
{
@@ -285,31 +282,6 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
self.osc_raw.push(byte);
}
},
- Action::OscEnd => {
- let param_idx = self.osc_num_params;
- let idx = self.osc_raw.len();
-
- match param_idx {
- // Finish last parameter if not already maxed
- MAX_OSC_PARAMS => (),
-
- // First param is special - 0 to current byte index
- 0 => {
- self.osc_params[param_idx] = (0, idx);
- self.osc_num_params += 1;
- },
-
- // All other params depend on previous indexing
- _ => {
- let prev = self.osc_params[param_idx - 1];
- let begin = prev.1;
- self.osc_params[param_idx] = (begin, idx);
- self.osc_num_params += 1;
- },
- }
- self.osc_dispatch(performer, byte);
- },
- Action::Unhook => performer.unhook(),
Action::CsiDispatch => {
if self.params.is_full() {
self.ignoring = true;
@@ -341,37 +313,203 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
return;
}
- if byte == b';' {
- self.params.push(self.param);
- self.param = 0;
- } else if byte == b':' {
- self.params.extend(self.param);
- self.param = 0;
- } else {
- // Continue collecting bytes into param
- self.param = self.param.saturating_mul(10);
- self.param = self.param.saturating_add((byte - b'0') as u16);
+ match byte {
+ b';' => {
+ self.params.push(self.param);
+ self.param = 0;
+ },
+ b':' => {
+ self.params.extend(self.param);
+ self.param = 0;
+ },
+ _ => {
+ // Continue collecting bytes into param
+ self.param = self.param.saturating_mul(10);
+ self.param = self.param.saturating_add((byte - b'0') as u16);
+ },
}
},
- Action::Clear => {
- // Reset everything on ESC/CSI/DCS entry
- self.intermediate_idx = 0;
- self.ignoring = false;
- self.param = 0;
+ _ => (),
+ }
+ }
+
+ /// Reset escape sequence parameters and intermediates.
+ #[inline]
+ fn reset_params(&mut self) {
+ self.intermediate_idx = 0;
+ self.ignoring = false;
+ self.param = 0;
+
+ self.params.clear();
+ }
+
+ /// Separate method for osc_dispatch that borrows self as read-only
+ ///
+ /// The aliasing is needed here for multiple slices into self.osc_raw
+ #[inline]
+ fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) {
+ let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] =
+ unsafe { MaybeUninit::uninit().assume_init() };
+
+ for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) {
+ let indices = self.osc_params[i];
+ *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]);
+ }
+
+ unsafe {
+ let num_params = self.osc_num_params;
+ let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]];
+ performer.osc_dispatch(&*params, byte == 0x07);
+ }
+ }
- self.params.clear();
+ /// Advance the parser state from ground.
+ ///
+ /// The ground state is handled separately since it can only be left using
+ /// the escape character (`\x1b`). This allows more efficient parsing by
+ /// using SIMD search with [`memchr`].
+ #[inline]
+ fn advance_ground<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) -> usize {
+ // Find the next escape character.
+ let num_bytes = bytes.len();
+ let plain_chars = memchr::memchr(0x1B, bytes).unwrap_or(num_bytes);
+
+ // If the next character is ESC, just process it and short-circuit.
+ if plain_chars == 0 {
+ self.state = State::Escape;
+ self.reset_params();
+ return 1;
+ }
+
+ match str::from_utf8(&bytes[..plain_chars]) {
+ Ok(parsed) => {
+ Self::ground_dispatch(performer, parsed);
+ let mut processed = plain_chars;
+
+ // If there's another character, it must be escape so process it directly.
+ if processed < num_bytes {
+ self.state = State::Escape;
+ self.reset_params();
+ processed += 1;
+ }
+
+ processed
+ },
+ // Handle invalid and partial utf8.
+ Err(err) => {
+ // Dispatch all the valid bytes.
+ let valid_bytes = err.valid_up_to();
+ let parsed = unsafe { str::from_utf8_unchecked(&bytes[..valid_bytes]) };
+ Self::ground_dispatch(performer, parsed);
+
+ match err.error_len() {
+ Some(len) => {
+ // Execute C1 escapes or emit replacement character.
+ if len == 1 && bytes[valid_bytes] <= 0x9F {
+ performer.execute(bytes[valid_bytes]);
+ } else {
+ performer.print('�');
+ }
+
+ // Restart processing after the invalid bytes.
+ //
+ // While we could theoretically try to just re-parse
+ // `bytes[valid_bytes + len..plain_chars]`, it's easier
+ // to just skip it and invalid utf8 is pretty rare anyway.
+ valid_bytes + len
+ },
+ None => {
+ if plain_chars < num_bytes {
+ // Process bytes cut off by escape.
+ performer.print('�');
+ self.state = State::Escape;
+ self.reset_params();
+ plain_chars + 1
+ } else {
+ // Process bytes cut off by the buffer end.
+ let extra_bytes = num_bytes - valid_bytes;
+ let partial_len = self.partial_utf8_len + extra_bytes;
+ self.partial_utf8[self.partial_utf8_len..partial_len]
+ .copy_from_slice(&bytes[valid_bytes..valid_bytes + extra_bytes]);
+ self.partial_utf8_len = partial_len;
+ num_bytes
+ }
+ },
+ }
+ },
+ }
+ }
+
+ /// Advance the parser while processing a partial utf8 codepoint.
+ #[inline]
+ fn advance_partial_utf8<P: Perform>(&mut self, performer: &mut P, bytes: &[u8]) -> usize {
+ // Try to copy up to 3 more characters, to ensure the codepoint is complete.
+ let old_bytes = self.partial_utf8_len;
+ let to_copy = bytes.len().min(self.partial_utf8.len() - old_bytes);
+ self.partial_utf8[old_bytes..old_bytes + to_copy].copy_from_slice(&bytes[..to_copy]);
+ self.partial_utf8_len += to_copy;
+
+ // Parse the unicode character.
+ match str::from_utf8(&self.partial_utf8[..self.partial_utf8_len]) {
+ // If the entire buffer is valid, use the first character and continue parsing.
+ Ok(parsed) => {
+ let c = unsafe { parsed.chars().next().unwrap_unchecked() };
+ performer.print(c);
+
+ self.partial_utf8_len = 0;
+ c.len_utf8() - old_bytes
+ },
+ Err(err) => {
+ match err.error_len() {
+ // If the partial character was also invalid, emit the replacement
+ // character.
+ Some(invalid_len) => {
+ performer.print('�');
+
+ self.partial_utf8_len = 0;
+ invalid_len - old_bytes
+ },
+ None => {
+ // If we have any valid bytes, that means we partially copied another
+ // utf8 character into `partial_utf8`. Since we only care about the
+ // first character, we just ignore the rest.
+ let valid_bytes = err.valid_up_to();
+ if valid_bytes > 0 {
+ let c = unsafe {
+ let parsed =
+ str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
+ parsed.chars().next().unwrap_unchecked()
+ };
+ performer.print(c);
+
+ self.partial_utf8_len = 0;
+ valid_bytes - old_bytes
+ } else {
+ // If the character still isn't complete, wait for more data.
+ bytes.len()
+ }
+ },
+ }
},
- Action::BeginUtf8 => self.process_utf8(performer, byte),
- Action::Ignore => (),
- Action::None => (),
+ }
+ }
+
+ /// Handle ground dispatch of print/execute for all characters in a string.
+ #[inline]
+ fn ground_dispatch<P: Perform>(performer: &mut P, text: &str) {
+ for c in text.chars() {
+ match c {
+ '\x00'..='\x1f' | '\u{80}'..='\u{9f}' => performer.execute(c as u8),
+ _ => performer.print(c),
+ }
}
}
}
/// Performs actions requested by the Parser
///
-/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor
-/// movement, or simply printing characters to the screen.
+/// Actions in this case mean, for example, handling a CSI escape sequence
+/// describing cursor movement, or simply printing characters to the screen.
///
/// The methods on this type correspond to actions described in
/// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in
@@ -385,19 +523,21 @@ pub trait Perform {
/// Execute a C0 or C1 control function.
fn execute(&mut self, _byte: u8) {}
- /// Invoked when a final character arrives in first part of device control string.
+ /// Invoked when a final character arrives in first part of device control
+ /// string.
///
- /// The control function should be determined from the private marker, final character, and
- /// execute with a parameter list. A handler should be selected for remaining characters in the
- /// string; the handler function should subsequently be called by `put` for every character in
+ /// The control function should be determined from the private marker, final
+ /// character, and execute with a parameter list. A handler should be
+ /// selected for remaining characters in the string; the handler
+ /// function should subsequently be called by `put` for every character in
/// the control string.
///
/// The `ignore` flag indicates that more than two intermediates arrived and
/// subsequent characters were ignored.
fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: char) {}
- /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls
- /// will also be passed to the handler.
+ /// Pass bytes as part of a device control string to the handle chosen in
+ /// `hook`. C0 controls will also be passed to the handler.
fn put(&mut self, _byte: u8) {}
/// Called when a device control string is terminated.
@@ -411,9 +551,9 @@ pub trait Perform {
/// A final character has arrived for a CSI sequence
///
- /// The `ignore` flag indicates that either more than two intermediates arrived
- /// or the number of parameters exceeded the maximum supported length,
- /// and subsequent characters were ignored.
+ /// The `ignore` flag indicates that either more than two intermediates
+ /// arrived or the number of parameters exceeded the maximum supported
+ /// length, and subsequent characters were ignored.
fn csi_dispatch(
&mut self,
_params: &Params,
@@ -428,6 +568,19 @@ pub trait Perform {
/// The `ignore` flag indicates that more than two intermediates arrived and
/// subsequent characters were ignored.
fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {}
+
+ /// Whether the parser should terminate prematurely.
+ ///
+ /// This can be used in conjunction with
+ /// [`Parser::advance_until_terminated`] to terminate the parser after
+ /// receiving certain escape sequences like synchronized updates.
+ ///
+ /// This is checked after every parsed byte, so no expensive computation
+ /// should take place in this function.
+ #[inline(always)]
+ fn terminated(&self) -> bool {
+ false
+ }
}
#[cfg(all(test, feature = "no_std"))]
@@ -436,12 +589,12 @@ extern crate std;
#[cfg(test)]
mod tests {
- use super::*;
-
use std::vec::Vec;
- static OSC_BYTES: &[u8] = &[
- 0x1b, 0x5d, // Begin OSC
+ use super::*;
+
+ const OSC_BYTES: &[u8] = &[
+ 0x1B, 0x5D, // Begin OSC
b'2', b';', b'j', b'w', b'i', b'l', b'm', b'@', b'j', b'w', b'i', b'l', b'm', b'-', b'd',
b'e', b's', b'k', b':', b' ', b'~', b'/', b'c', b'o', b'd', b'e', b'/', b'a', b'l', b'a',
b'c', b'r', b'i', b't', b't', b'y', 0x07, // End OSC
@@ -459,6 +612,8 @@ mod tests {
Esc(Vec<u8>, bool, u8),
DcsHook(Vec<Vec<u16>>, Vec<u8>, bool, char),
DcsPut(u8),
+ Print(char),
+ Execute(u8),
DcsUnhook,
}
@@ -492,6 +647,14 @@ mod tests {
fn unhook(&mut self) {
self.dispatched.push(Sequence::DcsUnhook);
}
+
+ fn print(&mut self, c: char) {
+ self.dispatched.push(Sequence::Print(c));
+ }
+
+ fn execute(&mut self, byte: u8) {
+ self.dispatched.push(Sequence::Execute(byte));
+ }
}
#[test]
@@ -499,9 +662,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in OSC_BYTES {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, OSC_BYTES);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -519,9 +680,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in &[0x1b, 0x5d, 0x07] {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, &[0x1B, 0x5D, 0x07]);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -537,9 +696,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in input {
- parser.advance(&mut dispatcher, byte);
- }
+ parser.advance(&mut dispatcher, &input);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -553,13 +710,11 @@ mod tests {
#[test]
fn osc_bell_terminated() {
- static INPUT: &[u8] = b"\x1b]11;ff/00/ff\x07";
+ const INPUT: &[u8] = b"\x1b]11;ff/00/ff\x07";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -570,13 +725,11 @@ mod tests {
#[test]
fn osc_c0_st_terminated() {
- static INPUT: &[u8] = b"\x1b]11;ff/00/ff\x1b\\";
+ const INPUT: &[u8] = b"\x1b]11;ff/00/ff\x1b\\";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 2);
match &dispatcher.dispatched[0] {
@@ -587,37 +740,29 @@ mod tests {
#[test]
fn parse_osc_with_utf8_arguments() {
- static INPUT: &[u8] = &[
- 0x0d, 0x1b, 0x5d, 0x32, 0x3b, 0x65, 0x63, 0x68, 0x6f, 0x20, 0x27, 0xc2, 0xaf, 0x5c,
- 0x5f, 0x28, 0xe3, 0x83, 0x84, 0x29, 0x5f, 0x2f, 0xc2, 0xaf, 0x27, 0x20, 0x26, 0x26,
- 0x20, 0x73, 0x6c, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07,
+ const INPUT: &[u8] = &[
+ 0x0D, 0x1B, 0x5D, 0x32, 0x3B, 0x65, 0x63, 0x68, 0x6F, 0x20, 0x27, 0xC2, 0xAF, 0x5C,
+ 0x5F, 0x28, 0xE3, 0x83, 0x84, 0x29, 0x5F, 0x2F, 0xC2, 0xAF, 0x27, 0x20, 0x26, 0x26,
+ 0x20, 0x73, 0x6C, 0x65, 0x65, 0x70, 0x20, 0x31, 0x07,
];
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
- assert_eq!(dispatcher.dispatched.len(), 1);
- match &dispatcher.dispatched[0] {
- Sequence::Osc(params, _) => {
- assert_eq!(params[0], &[b'2']);
- assert_eq!(params[1], &INPUT[5..(INPUT.len() - 1)]);
- },
- _ => panic!("expected osc sequence"),
- }
+ assert_eq!(dispatcher.dispatched[0], Sequence::Execute(b'\r'));
+ let osc_data = INPUT[5..(INPUT.len() - 1)].into();
+ assert_eq!(dispatcher.dispatched[1], Sequence::Osc(vec![vec![b'2'], osc_data], true));
+ assert_eq!(dispatcher.dispatched.len(), 2);
}
#[test]
fn osc_containing_string_terminator() {
- static INPUT: &[u8] = b"\x1b]2;\xe6\x9c\xab\x1b\\";
+ const INPUT: &[u8] = b"\x1b]2;\xe6\x9c\xab\x1b\\";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 2);
match &dispatcher.dispatched[0] {
@@ -630,27 +775,21 @@ mod tests {
#[test]
fn exceed_max_buffer_size() {
- static NUM_BYTES: usize = MAX_OSC_RAW + 100;
- static INPUT_START: &[u8] = &[0x1b, b']', b'5', b'2', b';', b's'];
- static INPUT_END: &[u8] = &[b'\x07'];
+ const NUM_BYTES: usize = MAX_OSC_RAW + 100;
+ const INPUT_START: &[u8] = b"\x1b]52;s";
+ const INPUT_END: &[u8] = b"\x07";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
// Create valid OSC escape
- for byte in INPUT_START {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_START);
// Exceed max buffer size
- for _ in 0..NUM_BYTES {
- parser.advance(&mut dispatcher, b'a');
- }
+ parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]);
// Terminate escape for dispatch
- for byte in INPUT_END {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_END);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -679,9 +818,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in input {
- parser.advance(&mut dispatcher, byte);
- }
+ parser.advance(&mut dispatcher, &input);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -704,9 +841,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in input {
- parser.advance(&mut dispatcher, byte);
- }
+ parser.advance(&mut dispatcher, &input);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -723,9 +858,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in b"\x1b[4;m" {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, b"\x1b[4;m");
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -740,9 +873,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in b"\x1b[;4m" {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, b"\x1b[;4m");
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -754,35 +885,31 @@ mod tests {
#[test]
fn parse_long_csi_param() {
// The important part is the parameter, which is (i64::MAX + 1)
- static INPUT: &[u8] = b"\x1b[9223372036854775808m";
+ const INPUT: &[u8] = b"\x1b[9223372036854775808m";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
- Sequence::Csi(params, ..) => assert_eq!(params, &[[std::u16::MAX as u16]]),
+ Sequence::Csi(params, ..) => assert_eq!(params, &[[u16::MAX]]),
_ => panic!("expected csi sequence"),
}
}
#[test]
fn csi_reset() {
- static INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
+ const INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
Sequence::Csi(params, intermediates, ignore, _) => {
- assert_eq!(intermediates, &[b'?']);
+ assert_eq!(intermediates, b"?");
assert_eq!(params, &[[1049]]);
assert!(!ignore);
},
@@ -792,13 +919,11 @@ mod tests {
#[test]
fn csi_subparameters() {
- static INPUT: &[u8] = b"\x1b[38:2:255:0:255;1m";
+ const INPUT: &[u8] = b"\x1b[38:2:255:0:255;1m";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -818,9 +943,7 @@ mod tests {
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in input {
- parser.advance(&mut dispatcher, byte);
- }
+ parser.advance(&mut dispatcher, &input);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -835,18 +958,16 @@ mod tests {
#[test]
fn dcs_reset() {
- static INPUT: &[u8] = b"\x1b[3;1\x1bP1$tx\x9c";
+ const INPUT: &[u8] = b"\x1b[3;1\x1bP1$tx\x9c";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 3);
match &dispatcher.dispatched[0] {
Sequence::DcsHook(params, intermediates, ignore, _) => {
- assert_eq!(intermediates, &[b'$']);
+ assert_eq!(intermediates, b"$");
assert_eq!(params, &[[1]]);
assert!(!ignore);
},
@@ -858,13 +979,11 @@ mod tests {
#[test]
fn parse_dcs() {
- static INPUT: &[u8] = b"\x1bP0;1|17/ab\x9c";
+ const INPUT: &[u8] = b"\x1bP0;1|17/ab\x9c";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 7);
match &dispatcher.dispatched[0] {
@@ -882,35 +1001,31 @@ mod tests {
#[test]
fn intermediate_reset_on_dcs_exit() {
- static INPUT: &[u8] = b"\x1bP=1sZZZ\x1b+\x5c";
+ const INPUT: &[u8] = b"\x1bP=1sZZZ\x1b+\x5c";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 6);
match &dispatcher.dispatched[5] {
- Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, &[b'+']),
+ Sequence::Esc(intermediates, ..) => assert_eq!(intermediates, b"+"),
_ => panic!("expected esc sequence"),
}
}
#[test]
fn esc_reset() {
- static INPUT: &[u8] = b"\x1b[3;1\x1b(A";
+ const INPUT: &[u8] = b"\x1b[3;1\x1b(A";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
Sequence::Esc(intermediates, ignore, byte) => {
- assert_eq!(intermediates, &[b'(']);
+ assert_eq!(intermediates, b"(");
assert_eq!(*byte, b'A');
assert!(!ignore);
},
@@ -919,14 +1034,25 @@ mod tests {
}
#[test]
+ fn esc_reset_intermediates() {
+ const INPUT: &[u8] = b"\x1b[?2004l\x1b#8";
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, INPUT);
+
+ assert_eq!(dispatcher.dispatched.len(), 2);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Csi(vec![vec![2004]], vec![63], false, 'l'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Esc(vec![35], false, 56));
+ }
+
+ #[test]
fn params_buffer_filled_with_subparam() {
- static INPUT: &[u8] = b"\x1b[::::::::::::::::::::::::::::::::x\x1b";
+ const INPUT: &[u8] = b"\x1b[::::::::::::::::::::::::::::::::x\x1b";
let mut dispatcher = Dispatcher::default();
let mut parser = Parser::new();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -943,18 +1069,16 @@ mod tests {
#[cfg(feature = "no_std")]
#[test]
fn build_with_fixed_size() {
- static INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
+ const INPUT: &[u8] = b"\x1b[3;1\x1b[?1049h";
let mut dispatcher = Dispatcher::default();
let mut parser: Parser<30> = Parser::new_with_size();
- for byte in INPUT {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
Sequence::Csi(params, intermediates, ignore, _) => {
- assert_eq!(intermediates, &[b'?']);
+ assert_eq!(intermediates, b"?");
assert_eq!(params, &[[1049]]);
assert!(!ignore);
},
@@ -966,27 +1090,21 @@ mod tests {
#[test]
fn exceed_fixed_osc_buffer_size() {
const OSC_BUFFER_SIZE: usize = 32;
- static NUM_BYTES: usize = OSC_BUFFER_SIZE + 100;
- static INPUT_START: &[u8] = b"\x1b]52;";
- static INPUT_END: &[u8] = b"\x07";
+ const NUM_BYTES: usize = OSC_BUFFER_SIZE + 100;
+ const INPUT_START: &[u8] = b"\x1b]52;";
+ const INPUT_END: &[u8] = b"\x07";
let mut dispatcher = Dispatcher::default();
let mut parser: Parser<OSC_BUFFER_SIZE> = Parser::new_with_size();
// Create valid OSC escape
- for byte in INPUT_START {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_START);
// Exceed max buffer size
- for _ in 0..NUM_BYTES {
- parser.advance(&mut dispatcher, b'a');
- }
+ parser.advance(&mut dispatcher, &[b'a'; NUM_BYTES]);
// Terminate escape for dispatch
- for byte in INPUT_END {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_END);
assert_eq!(dispatcher.dispatched.len(), 1);
match &dispatcher.dispatched[0] {
@@ -1005,22 +1123,16 @@ mod tests {
#[cfg(feature = "no_std")]
#[test]
fn fixed_size_osc_containing_string_terminator() {
- static INPUT_START: &[u8] = b"\x1b]2;";
- static INPUT_MIDDLE: &[u8] = b"s\xe6\x9c\xab";
- static INPUT_END: &[u8] = b"\x1b\\";
+ const INPUT_START: &[u8] = b"\x1b]2;";
+ const INPUT_MIDDLE: &[u8] = b"s\xe6\x9c\xab";
+ const INPUT_END: &[u8] = b"\x1b\\";
let mut dispatcher = Dispatcher::default();
let mut parser: Parser<5> = Parser::new_with_size();
- for byte in INPUT_START {
- parser.advance(&mut dispatcher, *byte);
- }
- for byte in INPUT_MIDDLE {
- parser.advance(&mut dispatcher, *byte);
- }
- for byte in INPUT_END {
- parser.advance(&mut dispatcher, *byte);
- }
+ parser.advance(&mut dispatcher, INPUT_START);
+ parser.advance(&mut dispatcher, INPUT_MIDDLE);
+ parser.advance(&mut dispatcher, INPUT_END);
assert_eq!(dispatcher.dispatched.len(), 2);
match &dispatcher.dispatched[0] {
@@ -1031,74 +1143,144 @@ mod tests {
_ => panic!("expected osc sequence"),
}
}
-}
-#[cfg(all(feature = "nightly", test))]
-mod bench {
- extern crate std;
- extern crate test;
+ #[test]
+ fn unicode() {
+ const INPUT: &[u8] = b"\xF0\x9F\x8E\x89_\xF0\x9F\xA6\x80\xF0\x9F\xA6\x80_\xF0\x9F\x8E\x89";
- use super::*;
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
- use test::{black_box, Bencher};
+ parser.advance(&mut dispatcher, INPUT);
- static VTE_DEMO: &[u8] = include_bytes!("../tests/demo.vte");
+ assert_eq!(dispatcher.dispatched.len(), 6);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('🎉'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('_'));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Print('🦀'));
+ assert_eq!(dispatcher.dispatched[3], Sequence::Print('🦀'));
+ assert_eq!(dispatcher.dispatched[4], Sequence::Print('_'));
+ assert_eq!(dispatcher.dispatched[5], Sequence::Print('🎉'));
+ }
- struct BenchDispatcher;
- impl Perform for BenchDispatcher {
- fn print(&mut self, c: char) {
- black_box(c);
- }
+ #[test]
+ fn invalid_utf8() {
+ const INPUT: &[u8] = b"a\xEF\xBCb";
- fn execute(&mut self, byte: u8) {
- black_box(byte);
- }
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
- fn hook(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) {
- black_box((params, intermediates, ignore, c));
- }
+ parser.advance(&mut dispatcher, INPUT);
- fn put(&mut self, byte: u8) {
- black_box(byte);
- }
+ assert_eq!(dispatcher.dispatched.len(), 3);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('a'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('�'));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Print('b'));
+ }
- fn osc_dispatch(&mut self, params: &[&[u8]], bell_terminated: bool) {
- black_box((params, bell_terminated));
- }
+ #[test]
+ fn partial_utf8() {
+ const INPUT: &[u8] = b"\xF0\x9F\x9A\x80";
- fn csi_dispatch(&mut self, params: &Params, intermediates: &[u8], ignore: bool, c: char) {
- black_box((params, intermediates, ignore, c));
- }
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
- fn esc_dispatch(&mut self, intermediates: &[u8], ignore: bool, byte: u8) {
- black_box((intermediates, ignore, byte));
- }
+ parser.advance(&mut dispatcher, &INPUT[..1]);
+ parser.advance(&mut dispatcher, &INPUT[1..2]);
+ parser.advance(&mut dispatcher, &INPUT[2..3]);
+ parser.advance(&mut dispatcher, &INPUT[3..]);
+
+ assert_eq!(dispatcher.dispatched.len(), 1);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('🚀'));
}
- #[bench]
- fn testfile(b: &mut Bencher) {
- b.iter(|| {
- let mut dispatcher = BenchDispatcher;
- let mut parser = Parser::new();
+ #[test]
+ fn partial_utf8_separating_utf8() {
+ // This is different from the `partial_utf8` test since it has a multi-byte UTF8
+ // character after the partial UTF8 state, causing a partial byte to be present
+ // in the `partial_utf8` buffer after the 2-byte codepoint.
- for byte in VTE_DEMO {
- parser.advance(&mut dispatcher, *byte);
- }
- });
+ // "ĸ🎉"
+ const INPUT: &[u8] = b"\xC4\xB8\xF0\x9F\x8E\x89";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, &INPUT[..1]);
+ parser.advance(&mut dispatcher, &INPUT[1..]);
+
+ assert_eq!(dispatcher.dispatched.len(), 2);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('ĸ'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('🎉'));
}
- #[bench]
- fn state_changes(b: &mut Bencher) {
- let input = b"\x1b]2;X\x1b\\ \x1b[0m \x1bP0@\x1b\\";
- b.iter(|| {
- let mut dispatcher = BenchDispatcher;
- let mut parser = Parser::new();
+ #[test]
+ fn partial_invalid_utf8() {
+ const INPUT: &[u8] = b"a\xEF\xBCb";
- for _ in 0..1_000 {
- for byte in input {
- parser.advance(&mut dispatcher, *byte);
- }
- }
- });
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, &INPUT[..1]);
+ parser.advance(&mut dispatcher, &INPUT[1..2]);
+ parser.advance(&mut dispatcher, &INPUT[2..3]);
+ parser.advance(&mut dispatcher, &INPUT[3..]);
+
+ assert_eq!(dispatcher.dispatched.len(), 3);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('a'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('�'));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Print('b'));
+ }
+
+ #[test]
+ fn partial_utf8_into_esc() {
+ const INPUT: &[u8] = b"\xD8\x1b012";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, INPUT);
+
+ assert_eq!(dispatcher.dispatched.len(), 4);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('�'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Esc(Vec::new(), false, b'0'));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Print('1'));
+ assert_eq!(dispatcher.dispatched[3], Sequence::Print('2'));
+ }
+
+ #[test]
+ fn c1s() {
+ const INPUT: &[u8] = b"\x00\x1f\x80\x90\x98\x9b\x9c\x9d\x9e\x9fa";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, INPUT);
+
+ assert_eq!(dispatcher.dispatched.len(), 11);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Execute(31));
+ assert_eq!(dispatcher.dispatched[2], Sequence::Execute(128));
+ assert_eq!(dispatcher.dispatched[3], Sequence::Execute(144));
+ assert_eq!(dispatcher.dispatched[4], Sequence::Execute(152));
+ assert_eq!(dispatcher.dispatched[5], Sequence::Execute(155));
+ assert_eq!(dispatcher.dispatched[6], Sequence::Execute(156));
+ assert_eq!(dispatcher.dispatched[7], Sequence::Execute(157));
+ assert_eq!(dispatcher.dispatched[8], Sequence::Execute(158));
+ assert_eq!(dispatcher.dispatched[9], Sequence::Execute(159));
+ assert_eq!(dispatcher.dispatched[10], Sequence::Print('a'));
+ }
+
+ #[test]
+ fn execute_anywhere() {
+ const INPUT: &[u8] = b"\x18\x1a";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, INPUT);
+
+ assert_eq!(dispatcher.dispatched.len(), 2);
+ assert_eq!(dispatcher.dispatched[0], Sequence::Execute(0x18));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Execute(0x1A));
}
}
diff --git a/src/params.rs b/src/params.rs
index 608c040..967befb 100644
--- a/src/params.rs
+++ b/src/params.rs
@@ -8,8 +8,9 @@ pub(crate) const MAX_PARAMS: usize = 32;
pub struct Params {
/// Number of subparameters for each parameter.
///
- /// For each entry in the `params` slice, this stores the length of the param as number of
- /// subparams at the same index as the param in the `params` slice.
+ /// For each entry in the `params` slice, this stores the length of the
+ /// param as number of subparams at the same index as the param in the
+ /// `params` slice.
///
/// At the subparam positions the length will always be `0`.
subparams: [u8; MAX_PARAMS],
diff --git a/src/table.rs b/src/table.rs
index f2c0105..ac288e7 100644
--- a/src/table.rs
+++ b/src/table.rs
@@ -1,39 +1,20 @@
-/// This is the state change table. It's indexed first by current state and then by the next
-/// character in the pty stream.
-use crate::definitions::{pack, Action, State};
-
use vte_generate_state_changes::generate_state_changes;
+/// This is the state change table. It's indexed first by current state and then
+/// by the next character in the pty stream.
+use crate::definitions::{pack, Action, State};
+
// Generate state changes at compile-time
-pub static STATE_CHANGES: [[u8; 256]; 16] = state_changes();
+pub const STATE_CHANGES: [[u8; 256]; 13] = state_changes();
generate_state_changes!(state_changes, {
- Anywhere {
- 0x18 => (Ground, Execute),
- 0x1a => (Ground, Execute),
- 0x1b => (Escape, None),
- },
-
- Ground {
- 0x00..=0x17 => (Anywhere, Execute),
- 0x19 => (Anywhere, Execute),
- 0x1c..=0x1f => (Anywhere, Execute),
- 0x20..=0x7f => (Anywhere, Print),
- 0x80..=0x8f => (Anywhere, Execute),
- 0x91..=0x9a => (Anywhere, Execute),
- 0x9c => (Anywhere, Execute),
- // Beginning of UTF-8 2 byte sequence
- 0xc2..=0xdf => (Utf8, BeginUtf8),
- // Beginning of UTF-8 3 byte sequence
- 0xe0..=0xef => (Utf8, BeginUtf8),
- // Beginning of UTF-8 4 byte sequence
- 0xf0..=0xf4 => (Utf8, BeginUtf8),
- },
-
Escape {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x20..=0x2f => (EscapeIntermediate, Collect),
0x30..=0x4f => (Ground, EscDispatch),
0x51..=0x57 => (Ground, EscDispatch),
@@ -51,18 +32,24 @@ generate_state_changes!(state_changes, {
EscapeIntermediate {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
0x20..=0x2f => (Anywhere, Collect),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x30..=0x7e => (Ground, EscDispatch),
},
CsiEntry {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x20..=0x2f => (CsiIntermediate, Collect),
0x30..=0x39 => (CsiParam, Param),
0x3a..=0x3b => (CsiParam, Param),
@@ -72,20 +59,26 @@ generate_state_changes!(state_changes, {
CsiIgnore {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
- 0x20..=0x3f => (Anywhere, Ignore),
- 0x7f => (Anywhere, Ignore),
+ 0x20..=0x3f => (Anywhere, None),
+ 0x7f => (Anywhere, None),
0x40..=0x7e => (Ground, None),
},
CsiParam {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
0x30..=0x39 => (Anywhere, Param),
0x3a..=0x3b => (Anywhere, Param),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x3c..=0x3f => (CsiIgnore, None),
0x20..=0x2f => (CsiIntermediate, Collect),
0x40..=0x7e => (Ground, CsiDispatch),
@@ -93,19 +86,25 @@ generate_state_changes!(state_changes, {
CsiIntermediate {
0x00..=0x17 => (Anywhere, Execute),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Execute),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Execute),
0x20..=0x2f => (Anywhere, Collect),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x30..=0x3f => (CsiIgnore, None),
0x40..=0x7e => (Ground, CsiDispatch),
},
DcsEntry {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
- 0x7f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
+ 0x7f => (Anywhere, None),
0x20..=0x2f => (DcsIntermediate, Collect),
0x30..=0x39 => (DcsParam, Param),
0x3a..=0x3b => (DcsParam, Param),
@@ -114,30 +113,39 @@ generate_state_changes!(state_changes, {
},
DcsIntermediate {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
0x20..=0x2f => (Anywhere, Collect),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x30..=0x3f => (DcsIgnore, None),
0x40..=0x7e => (DcsPassthrough, None),
},
DcsIgnore {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
- 0x20..=0x7f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
+ 0x20..=0x7f => (Anywhere, None),
0x9c => (Ground, None),
},
DcsParam {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
0x30..=0x39 => (Anywhere, Param),
0x3a..=0x3b => (Anywhere, Param),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x3c..=0x3f => (DcsIgnore, None),
0x20..=0x2f => (DcsIntermediate, Collect),
0x40..=0x7e => (DcsPassthrough, None),
@@ -145,27 +153,36 @@ generate_state_changes!(state_changes, {
DcsPassthrough {
0x00..=0x17 => (Anywhere, Put),
+ 0x18 => (Ground, Execute),
0x19 => (Anywhere, Put),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
0x1c..=0x1f => (Anywhere, Put),
0x20..=0x7e => (Anywhere, Put),
- 0x7f => (Anywhere, Ignore),
+ 0x7f => (Anywhere, None),
0x9c => (Ground, None),
},
SosPmApcString {
- 0x00..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
- 0x20..=0x7f => (Anywhere, Ignore),
+ 0x00..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
+ 0x20..=0x7f => (Anywhere, None),
0x9c => (Ground, None),
},
OscString {
- 0x00..=0x06 => (Anywhere, Ignore),
+ 0x00..=0x06 => (Anywhere, None),
0x07 => (Ground, None),
- 0x08..=0x17 => (Anywhere, Ignore),
- 0x19 => (Anywhere, Ignore),
- 0x1c..=0x1f => (Anywhere, Ignore),
+ 0x08..=0x17 => (Anywhere, None),
+ 0x18 => (Ground, Execute),
+ 0x19 => (Anywhere, None),
+ 0x1a => (Ground, Execute),
+ 0x1b => (Escape, None),
+ 0x1c..=0x1f => (Anywhere, None),
0x20..=0xff => (Anywhere, OscPut),
}
});
diff --git a/utf8parse/Cargo.toml b/utf8parse/Cargo.toml
deleted file mode 100644
index 71ea44b..0000000
--- a/utf8parse/Cargo.toml
+++ /dev/null
@@ -1,15 +0,0 @@
-[package]
-authors = ["Joe Wilm <joe@jwilm.com>", "Christian Duerr <contact@christianduerr.com>"]
-description = "Table-driven UTF-8 parser"
-documentation = "https://docs.rs/utf8parse/"
-repository = "https://github.com/alacritty/vte"
-keywords = ["utf8", "parse", "table"]
-categories = ["parsing", "no-std"]
-license = "Apache-2.0 OR MIT"
-version = "0.2.2"
-name = "utf8parse"
-edition = "2018"
-
-[features]
-nightly = []
-default = []
diff --git a/utf8parse/LICENSE-APACHE b/utf8parse/LICENSE-APACHE
deleted file mode 120000
index 965b606..0000000
--- a/utf8parse/LICENSE-APACHE
+++ /dev/null
@@ -1 +0,0 @@
-../LICENSE-APACHE \ No newline at end of file
diff --git a/utf8parse/LICENSE-MIT b/utf8parse/LICENSE-MIT
deleted file mode 120000
index 76219eb..0000000
--- a/utf8parse/LICENSE-MIT
+++ /dev/null
@@ -1 +0,0 @@
-../LICENSE-MIT \ No newline at end of file
diff --git a/utf8parse/src/lib.rs b/utf8parse/src/lib.rs
deleted file mode 100644
index 093de81..0000000
--- a/utf8parse/src/lib.rs
+++ /dev/null
@@ -1,132 +0,0 @@
-//! A table-driven UTF-8 Parser
-//!
-//! This module implements a table-driven UTF-8 parser which should
-//! theoretically contain the minimal number of branches (1). The only branch is
-//! on the `Action` returned from unpacking a transition.
-#![deny(clippy::all, clippy::if_not_else, clippy::enum_glob_use)]
-#![cfg_attr(all(feature = "nightly", test), feature(test))]
-#![no_std]
-
-use core::char;
-
-mod types;
-
-use types::{Action, State};
-
-/// Handles codepoint and invalid sequence events from the parser.
-pub trait Receiver {
- /// Called whenever a codepoint is parsed successfully
- fn codepoint(&mut self, _: char);
-
- /// Called when an invalid_sequence is detected
- fn invalid_sequence(&mut self);
-}
-
-/// A parser for Utf8 Characters
-///
-/// Repeatedly call `advance` with bytes to emit Utf8 characters
-#[derive(Clone, Default, PartialEq, Eq, Debug)]
-pub struct Parser {
- point: u32,
- state: State,
-}
-
-/// Continuation bytes are masked with this value.
-const CONTINUATION_MASK: u8 = 0b0011_1111;
-
-impl Parser {
- /// Create a new Parser
- pub fn new() -> Parser {
- Parser { point: 0, state: State::Ground }
- }
-
- /// Advance the parser
- ///
- /// The provider receiver will be called whenever a codepoint is completed or an invalid
- /// sequence is detected.
- pub fn advance<R>(&mut self, receiver: &mut R, byte: u8)
- where
- R: Receiver,
- {
- let (state, action) = self.state.advance(byte);
- self.perform_action(receiver, byte, action);
- self.state = state;
- }
-
- fn perform_action<R>(&mut self, receiver: &mut R, byte: u8, action: Action)
- where
- R: Receiver,
- {
- match action {
- Action::InvalidSequence => {
- self.point = 0;
- receiver.invalid_sequence();
- },
- Action::EmitByte => {
- receiver.codepoint(byte as char);
- },
- Action::SetByte1 => {
- let point = self.point | ((byte & CONTINUATION_MASK) as u32);
- let c = unsafe { char::from_u32_unchecked(point) };
- self.point = 0;
-
- receiver.codepoint(c);
- },
- Action::SetByte2 => {
- self.point |= ((byte & CONTINUATION_MASK) as u32) << 6;
- },
- Action::SetByte2Top => {
- self.point |= ((byte & 0b0001_1111) as u32) << 6;
- },
- Action::SetByte3 => {
- self.point |= ((byte & CONTINUATION_MASK) as u32) << 12;
- },
- Action::SetByte3Top => {
- self.point |= ((byte & 0b0000_1111) as u32) << 12;
- },
- Action::SetByte4 => {
- self.point |= ((byte & 0b0000_0111) as u32) << 18;
- },
- }
- }
-}
-
-#[cfg(all(feature = "nightly", test))]
-mod benches {
- extern crate std;
- extern crate test;
-
- use super::{Parser, Receiver};
-
- use self::test::{black_box, Bencher};
-
- static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt");
-
- impl Receiver for () {
- fn codepoint(&mut self, c: char) {
- black_box(c);
- }
-
- fn invalid_sequence(&mut self) {}
- }
-
- #[bench]
- fn parse_bench_utf8_demo(b: &mut Bencher) {
- let mut parser = Parser::new();
-
- b.iter(|| {
- for byte in UTF8_DEMO {
- parser.advance(&mut (), *byte);
- }
- })
- }
-
- #[bench]
- fn std_string_parse_utf8(b: &mut Bencher) {
- b.iter(|| {
- for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() {
- black_box(c);
- }
- });
- }
-}
diff --git a/utf8parse/src/types.rs b/utf8parse/src/types.rs
deleted file mode 100644
index 8a52c67..0000000
--- a/utf8parse/src/types.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-//! Types supporting the UTF-8 parser
-
-/// Action to take when receiving a byte
-#[derive(Debug, Copy, Clone)]
-pub enum Action {
- /// Unexpected byte; sequence is invalid
- InvalidSequence = 0,
- /// Received valid 7-bit ASCII byte which can be directly emitted.
- EmitByte = 1,
- /// Set the bottom continuation byte
- SetByte1 = 2,
- /// Set the 2nd-from-last continuation byte
- SetByte2 = 3,
- /// Set the 2nd-from-last byte which is part of a two byte sequence
- SetByte2Top = 4,
- /// Set the 3rd-from-last continuation byte
- SetByte3 = 5,
- /// Set the 3rd-from-last byte which is part of a three byte sequence
- SetByte3Top = 6,
- /// Set the top byte of a four byte sequence.
- SetByte4 = 7,
-}
-
-/// States the parser can be in.
-///
-/// There is a state for each initial input of the 3 and 4 byte sequences since
-/// the following bytes are subject to different conditions than a tail byte.
-#[allow(non_camel_case_types)]
-#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]
-pub enum State {
- /// Ground state; expect anything
- #[default]
- Ground = 0,
- /// 3 tail bytes
- Tail3 = 1,
- /// 2 tail bytes
- Tail2 = 2,
- /// 1 tail byte
- Tail1 = 3,
- /// UTF8-3 starting with E0
- U3_2_e0 = 4,
- /// UTF8-3 starting with ED
- U3_2_ed = 5,
- /// UTF8-4 starting with F0
- Utf8_4_3_f0 = 6,
- /// UTF8-4 starting with F4
- Utf8_4_3_f4 = 7,
-}
-
-impl State {
- /// Advance the parser state.
- ///
- /// This takes the current state and input byte into consideration, to determine the next state
- /// and any action that should be taken.
- #[inline]
- pub fn advance(self, byte: u8) -> (State, Action) {
- match self {
- State::Ground => match byte {
- 0x00..=0x7f => (State::Ground, Action::EmitByte),
- 0xc2..=0xdf => (State::Tail1, Action::SetByte2Top),
- 0xe0 => (State::U3_2_e0, Action::SetByte3Top),
- 0xe1..=0xec => (State::Tail2, Action::SetByte3Top),
- 0xed => (State::U3_2_ed, Action::SetByte3Top),
- 0xee..=0xef => (State::Tail2, Action::SetByte3Top),
- 0xf0 => (State::Utf8_4_3_f0, Action::SetByte4),
- 0xf1..=0xf3 => (State::Tail3, Action::SetByte4),
- 0xf4 => (State::Utf8_4_3_f4, Action::SetByte4),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::U3_2_e0 => match byte {
- 0xa0..=0xbf => (State::Tail1, Action::SetByte2),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::U3_2_ed => match byte {
- 0x80..=0x9f => (State::Tail1, Action::SetByte2),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Utf8_4_3_f0 => match byte {
- 0x90..=0xbf => (State::Tail2, Action::SetByte3),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Utf8_4_3_f4 => match byte {
- 0x80..=0x8f => (State::Tail2, Action::SetByte3),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Tail3 => match byte {
- 0x80..=0xbf => (State::Tail2, Action::SetByte3),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Tail2 => match byte {
- 0x80..=0xbf => (State::Tail1, Action::SetByte2),
- _ => (State::Ground, Action::InvalidSequence),
- },
- State::Tail1 => match byte {
- 0x80..=0xbf => (State::Ground, Action::SetByte1),
- _ => (State::Ground, Action::InvalidSequence),
- },
- }
- }
-}
diff --git a/utf8parse/tests/UTF-8-demo.txt b/utf8parse/tests/UTF-8-demo.txt
deleted file mode 100644
index 4363f27..0000000
--- a/utf8parse/tests/UTF-8-demo.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-
-UTF-8 encoded sample plain-text file
-‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
-
-Markus Kuhn [ˈmaʳkʊs kuːn] <http://www.cl.cam.ac.uk/~mgk25/> — 2002-07-25
-
-
-The ASCII compatible UTF-8 encoding used in this plain-text file
-is defined in Unicode, ISO 10646-1, and RFC 2279.
-
-
-Using Unicode/UTF-8, you can write in emails and source code things such as
-
-Mathematics and sciences:
-
- ∮ E⋅da = Q, n → ∞, ∑ f(i) = ∏ g(i), ⎧⎡⎛┌─────┐⎞⎤⎫
- ⎪⎢⎜│a²+b³ ⎟⎥⎪
- ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), ⎪⎢⎜│───── ⎟⎥⎪
- ⎪⎢⎜⎷ c₈ ⎟⎥⎪
- ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⎨⎢⎜ ⎟⎥⎬
- ⎪⎢⎜ ∞ ⎟⎥⎪
- ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫), ⎪⎢⎜ ⎲ ⎟⎥⎪
- ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪
- 2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm ⎩⎣⎝i=1 ⎠⎦⎭
-
-Linguistics and dictionaries:
-
- ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn
- Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]
-
-APL:
-
- ((V⍳V)=⍳⍴V)/V←,V ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈
-
-Nicer typography in plain text files:
-
- ╔══════════════════════════════════════════╗
- ║ ║
- ║ • ‘single’ and “double” quotes ║
- ║ ║
- ║ • Curly apostrophes: “We’ve been here” ║
- ║ ║
- ║ • Latin-1 apostrophe and accents: '´` ║
- ║ ║
- ║ • ‚deutsche‘ „Anführungszeichen“ ║
- ║ ║
- ║ • †, ‡, ‰, •, 3–4, —, −5/+5, ™, … ║
- ║ ║
- ║ • ASCII safety test: 1lI|, 0OD, 8B ║
- ║ ╭─────────╮ ║
- ║ • the euro symbol: │ 14.95 € │ ║
- ║ ╰─────────╯ ║
- ╚══════════════════════════════════════════╝
-
-Combining characters:
-
- STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑
-
-Greek (in Polytonic):
-
- The Greek anthem:
-
- Σὲ γνωρίζω ἀπὸ τὴν κόψη
- τοῦ σπαθιοῦ τὴν τρομερή,
- σὲ γνωρίζω ἀπὸ τὴν ὄψη
- ποὺ μὲ βία μετράει τὴ γῆ.
-
- ᾿Απ᾿ τὰ κόκκαλα βγαλμένη
- τῶν ῾Ελλήνων τὰ ἱερά
- καὶ σὰν πρῶτα ἀνδρειωμένη
- χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά!
-
- From a speech of Demosthenes in the 4th century BC:
-
- Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι,
- ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς
- λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ
- τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿
- εἰς τοῦτο προήκοντα, ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ
- πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν
- οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι,
- οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν
- ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον
- τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι
- γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν
- προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους
- σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ
- τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ
- τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς
- τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον.
-
- Δημοσθένους, Γ´ ᾿Ολυνθιακὸς
-
-Georgian:
-
- From a Unicode conference invitation:
-
- გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო
- კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს,
- ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს
- ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი,
- ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება
- ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში,
- ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში.
-
-Russian:
-
- From a Unicode conference invitation:
-
- Зарегистрируйтесь сейчас на Десятую Международную Конференцию по
- Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии.
- Конференция соберет широкий круг экспертов по вопросам глобального
- Интернета и Unicode, локализации и интернационализации, воплощению и
- применению Unicode в различных операционных системах и программных
- приложениях, шрифтах, верстке и многоязычных компьютерных системах.
-
-Thai (UCS Level 2):
-
- Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese
- classic 'San Gua'):
-
- [----------------------------|------------------------]
- ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช พระปกเกศกองบู๊กู้ขึ้นใหม่
- สิบสองกษัตริย์ก่อนหน้าแลถัดไป สององค์ไซร้โง่เขลาเบาปัญญา
- ทรงนับถือขันทีเป็นที่พึ่ง บ้านเมืองจึงวิปริตเป็นนักหนา
- โฮจิ๋นเรียกทัพทั่วหัวเมืองมา หมายจะฆ่ามดชั่วตัวสำคัญ
- เหมือนขับไสไล่เสือจากเคหา รับหมาป่าเข้ามาเลยอาสัญ
- ฝ่ายอ้องอุ้นยุแยกให้แตกกัน ใช้สาวนั้นเป็นชนวนชื่นชวนใจ
- พลันลิฉุยกุยกีกลับก่อเหตุ ช่างอาเพศจริงหนาฟ้าร้องไห้
- ต้องรบราฆ่าฟันจนบรรลัย ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ
-
- (The above is a two-column text. If combining characters are handled
- correctly, the lines of the second column should be aligned with the
- | character above.)
-
-Ethiopian:
-
- Proverbs in the Amharic language:
-
- ሰማይ አይታረስ ንጉሥ አይከሰስ።
- ብላ ካለኝ እንደአባቴ በቆመጠኝ።
- ጌጥ ያለቤቱ ቁምጥና ነው።
- ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው።
- የአፍ ወለምታ በቅቤ አይታሽም።
- አይጥ በበላ ዳዋ ተመታ።
- ሲተረጉሙ ይደረግሙ።
- ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል።
- ድር ቢያብር አንበሳ ያስር።
- ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም።
- እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም።
- የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ።
- ሥራ ከመፍታት ልጄን ላፋታት።
- ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል።
- የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ።
- ተንጋሎ ቢተፉ ተመልሶ ባፉ።
- ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው።
- እግርህን በፍራሽህ ልክ ዘርጋ።
-
-Runes:
-
- ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ
-
- (Old English, which transcribed into Latin reads 'He cwaeth that he
- bude thaem lande northweardum with tha Westsae.' and means 'He said
- that he lived in the northern land near the Western Sea.')
-
-Braille:
-
- ⡌⠁⠧⠑ ⠼⠁⠒ ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌
-
- ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞
- ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎
- ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂
- ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙
- ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑
- ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲
-
- ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
-
- ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹
- ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞
- ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕
- ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹
- ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎
- ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎
- ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳
- ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞
- ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
-
- (The first couple of paragraphs of "A Christmas Carol" by Dickens)
-
-Compact font selection example text:
-
- ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789
- abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ
- –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд
- ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა
-
-Greetings in various languages:
-
- Hello world, Καλημέρα κόσμε, コンニチハ
-
-Box drawing alignment tests: █
- ▉
- ╔══╦══╗ ┌──┬──┐ ╭──┬──╮ ╭──┬──╮ ┏━━┳━━┓ ┎┒┏┑ ╷ ╻ ┏┯┓ ┌┰┐ ▊ ╱╲╱╲╳╳╳
- ║┌─╨─┐║ │╔═╧═╗│ │╒═╪═╕│ │╓─╁─╖│ ┃┌─╂─┐┃ ┗╃╄┙ ╶┼╴╺╋╸┠┼┨ ┝╋┥ ▋ ╲╱╲╱╳╳╳
- ║│╲ ╱│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╿ │┃ ┍╅╆┓ ╵ ╹ ┗┷┛ └┸┘ ▌ ╱╲╱╲╳╳╳
- ╠╡ ╳ ╞╣ ├╢ ╟┤ ├┼─┼─┼┤ ├╫─╂─╫┤ ┣┿╾┼╼┿┫ ┕┛┖┚ ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳
- ║│╱ ╲│║ │║ ║│ ││ │ ││ │║ ┃ ║│ ┃│ ╽ │┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▎
- ║└─╥─┘║ │╚═╤═╝│ │╘═╪═╛│ │╙─╀─╜│ ┃└─╂─┘┃ ░░▒▒▓▓██ ┊ ┆ ╎ ╏ ┇ ┋ ▏
- ╚══╩══╝ └──┴──┘ ╰──┴──╯ ╰──┴──╯ ┗━━┻━━┛ ▗▄▖▛▀▜ └╌╌┘ ╎ ┗╍╍┛ ┋ ▁▂▃▄▅▆▇█
- ▝▀▘▙▄▟
diff --git a/utf8parse/tests/utf-8-demo.rs b/utf8parse/tests/utf-8-demo.rs
deleted file mode 100644
index 51df492..0000000
--- a/utf8parse/tests/utf-8-demo.rs
+++ /dev/null
@@ -1,31 +0,0 @@
-use utf8parse::{Parser, Receiver};
-
-static UTF8_DEMO: &[u8] = include_bytes!("UTF-8-demo.txt");
-
-#[derive(Debug, PartialEq)]
-struct StringWrapper(String);
-
-impl Receiver for StringWrapper {
- fn codepoint(&mut self, c: char) {
- self.0.push(c);
- }
-
- fn invalid_sequence(&mut self) {}
-}
-
-#[test]
-fn utf8parse_test() {
- let mut parser = Parser::new();
-
- // utf8parse implementation
- let mut actual = StringWrapper(String::new());
-
- for byte in UTF8_DEMO {
- parser.advance(&mut actual, *byte)
- }
-
- // standard library implementation
- let expected = String::from_utf8_lossy(UTF8_DEMO);
-
- assert_eq!(actual.0, expected);
-}
diff --git a/vte_generate_state_changes/src/lib.rs b/vte_generate_state_changes/src/lib.rs
index b016518..ff8ea49 100644
--- a/vte_generate_state_changes/src/lib.rs
+++ b/vte_generate_state_changes/src/lib.rs
@@ -25,8 +25,8 @@ pub fn generate_state_changes(item: proc_macro::TokenStream) -> proc_macro::Toke
let assignments_stream = states_stream(&mut iter);
quote!(
- const fn #fn_name() -> [[u8; 256]; 16] {
- let mut state_changes = [[0; 256]; 16];
+ const fn #fn_name() -> [[u8; 256]; 13] {
+ let mut state_changes = [[0; 256]; 13];
#assignments_stream
@@ -71,7 +71,8 @@ fn state_entry_stream(iter: &mut Peekable<token_stream::IntoIter>) -> TokenStrea
tokens
}
-/// Generate the array assignment statement for a single byte->target mapping for one state.
+/// Generate the array assignment statement for a single byte->target mapping
+/// for one state.
fn change_stream(iter: &mut Peekable<token_stream::IntoIter>, state: &TokenTree) -> TokenStream {
// Start of input byte range
let start = next_usize(iter);
@@ -101,8 +102,6 @@ fn change_stream(iter: &mut Peekable<token_stream::IntoIter>, state: &TokenTree)
// Create a new entry for every byte in the range
for byte in start..=end {
- // TODO: Force adding `State::` and `Action::`?
- // TODO: Should we really use `pack` here without import?
tokens.extend(quote!(
state_changes[State::#state as usize][#byte] =
pack(State::#target_state, Action::#target_action);
@@ -148,7 +147,8 @@ fn expect_punct(iter: &mut impl Iterator<Item = TokenTree>, c: char) {
///
/// # Panics
///
-/// Panics if the next token is not a [`usize`] in hex or decimal literal format.
+/// Panics if the next token is not a [`usize`] in hex or decimal literal
+/// format.
fn next_usize(iter: &mut impl Iterator<Item = TokenTree>) -> usize {
match iter.next() {
Some(Literal(literal)) => {