diff options
author | Joe Wilm <joe@jwilm.com> | 2016-09-17 15:51:45 -0700 |
---|---|---|
committer | Joe Wilm <joe@jwilm.com> | 2016-09-17 17:03:20 -0700 |
commit | cffdb6de59ceb3fd9983a1c19476e5109da8db97 (patch) | |
tree | 26603abf607d21eefd3b9a6ac79a36dab63b5781 /src/lib.rs | |
parent | 930f8cc30a5bc4943c1b56e18cf1a3f8bb00bc2a (diff) | |
download | r-alacritty-vte-cffdb6de59ceb3fd9983a1c19476e5109da8db97.tar.gz r-alacritty-vte-cffdb6de59ceb3fd9983a1c19476e5109da8db97.tar.bz2 r-alacritty-vte-cffdb6de59ceb3fd9983a1c19476e5109da8db97.zip |
Add support for UTF-8
This adds a table-driven UTF-8 parser which only has a single branch for
the entire parser. UTF-8 support is essentially bolted onto the VTE
parser. Not the most elegant, but it does prevent the transition tables
from blowing up.
Instead of refactoring the syntax extension to handle both table
definitions, I've opted to copy/paste now for both simplicities sake and
because I can't see a clear path to a minimal shared solution.
Diffstat (limited to 'src/lib.rs')
-rw-r--r-- | src/lib.rs | 49 |
1 files changed, 42 insertions, 7 deletions
@@ -1,5 +1,6 @@ mod table; mod definitions; +mod utf8; pub use definitions::{Action, State, unpack}; @@ -27,6 +28,20 @@ impl State { const MAX_INTERMEDIATES: usize = 2; const MAX_PARAMS: usize = 16; +struct VtUtf8Receiver<'a, P: Parser + 'a>(&'a mut P, &'a mut State); + +impl<'a, P: Parser> utf8::Receiver for VtUtf8Receiver<'a, P> { + fn codepoint(&mut self, c: char) { + self.0.print(c); + *self.1 = State::Ground; + } + + fn invalid_sequence(&mut self) { + self.0.print('�'); + *self.1 = State::Ground; + } +} + /// ANSI VTE Parser /// /// As described in http://vt100.net/emu/dec_ansi_parser @@ -38,7 +53,8 @@ pub struct StateMachine { intermediate_idx: usize, params: [i64; MAX_PARAMS], num_params: usize, - ignoring: bool + ignoring: bool, + utf8_parser: utf8::Parser, } impl StateMachine { @@ -50,6 +66,7 @@ impl StateMachine { params: [0i64; MAX_PARAMS], num_params: 0, ignoring: false, + utf8_parser: utf8::Parser::new(), } } @@ -62,6 +79,12 @@ impl StateMachine { } pub fn advance<P: Parser>(&mut self, parser: &mut P, byte: u8) { + // Utf8 characters are handled out-of-band. + if let State::Utf8 = self.state { + self.process_utf8(parser, byte); + return; + } + // Handle state changes in the anywhere state before evaluating changes // for current state. let mut change = STATE_CHANGE[State::Anywhere as usize][byte as usize]; @@ -76,13 +99,22 @@ impl StateMachine { self.perform_state_change(parser, state, action, byte); } + #[inline] + fn process_utf8<P>(&mut self, parser: &mut P, byte: u8) + where P: Parser + { + let mut receiver = VtUtf8Receiver(parser, &mut self.state); + let utf8_parser = &mut self.utf8_parser; + utf8_parser.advance(&mut receiver, byte); + } + fn perform_state_change<P>(&mut self, parser: &mut P, state: State, action: Action, byte: u8) where P: Parser { macro_rules! maybe_action { ($action:expr, $arg:expr) => { match $action { - Action::None | Action::Unused__ => (), + Action::None => (), action => { self.perform_action(parser, action, $arg); }, @@ -91,7 +123,7 @@ impl StateMachine { } match state { - State::Anywhere | State::Unused__ => { + State::Anywhere => { // Just run the action self.perform_action(parser, action, byte); }, @@ -114,7 +146,7 @@ impl StateMachine { fn perform_action<P: Parser>(&mut self, parser: &mut P, action: Action, byte: u8) { match action { - Action::Print => parser.print(self, byte as char), + Action::Print => parser.print(byte as char), Action::Execute => parser.execute(self, byte), Action::Hook => parser.hook(self, byte), Action::Put => parser.put(self, byte), @@ -124,7 +156,7 @@ impl StateMachine { Action::Unhook => parser.unhook(self, byte), Action::CsiDispatch => parser.csi_dispatch(self, byte as char), Action::EscDispatch => parser.esc_dispatch(self, byte), - Action::Ignore | Action::None | Action::Unused__=> (), + Action::Ignore | Action::None => (), Action::Collect => { if self.intermediate_idx == MAX_INTERMEDIATES { self.ignoring = true; @@ -155,13 +187,16 @@ impl StateMachine { self.intermediate_idx = 0; self.num_params = 0; self.ignoring = false; - } + }, + Action::BeginUtf8 => { + self.process_utf8(parser, byte); + }, } } } pub trait Parser { - fn print(&mut self, &StateMachine, c: char); + fn print(&mut self, c: char); fn execute(&mut self, &StateMachine, byte: u8); fn hook(&mut self, &StateMachine, byte: u8); fn put(&mut self, &StateMachine, byte: u8); |