diff options
author | Joe Wilm <joe@jwilm.com> | 2016-09-17 17:02:29 -0700 |
---|---|---|
committer | Joe Wilm <joe@jwilm.com> | 2016-09-17 17:03:25 -0700 |
commit | 917080a5c27b3310daab135f9bfdbc531cb54186 (patch) | |
tree | 29e73dbde735185a6edbf0e7d3b1c354cf6a75b5 /utf8parse/src/table.rs.in | |
parent | 85388ab070fbc41c8cce3ffbfbcc0d1d917109e0 (diff) | |
download | r-alacritty-vte-917080a5c27b3310daab135f9bfdbc531cb54186.tar.gz r-alacritty-vte-917080a5c27b3310daab135f9bfdbc531cb54186.tar.bz2 r-alacritty-vte-917080a5c27b3310daab135f9bfdbc531cb54186.zip |
Move utf8 parsing into separate crate
Diffstat (limited to 'utf8parse/src/table.rs.in')
-rw-r--r-- | utf8parse/src/table.rs.in | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/utf8parse/src/table.rs.in b/utf8parse/src/table.rs.in new file mode 100644 index 0000000..2acafe7 --- /dev/null +++ b/utf8parse/src/table.rs.in @@ -0,0 +1,60 @@ +//! UTF-8 Parse Transition Table + +/// Transition table for parsing UTF-8. This is built from the grammar described +/// at https://tools.ietf.org/html/rfc3629#section-4 which I have copied and +/// formatted below. +/// +/// # UTF-8 Grammar +/// +/// ```ignore +/// UTF8-octets = *( UTF8-char ) +/// UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 +/// UTF8-1 = %x00-7F +/// UTF8-2 = %xC2-DF UTF8-tail +/// UTF8-3 = %xE0 %xA0-BF UTF8-tail / +/// %xE1-EC 2( UTF8-tail ) / +/// %xED %x80-9F UTF8-tail / +/// %xEE-EF 2( UTF8-tail ) +/// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / +/// %xF1-F3 3( UTF8-tail ) / +/// %xF4 %x80-8F 2( UTF8-tail ) +/// UTF8-tail = %x80-BF +/// ``` +/// +/// Not specifying an action in this table is equivalent to specifying +/// Action::InvalidSequence. Not specifying a state is equivalent to specifying +/// state::ground. +pub static TRANSITIONS: [[u8; 256]; 8] = utf8_state_table! { + State::Ground => { + 0x00...0x7f => (State::Ground, Action::EmitByte), + 0xc2...0xdf => (State::Tail1, Action::SetByte2Top), + 0xe0 => (State::U3_2_e0, Action::SetByte3Top), + 0xe1...0xec => (State::Tail2, Action::SetByte3Top), + 0xed => (State::U3_2_ed, Action::SetByte3Top), + 0xee...0xef => (State::Tail2, Action::SetByte3Top), + 0xf0 => (State::Utf8_4_3_f0, Action::SetByte4), + 0xf1...0xf3 => (State::Tail3, Action::SetByte4), + 0xf4 => (State::Utf8_4_3_f4, Action::SetByte4), + }, + State::U3_2_e0 => { + 0xa0...0xbf => (State::Tail1, Action::SetByte2), + }, + State::U3_2_ed => { + 0x80...0x9f => (State::Tail1, Action::SetByte2), + }, + State::Utf8_4_3_f0 => { + 0x90...0xbf => (State::Tail2, Action::SetByte3), + }, + State::Utf8_4_3_f4 => { + 0x80...0x8f => (State::Tail2, Action::SetByte3), + }, + State::Tail3 => { + 0x80...0xbf => (State::Tail2, Action::SetByte3), + }, + State::Tail2 => { + 0x80...0xbf => (State::Tail1, Action::SetByte2), + }, + State::Tail1 => { + 0x80...0xbf => (State::Ground, Action::SetByte1), + }, +}; |