//! UTF-8 Parse Transition Table /// Transition table for parsing UTF-8. This is built from the grammar described /// at https://tools.ietf.org/html/rfc3629#section-4 which I have copied and /// formatted below. /// /// # UTF-8 Grammar /// /// ```ignore /// UTF8-octets = *( UTF8-char ) /// UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 /// UTF8-1 = %x00-7F /// UTF8-2 = %xC2-DF UTF8-tail /// UTF8-3 = %xE0 %xA0-BF UTF8-tail / /// %xE1-EC 2( UTF8-tail ) / /// %xED %x80-9F UTF8-tail / /// %xEE-EF 2( UTF8-tail ) /// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / /// %xF1-F3 3( UTF8-tail ) / /// %xF4 %x80-8F 2( UTF8-tail ) /// UTF8-tail = %x80-BF /// ``` /// /// Not specifying an action in this table is equivalent to specifying /// Action::InvalidSequence. Not specifying a state is equivalent to specifying /// state::ground. pub static TRANSITIONS: [[u8; 256]; 8] = utf8_state_table! { State::Ground => { 0x00...0x7f => (State::Ground, Action::EmitByte), 0xc2...0xdf => (State::Tail1, Action::SetByte2Top), 0xe0 => (State::U3_2_e0, Action::SetByte3Top), 0xe1...0xec => (State::Tail2, Action::SetByte3Top), 0xed => (State::U3_2_ed, Action::SetByte3Top), 0xee...0xef => (State::Tail2, Action::SetByte3Top), 0xf0 => (State::Utf8_4_3_f0, Action::SetByte4), 0xf1...0xf3 => (State::Tail3, Action::SetByte4), 0xf4 => (State::Utf8_4_3_f4, Action::SetByte4), }, State::U3_2_e0 => { 0xa0...0xbf => (State::Tail1, Action::SetByte2), }, State::U3_2_ed => { 0x80...0x9f => (State::Tail1, Action::SetByte2), }, State::Utf8_4_3_f0 => { 0x90...0xbf => (State::Tail2, Action::SetByte3), }, State::Utf8_4_3_f4 => { 0x80...0x8f => (State::Tail2, Action::SetByte3), }, State::Tail3 => { 0x80...0xbf => (State::Tail2, Action::SetByte3), }, State::Tail2 => { 0x80...0xbf => (State::Tail1, Action::SetByte2), }, State::Tail1 => { 0x80...0xbf => (State::Ground, Action::SetByte1), }, };