aboutsummaryrefslogtreecommitdiff
path: root/utf8parse/src/table.rs.in
diff options
context:
space:
mode:
Diffstat (limited to 'utf8parse/src/table.rs.in')
-rw-r--r--utf8parse/src/table.rs.in60
1 files changed, 60 insertions, 0 deletions
diff --git a/utf8parse/src/table.rs.in b/utf8parse/src/table.rs.in
new file mode 100644
index 0000000..2acafe7
--- /dev/null
+++ b/utf8parse/src/table.rs.in
@@ -0,0 +1,60 @@
+//! UTF-8 Parse Transition Table
+
+/// Transition table for parsing UTF-8. This is built from the grammar described
+/// at https://tools.ietf.org/html/rfc3629#section-4 which I have copied and
+/// formatted below.
+///
+/// # UTF-8 Grammar
+///
+/// ```ignore
+/// UTF8-octets = *( UTF8-char )
+/// UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
+/// UTF8-1 = %x00-7F
+/// UTF8-2 = %xC2-DF UTF8-tail
+/// UTF8-3 = %xE0 %xA0-BF UTF8-tail /
+/// %xE1-EC 2( UTF8-tail ) /
+/// %xED %x80-9F UTF8-tail /
+/// %xEE-EF 2( UTF8-tail )
+/// UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) /
+/// %xF1-F3 3( UTF8-tail ) /
+/// %xF4 %x80-8F 2( UTF8-tail )
+/// UTF8-tail = %x80-BF
+/// ```
+///
+/// Not specifying an action in this table is equivalent to specifying
+/// Action::InvalidSequence. Not specifying a state is equivalent to specifying
+/// state::ground.
+pub static TRANSITIONS: [[u8; 256]; 8] = utf8_state_table! {
+ State::Ground => {
+ 0x00...0x7f => (State::Ground, Action::EmitByte),
+ 0xc2...0xdf => (State::Tail1, Action::SetByte2Top),
+ 0xe0 => (State::U3_2_e0, Action::SetByte3Top),
+ 0xe1...0xec => (State::Tail2, Action::SetByte3Top),
+ 0xed => (State::U3_2_ed, Action::SetByte3Top),
+ 0xee...0xef => (State::Tail2, Action::SetByte3Top),
+ 0xf0 => (State::Utf8_4_3_f0, Action::SetByte4),
+ 0xf1...0xf3 => (State::Tail3, Action::SetByte4),
+ 0xf4 => (State::Utf8_4_3_f4, Action::SetByte4),
+ },
+ State::U3_2_e0 => {
+ 0xa0...0xbf => (State::Tail1, Action::SetByte2),
+ },
+ State::U3_2_ed => {
+ 0x80...0x9f => (State::Tail1, Action::SetByte2),
+ },
+ State::Utf8_4_3_f0 => {
+ 0x90...0xbf => (State::Tail2, Action::SetByte3),
+ },
+ State::Utf8_4_3_f4 => {
+ 0x80...0x8f => (State::Tail2, Action::SetByte3),
+ },
+ State::Tail3 => {
+ 0x80...0xbf => (State::Tail2, Action::SetByte3),
+ },
+ State::Tail2 => {
+ 0x80...0xbf => (State::Tail1, Action::SetByte2),
+ },
+ State::Tail1 => {
+ 0x80...0xbf => (State::Ground, Action::SetByte1),
+ },
+};