aboutsummaryrefslogtreecommitdiff
path: root/src/utf8/mod.rs
diff options
context:
space:
mode:
authorJoe Wilm <joe@jwilm.com>2016-09-17 17:02:29 -0700
committerJoe Wilm <joe@jwilm.com>2016-09-17 17:03:25 -0700
commit917080a5c27b3310daab135f9bfdbc531cb54186 (patch)
tree29e73dbde735185a6edbf0e7d3b1c354cf6a75b5 /src/utf8/mod.rs
parent85388ab070fbc41c8cce3ffbfbcc0d1d917109e0 (diff)
downloadr-alacritty-vte-917080a5c27b3310daab135f9bfdbc531cb54186.tar.gz
r-alacritty-vte-917080a5c27b3310daab135f9bfdbc531cb54186.tar.bz2
r-alacritty-vte-917080a5c27b3310daab135f9bfdbc531cb54186.zip
Move utf8 parsing into separate crate
Diffstat (limited to 'src/utf8/mod.rs')
-rw-r--r--src/utf8/mod.rs91
1 files changed, 0 insertions, 91 deletions
diff --git a/src/utf8/mod.rs b/src/utf8/mod.rs
deleted file mode 100644
index 3d099b1..0000000
--- a/src/utf8/mod.rs
+++ /dev/null
@@ -1,91 +0,0 @@
-//! A table-driven UTF-8 Parser
-//!
-//! This module implements a table-driven UTF-8 parser which should
-//! theoretically contain the minimal number of branches (1). The only branch is
-//! on the `Action` returned from unpacking a transition.
-use std::char;
-
-mod types;
-use self::types::{State, Action, unpack};
-
-mod table;
-use self::table::TRANSITIONS;
-
-/// Handles codepoint and invalid sequence events from the parser.
-pub trait Receiver {
- /// Code point parsed
- ///
- /// Called with the codepoint
- fn codepoint(&mut self, char);
-
- /// Invalid sequence encountered
- fn invalid_sequence(&mut self);
-}
-
-/// A parser for Utf8 Characters
-///
-/// Repeatedly call `advance` with bytes to emit Utf8 characters
-pub struct Parser {
- point: u32,
- state: State,
-}
-
-/// Continuation bytes are masked with this value.
-const CONTINUATION_MASK: u8 = 0b0011_1111;
-
-impl Parser {
- /// Create a new Parser
- pub fn new() -> Parser {
- Parser {
- point: 0,
- state: State::Ground,
- }
- }
-
- pub fn advance<R>(&mut self, receiver: &mut R, byte: u8)
- where R: Receiver
- {
- let cur = self.state as usize;
- let change = TRANSITIONS[cur][byte as usize];
- let (state, action) = unsafe { unpack(change) };
-
- self.perform_action(receiver, byte, action);
- self.state = state;
- }
-
- fn perform_action<R>(&mut self, receiver: &mut R, byte: u8, action: Action)
- where R: Receiver
- {
- match action {
- Action::InvalidSequence => {
- self.point = 0;
- receiver.invalid_sequence();
- },
- Action::EmitByte => {
- receiver.codepoint(byte as char);
- },
- Action::SetByte1 => {
- let point = self.point | ((byte & CONTINUATION_MASK) as u32);
- let c = unsafe { char::from_u32_unchecked(point) };
- self.point = 0;
-
- receiver.codepoint(c);
- },
- Action::SetByte2 => {
- self.point |= ((byte & CONTINUATION_MASK) as u32) << 6;
- },
- Action::SetByte2Top => {
- self.point |= ((byte & 0b0001_1111) as u32) << 6;
- },
- Action::SetByte3 => {
- self.point |= ((byte & CONTINUATION_MASK) as u32) << 12;
- },
- Action::SetByte3Top => {
- self.point |= ((byte & 0b0000_1111) as u32) << 12;
- },
- Action::SetByte4 => {
- self.point |= ((byte & 0b0000_0111) as u32) << 18;
- },
- }
- }
-}