diff options
author | Christian Duerr <contact@christianduerr.com> | 2019-12-10 19:16:01 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-12-10 19:16:01 +0100 |
commit | 9d37aa7a71801f3569d2a2a55dc82c37935f205a (patch) | |
tree | fd20b01398034934957c0d311209103482836771 /utf8parse/src/lib.rs | |
parent | ea940fcb74abce67b927788e4f9f64fc63073d37 (diff) | |
download | r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.tar.gz r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.tar.bz2 r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.zip |
Remove table generation
This completely removes the `codegen` project, which relied on outdated
libraries to parse DSLs to build the utf8 and vte state tables, to make
the library easier to maintain.
The utf8 table could be completely removed in favor of a `match`
statement, which also lead to a performance improvement with the utf8
parser.
The vte table did not benefit from `match` statements at all and instead
had significantly worse performance with it. To replace the old
codegeneration for vte, the `generate_state_changes` crate has been
created instead, which uses the language's proc_macro feature to create
a `const fn` which will generate the table at compile time.
Diffstat (limited to 'utf8parse/src/lib.rs')
-rw-r--r-- | utf8parse/src/lib.rs | 50 |
1 files changed, 43 insertions, 7 deletions
diff --git a/utf8parse/src/lib.rs b/utf8parse/src/lib.rs index 8c866f5..c092647 100644 --- a/utf8parse/src/lib.rs +++ b/utf8parse/src/lib.rs @@ -3,15 +3,14 @@ //! This module implements a table-driven UTF-8 parser which should //! theoretically contain the minimal number of branches (1). The only branch is //! on the `Action` returned from unpacking a transition. +#![cfg_attr(all(feature = "nightly", test), feature(test))] #![no_std] use core::char; -mod table; mod types; -use table::TRANSITIONS; -use types::{unpack, Action, State}; +use types::{Action, State}; /// Handles codepoint and invalid sequence events from the parser. pub trait Receiver { @@ -48,10 +47,7 @@ impl Parser { where R: Receiver, { - let cur = self.state as usize; - let change = TRANSITIONS[cur][byte as usize]; - let (state, action) = unsafe { unpack(change) }; - + let (state, action) = self.state.advance(byte); self.perform_action(receiver, byte, action); self.state = state; } @@ -93,3 +89,43 @@ impl Parser { } } } + +#[cfg(all(feature = "nightly", test))] +mod benches { + extern crate std; + extern crate test; + + use super::{Parser, Receiver}; + + use self::test::{black_box, Bencher}; + + static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt"); + + impl Receiver for () { + fn codepoint(&mut self, c: char) { + black_box(c); + } + + fn invalid_sequence(&mut self) {} + } + + #[bench] + fn parse_bench_utf8_demo(b: &mut Bencher) { + let mut parser = Parser::new(); + + b.iter(|| { + for byte in UTF8_DEMO { + parser.advance(&mut (), *byte); + } + }) + } + + #[bench] + fn std_string_parse_utf8(b: &mut Bencher) { + b.iter(|| { + for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() { + black_box(c); + } + }); + } +} |