aboutsummaryrefslogtreecommitdiff
path: root/utf8parse/src/lib.rs
diff options
context:
space:
mode:
authorChristian Duerr <contact@christianduerr.com>2019-12-10 19:16:01 +0100
committerGitHub <noreply@github.com>2019-12-10 19:16:01 +0100
commit9d37aa7a71801f3569d2a2a55dc82c37935f205a (patch)
treefd20b01398034934957c0d311209103482836771 /utf8parse/src/lib.rs
parentea940fcb74abce67b927788e4f9f64fc63073d37 (diff)
downloadr-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.tar.gz
r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.tar.bz2
r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.zip
Remove table generation
This completely removes the `codegen` project, which relied on outdated libraries to parse DSLs to build the utf8 and vte state tables, to make the library easier to maintain. The utf8 table could be completely removed in favor of a `match` statement, which also lead to a performance improvement with the utf8 parser. The vte table did not benefit from `match` statements at all and instead had significantly worse performance with it. To replace the old codegeneration for vte, the `generate_state_changes` crate has been created instead, which uses the language's proc_macro feature to create a `const fn` which will generate the table at compile time.
Diffstat (limited to 'utf8parse/src/lib.rs')
-rw-r--r--utf8parse/src/lib.rs50
1 files changed, 43 insertions, 7 deletions
diff --git a/utf8parse/src/lib.rs b/utf8parse/src/lib.rs
index 8c866f5..c092647 100644
--- a/utf8parse/src/lib.rs
+++ b/utf8parse/src/lib.rs
@@ -3,15 +3,14 @@
//! This module implements a table-driven UTF-8 parser which should
//! theoretically contain the minimal number of branches (1). The only branch is
//! on the `Action` returned from unpacking a transition.
+#![cfg_attr(all(feature = "nightly", test), feature(test))]
#![no_std]
use core::char;
-mod table;
mod types;
-use table::TRANSITIONS;
-use types::{unpack, Action, State};
+use types::{Action, State};
/// Handles codepoint and invalid sequence events from the parser.
pub trait Receiver {
@@ -48,10 +47,7 @@ impl Parser {
where
R: Receiver,
{
- let cur = self.state as usize;
- let change = TRANSITIONS[cur][byte as usize];
- let (state, action) = unsafe { unpack(change) };
-
+ let (state, action) = self.state.advance(byte);
self.perform_action(receiver, byte, action);
self.state = state;
}
@@ -93,3 +89,43 @@ impl Parser {
}
}
}
+
+#[cfg(all(feature = "nightly", test))]
+mod benches {
+ extern crate std;
+ extern crate test;
+
+ use super::{Parser, Receiver};
+
+ use self::test::{black_box, Bencher};
+
+ static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt");
+
+ impl Receiver for () {
+ fn codepoint(&mut self, c: char) {
+ black_box(c);
+ }
+
+ fn invalid_sequence(&mut self) {}
+ }
+
+ #[bench]
+ fn parse_bench_utf8_demo(b: &mut Bencher) {
+ let mut parser = Parser::new();
+
+ b.iter(|| {
+ for byte in UTF8_DEMO {
+ parser.advance(&mut (), *byte);
+ }
+ })
+ }
+
+ #[bench]
+ fn std_string_parse_utf8(b: &mut Bencher) {
+ b.iter(|| {
+ for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() {
+ black_box(c);
+ }
+ });
+ }
+}