Remove table generation

This completely removes the `codegen` project, which relied on outdated libraries to parse DSLs to build the utf8 and vte state tables, to make the library easier to maintain. The utf8 table could be completely removed in favor of a `match` statement, which also lead to a performance improvement with the utf8 parser. The vte table did not benefit from `match` statements at all and instead had significantly worse performance with it. To replace the old codegeneration for vte, the `generate_state_changes` crate has been created instead, which uses the language's proc_macro feature to create a `const fn` which will generate the table at compile time.
author: Christian Duerr <contact@christianduerr.com> 2019-12-10 19:16:01 +0100
committer: GitHub <noreply@github.com> 2019-12-10 19:16:01 +0100
commit: 9d37aa7a71801f3569d2a2a55dc82c37935f205a (patch)
tree: fd20b01398034934957c0d311209103482836771 /utf8parse/src/lib.rs
parent: ea940fcb74abce67b927788e4f9f64fc63073d37 (diff)
download: r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.tar.gz
r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.tar.bz2
r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.zip
1 files changed, 43 insertions, 7 deletions
diff --git a/utf8parse/src/lib.rs b/utf8parse/src/lib.rs
index 8c866f5..c092647 100644
--- a/utf8parse/src/lib.rs
+++ b/utf8parse/src/lib.rs
@@ -3,15 +3,14 @@
 //! This module implements a table-driven UTF-8 parser which should
 //! theoretically contain the minimal number of branches (1). The only branch is
 //! on the `Action` returned from unpacking a transition.
+#![cfg_attr(all(feature = "nightly", test), feature(test))]
 #![no_std]
 
 use core::char;
 
-mod table;
 mod types;
 
-use table::TRANSITIONS;
-use types::{unpack, Action, State};
+use types::{Action, State};
 
 /// Handles codepoint and invalid sequence events from the parser.
 pub trait Receiver {
@@ -48,10 +47,7 @@ impl Parser {
     where
         R: Receiver,
     {
-        let cur = self.state as usize;
-        let change = TRANSITIONS[cur][byte as usize];
-        let (state, action) = unsafe { unpack(change) };
-
+        let (state, action) = self.state.advance(byte);
         self.perform_action(receiver, byte, action);
         self.state = state;
     }
@@ -93,3 +89,43 @@ impl Parser {
         }
     }
 }
+
+#[cfg(all(feature = "nightly", test))]
+mod benches {
+    extern crate std;
+    extern crate test;
+
+    use super::{Parser, Receiver};
+
+    use self::test::{black_box, Bencher};
+
+    static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt");
+
+    impl Receiver for () {
+        fn codepoint(&mut self, c: char) {
+            black_box(c);
+        }
+
+        fn invalid_sequence(&mut self) {}
+    }
+
+    #[bench]
+    fn parse_bench_utf8_demo(b: &mut Bencher) {
+        let mut parser = Parser::new();
+
+        b.iter(|| {
+            for byte in UTF8_DEMO {
+                parser.advance(&mut (), *byte);
+            }
+        })
+    }
+
+    #[bench]
+    fn std_string_parse_utf8(b: &mut Bencher) {
+        b.iter(|| {
+            for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() {
+                black_box(c);
+            }
+        });
+    }
+}
author	Christian Duerr <contact@christianduerr.com>	2019-12-10 19:16:01 +0100
committer	GitHub <noreply@github.com>	2019-12-10 19:16:01 +0100
commit	9d37aa7a71801f3569d2a2a55dc82c37935f205a (patch)
tree	fd20b01398034934957c0d311209103482836771 /utf8parse/src/lib.rs
parent	ea940fcb74abce67b927788e4f9f64fc63073d37 (diff)
download	r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.tar.gz r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.tar.bz2 r-alacritty-vte-9d37aa7a71801f3569d2a2a55dc82c37935f205a.zip