aboutsummaryrefslogtreecommitdiff
path: root/utf8parse/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'utf8parse/src/lib.rs')
-rw-r--r--utf8parse/src/lib.rs50
1 files changed, 43 insertions, 7 deletions
diff --git a/utf8parse/src/lib.rs b/utf8parse/src/lib.rs
index 8c866f5..c092647 100644
--- a/utf8parse/src/lib.rs
+++ b/utf8parse/src/lib.rs
@@ -3,15 +3,14 @@
//! This module implements a table-driven UTF-8 parser which should
//! theoretically contain the minimal number of branches (1). The only branch is
//! on the `Action` returned from unpacking a transition.
+#![cfg_attr(all(feature = "nightly", test), feature(test))]
#![no_std]
use core::char;
-mod table;
mod types;
-use table::TRANSITIONS;
-use types::{unpack, Action, State};
+use types::{Action, State};
/// Handles codepoint and invalid sequence events from the parser.
pub trait Receiver {
@@ -48,10 +47,7 @@ impl Parser {
where
R: Receiver,
{
- let cur = self.state as usize;
- let change = TRANSITIONS[cur][byte as usize];
- let (state, action) = unsafe { unpack(change) };
-
+ let (state, action) = self.state.advance(byte);
self.perform_action(receiver, byte, action);
self.state = state;
}
@@ -93,3 +89,43 @@ impl Parser {
}
}
}
+
+#[cfg(all(feature = "nightly", test))]
+mod benches {
+ extern crate std;
+ extern crate test;
+
+ use super::{Parser, Receiver};
+
+ use self::test::{black_box, Bencher};
+
+ static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt");
+
+ impl Receiver for () {
+ fn codepoint(&mut self, c: char) {
+ black_box(c);
+ }
+
+ fn invalid_sequence(&mut self) {}
+ }
+
+ #[bench]
+ fn parse_bench_utf8_demo(b: &mut Bencher) {
+ let mut parser = Parser::new();
+
+ b.iter(|| {
+ for byte in UTF8_DEMO {
+ parser.advance(&mut (), *byte);
+ }
+ })
+ }
+
+ #[bench]
+ fn std_string_parse_utf8(b: &mut Bencher) {
+ b.iter(|| {
+ for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() {
+ black_box(c);
+ }
+ });
+ }
+}