diff options
Diffstat (limited to 'utf8parse/src/lib.rs')
-rw-r--r-- | utf8parse/src/lib.rs | 50 |
1 files changed, 43 insertions, 7 deletions
diff --git a/utf8parse/src/lib.rs b/utf8parse/src/lib.rs index 8c866f5..c092647 100644 --- a/utf8parse/src/lib.rs +++ b/utf8parse/src/lib.rs @@ -3,15 +3,14 @@ //! This module implements a table-driven UTF-8 parser which should //! theoretically contain the minimal number of branches (1). The only branch is //! on the `Action` returned from unpacking a transition. +#![cfg_attr(all(feature = "nightly", test), feature(test))] #![no_std] use core::char; -mod table; mod types; -use table::TRANSITIONS; -use types::{unpack, Action, State}; +use types::{Action, State}; /// Handles codepoint and invalid sequence events from the parser. pub trait Receiver { @@ -48,10 +47,7 @@ impl Parser { where R: Receiver, { - let cur = self.state as usize; - let change = TRANSITIONS[cur][byte as usize]; - let (state, action) = unsafe { unpack(change) }; - + let (state, action) = self.state.advance(byte); self.perform_action(receiver, byte, action); self.state = state; } @@ -93,3 +89,43 @@ impl Parser { } } } + +#[cfg(all(feature = "nightly", test))] +mod benches { + extern crate std; + extern crate test; + + use super::{Parser, Receiver}; + + use self::test::{black_box, Bencher}; + + static UTF8_DEMO: &[u8] = include_bytes!("../tests/UTF-8-demo.txt"); + + impl Receiver for () { + fn codepoint(&mut self, c: char) { + black_box(c); + } + + fn invalid_sequence(&mut self) {} + } + + #[bench] + fn parse_bench_utf8_demo(b: &mut Bencher) { + let mut parser = Parser::new(); + + b.iter(|| { + for byte in UTF8_DEMO { + parser.advance(&mut (), *byte); + } + }) + } + + #[bench] + fn std_string_parse_utf8(b: &mut Bencher) { + b.iter(|| { + for c in std::str::from_utf8(UTF8_DEMO).unwrap().chars() { + black_box(c); + } + }); + } +} |