aboutsummaryrefslogtreecommitdiff
path: root/src/io.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/io.rs')
-rw-r--r--src/io.rs194
1 files changed, 0 insertions, 194 deletions
diff --git a/src/io.rs b/src/io.rs
deleted file mode 100644
index 5801efaf..00000000
--- a/src/io.rs
+++ /dev/null
@@ -1,194 +0,0 @@
-// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-//! Unmerged utf8 chars iterator vendored from std::io
-//!
-use std::io::{BufRead, ErrorKind, Error};
-use std::fmt;
-use std::error as std_error;
-use std::result;
-use std::char;
-
-static UTF8_CHAR_WIDTH: [u8; 256] = [
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
-0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
-3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
-4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
-];
-
-/// Given a first byte, determine how many bytes are in this UTF-8 character
-#[inline]
-pub fn utf8_char_width(b: u8) -> usize {
- return UTF8_CHAR_WIDTH[b as usize] as usize;
-}
-
-/// An iterator over the `char`s of a reader.
-///
-/// This struct is generally created by calling [`utf8_chars()`][utf8_chars] on a reader.
-/// Please see the documentation of `utf8_chars()` for more details.
-///
-/// [utf8_chars]: trait.BufRead.html#method.utf8_chars
-pub struct Utf8Chars<R> {
- inner: R,
-}
-
-impl<R> Utf8Chars<R> {
- pub fn new(inner: R) -> Utf8Chars<R> {
- Utf8Chars { inner: inner }
- }
-}
-
-/// An enumeration of possible errors that can be generated from the `Utf8Chars`
-/// adapter.
-#[derive(Debug)]
-pub enum Utf8CharsError {
- /// Variant representing that the underlying stream was read successfully
- /// but contains a byte sequence ill-formed in UTF-8.
- InvalidUtf8,
-
- /// Variant representing that the underlying stream contains the start
- /// of a byte sequence well-formed in UTF-8, but ends prematurely.
- ///
- /// Contains number of unused bytes
- IncompleteUtf8(usize),
-
- /// Variant representing that an I/O error occurred.
- Io(Error),
-}
-
-impl<R: BufRead> Iterator for Utf8Chars<R> {
- type Item = result::Result<char, Utf8CharsError>;
-
- // allow(unused_assignments) because consumed += 1 is not recognized as being used
- #[allow(unused_assignments)]
- fn next(&mut self) -> Option<result::Result<char, Utf8CharsError>> {
- macro_rules! read_byte {
- (EOF => $on_eof: expr) => {
- {
- let byte;
- loop {
- match self.inner.fill_buf() {
- Ok(buffer) => {
- if let Some(&b) = buffer.first() {
- byte = b;
- break
- } else {
- $on_eof
- }
- }
- Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
- Err(e) => return Some(Err(Utf8CharsError::Io(e))),
- }
- }
- byte
- }
- }
- }
-
- let first = read_byte!(EOF => return None);
- self.inner.consume(1);
-
- let mut consumed = 1;
-
- macro_rules! continuation_byte {
- ($range: pat) => {
- {
- match read_byte!(EOF => return Some(Err(Utf8CharsError::IncompleteUtf8(consumed)))) {
- byte @ $range => {
- self.inner.consume(1);
- consumed += 1;
- (byte & 0b0011_1111) as u32
- }
- _ => return Some(Err(Utf8CharsError::InvalidUtf8))
- }
- }
- }
- }
-
- // Ranges can be checked against https://tools.ietf.org/html/rfc3629#section-4
- let code_point = match utf8_char_width(first) {
- 1 => return Some(Ok(first as char)),
- 2 => {
- let second = continuation_byte!(0x80...0xBF);
- ((first & 0b0001_1111) as u32) << 6 | second
- }
- 3 => {
- let second = match first {
- 0xE0 => continuation_byte!(0xA0...0xBF),
- 0xE1...0xEC => continuation_byte!(0x80...0xBF),
- 0xED => continuation_byte!(0x80...0x9F),
- 0xEE...0xEF => continuation_byte!(0x80...0xBF),
- _ => unreachable!(),
- };
- let third = continuation_byte!(0x80...0xBF);
- ((first & 0b0000_1111) as u32) << 12 | second << 6 | third
- }
- 4 => {
- let second = match first {
- 0xF0 => continuation_byte!(0x90...0xBF),
- 0xF0...0xF3 => continuation_byte!(0x80...0xBF),
- 0xF4 => continuation_byte!(0x80...0x8F),
- _ => unreachable!(),
- };
- let third = continuation_byte!(0x80...0xBF);
- let fourth = continuation_byte!(0x80...0xBF);
- ((first & 0b0000_0111) as u32) << 18 | second << 12 | third << 6 | fourth
- }
- _ => return Some(Err(Utf8CharsError::InvalidUtf8))
- };
- unsafe {
- Some(Ok(char::from_u32_unchecked(code_point)))
- }
- }
-}
-
-impl std_error::Error for Utf8CharsError {
- fn description(&self) -> &str {
- match *self {
- Utf8CharsError::InvalidUtf8 => "invalid UTF-8 byte sequence",
- Utf8CharsError::IncompleteUtf8(_) => {
- "stream ended in the middle of an UTF-8 byte sequence"
- }
- Utf8CharsError::Io(ref e) => std_error::Error::description(e),
- }
- }
- fn cause(&self) -> Option<&std_error::Error> {
- match *self {
- Utf8CharsError::InvalidUtf8 | Utf8CharsError::IncompleteUtf8(_) => None,
- Utf8CharsError::Io(ref e) => e.cause(),
- }
- }
-}
-
-impl fmt::Display for Utf8CharsError {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- Utf8CharsError::InvalidUtf8 => {
- "invalid UTF-8 byte sequence".fmt(f)
- }
- Utf8CharsError::IncompleteUtf8(_) => {
- "stream ended in the middle of an UTF-8 byte sequence".fmt(f)
- }
- Utf8CharsError::Io(ref e) => e.fmt(f),
- }
- }
-}