aboutsummaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs52
1 files changed, 32 insertions, 20 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 3c2f863..1f229a3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -460,6 +460,22 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
c.len_utf8() - old_bytes
},
Err(err) => {
+ let valid_bytes = err.valid_up_to();
+ // If we have any valid bytes, that means we partially copied another
+ // utf8 character into `partial_utf8`. Since we only care about the
+ // first character, we just ignore the rest.
+ if valid_bytes > 0 {
+ let c = unsafe {
+ let parsed = str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
+ parsed.chars().next().unwrap_unchecked()
+ };
+
+ performer.print(c);
+
+ self.partial_utf8_len = 0;
+ return valid_bytes - old_bytes;
+ }
+
match err.error_len() {
// If the partial character was also invalid, emit the replacement
// character.
@@ -469,26 +485,8 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
self.partial_utf8_len = 0;
invalid_len - old_bytes
},
- None => {
- // If we have any valid bytes, that means we partially copied another
- // utf8 character into `partial_utf8`. Since we only care about the
- // first character, we just ignore the rest.
- let valid_bytes = err.valid_up_to();
- if valid_bytes > 0 {
- let c = unsafe {
- let parsed =
- str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
- parsed.chars().next().unwrap_unchecked()
- };
- performer.print(c);
-
- self.partial_utf8_len = 0;
- valid_bytes - old_bytes
- } else {
- // If the character still isn't complete, wait for more data.
- bytes.len()
- }
- },
+ // If the character still isn't complete, wait for more data.
+ None => to_copy,
}
},
}
@@ -1232,6 +1230,20 @@ mod tests {
}
#[test]
+ fn partial_invalid_utf8_split() {
+ const INPUT: &[u8] = b"\xE4\xBF\x99\xB5";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, &INPUT[..2]);
+ parser.advance(&mut dispatcher, &INPUT[2..]);
+
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('俙'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('�'));
+ }
+
+ #[test]
fn partial_utf8_into_esc() {
const INPUT: &[u8] = b"\xD8\x1b012";