aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKirill Chibisov <contact@kchibisov.com>2025-01-12 11:59:01 +0300
committerGitHub <noreply@github.com>2025-01-12 11:59:01 +0300
commitc18ef2206af630f729835da237381628650005aa (patch)
tree2ff73566723077075bfabe5d206a17fbb31c4537
parentff21c30b7a376e93b9a4df4812a05567f27e73fa (diff)
downloadr-alacritty-vte-c18ef2206af630f729835da237381628650005aa.tar.gz
r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.tar.bz2
r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.zip
Fix crash when valid char was split
If the valid character was split across reads of partial utf8 and got terminated by invalid byte, we should print it and advance, instead of trying to discard it entirely.
-rw-r--r--src/lib.rs52
1 files changed, 32 insertions, 20 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 3c2f863..1f229a3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -460,6 +460,22 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
c.len_utf8() - old_bytes
},
Err(err) => {
+ let valid_bytes = err.valid_up_to();
+ // If we have any valid bytes, that means we partially copied another
+ // utf8 character into `partial_utf8`. Since we only care about the
+ // first character, we just ignore the rest.
+ if valid_bytes > 0 {
+ let c = unsafe {
+ let parsed = str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
+ parsed.chars().next().unwrap_unchecked()
+ };
+
+ performer.print(c);
+
+ self.partial_utf8_len = 0;
+ return valid_bytes - old_bytes;
+ }
+
match err.error_len() {
// If the partial character was also invalid, emit the replacement
// character.
@@ -469,26 +485,8 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
self.partial_utf8_len = 0;
invalid_len - old_bytes
},
- None => {
- // If we have any valid bytes, that means we partially copied another
- // utf8 character into `partial_utf8`. Since we only care about the
- // first character, we just ignore the rest.
- let valid_bytes = err.valid_up_to();
- if valid_bytes > 0 {
- let c = unsafe {
- let parsed =
- str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
- parsed.chars().next().unwrap_unchecked()
- };
- performer.print(c);
-
- self.partial_utf8_len = 0;
- valid_bytes - old_bytes
- } else {
- // If the character still isn't complete, wait for more data.
- bytes.len()
- }
- },
+ // If the character still isn't complete, wait for more data.
+ None => to_copy,
}
},
}
@@ -1232,6 +1230,20 @@ mod tests {
}
#[test]
+ fn partial_invalid_utf8_split() {
+ const INPUT: &[u8] = b"\xE4\xBF\x99\xB5";
+
+ let mut dispatcher = Dispatcher::default();
+ let mut parser = Parser::new();
+
+ parser.advance(&mut dispatcher, &INPUT[..2]);
+ parser.advance(&mut dispatcher, &INPUT[2..]);
+
+ assert_eq!(dispatcher.dispatched[0], Sequence::Print('俙'));
+ assert_eq!(dispatcher.dispatched[1], Sequence::Print('�'));
+ }
+
+ #[test]
fn partial_utf8_into_esc() {
const INPUT: &[u8] = b"\xD8\x1b012";