diff options
author | Kirill Chibisov <contact@kchibisov.com> | 2025-01-12 11:59:01 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-12 11:59:01 +0300 |
commit | c18ef2206af630f729835da237381628650005aa (patch) | |
tree | 2ff73566723077075bfabe5d206a17fbb31c4537 | |
parent | ff21c30b7a376e93b9a4df4812a05567f27e73fa (diff) | |
download | r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.tar.gz r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.tar.bz2 r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.zip |
Fix crash when valid char was split
If the valid character was split across reads of partial utf8 and
got terminated by invalid byte, we should print it and advance, instead
of trying to discard it entirely.
-rw-r--r-- | src/lib.rs | 52 |
1 files changed, 32 insertions, 20 deletions
@@ -460,6 +460,22 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> { c.len_utf8() - old_bytes }, Err(err) => { + let valid_bytes = err.valid_up_to(); + // If we have any valid bytes, that means we partially copied another + // utf8 character into `partial_utf8`. Since we only care about the + // first character, we just ignore the rest. + if valid_bytes > 0 { + let c = unsafe { + let parsed = str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]); + parsed.chars().next().unwrap_unchecked() + }; + + performer.print(c); + + self.partial_utf8_len = 0; + return valid_bytes - old_bytes; + } + match err.error_len() { // If the partial character was also invalid, emit the replacement // character. @@ -469,26 +485,8 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> { self.partial_utf8_len = 0; invalid_len - old_bytes }, - None => { - // If we have any valid bytes, that means we partially copied another - // utf8 character into `partial_utf8`. Since we only care about the - // first character, we just ignore the rest. - let valid_bytes = err.valid_up_to(); - if valid_bytes > 0 { - let c = unsafe { - let parsed = - str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]); - parsed.chars().next().unwrap_unchecked() - }; - performer.print(c); - - self.partial_utf8_len = 0; - valid_bytes - old_bytes - } else { - // If the character still isn't complete, wait for more data. - bytes.len() - } - }, + // If the character still isn't complete, wait for more data. + None => to_copy, } }, } @@ -1232,6 +1230,20 @@ mod tests { } #[test] + fn partial_invalid_utf8_split() { + const INPUT: &[u8] = b"\xE4\xBF\x99\xB5"; + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + + parser.advance(&mut dispatcher, &INPUT[..2]); + parser.advance(&mut dispatcher, &INPUT[2..]); + + assert_eq!(dispatcher.dispatched[0], Sequence::Print('俙')); + assert_eq!(dispatcher.dispatched[1], Sequence::Print('�')); + } + + #[test] fn partial_utf8_into_esc() { const INPUT: &[u8] = b"\xD8\x1b012"; |