Fix crash when valid char was split

If the valid character was split across reads of partial utf8 and got terminated by invalid byte, we should print it and advance, instead of trying to discard it entirely.
author: Kirill Chibisov <contact@kchibisov.com> 2025-01-12 11:59:01 +0300
committer: GitHub <noreply@github.com> 2025-01-12 11:59:01 +0300
commit: c18ef2206af630f729835da237381628650005aa (patch)
tree: 2ff73566723077075bfabe5d206a17fbb31c4537
parent: ff21c30b7a376e93b9a4df4812a05567f27e73fa (diff)
download: r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.tar.gz
r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.tar.bz2
r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.zip
1 files changed, 32 insertions, 20 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 3c2f863..1f229a3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -460,6 +460,22 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
                 c.len_utf8() - old_bytes
             },
             Err(err) => {
+                let valid_bytes = err.valid_up_to();
+                // If we have any valid bytes, that means we partially copied another
+                // utf8 character into `partial_utf8`. Since we only care about the
+                // first character, we just ignore the rest.
+                if valid_bytes > 0 {
+                    let c = unsafe {
+                        let parsed = str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
+                        parsed.chars().next().unwrap_unchecked()
+                    };
+
+                    performer.print(c);
+
+                    self.partial_utf8_len = 0;
+                    return valid_bytes - old_bytes;
+                }
+
                 match err.error_len() {
                     // If the partial character was also invalid, emit the replacement
                     // character.
@@ -469,26 +485,8 @@ impl<const OSC_RAW_BUF_SIZE: usize> Parser<OSC_RAW_BUF_SIZE> {
                         self.partial_utf8_len = 0;
                         invalid_len - old_bytes
                     },
-                    None => {
-                        // If we have any valid bytes, that means we partially copied another
-                        // utf8 character into `partial_utf8`. Since we only care about the
-                        // first character, we just ignore the rest.
-                        let valid_bytes = err.valid_up_to();
-                        if valid_bytes > 0 {
-                            let c = unsafe {
-                                let parsed =
-                                    str::from_utf8_unchecked(&self.partial_utf8[..valid_bytes]);
-                                parsed.chars().next().unwrap_unchecked()
-                            };
-                            performer.print(c);
-
-                            self.partial_utf8_len = 0;
-                            valid_bytes - old_bytes
-                        } else {
-                            // If the character still isn't complete, wait for more data.
-                            bytes.len()
-                        }
-                    },
+                    // If the character still isn't complete, wait for more data.
+                    None => to_copy,
                 }
             },
         }
@@ -1232,6 +1230,20 @@ mod tests {
     }
 
     #[test]
+    fn partial_invalid_utf8_split() {
+        const INPUT: &[u8] = b"\xE4\xBF\x99\xB5";
+
+        let mut dispatcher = Dispatcher::default();
+        let mut parser = Parser::new();
+
+        parser.advance(&mut dispatcher, &INPUT[..2]);
+        parser.advance(&mut dispatcher, &INPUT[2..]);
+
+        assert_eq!(dispatcher.dispatched[0], Sequence::Print('俙'));
+        assert_eq!(dispatcher.dispatched[1], Sequence::Print('�'));
+    }
+
+    #[test]
     fn partial_utf8_into_esc() {
         const INPUT: &[u8] = b"\xD8\x1b012";
author	Kirill Chibisov <contact@kchibisov.com>	2025-01-12 11:59:01 +0300
committer	GitHub <noreply@github.com>	2025-01-12 11:59:01 +0300
commit	c18ef2206af630f729835da237381628650005aa (patch)
tree	2ff73566723077075bfabe5d206a17fbb31c4537
parent	ff21c30b7a376e93b9a4df4812a05567f27e73fa (diff)
download	r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.tar.gz r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.tar.bz2 r-alacritty-vte-c18ef2206af630f729835da237381628650005aa.zip