From fa99afe35eb5d8cf01d875e12b53165bf1104a60 Mon Sep 17 00:00:00 2001 From: bfredl Date: Wed, 4 Sep 2024 12:09:42 +0200 Subject: fix(multibyte): handle backspace of wide clusters in replace mode Make utf_head_off more robust against invalid sequences and embedded NUL chars --- test/unit/mbyte_spec.lua | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'test/unit') diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua index 787a8862ae..62390c8794 100644 --- a/test/unit/mbyte_spec.lua +++ b/test/unit/mbyte_spec.lua @@ -4,7 +4,6 @@ local itp = t.gen_itp(it) local ffi = t.ffi local eq = t.eq local to_cstr = t.to_cstr -local ok = t.ok local lib = t.cimport( './src/nvim/mbyte.h', @@ -302,7 +301,10 @@ describe('mbyte', function() local mb_glyphs = {} while pos < len do local clen = lib.utfc_ptr2len(cstr + pos) - ok(clen > 0) -- otherwise we get stuck + if clen == 0 then + eq(0, string.byte(str, pos + 1)) -- only NUL bytes can has length zery + clen = 1 -- but skip it, otherwise we get stuck + end if clen > 1 then table.insert(mb_glyphs, string.sub(str, pos + 1, pos + clen)) end @@ -325,13 +327,18 @@ describe('mbyte', function() -- stylua doesn't like ZWJ chars.. -- stylua: ignore start check('hej och hΓ₯ πŸ§‘β€πŸŒΎ!', { 'Γ₯', 'πŸ§‘β€πŸŒΎ' }) - -- emoji only (various kinds of combinations, use g8 to see them) + + -- emoji (various kinds of combinations, use g8 to see them) check("πŸ³οΈβ€βš§οΈπŸ§‘β€πŸŒΎβ€οΈπŸ˜‚πŸ΄β€β˜ οΈ", {"πŸ³οΈβ€βš§οΈ", "πŸ§‘β€πŸŒΎ", "❀️", "πŸ˜‚", "πŸ΄β€β˜ οΈ"}) check('πŸ³οΈβ€βš§οΈxyπŸ§‘β€πŸŒΎ\rβ€οΈπŸ˜‚Γ₯πŸ΄β€β˜ οΈΒ€', { 'πŸ³οΈβ€βš§οΈ', 'πŸ§‘β€πŸŒΎ', '❀️', 'πŸ˜‚', 'Γ₯', 'πŸ΄β€β˜ οΈ', 'Β€' }) + check('πŸ³οΈβ€βš§οΈ\000πŸ§‘β€πŸŒΎ\000❀️\000πŸ˜‚\000Γ₯\000πŸ΄β€β˜ οΈ\000Β€', { 'πŸ³οΈβ€βš§οΈ', 'πŸ§‘β€πŸŒΎ', '❀️', 'πŸ˜‚', 'Γ₯', 'πŸ΄β€β˜ οΈ', 'Β€' }) + check('\195πŸ³οΈβ€βš§οΈ\198πŸ§‘β€πŸŒΎ\165❀️\168\195πŸ˜‚\255πŸ΄β€β˜ οΈ\129Β€\165', { 'πŸ³οΈβ€βš§οΈ', 'πŸ§‘β€πŸŒΎ', '❀️', 'πŸ˜‚', 'πŸ΄β€β˜ οΈ', 'Β€' }) check('πŸ‡¦πŸ…±οΈ πŸ‡¦πŸ‡½ πŸ‡¦πŸ‡¨πŸ‡¦ πŸ‡²πŸ‡½πŸ‡ΉπŸ‡±',{'πŸ‡¦', 'πŸ…±οΈ', 'πŸ‡¦πŸ‡½', 'πŸ‡¦πŸ‡¨', 'πŸ‡¦', 'πŸ‡²πŸ‡½', 'πŸ‡ΉπŸ‡±'}) check('🏴󠁧󠁒󠁳󠁣󠁴󠁿🏴󠁧󠁒󠁷󠁬󠁳󠁿', {'🏴󠁧󠁒󠁳󠁣󠁴󠁿', '🏴󠁧󠁒󠁷󠁬󠁳󠁿'}) + check('Γ₯\165ΓΌ\195aΓ«q\168Ξ²\000\169本\255', {'Γ₯', 'ΓΌ', 'Γ«', 'Ξ²', '本'}) + lib.p_arshape = true -- default check('Ψ³Ω„Ψ§Ω…', { 'Ψ³', 'Ω„Ψ§', 'Ω…' }) lib.p_arshape = false -- cgit