From fa99afe35eb5d8cf01d875e12b53165bf1104a60 Mon Sep 17 00:00:00 2001 From: bfredl Date: Wed, 4 Sep 2024 12:09:42 +0200 Subject: fix(multibyte): handle backspace of wide clusters in replace mode Make utf_head_off more robust against invalid sequences and embedded NUL chars --- test/functional/editor/mode_insert_spec.lua | 93 +++++++++++++++++++++++++++++ test/unit/mbyte_spec.lua | 13 +++- 2 files changed, 103 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/functional/editor/mode_insert_spec.lua b/test/functional/editor/mode_insert_spec.lua index fc1e6c4ee4..87d5c46134 100644 --- a/test/functional/editor/mode_insert_spec.lua +++ b/test/functional/editor/mode_insert_spec.lua @@ -351,4 +351,97 @@ describe('insert-mode', function() eq(2, api.nvim_win_get_cursor(0)[1]) end) end) + + it('backspace after replacing multibyte chars', function() + local screen = Screen.new(30, 3) + screen:attach() + api.nvim_buf_set_lines(0, 0, -1, true, { 'test ȧ̟̜̝̅̚m̆̉̐̐̇̈ å' }) + feed('^Rabcdefghi') + screen:expect([[ + abcdefghi^ | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + screen:expect([[ + abcdefgh^å | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + screen:expect([[ + abcdefg^ å | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + screen:expect([[ + abcdef^m̆̉̐̐̇̈ å | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + screen:expect([[ + abcde^ȧ̟̜̝̅̚m̆̉̐̐̇̈ å | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + screen:expect([[ + abcd^ ȧ̟̜̝̅̚m̆̉̐̐̇̈ å | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + + api.nvim_buf_set_lines(0, 0, -1, true, { 'wow 🧑‍🌾🏳️‍⚧️x' }) + feed('^Rabcd') + + screen:expect([[ + abcd^🧑‍🌾🏳️‍⚧️x | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('e') + screen:expect([[ + abcde^🏳️‍⚧️x | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('f') + screen:expect([[ + abcdef^x | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + screen:expect([[ + abcde^🏳️‍⚧️x | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + screen:expect([[ + abcd^🧑‍🌾🏳️‍⚧️x | + {1:~ }| + {5:-- REPLACE --} | + ]]) + + feed('') + screen:expect([[ + abc^ 🧑‍🌾🏳️‍⚧️x | + {1:~ }| + {5:-- REPLACE --} | + ]]) + end) end) diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua index 787a8862ae..62390c8794 100644 --- a/test/unit/mbyte_spec.lua +++ b/test/unit/mbyte_spec.lua @@ -4,7 +4,6 @@ local itp = t.gen_itp(it) local ffi = t.ffi local eq = t.eq local to_cstr = t.to_cstr -local ok = t.ok local lib = t.cimport( './src/nvim/mbyte.h', @@ -302,7 +301,10 @@ describe('mbyte', function() local mb_glyphs = {} while pos < len do local clen = lib.utfc_ptr2len(cstr + pos) - ok(clen > 0) -- otherwise we get stuck + if clen == 0 then + eq(0, string.byte(str, pos + 1)) -- only NUL bytes can has length zery + clen = 1 -- but skip it, otherwise we get stuck + end if clen > 1 then table.insert(mb_glyphs, string.sub(str, pos + 1, pos + clen)) end @@ -325,13 +327,18 @@ describe('mbyte', function() -- stylua doesn't like ZWJ chars.. -- stylua: ignore start check('hej och hå 🧑‍🌾!', { 'å', '🧑‍🌾' }) - -- emoji only (various kinds of combinations, use g8 to see them) + + -- emoji (various kinds of combinations, use g8 to see them) check("🏳️‍⚧️🧑‍🌾❤️😂🏴‍☠️", {"🏳️‍⚧️", "🧑‍🌾", "❤️", "😂", "🏴‍☠️"}) check('🏳️‍⚧️xy🧑‍🌾\r❤️😂å🏴‍☠️€', { '🏳️‍⚧️', '🧑‍🌾', '❤️', '😂', 'å', '🏴‍☠️', '€' }) + check('🏳️‍⚧️\000🧑‍🌾\000❤️\000😂\000å\000🏴‍☠️\000€', { '🏳️‍⚧️', '🧑‍🌾', '❤️', '😂', 'å', '🏴‍☠️', '€' }) + check('\195🏳️‍⚧️\198🧑‍🌾\165❤️\168\195😂\255🏴‍☠️\129€\165', { '🏳️‍⚧️', '🧑‍🌾', '❤️', '😂', '🏴‍☠️', '€' }) check('🇦🅱️ 🇦🇽 🇦🇨🇦 🇲🇽🇹🇱',{'🇦', '🅱️', '🇦🇽', '🇦🇨', '🇦', '🇲🇽', '🇹🇱'}) check('🏴󠁧󠁢󠁳󠁣󠁴󠁿🏴󠁧󠁢󠁷󠁬󠁳󠁿', {'🏴󠁧󠁢󠁳󠁣󠁴󠁿', '🏴󠁧󠁢󠁷󠁬󠁳󠁿'}) + check('å\165ü\195aëq\168β\000\169本\255', {'å', 'ü', 'ë', 'β', '本'}) + lib.p_arshape = true -- default check('سلام', { 'س', 'لا', 'م' }) lib.p_arshape = false -- cgit