From cfdf68a7acde16597fbd896674af68c42361102c Mon Sep 17 00:00:00 2001 From: bfredl Date: Thu, 8 Aug 2024 10:42:08 +0200 Subject: feat(mbyte): support extended grapheme clusters including more emoji Use the grapheme break algorithm from utf8proc to support grapheme clusters from recent unicode versions. Handle variant selector VS16 turning some codepoints into double-width emoji. This means we need to use ptr2cells rather than char2cells when possible. --- test/functional/api/vim_spec.lua | 22 ++++++ test/functional/ui/decorations_spec.lua | 21 ++++++ test/functional/ui/messages_spec.lua | 35 +++++++++ test/functional/ui/multibyte_spec.lua | 122 +++++++++++++++++++++++++++++--- 4 files changed, 191 insertions(+), 9 deletions(-) (limited to 'test/functional') diff --git a/test/functional/api/vim_spec.lua b/test/functional/api/vim_spec.lua index 4210b7ecf0..074d3ac0a3 100644 --- a/test/functional/api/vim_spec.lua +++ b/test/functional/api/vim_spec.lua @@ -1435,6 +1435,28 @@ describe('API', function() it('cannot handle NULs', function() eq(0, api.nvim_strwidth('\0abc')) end) + + it('can handle emoji with variant selectors and ZWJ', function() + local selector = '❀️' + eq(2, fn.strchars(selector)) + eq(1, fn.strcharlen(selector)) + eq(2, api.nvim_strwidth(selector)) + + local no_selector = '❀' + eq(1, fn.strchars(no_selector)) + eq(1, fn.strcharlen(no_selector)) + eq(1, api.nvim_strwidth(no_selector)) + + local selector_zwj_selector = 'πŸ³οΈβ€βš§οΈ' + eq(5, fn.strchars(selector_zwj_selector)) + eq(1, fn.strcharlen(selector_zwj_selector)) + eq(2, api.nvim_strwidth(selector_zwj_selector)) + + local emoji_zwj_emoji = 'πŸ§‘β€πŸŒΎ' + eq(3, fn.strchars(emoji_zwj_emoji)) + eq(1, fn.strcharlen(emoji_zwj_emoji)) + eq(2, api.nvim_strwidth(emoji_zwj_emoji)) + end) end) describe('nvim_get_current_line, nvim_set_current_line', function() diff --git a/test/functional/ui/decorations_spec.lua b/test/functional/ui/decorations_spec.lua index 1709819575..61a5e1d6f7 100644 --- a/test/functional/ui/decorations_spec.lua +++ b/test/functional/ui/decorations_spec.lua @@ -5620,6 +5620,27 @@ l5 ]] }) end) + + it('supports emoji as signs', function() + insert(example_test3) + feed 'gg' + api.nvim_buf_set_extmark(0, ns, 1, 0, {sign_text='πŸ§‘β€πŸŒΎ'}) + -- VS16 can change width of character + api.nvim_buf_set_extmark(0, ns, 2, 0, {sign_text='❀️'}) + api.nvim_buf_set_extmark(0, ns, 3, 0, {sign_text='❀'}) + api.nvim_buf_set_extmark(0, ns, 4, 0, {sign_text='❀x'}) + screen:expect([[ + {7: }^l1 | + πŸ§‘β€πŸŒΎl2 | + ❀️l3 | + ❀ l4 | + ❀xl5 | + {7: } | + {1:~ }|*3 + | + ]]) + eq("Invalid 'sign_text'", pcall_err(api.nvim_buf_set_extmark, 0, ns, 5, 0, {sign_text='❀️x'})) + end) end) describe('decorations: virt_text', function() diff --git a/test/functional/ui/messages_spec.lua b/test/functional/ui/messages_spec.lua index 07192800e5..036b5ceefc 100644 --- a/test/functional/ui/messages_spec.lua +++ b/test/functional/ui/messages_spec.lua @@ -1436,6 +1436,41 @@ vimComment xxx match /\s"[^\-:.%#=*].*$/ms=s+1,lc=1 excludenl contains=@vim } end) + it('supports nvim_echo messages with emoji', function() + -- stylua: ignore + async_meths.nvim_echo( + { { 'wow, πŸ³οΈβ€βš§οΈπŸ§‘β€πŸŒΎβ€οΈπŸ˜‚πŸ΄β€β˜ οΈ\nvariant ❀️ one\nvariant ❀ two' } }, true, {} + ) + + screen:expect([[ + | + {1:~ }| + {3: }| + wow, πŸ³οΈβ€βš§οΈπŸ§‘β€πŸŒΎβ€οΈπŸ˜‚πŸ΄β€β˜ οΈ | + variant ❀️ one | + variant ❀ two | + {6:Press ENTER or type command to continue}^ | + ]]) + + feed '' + screen:expect([[ + ^ | + {1:~ }|*5 + | + ]]) + + feed ':messages' + screen:expect([[ + | + {1:~ }| + {3: }| + wow, πŸ³οΈβ€βš§οΈπŸ§‘β€πŸŒΎβ€οΈπŸ˜‚πŸ΄β€β˜ οΈ | + variant ❀️ one | + variant ❀ two | + {6:Press ENTER or type command to continue}^ | + ]]) + end) + it('prints lines in Ex mode correctly with a burst of carriage returns #19341', function() command('set number') api.nvim_buf_set_lines(0, 0, 0, true, { 'aaa', 'bbb', 'ccc' }) diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua index dc25a09d0d..f16f750ea1 100644 --- a/test/functional/ui/multibyte_spec.lua +++ b/test/functional/ui/multibyte_spec.lua @@ -296,6 +296,86 @@ describe('multibyte rendering', function() ]], } end) + + it('supports emoji with variant selectors and ZWJ', function() + command('set ruler') + insert('πŸ³οΈβ€βš§οΈ') + screen:expect([[ + ^πŸ³οΈβ€βš§οΈ | + {1:~ }|*4 + 1,1 All | + ]]) + + feed('a word') + screen:expect([[ + πŸ³οΈβ€βš§οΈ wor^d | + {1:~ }|*4 + 1,21-7 All | + ]]) + + feed('0') + screen:expect([[ + ^πŸ³οΈβ€βš§οΈ word | + {1:~ }|*4 + 1,1 All | + ]]) + + feed('l') + screen:expect([[ + πŸ³οΈβ€βš§οΈ^ word | + {1:~ }|*4 + 1,17-3 All | + ]]) + + feed('h') + screen:expect([[ + ^πŸ³οΈβ€βš§οΈ word | + {1:~ }|*4 + 1,1 All | + ]]) + + feed('o❀️ variant selected') + screen:expect([[ + πŸ³οΈβ€βš§οΈ word | + ❀️ variant selecte^d | + {1:~ }|*3 + 2,23-19 All | + ]]) + + feed('0') + screen:expect([[ + πŸ³οΈβ€βš§οΈ word | + ^❀️ variant selected | + {1:~ }|*3 + 2,1 All | + ]]) + + feed('l') + screen:expect([[ + πŸ³οΈβ€βš§οΈ word | + ❀️^ variant selected | + {1:~ }|*3 + 2,7-3 All | + ]]) + + feed('h') + screen:expect([[ + πŸ³οΈβ€βš§οΈ word | + ^❀️ variant selected | + {1:~ }|*3 + 2,1 All | + ]]) + + -- without selector: single width (note column 18 and not 19) + feed('o❀ variant selected') + screen:expect([[ + πŸ³οΈβ€βš§οΈ word | + ❀️ variant selected | + ❀ variant selecte^d | + {1:~ }|*2 + 3,20-18 All | + ]]) + end) end) describe('multibyte rendering: statusline', function() @@ -348,11 +428,12 @@ describe('multibyte rendering: statusline', function() it('non-printable followed by MAX_MCO unicode combination points', function() command('set statusline=ΒŸα·°βƒ―ΜΈβƒβƒ§βƒ') -- U+9F + U+1DF0 + U+20EF + U+0338 + U+20D0 + U+20E7 + U+20DD + -- TODO: not ideal, better with plain ">" and then space+combining screen:expect([[ - ^ | - {1:~ }| - {3:<9f><1df0><20ef><0338><20d0><20e7><20dd>}| - | + ^ | + {1:~ }| + {3:<9f≯⃯ᷰ⃐⃧⃝ }| + | ]]) end) @@ -368,9 +449,20 @@ describe('multibyte rendering: statusline', function() } end) - it('unprintable chars in filename with default stl', function() + it('emoji with ZWJ in filename with default stl', function() command('file πŸ§‘β€πŸ’»') - -- TODO: this is wrong but avoids a crash + screen:expect { + grid = [[ + ^ | + {1:~ }| + {3:πŸ§‘β€πŸ’» }| + | + ]], + } + end) + + it('unprintable chars in filename with default stl', function() + command('file πŸ§‘β€‹πŸ’»') screen:expect { grid = [[ ^ | @@ -381,15 +473,27 @@ describe('multibyte rendering: statusline', function() } end) - it('unprintable chars in filename with custom stl', function() + it('emoji with ZWJ in filename with custom stl', function() command('set statusline=xx%#ErrorMsg#%f%##yy') command('file πŸ§‘β€πŸ’»') - -- TODO: this is also wrong but also avoids a crash screen:expect { grid = [[ ^ | {1:~ }| - {3:xx}{9:πŸ§‘<200d>πŸ’»}{3:yy }| + {3:xx}{9:πŸ§‘β€πŸ’»}{3:yy }| + | + ]], + } + end) + + it('unprintable chars in filename with custom stl', function() + command('set statusline=xx%#ErrorMsg#%f%##yy') + command('file πŸ§‘β€‹πŸ’»') + screen:expect { + grid = [[ + ^ | + {1:~ }| + {3:xx}{9:πŸ§‘<200b>πŸ’»}{3:yy }| | ]], } -- cgit