From 39f8aaeb815c2e31cffec12ef36ad4f25df91602 Mon Sep 17 00:00:00 2001 From: Björn Linse Date: Fri, 24 Jan 2020 09:48:58 +0100 Subject: fix(status): handle unprintable chars in the statusline --- test/functional/ui/multibyte_spec.lua | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'test/functional/ui/multibyte_spec.lua') diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua index d4e237bcb4..9f413f8bff 100644 --- a/test/functional/ui/multibyte_spec.lua +++ b/test/functional/ui/multibyte_spec.lua @@ -158,6 +158,7 @@ describe('multibyte rendering: statusline', function() screen:set_default_attr_ids({ [1] = {bold = true, foreground = Screen.colors.Blue1}, [2] = {bold = true, reverse = true}, + [3] = {background = Screen.colors.Red, foreground = Screen.colors.Gray100}; }) screen:attach() command('set laststatus=2') @@ -220,4 +221,27 @@ describe('multibyte rendering: statusline', function() | ]]} end) + + it('unprintable chars in filename with default stl', function() + command("file 🧑‍💻") + -- TODO: this is wrong but avoids a crash + screen:expect{grid=[[ + ^ | + {1:~ }| + {2:🧑�💻 }| + | + ]]} + end) + + it('unprintable chars in filename with custom stl', function() + command('set statusline=xx%#ErrorMsg#%f%##yy') + command("file 🧑‍💻") + -- TODO: this is also wrong but also avoids a crash + screen:expect{grid=[[ + ^ | + {1:~ }| + {2:xx}{3:🧑<200d>💻}{2:yy }| + | + ]]} + end) end) -- cgit From cd6458123fb4859e47ffcd144ebbddf389a09610 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Mon, 26 Jun 2023 11:52:52 +0800 Subject: fix(charset): fix wrong display of 0xffff (#24158) --- test/functional/ui/multibyte_spec.lua | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'test/functional/ui/multibyte_spec.lua') diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua index 9f413f8bff..5cecd423d7 100644 --- a/test/functional/ui/multibyte_spec.lua +++ b/test/functional/ui/multibyte_spec.lua @@ -17,10 +17,11 @@ describe("multibyte rendering", function() screen = Screen.new(60, 6) screen:attach({rgb=true}) screen:set_default_attr_ids({ - [1] = {bold = true, foreground = Screen.colors.Blue1}, + [1] = {bold = true, foreground = Screen.colors.Blue}, [2] = {background = Screen.colors.WebGray}, [3] = {background = Screen.colors.LightMagenta}, [4] = {bold = true}, + [5] = {foreground = Screen.colors.Blue}, }) end) @@ -119,6 +120,19 @@ describe("multibyte rendering", function() ]]) end) + it('0xffff is shown as 4 hex digits', function() + command([[call setline(1, "\uFFFF!!!")]]) + feed('$') + screen:expect{grid=[[ + {5:}!!^! | + {1:~ }| + {1:~ }| + {1:~ }| + {1:~ }| + | + ]]} + end) + it('works with a lot of unicode (zalgo) text', function() screen:try_resize(65, 10) meths.buf_set_lines(0,0,-1,true, split(dedent [[ -- cgit From 8da986ea877b07a5eb117446f410f2a7fc8cd9cb Mon Sep 17 00:00:00 2001 From: bfredl Date: Wed, 13 Sep 2023 13:39:18 +0200 Subject: refactor(grid): change schar_T representation to be more compact Previously, a screen cell would occupy 28+4=32 bytes per cell as we always made space for up to MAX_MCO+1 codepoints in a cell. As an example, even a pretty modest 50*80 screen would consume 50*80*2*32 = 256000, i e a quarter megabyte With the factor of two due to the TUI side buffer, and even more when using msg_grid and/or ext_multigrid. This instead stores a 4-byte union of either: - a valid UTF-8 sequence up to 4 bytes - an escape char which is invalid UTF-8 (0xFF) plus a 24-bit index to a glyph cache This avoids allocating space for huge composed glyphs _upfront_, while still keeping rendering such glyphs reasonably fast (1 hash table lookup + one plain index lookup). If the same large glyphs are using repeatedly on the screen, this is still a net reduction of memory/cache consumption. The only case which really gets worse is if you blast the screen full with crazy emojis and zalgo text and even this case only leads to 4 extra bytes per char. When only <= 4-byte glyphs are used, plus the 4-byte attribute code, i e 8 bytes in total there is a factor of four reduction of memory use. Memory which will be quite hot in cache as the screen buffer is scanned over in win_line() buffer text drawing A slight complication is that the representation depends on host byte order. I've tested this manually by compling and running this in qemu-s390x and it works fine. We might add a qemu based solution to CI at some point. --- test/functional/ui/multibyte_spec.lua | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'test/functional/ui/multibyte_spec.lua') diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua index 5cecd423d7..417c7b797c 100644 --- a/test/functional/ui/multibyte_spec.lua +++ b/test/functional/ui/multibyte_spec.lua @@ -160,6 +160,22 @@ describe("multibyte rendering", function() {1:~ }| | ]]} + + -- nvim will reset the zalgo text^W^W glyph cache if it gets too full. + -- this should be exceedingly rare, but fake it to make sure it works + meths._invalidate_glyph_cache() + screen:expect{grid=[[ + ^L̓̉̑̒̌̚ơ̗̌̒̄̀ŕ̈̈̎̐̕è̇̅̄̄̐m̖̟̟̅̄̚ ̛̓̑̆̇̍i̗̟̞̜̅̐p̗̞̜̉̆̕s̟̜̘̍̑̏ū̟̞̎̃̉ḿ̘̙́́̐ ̖̍̌̇̉̚d̞̄̃̒̉̎ò́̌̌̂̐l̞̀̄̆̌̚ȯ̖̞̋̀̐r̓̇̌̃̃̚ ̗̘̀̏̍́s̜̀̎̎̑̕i̟̗̐̄̄̚t̝̎̆̓̐̒ ̘̇̔̓̊̚ȃ̛̟̗̏̅m̜̟̙̞̈̓é̘̞̟̔̆t̝̂̂̈̑̔,̜̜̖̅̄̍ ̛̗̊̓̆̚c̟̍̆̍̈̔ȯ̖̖̝̑̀n̜̟̎̊̃̚s̟̏̇̎̒̚e̙̐̈̓̌̚c̙̍̈̏̅̕ť̇̄̇̆̓e̛̓̌̈̓̈t̟̍̀̉̆̅u̝̞̎̂̄̚r̘̀̅̈̅̐ ̝̞̓́̇̉ã̏̀̆̅̕d̛̆̐̉̆̋ȉ̞̟̍̃̚p̛̜̊̍̂̓ȋ̏̅̃̋̚ṥ̛̏̃̕č̛̞̝̀̂í̗̘̌́̎n̔̎́̒̂̕ǧ̗̜̋̇̂ ̛̜̔̄̎̃ê̛̔̆̇̕l̘̝̏̐̊̏ĩ̛̍̏̏̄t̟̐́̀̐̎,̙̘̍̆̉̐ ̋̂̏̄̌̅s̙̓̌̈́̇e̛̗̋̒̎̏d̜̗̊̍̊̚ | + ď̘̋̌̌̕ǒ̝̗̔̇̕ ̙̍́̄̄̉è̛̛̞̌̌i̜̖̐̈̆̚ȕ̇̈̓̃̓ŝ̛̞̙̉̋m̜̐̂̄̋̂ȯ̈̎̎̅̕d̜̙̓̔̋̑ ̞̗̄̂̂̚t̝̊́̃́̄e̛̘̜̞̓̑m̊̅̏̉̌̕p̛̈̂̇̀̐ỏ̙̘̈̉̔r̘̞̋̍̃̚ ̝̄̀̇̅̇ỉ̛̖̍̓̈n̛̛̝̎̕̕c̛̛̊̅́̐ĭ̗̓̀̍̐d̞̜̋̐̅̚i̟̙̇̄̊̄d̞̊̂̀̇̚ủ̝̉̑̃̕n̜̏̇̄̐̋ť̗̜̞̋̉ ̝̒̓̌̓̚ȕ̖̙̀̚̕t̖̘̎̉̂̌ ̛̝̄̍̌̂l̛̟̝̃̑̋á̛̝̝̔̅b̝̙̜̗̅̒ơ̖̌̒̄̆r̒̇̓̎̈̄e̛̛̖̅̏̇ ̖̗̜̝̃́e̛̛̘̅̔̌ẗ̛̙̗̐̕ ̖̟̇̋̌̈d̞̙̀̉̑̕ŏ̝̂́̐̑l̞̟̗̓̓̀ơ̘̎̃̄̂r̗̗̖̔̆̍ẻ̖̝̞̋̅ ̜̌̇̍̈̊m̈̉̇̄̒̀a̜̞̘̔̅̆g̗̖̈̃̈̉n̙̖̄̈̉̄â̛̝̜̄̃ ̛́̎̕̕̚ā̊́́̆̌l̟̙̞̃̒́i̖̇̎̃̀̋q̟̇̒̆́̊ủ́̌̇̑̚ã̛̘̉̐̚.̛́̏̐̍̊ | + U̝̙̎̈̐̆t̜̍̌̀̔̏ ̞̉̍̇̈̃e̟̟̊̄̕̕n̝̜̒̓̆̕i̖̒̌̅̇̚m̞̊̃̔̊̂ ̛̜̊̎̄̂a̘̜̋̒̚̚d̟̊̎̇̂̍ ̜̖̏̑̉̕m̜̒̎̅̄̚i̝̖̓̂̍̕n̙̉̒̑̀̔ỉ̖̝̌̒́m̛̖̘̅̆̎ ̖̉̎̒̌̕v̖̞̀̔́̎e̖̙̗̒̎̉n̛̗̝̎̀̂ȉ̞̗̒̕̚ȧ̟̜̝̅̚m̆̉̐̐̇̈,̏̐̎́̍́ ̜̞̙̘̏̆q̙̖̙̅̓̂ủ̇́̀̔̚í̙̟̟̏̐s̖̝̍̏̂̇ ̛̘̋̈̕̕ń̛̞̜̜̎o̗̜̔̔̈̆s̞̘̘̄̒̋t̛̅̋́̔̈ȓ̓̒́̇̅ủ̜̄̃̒̍d̙̝̘̊̏̚ ̛̟̞̄́̔e̛̗̝̍̃̀x̞̖̃̄̂̅e̖̅̇̐̔̃r̗̞̖̔̎̚c̘̜̖̆̊̏ï̙̝̙̂̕t̖̏́̓̋̂ă̖̄̆̑̒t̜̟̍̉̑̏i̛̞̞̘̒̑ǒ̜̆̅̃̉ṅ̖̜̒̎̚ | + u̗̞̓̔̈̏ĺ̟̝́̎̚l̛̜̅̌̎̆a̒̑̆̔̇̃m̜̗̈̊̎̚ċ̘̋̇̂̚ơ̟̖̊́̕ ̖̟̍̉̏̚l̙̔̓̀̅̏ä̞̗̘̙̅ḃ̟̎̄̃̕o̞̎̓̓̓̚r̗̜̊̓̈̒ï̗̜̃̃̅s̀̒̌̂̎̂ ̖̗̗̋̎̐n̝̟̝̘̄̚i̜̒̀̒̐̕s̘̘̄̊̃̀ī̘̜̏̌̕ ̗̖̞̐̈̒ư̙̞̄́̌t̟̘̖̙̊̚ ̌̅̋̆̚̚ä̇̊̇̕̕l̝̞̘̋̔̅i̍̋́̆̑̈q̛̆̐̈̐̚ư̏̆̊́̚î̜̝̑́̊p̗̓̅̑̆̏ ̆́̓̔̋̋e̟̊̋̏̓̚x̗̍̑̊̎̈ ̟̞̆̄̂̍ë̄̎̄̃̅a̛̜̅́̃̈ ̔̋̀̎̐̀c̖̖̍̀̒̂ơ̛̙̖̄̒m̘̔̍̏̆̕ḿ̖̙̝̏̂ȍ̓̋̈̀̕d̆̂̊̅̓̚o̖̔̌̑̚̕ ̙̆́̔̊̒c̖̘̖̀̄̍o̓̄̑̐̓̒ñ̞̒̎̈̚s̞̜̘̈̄̄e̙̊̀̇̌̋q̐̒̓́̔̃ư̗̟̔̔̚å̖̙̞̄̏t̛̙̟̒̇̏.̙̗̓̃̓̎ | + D̜̖̆̏̌̌ư̑̃̌̍̕i̝̊̊̊̊̄s̛̙̒́̌̇ ̛̃̔̄̆̌ă̘̔̅̅̀ú̟̟̟̃̃t̟̂̄̈̈̃e̘̅̌̒̂̆ ̖̟̐̉̉̌î̟̟̙̜̇r̛̙̞̗̄̌ú̗̗̃̌̎r̛̙̘̉̊̕e̒̐̔̃̓̋ ̊̊̍̋̑̉d̛̝̙̉̀̓o̘̜̐̐̓̐l̞̋̌̆̍́o̊̊̐̃̃̚ṙ̛̖̘̃̕ ̞̊̀̍̒̕ȉ́̑̐̇̅ǹ̜̗̜̞̏ ̛̜̐̄̄̚r̜̖̈̇̅̋ĕ̗̉̃̔̚p̟̝̀̓̔̆r̜̈̆̇̃̃e̘̔̔̏̎̓h̗̒̉̑̆̚ė̛̘̘̈̐n̘̂̀̒̕̕d̗̅̂̋̅́ê̗̜̜̜̕r̟̋̄̐̅̂i̛̔̌̒̂̕t̛̗̓̎̀̎ ̙̗̀̉̂̚ȉ̟̗̐̓̚n̙̂̍̏̓̉ ̙̘̊̋̍̕v̜̖̀̎̆̐ő̜̆̉̃̎l̑̋̒̉̔̆ư̙̓̓́̚p̝̘̖̎̏̒t̛̘̝̞̂̓ȁ̘̆̔́̊t̖̝̉̒̐̎e̞̟̋̀̅̄ ̆̌̃̀̑̔v̝̘̝̍̀̇ȅ̝̊̄̓̕l̞̝̑̔̂̋ĭ̝̄̅̆̍t̝̜̉̂̈̇ | + ē̟̊̇̕̚s̖̘̘̒̄̑s̛̘̀̊̆̇e̛̝̘̒̏̚ ̉̅̑̂̐̎c̛̟̙̎̋̓i̜̇̒̏̆̆l̟̄́̆̊̌l̍̊̋̃̆̌ủ̗̙̒̔̚m̛̘̘̖̅̍ ̖̙̈̎̂̕d̞̟̏̋̈̔ơ̟̝̌̃̄l̗̙̝̂̉̒õ̒̃̄̄̚ŕ̗̏̏̊̍ê̞̝̞̋̈ ̜̔̒̎̃̚e̞̟̞̒̃̄ư̖̏̄̑̃ ̛̗̜̄̓̎f̛̖̞̅̓̃ü̞̏̆̋̕g̜̝̞̑̑̆i̛̘̐̐̅̚à̜̖̌̆̎t̙̙̎̉̂̍ ̋̔̈̎̎̉n̞̓́̔̊̕ư̘̅̋̔̚l̗̍̒̄̀̚l̞̗̘̙̓̍â̘̔̒̎̚ ̖̓̋̉̃̆p̛̛̘̋̌̀ä̙̔́̒̕r̟̟̖̋̐̋ì̗̙̎̓̓ȃ̔̋̑̚̕t̄́̎̓̂̋ư̏̈̂̑̃r̖̓̋̊̚̚.̒̆̑̆̊̎ ̘̜̍̐̂̚E̞̅̐̇́̂x̄́̈̌̉̕ć̘̃̉̃̕è̘̂̑̏̑p̝̘̑̂̌̆t̔̐̅̍̌̂ȇ̞̈̐̚̕ű̝̞̜́̚ŕ̗̝̉̆́ | + š̟́̔̏̀ȉ̝̟̝̏̅n̑̆̇̒̆̚t̝̒́̅̋̏ ̗̑̌̋̇̚ơ̙̗̟̆̅c̙̞̙̎̊̎c̘̟̍̔̊̊a̛̒̓̉́̐e̜̘̙̒̅̇ć̝̝̂̇̕ả̓̍̎̂̚t̗̗̗̟̒̃ ̘̒̓̐̇́c̟̞̉̐̓̄ȕ̙̗̅́̏p̛̍̋̈́̅i̖̓̒̍̈̄d̞̃̈̌̆̐a̛̗̝̎̋̉t̞̙̀̊̆̇a̛̙̒̆̉̚t̜̟̘̉̓̚ ̝̘̗̐̇̕n̛̘̑̏̂́ō̑̋̉̏́ň̞̊̆̄̃ ̙̙̙̜̄̏p̒̆̋̋̓̏r̖̖̅̉́̚ơ̜̆̑̈̚i̟̒̀̃̂̌d̛̏̃̍̋̚ë̖̞̙̗̓n̛̘̓̒̅̎t̟̗̙̊̆̚,̘̙̔̊̚̕ ̟̗̘̜̑̔s̜̝̍̀̓̌û̞̙̅̇́n̘̗̝̒̃̎t̗̅̀̅̊̈ ̗̖̅̅̀̄i̛̖̍̅̋̂n̙̝̓̓̎̚ ̞̋̅̋̃̚c̗̒̀̆̌̎ū̞̂̑̌̓ĺ̛̐̍̑́p̝̆̌̎̈̚a̖̙̒̅̈̌ ̝̝̜̂̈̀q̝̖̔̍̒̚ư̔̐̂̎̊ǐ̛̟̖̘̕ | + o̖̜̔̋̅̚f̛̊̀̉́̕f̏̉̀̔̃̃i̘̍̎̐̔̎c̙̅̑̂̐̅ȋ̛̜̀̒̚a̋̍̇̏̀̋ ̖̘̒̅̃̒d̗̘̓̈̇̋é̝́̎̒̄š̙̒̊̉̋e̖̓̐̀̍̕r̗̞̂̅̇̄ù̘̇̐̉̀n̐̑̀̄̍̐t̟̀̂̊̄̚ ̟̝̂̍̏́m̜̗̈̂̏̚ő̞̊̑̇̒l̘̑̏́̔̄l̛̛̇̃̋̊i̓̋̒̃̉̌t̛̗̜̏̀̋ ̙̟̒̂̌̐a̙̝̔̆̏̅n̝̙̙̗̆̅i̍̔́̊̃̕m̖̝̟̒̍̚ ̛̃̃̑̌́ǐ̘̉̔̅̚d̝̗̀̌̏̒ ̖̝̓̑̊̚ȇ̞̟̖̌̕š̙̙̈̔̀t̂̉̒̍̄̄ ̝̗̊̋̌̄l̛̞̜̙̘̔å̝̍̂̍̅b̜̆̇̈̉̌ǒ̜̙̎̃̆r̝̀̄̍́̕ư̋̊́̊̕m̜̗̒̐̕̚.̟̘̀̒̌̚ | + {1:~ }| + | + ]], reset=true} end) end) -- cgit From ddef39299f357d3131644647379e88a69749bf40 Mon Sep 17 00:00:00 2001 From: bfredl Date: Tue, 19 Sep 2023 14:30:02 +0200 Subject: refactor(grid): do arabic shaping in one place The 'arabicshape' feature of vim is a transformation of unicode text to make arabic and some related scripts look better at display time. In particular the content of a cell will be adjusted depending on the (original) content of the cells just before and after it. This is implemented by the arabic_shape() function in nvim. Before this commit, shaping was invoked in four different contexts: - when rendering buffer text in win_line() - in line_putchar() for rendering virtual text - as part of grid_line_puts, used by messages and statuslines and similar - as part of draw_cmdline() for drawing the cmdline This replaces all these with a post-processing step in grid_put_linebuf(), which has become the entry point for all text rendering after recent refactors. An aim of this is to make the handling of multibyte text yet simpler. One of the main reasons multibyte chars needs to be "parsed" into codepoint arrays of composing chars is so that these could be inspected for the purpose of shaping. This can likely be vastly simplified in many contexts where only the total length (in bytes) and width of composed char is needed. --- test/functional/ui/multibyte_spec.lua | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'test/functional/ui/multibyte_spec.lua') diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua index 417c7b797c..077dd1a779 100644 --- a/test/functional/ui/multibyte_spec.lua +++ b/test/functional/ui/multibyte_spec.lua @@ -177,6 +177,57 @@ describe("multibyte rendering", function() | ]], reset=true} end) + + it('works with arabic input and arabicshape', function() + command('set arabic') + + command('set noarabicshape') + feed('isghl!') + screen:expect{grid=[[ + ^!مالس| + {1: ~}| + {1: ~}| + {1: ~}| + {1: ~}| + | + ]]} + + command('set arabicshape') + screen:expect{grid=[[ + ^!ﻡﻼﺳ| + {1: ~}| + {1: ~}| + {1: ~}| + {1: ~}| + | + ]]} + end) + + it('works with arabic input and arabicshape and norightleft', function() + command('set arabic norightleft') + + command('set noarabicshape') + feed('isghl!') + screen:expect{grid=[[ + سلام^! | + {1:~ }| + {1:~ }| + {1:~ }| + {1:~ }| + | + ]]} + + command('set arabicshape') + screen:expect{grid=[[ + ﺱﻼﻣ^! | + {1:~ }| + {1:~ }| + {1:~ }| + {1:~ }| + | + ]]} + + end) end) describe('multibyte rendering: statusline', function() -- cgit From b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254 Mon Sep 17 00:00:00 2001 From: bfredl Date: Mon, 6 Nov 2023 14:52:27 +0100 Subject: refactor(grid): make screen rendering more multibyte than ever before Problem: buffer text with composing chars are converted from UTF-8 to an array of up to seven UTF-32 values and then converted back to UTF-8 strings. Solution: Convert buffer text directly to UTF-8 based schar_T values. The limit of the text size is now in schar_T bytes, which is currently 31+1 but easily could be raised as it no longer multiplies the size of the entire screen grid when not used, the full size is only required for temporary scratch buffers. Also does some general cleanup to win_line text handling, which was unnecessarily complicated due to multibyte rendering being an "opt-in" feature long ago. Nowadays, a char is just a char, regardless if it consists of one ASCII byte or multiple bytes. --- test/functional/ui/multibyte_spec.lua | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'test/functional/ui/multibyte_spec.lua') diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua index 077dd1a779..d72bf27d6b 100644 --- a/test/functional/ui/multibyte_spec.lua +++ b/test/functional/ui/multibyte_spec.lua @@ -228,6 +228,36 @@ describe("multibyte rendering", function() ]]} end) + + it('works with arabicshape and multiple composing chars', function() + -- this tests an important edge case: arabicshape might increase the byte size of the base + -- character in a way so that the last composing char no longer fits. use "g8" on the text + -- to observe what is happening (the final E1 80 B7 gets deleted with 'arabicshape') + -- If we would increase the schar_t size, say from 32 to 64 bytes, we need to extend the + -- test text with even more zalgo energy to still touch this edge case. + + meths.buf_set_lines(0,0,-1,true, {"سلام့̀́̂̃̄̅̆̇̈̉̊̋̌"}) + command('set noarabicshape') + + screen:expect{grid=[[ + ^سلام့̀́̂̃̄̅̆̇̈̉̊̋̌ | + {1:~ }| + {1:~ }| + {1:~ }| + {1:~ }| + | + ]]} + + command('set arabicshape') + screen:expect{grid=[[ + ^ﺱﻼﻣ̀́̂̃̄̅̆̇̈̉̊̋̌ | + {1:~ }| + {1:~ }| + {1:~ }| + {1:~ }| + | + ]]} + end) end) describe('multibyte rendering: statusline', function() -- cgit