From 39f8aaeb815c2e31cffec12ef36ad4f25df91602 Mon Sep 17 00:00:00 2001
From: Björn Linse <bjorn.linse@gmail.com>
Date: Fri, 24 Jan 2020 09:48:58 +0100
Subject: fix(status): handle unprintable chars in the statusline

---
 test/functional/ui/multibyte_spec.lua | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'test/functional/ui/multibyte_spec.lua')
diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua
index d4e237bcb4..9f413f8bff 100644
--- a/test/functional/ui/multibyte_spec.lua
+++ b/test/functional/ui/multibyte_spec.lua
@@ -158,6 +158,7 @@ describe('multibyte rendering: statusline', function()
     screen:set_default_attr_ids({
       [1] = {bold = true, foreground = Screen.colors.Blue1},
       [2] = {bold = true, reverse = true},
+      [3] = {background = Screen.colors.Red, foreground = Screen.colors.Gray100};
     })
     screen:attach()
     command('set laststatus=2')
@@ -220,4 +221,27 @@ describe('multibyte rendering: statusline', function()
                                               |
     ]]}
   end)
+
+  it('unprintable chars in filename with default stl', function()
+    command("file 🧑‍💻")
+    -- TODO: this is wrong but avoids a crash
+    screen:expect{grid=[[
+      ^                                        |
+      {1:~                                       }|
+      {2:🧑�💻                                   }|
+                                              |
+    ]]}
+  end)
+
+  it('unprintable chars in filename with custom stl', function()
+    command('set statusline=xx%#ErrorMsg#%f%##yy')
+    command("file 🧑‍💻")
+    -- TODO: this is also wrong but also avoids a crash
+    screen:expect{grid=[[
+      ^                                        |
+      {1:~                                       }|
+      {2:xx}{3:🧑<200d>💻}{2:yy                          }|
+                                              |
+    ]]}
+  end)
 end)
-- 
cgit 


From cd6458123fb4859e47ffcd144ebbddf389a09610 Mon Sep 17 00:00:00 2001
From: zeertzjq <zeertzjq@outlook.com>
Date: Mon, 26 Jun 2023 11:52:52 +0800
Subject: fix(charset): fix wrong display of 0xffff (#24158)

---
 test/functional/ui/multibyte_spec.lua | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'test/functional/ui/multibyte_spec.lua')

diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua
index 9f413f8bff..5cecd423d7 100644
--- a/test/functional/ui/multibyte_spec.lua
+++ b/test/functional/ui/multibyte_spec.lua
@@ -17,10 +17,11 @@ describe("multibyte rendering", function()
     screen = Screen.new(60, 6)
     screen:attach({rgb=true})
     screen:set_default_attr_ids({
-      [1] = {bold = true, foreground = Screen.colors.Blue1},
+      [1] = {bold = true, foreground = Screen.colors.Blue},
       [2] = {background = Screen.colors.WebGray},
       [3] = {background = Screen.colors.LightMagenta},
       [4] = {bold = true},
+      [5] = {foreground = Screen.colors.Blue},
     })
   end)
 
@@ -119,6 +120,19 @@ describe("multibyte rendering", function()
     ]])
   end)
 
+  it('0xffff is shown as 4 hex digits', function()
+    command([[call setline(1, "\uFFFF!!!")]])
+    feed('$')
+    screen:expect{grid=[[
+      {5:<ffff>}!!^!                                                   |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+  end)
+
   it('works with a lot of unicode (zalgo) text', function()
     screen:try_resize(65, 10)
     meths.buf_set_lines(0,0,-1,true, split(dedent [[
-- 
cgit 


From 8da986ea877b07a5eb117446f410f2a7fc8cd9cb Mon Sep 17 00:00:00 2001
From: bfredl <bjorn.linse@gmail.com>
Date: Wed, 13 Sep 2023 13:39:18 +0200
Subject: refactor(grid): change schar_T representation to be more compact

Previously, a screen cell would occupy 28+4=32 bytes per cell
as we always made space for up to MAX_MCO+1 codepoints in a cell.

As an example, even a pretty modest 50*80 screen would consume

50*80*2*32 = 256000, i e a quarter megabyte

With the factor of two due to the TUI side buffer, and even more when
using msg_grid and/or ext_multigrid.

This instead stores a 4-byte union of either:
- a valid UTF-8 sequence up to 4 bytes
- an escape char which is invalid UTF-8 (0xFF) plus a 24-bit index to a
  glyph cache

This avoids allocating space for huge composed glyphs _upfront_, while
still keeping rendering such glyphs reasonably fast (1 hash table lookup
+ one plain index lookup). If the same large glyphs are using repeatedly
on the screen, this is still a net reduction of memory/cache
consumption. The only case which really gets worse is if you blast
the screen full with crazy emojis and zalgo text and even this case
only leads to 4 extra bytes per char.

When only <= 4-byte glyphs are used, plus the 4-byte attribute code,
i e 8 bytes in total there is a factor of four reduction of memory use.
Memory which will be quite hot in cache as the screen buffer is scanned
over in win_line() buffer text drawing

A slight complication is that the representation depends on host byte
order. I've tested this manually by compling and running this
in qemu-s390x and it works fine. We might add a qemu based solution
to CI at some point.
---
 test/functional/ui/multibyte_spec.lua | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'test/functional/ui/multibyte_spec.lua')

diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua
index 5cecd423d7..417c7b797c 100644
--- a/test/functional/ui/multibyte_spec.lua
+++ b/test/functional/ui/multibyte_spec.lua
@@ -160,6 +160,22 @@ describe("multibyte rendering", function()
       {1:~                                                                }|
                                                                        |
     ]]}
+
+    -- nvim will reset the zalgo text^W^W glyph cache if it gets too full.
+    -- this should be exceedingly rare, but fake it to make sure it works
+    meths._invalidate_glyph_cache()
+    screen:expect{grid=[[
+      ^L̓̉̑̒̌̚ơ̗̌̒̄̀ŕ̈̈̎̐̕è̇̅̄̄̐m̖̟̟̅̄̚ ̛̓̑̆̇̍i̗̟̞̜̅̐p̗̞̜̉̆̕s̟̜̘̍̑̏ū̟̞̎̃̉ḿ̘̙́́̐ ̖̍̌̇̉̚d̞̄̃̒̉̎ò́̌̌̂̐l̞̀̄̆̌̚ȯ̖̞̋̀̐r̓̇̌̃̃̚ ̗̘̀̏̍́s̜̀̎̎̑̕i̟̗̐̄̄̚t̝̎̆̓̐̒ ̘̇̔̓̊̚ȃ̛̟̗̏̅m̜̟̙̞̈̓é̘̞̟̔̆t̝̂̂̈̑̔,̜̜̖̅̄̍ ̛̗̊̓̆̚c̟̍̆̍̈̔ȯ̖̖̝̑̀n̜̟̎̊̃̚s̟̏̇̎̒̚e̙̐̈̓̌̚c̙̍̈̏̅̕ť̇̄̇̆̓e̛̓̌̈̓̈t̟̍̀̉̆̅u̝̞̎̂̄̚r̘̀̅̈̅̐ ̝̞̓́̇̉ã̏̀̆̅̕d̛̆̐̉̆̋ȉ̞̟̍̃̚p̛̜̊̍̂̓ȋ̏̅̃̋̚ṥ̛̏̃̕č̛̞̝̀̂í̗̘̌́̎n̔̎́̒̂̕ǧ̗̜̋̇̂ ̛̜̔̄̎̃ê̛̔̆̇̕l̘̝̏̐̊̏ĩ̛̍̏̏̄t̟̐́̀̐̎,̙̘̍̆̉̐ ̋̂̏̄̌̅s̙̓̌̈́̇e̛̗̋̒̎̏d̜̗̊̍̊̚     |
+      ď̘̋̌̌̕ǒ̝̗̔̇̕ ̙̍́̄̄̉è̛̛̞̌̌i̜̖̐̈̆̚ȕ̇̈̓̃̓ŝ̛̞̙̉̋m̜̐̂̄̋̂ȯ̈̎̎̅̕d̜̙̓̔̋̑ ̞̗̄̂̂̚t̝̊́̃́̄e̛̘̜̞̓̑m̊̅̏̉̌̕p̛̈̂̇̀̐ỏ̙̘̈̉̔r̘̞̋̍̃̚ ̝̄̀̇̅̇ỉ̛̖̍̓̈n̛̛̝̎̕̕c̛̛̊̅́̐ĭ̗̓̀̍̐d̞̜̋̐̅̚i̟̙̇̄̊̄d̞̊̂̀̇̚ủ̝̉̑̃̕n̜̏̇̄̐̋ť̗̜̞̋̉ ̝̒̓̌̓̚ȕ̖̙̀̚̕t̖̘̎̉̂̌ ̛̝̄̍̌̂l̛̟̝̃̑̋á̛̝̝̔̅b̝̙̜̗̅̒ơ̖̌̒̄̆r̒̇̓̎̈̄e̛̛̖̅̏̇ ̖̗̜̝̃́e̛̛̘̅̔̌ẗ̛̙̗̐̕ ̖̟̇̋̌̈d̞̙̀̉̑̕ŏ̝̂́̐̑l̞̟̗̓̓̀ơ̘̎̃̄̂r̗̗̖̔̆̍ẻ̖̝̞̋̅ ̜̌̇̍̈̊m̈̉̇̄̒̀a̜̞̘̔̅̆g̗̖̈̃̈̉n̙̖̄̈̉̄â̛̝̜̄̃ ̛́̎̕̕̚ā̊́́̆̌l̟̙̞̃̒́i̖̇̎̃̀̋q̟̇̒̆́̊ủ́̌̇̑̚ã̛̘̉̐̚.̛́̏̐̍̊   |
+      U̝̙̎̈̐̆t̜̍̌̀̔̏ ̞̉̍̇̈̃e̟̟̊̄̕̕n̝̜̒̓̆̕i̖̒̌̅̇̚m̞̊̃̔̊̂ ̛̜̊̎̄̂a̘̜̋̒̚̚d̟̊̎̇̂̍ ̜̖̏̑̉̕m̜̒̎̅̄̚i̝̖̓̂̍̕n̙̉̒̑̀̔ỉ̖̝̌̒́m̛̖̘̅̆̎ ̖̉̎̒̌̕v̖̞̀̔́̎e̖̙̗̒̎̉n̛̗̝̎̀̂ȉ̞̗̒̕̚ȧ̟̜̝̅̚m̆̉̐̐̇̈,̏̐̎́̍́ ̜̞̙̘̏̆q̙̖̙̅̓̂ủ̇́̀̔̚í̙̟̟̏̐s̖̝̍̏̂̇ ̛̘̋̈̕̕ń̛̞̜̜̎o̗̜̔̔̈̆s̞̘̘̄̒̋t̛̅̋́̔̈ȓ̓̒́̇̅ủ̜̄̃̒̍d̙̝̘̊̏̚ ̛̟̞̄́̔e̛̗̝̍̃̀x̞̖̃̄̂̅e̖̅̇̐̔̃r̗̞̖̔̎̚c̘̜̖̆̊̏ï̙̝̙̂̕t̖̏́̓̋̂ă̖̄̆̑̒t̜̟̍̉̑̏i̛̞̞̘̒̑ǒ̜̆̅̃̉ṅ̖̜̒̎̚               |
+      u̗̞̓̔̈̏ĺ̟̝́̎̚l̛̜̅̌̎̆a̒̑̆̔̇̃m̜̗̈̊̎̚ċ̘̋̇̂̚ơ̟̖̊́̕ ̖̟̍̉̏̚l̙̔̓̀̅̏ä̞̗̘̙̅ḃ̟̎̄̃̕o̞̎̓̓̓̚r̗̜̊̓̈̒ï̗̜̃̃̅s̀̒̌̂̎̂ ̖̗̗̋̎̐n̝̟̝̘̄̚i̜̒̀̒̐̕s̘̘̄̊̃̀ī̘̜̏̌̕ ̗̖̞̐̈̒ư̙̞̄́̌t̟̘̖̙̊̚ ̌̅̋̆̚̚ä̇̊̇̕̕l̝̞̘̋̔̅i̍̋́̆̑̈q̛̆̐̈̐̚ư̏̆̊́̚î̜̝̑́̊p̗̓̅̑̆̏ ̆́̓̔̋̋e̟̊̋̏̓̚x̗̍̑̊̎̈ ̟̞̆̄̂̍ë̄̎̄̃̅a̛̜̅́̃̈ ̔̋̀̎̐̀c̖̖̍̀̒̂ơ̛̙̖̄̒m̘̔̍̏̆̕ḿ̖̙̝̏̂ȍ̓̋̈̀̕d̆̂̊̅̓̚o̖̔̌̑̚̕ ̙̆́̔̊̒c̖̘̖̀̄̍o̓̄̑̐̓̒ñ̞̒̎̈̚s̞̜̘̈̄̄e̙̊̀̇̌̋q̐̒̓́̔̃ư̗̟̔̔̚å̖̙̞̄̏t̛̙̟̒̇̏.̙̗̓̃̓̎         |
+      D̜̖̆̏̌̌ư̑̃̌̍̕i̝̊̊̊̊̄s̛̙̒́̌̇ ̛̃̔̄̆̌ă̘̔̅̅̀ú̟̟̟̃̃t̟̂̄̈̈̃e̘̅̌̒̂̆ ̖̟̐̉̉̌î̟̟̙̜̇r̛̙̞̗̄̌ú̗̗̃̌̎r̛̙̘̉̊̕e̒̐̔̃̓̋ ̊̊̍̋̑̉d̛̝̙̉̀̓o̘̜̐̐̓̐l̞̋̌̆̍́o̊̊̐̃̃̚ṙ̛̖̘̃̕ ̞̊̀̍̒̕ȉ́̑̐̇̅ǹ̜̗̜̞̏ ̛̜̐̄̄̚r̜̖̈̇̅̋ĕ̗̉̃̔̚p̟̝̀̓̔̆r̜̈̆̇̃̃e̘̔̔̏̎̓h̗̒̉̑̆̚ė̛̘̘̈̐n̘̂̀̒̕̕d̗̅̂̋̅́ê̗̜̜̜̕r̟̋̄̐̅̂i̛̔̌̒̂̕t̛̗̓̎̀̎ ̙̗̀̉̂̚ȉ̟̗̐̓̚n̙̂̍̏̓̉ ̙̘̊̋̍̕v̜̖̀̎̆̐ő̜̆̉̃̎l̑̋̒̉̔̆ư̙̓̓́̚p̝̘̖̎̏̒t̛̘̝̞̂̓ȁ̘̆̔́̊t̖̝̉̒̐̎e̞̟̋̀̅̄ ̆̌̃̀̑̔v̝̘̝̍̀̇ȅ̝̊̄̓̕l̞̝̑̔̂̋ĭ̝̄̅̆̍t̝̜̉̂̈̇        |
+      ē̟̊̇̕̚s̖̘̘̒̄̑s̛̘̀̊̆̇e̛̝̘̒̏̚ ̉̅̑̂̐̎c̛̟̙̎̋̓i̜̇̒̏̆̆l̟̄́̆̊̌l̍̊̋̃̆̌ủ̗̙̒̔̚m̛̘̘̖̅̍ ̖̙̈̎̂̕d̞̟̏̋̈̔ơ̟̝̌̃̄l̗̙̝̂̉̒õ̒̃̄̄̚ŕ̗̏̏̊̍ê̞̝̞̋̈ ̜̔̒̎̃̚e̞̟̞̒̃̄ư̖̏̄̑̃ ̛̗̜̄̓̎f̛̖̞̅̓̃ü̞̏̆̋̕g̜̝̞̑̑̆i̛̘̐̐̅̚à̜̖̌̆̎t̙̙̎̉̂̍ ̋̔̈̎̎̉n̞̓́̔̊̕ư̘̅̋̔̚l̗̍̒̄̀̚l̞̗̘̙̓̍â̘̔̒̎̚ ̖̓̋̉̃̆p̛̛̘̋̌̀ä̙̔́̒̕r̟̟̖̋̐̋ì̗̙̎̓̓ȃ̔̋̑̚̕t̄́̎̓̂̋ư̏̈̂̑̃r̖̓̋̊̚̚.̒̆̑̆̊̎ ̘̜̍̐̂̚E̞̅̐̇́̂x̄́̈̌̉̕ć̘̃̉̃̕è̘̂̑̏̑p̝̘̑̂̌̆t̔̐̅̍̌̂ȇ̞̈̐̚̕ű̝̞̜́̚ŕ̗̝̉̆́           |
+      š̟́̔̏̀ȉ̝̟̝̏̅n̑̆̇̒̆̚t̝̒́̅̋̏ ̗̑̌̋̇̚ơ̙̗̟̆̅c̙̞̙̎̊̎c̘̟̍̔̊̊a̛̒̓̉́̐e̜̘̙̒̅̇ć̝̝̂̇̕ả̓̍̎̂̚t̗̗̗̟̒̃ ̘̒̓̐̇́c̟̞̉̐̓̄ȕ̙̗̅́̏p̛̍̋̈́̅i̖̓̒̍̈̄d̞̃̈̌̆̐a̛̗̝̎̋̉t̞̙̀̊̆̇a̛̙̒̆̉̚t̜̟̘̉̓̚ ̝̘̗̐̇̕n̛̘̑̏̂́ō̑̋̉̏́ň̞̊̆̄̃ ̙̙̙̜̄̏p̒̆̋̋̓̏r̖̖̅̉́̚ơ̜̆̑̈̚i̟̒̀̃̂̌d̛̏̃̍̋̚ë̖̞̙̗̓n̛̘̓̒̅̎t̟̗̙̊̆̚,̘̙̔̊̚̕ ̟̗̘̜̑̔s̜̝̍̀̓̌û̞̙̅̇́n̘̗̝̒̃̎t̗̅̀̅̊̈ ̗̖̅̅̀̄i̛̖̍̅̋̂n̙̝̓̓̎̚ ̞̋̅̋̃̚c̗̒̀̆̌̎ū̞̂̑̌̓ĺ̛̐̍̑́p̝̆̌̎̈̚a̖̙̒̅̈̌ ̝̝̜̂̈̀q̝̖̔̍̒̚ư̔̐̂̎̊ǐ̛̟̖̘̕          |
+      o̖̜̔̋̅̚f̛̊̀̉́̕f̏̉̀̔̃̃i̘̍̎̐̔̎c̙̅̑̂̐̅ȋ̛̜̀̒̚a̋̍̇̏̀̋ ̖̘̒̅̃̒d̗̘̓̈̇̋é̝́̎̒̄š̙̒̊̉̋e̖̓̐̀̍̕r̗̞̂̅̇̄ù̘̇̐̉̀n̐̑̀̄̍̐t̟̀̂̊̄̚ ̟̝̂̍̏́m̜̗̈̂̏̚ő̞̊̑̇̒l̘̑̏́̔̄l̛̛̇̃̋̊i̓̋̒̃̉̌t̛̗̜̏̀̋ ̙̟̒̂̌̐a̙̝̔̆̏̅n̝̙̙̗̆̅i̍̔́̊̃̕m̖̝̟̒̍̚ ̛̃̃̑̌́ǐ̘̉̔̅̚d̝̗̀̌̏̒ ̖̝̓̑̊̚ȇ̞̟̖̌̕š̙̙̈̔̀t̂̉̒̍̄̄ ̝̗̊̋̌̄l̛̞̜̙̘̔å̝̍̂̍̅b̜̆̇̈̉̌ǒ̜̙̎̃̆r̝̀̄̍́̕ư̋̊́̊̕m̜̗̒̐̕̚.̟̘̀̒̌̚                     |
+      {1:~                                                                }|
+                                                                       |
+    ]], reset=true}
   end)
 end)
 
-- 
cgit 


From ddef39299f357d3131644647379e88a69749bf40 Mon Sep 17 00:00:00 2001
From: bfredl <bjorn.linse@gmail.com>
Date: Tue, 19 Sep 2023 14:30:02 +0200
Subject: refactor(grid): do arabic shaping in one place

The 'arabicshape' feature of vim is a transformation of unicode text to
make arabic and some related scripts look better at display time. In
particular the content of a cell will be adjusted depending on the
(original) content of the cells just before and after it.

This is implemented by the arabic_shape() function in nvim. Before this
commit, shaping was invoked in four different contexts:

- when rendering buffer text in win_line()
- in line_putchar() for rendering virtual text
- as part of grid_line_puts, used by messages and statuslines and
  similar
- as part of draw_cmdline() for drawing the cmdline

This replaces all these with a post-processing step in grid_put_linebuf(),
which has become the entry point for all text rendering after recent
refactors.

An aim of this is to make the handling of multibyte text yet simpler.
One of the main reasons multibyte chars needs to be "parsed" into
codepoint arrays of composing chars is so that these could be inspected
for the purpose of shaping. This can likely be vastly simplified in many
contexts where only the total length (in bytes) and width of composed
char is needed.
---
 test/functional/ui/multibyte_spec.lua | 51 +++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'test/functional/ui/multibyte_spec.lua')

diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua
index 417c7b797c..077dd1a779 100644
--- a/test/functional/ui/multibyte_spec.lua
+++ b/test/functional/ui/multibyte_spec.lua
@@ -177,6 +177,57 @@ describe("multibyte rendering", function()
                                                                        |
     ]], reset=true}
   end)
+
+  it('works with arabic input and arabicshape', function()
+    command('set arabic')
+
+    command('set noarabicshape')
+    feed('isghl!<esc>')
+    screen:expect{grid=[[
+                                                             ^!مالس|
+      {1:                                                           ~}|
+      {1:                                                           ~}|
+      {1:                                                           ~}|
+      {1:                                                           ~}|
+                                                                  |
+    ]]}
+
+    command('set arabicshape')
+    screen:expect{grid=[[
+                                                              ^!ﻡﻼﺳ|
+      {1:                                                           ~}|
+      {1:                                                           ~}|
+      {1:                                                           ~}|
+      {1:                                                           ~}|
+                                                                  |
+    ]]}
+  end)
+
+  it('works with arabic input and arabicshape and norightleft', function()
+    command('set arabic norightleft')
+
+    command('set noarabicshape')
+    feed('isghl!<esc>')
+    screen:expect{grid=[[
+      سلام^!                                                       |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+
+    command('set arabicshape')
+    screen:expect{grid=[[
+      ﺱﻼﻣ^!                                                        |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+
+  end)
 end)
 
 describe('multibyte rendering: statusline', function()
-- 
cgit 


From b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254 Mon Sep 17 00:00:00 2001
From: bfredl <bjorn.linse@gmail.com>
Date: Mon, 6 Nov 2023 14:52:27 +0100
Subject: refactor(grid): make screen rendering more multibyte than ever before

Problem: buffer text with composing chars are converted from UTF-8
to an array of up to seven UTF-32 values and then converted back
to UTF-8 strings.

Solution: Convert buffer text directly to UTF-8 based schar_T values.

The limit of the text size is now in schar_T bytes, which is currently
31+1 but easily could be raised as it no longer multiplies the size
of the entire screen grid when not used, the full size is only required
for temporary scratch buffers.

Also does some general cleanup to win_line text handling, which was
unnecessarily complicated due to multibyte rendering being an "opt-in"
feature long ago. Nowadays, a char is just a char, regardless if it consists
of one ASCII byte or multiple bytes.
---
 test/functional/ui/multibyte_spec.lua | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'test/functional/ui/multibyte_spec.lua')

diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua
index 077dd1a779..d72bf27d6b 100644
--- a/test/functional/ui/multibyte_spec.lua
+++ b/test/functional/ui/multibyte_spec.lua
@@ -228,6 +228,36 @@ describe("multibyte rendering", function()
     ]]}
 
   end)
+
+  it('works with arabicshape and multiple composing chars', function()
+    -- this tests an important edge case: arabicshape might increase the byte size of the base
+    -- character in a way so that the last composing char no longer fits. use "g8" on the text
+    -- to observe what is happening (the final E1 80 B7 gets deleted with 'arabicshape')
+    -- If we would increase the schar_t size, say from 32 to 64 bytes, we need to extend the
+    -- test text with even more zalgo energy to still touch this edge case.
+
+    meths.buf_set_lines(0,0,-1,true, {"سلام့̀́̂̃̄̅̆̇̈̉̊̋̌"})
+    command('set noarabicshape')
+
+    screen:expect{grid=[[
+      ^سلام့̀́̂̃̄̅̆̇̈̉̊̋̌                                                        |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+
+    command('set arabicshape')
+    screen:expect{grid=[[
+      ^ﺱﻼﻣ̀́̂̃̄̅̆̇̈̉̊̋̌                                                         |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+  end)
 end)
 
 describe('multibyte rendering: statusline', function()
-- 
cgit