4 files changed, 114 insertions, 201 deletions
diff --git a/test/functional/ui/fold_spec.lua b/test/functional/ui/fold_spec.lua
index 9a0182ea29..1addf7088e 100644
--- a/test/functional/ui/fold_spec.lua
+++ b/test/functional/ui/fold_spec.lua
@@ -1102,8 +1102,6 @@ describe("folded lines", function()
     end)
 
     it("works with multibyte text", function()
-      -- Currently the only allowed value of 'maxcombine'
-      eq(6, meths.get_option_value('maxcombine', {}))
       eq(true, meths.get_option_value('arabicshape', {}))
       insert([[
         å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢͟ العَرَبِيَّة
@@ -1120,7 +1118,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ                               |
+          å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ                               |
           möre tex^t                                    |
           {1:~                                            }|
           {1:~                                            }|
@@ -1132,7 +1130,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ                               |
+          å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ                               |
           möre tex^t                                    |
           {1:~                                            }|
           {1:~                                            }|
@@ -1156,7 +1154,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:^+--  2 lines: å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
+          {5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1168,7 +1166,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:^+--  2 lines: å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
+          {5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1192,7 +1190,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:^+--  2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة·················}|
+          {5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة·················}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1204,7 +1202,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:^+--  2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة·················}|
+          {5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة·················}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1228,7 +1226,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {7:+ }{8:  1 }{5:^+--  2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة···········}|
+          {7:+ }{8:  1 }{5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة···········}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1240,7 +1238,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {7:+ }{8:  1 }{5:^+--  2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة···········}|
+          {7:+ }{8:  1 }{5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة···········}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1265,7 +1263,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:···········ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2  --^+}{8: 1  }{7: +}|
+          {5:···········ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}{8: 1  }{7: +}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1277,7 +1275,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:···········ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2  --^+}{8: 1  }{7: +}|
+          {5:···········ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}{8: 1  }{7: +}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1301,7 +1299,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:·················ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2  --^+}|
+          {5:·················ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1313,7 +1311,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:·················ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2  --^+}|
+          {5:·················ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1337,7 +1335,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̎͂̀̂͛͛ 语 å :senil 2  --^+}|
+          {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1349,7 +1347,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̎͂̀̂͛͛ 语 å :senil 2  --^+}|
+          {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1373,7 +1371,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-                                         ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̎͂̀̂͛͛ 语 å|
+                                         ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
                                               txet eröm|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1385,7 +1383,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-                                         ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̎͂̀̂͛͛ 语 å|
+                                         ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
                                               txet eröm|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1409,7 +1407,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-                                         ةيَّبِرَعَ^لا x̎͂̀̂͛͛ 语 å|
+                                         ةيَّبِرَعَ^لا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
                                               txet eröm|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1421,7 +1419,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-                                         ةيَّبِرَعَ^لا x̎͂̀̂͛͛ 语 å|
+                                         ةيَّبِرَعَ^لا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
                                               txet eröm|
           {1:                                            ~}|
           {1:                                            ~}|
diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua
index 077dd1a779..d72bf27d6b 100644
--- a/test/functional/ui/multibyte_spec.lua
+++ b/test/functional/ui/multibyte_spec.lua
@@ -228,6 +228,36 @@ describe("multibyte rendering", function()
     ]]}
 
   end)
+
+  it('works with arabicshape and multiple composing chars', function()
+    -- this tests an important edge case: arabicshape might increase the byte size of the base
+    -- character in a way so that the last composing char no longer fits. use "g8" on the text
+    -- to observe what is happening (the final E1 80 B7 gets deleted with 'arabicshape')
+    -- If we would increase the schar_t size, say from 32 to 64 bytes, we need to extend the
+    -- test text with even more zalgo energy to still touch this edge case.
+
+    meths.buf_set_lines(0,0,-1,true, {"سلام့̀́̂̃̄̅̆̇̈̉̊̋̌"})
+    command('set noarabicshape')
+
+    screen:expect{grid=[[
+      ^سلام့̀́̂̃̄̅̆̇̈̉̊̋̌                                                        |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+
+    command('set arabicshape')
+    screen:expect{grid=[[
+      ^ﺱﻼﻣ̀́̂̃̄̅̆̇̈̉̊̋̌                                                         |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+  end)
 end)
 
 describe('multibyte rendering: statusline', function()
diff --git a/test/functional/ui/output_spec.lua b/test/functional/ui/output_spec.lua
index 0dd1f0325c..7b93b74eac 100644
--- a/test/functional/ui/output_spec.lua
+++ b/test/functional/ui/output_spec.lua
@@ -225,8 +225,8 @@ describe("shell command :!", function()
         å                                                    |
         ref: å̲                                               |
         1: å̲                                                 |
-        2: å ̲                                               |
-        3: å ̲                                               |
+        2: å ̲                                                |
+        3: å ̲                                                |
                                                              |
         {3:Press ENTER or type command to continue}^              |
       ]])
diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua
index fdb1bceab0..cd94624570 100644
--- a/test/unit/mbyte_spec.lua
+++ b/test/unit/mbyte_spec.lua
@@ -4,17 +4,9 @@ local itp = helpers.gen_itp(it)
 local ffi     = helpers.ffi
 local eq      = helpers.eq
 
-local mbyte = helpers.cimport("./src/nvim/mbyte.h")
-local charset = helpers.cimport('./src/nvim/charset.h')
+local lib = helpers.cimport('./src/nvim/mbyte.h', './src/nvim/charset.h', './src/nvim/grid.h')
 
 describe('mbyte', function()
-  -- Array for composing characters
-  local intp = ffi.typeof('int[?]')
-  local function to_intp()
-    -- how to get MAX_MCO from globals.h?
-    return intp(7, 1)
-  end
-
   -- Convert from bytes to string
   local function to_string(bytes)
     local s = {}
@@ -30,14 +22,14 @@ describe('mbyte', function()
   itp('utf_ptr2char', function()
     -- For strings with length 1 the first byte is returned.
     for c = 0, 255 do
-      eq(c, mbyte.utf_ptr2char(to_string({c, 0})))
+      eq(c, lib.utf_ptr2char(to_string({c, 0})))
     end
 
     -- Some ill formed byte sequences that should not be recognized as UTF-8
     -- First byte: 0xc0 or 0xc1
     -- Second byte: 0x80 .. 0xbf
-    --eq(0x00c0, mbyte.utf_ptr2char(to_string({0xc0, 0x80})))
-    --eq(0x00c1, mbyte.utf_ptr2char(to_string({0xc1, 0xbf})))
+    --eq(0x00c0, lib.utf_ptr2char(to_string({0xc0, 0x80})))
+    --eq(0x00c1, lib.utf_ptr2char(to_string({0xc1, 0xbf})))
     --
     -- Sequences with more than four bytes
   end)
@@ -47,240 +39,133 @@ describe('mbyte', function()
       local char_p = ffi.typeof('char[?]')
       for c = n * 0x1000, n * 0x1000 + 0xFFF do
         local p = char_p(4, 0)
-        mbyte.utf_char2bytes(c, p)
-        eq(c, mbyte.utf_ptr2char(p))
-        eq(charset.vim_iswordc(c), charset.vim_iswordp(p))
+        lib.utf_char2bytes(c, p)
+        eq(c, lib.utf_ptr2char(p))
+        eq(lib.vim_iswordc(c), lib.vim_iswordp(p))
       end
     end)
   end
 
-  describe('utfc_ptr2char_len', function()
+  describe('utfc_ptr2schar_len', function()
+    local function test_seq(seq)
+      local firstc = ffi.new("int[1]")
+      local buf = ffi.new("char[32]")
+      lib.schar_get(buf, lib.utfc_ptr2schar_len(to_string(seq), #seq, firstc))
+      return {ffi.string(buf), firstc[0]}
+    end
+
+    local function byte(val)
+      return {string.char(val), val}
+    end
 
     itp('1-byte sequences', function()
-      local pcc = to_intp()
-      for c = 0, 255 do
-        eq(c, mbyte.utfc_ptr2char_len(to_string({c}), pcc, 1))
-        eq(0, pcc[0])
+      eq({'', 0}, test_seq{0})
+      for c = 1, 127 do
+        eq(byte(c), test_seq{c})
+      end
+      for c = 128, 255 do
+        eq({'', c}, test_seq{c})
       end
     end)
 
     itp('2-byte sequences', function()
-      local pcc = to_intp()
       -- No combining characters
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f}), pcc, 2))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x7f})
       -- No combining characters
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80}), pcc, 2))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x80})
 
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f}), pcc, 2))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0x7f})
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80}), pcc, 2))
-      eq(0, pcc[0])
+      eq({'\xc2\x80', 0x80}, test_seq{0xc2, 0x80})
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0xc0}), pcc, 2))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0xc0})
     end)
 
     itp('3-byte sequences', function()
-      local pcc = to_intp()
-
       -- No second UTF-8 character
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80, 0x80}), pcc, 3))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x80, 0x80})
       -- No combining character
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0x80}), pcc, 3))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0xc2, 0x80})
 
       -- Combining character is U+0300
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80}), pcc, 3))
-      eq(0x0300, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80})
 
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc}), pcc, 3))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc})
       -- Incomplete combining character
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc}), pcc, 3))
-      eq(0, pcc[0])
+      eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc})
 
-      -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x20d0, mbyte.utfc_ptr2char_len(to_string({0xe2, 0x83, 0x90}), pcc, 3))
-      eq(0, pcc[0])
+      -- One UTF-8 character (composing only)
+      eq({" \xe2\x83\x90", 0x20d0}, test_seq{0xe2, 0x83, 0x90})
     end)
 
     itp('4-byte sequences', function()
-      local pcc = to_intp()
 
       -- No following combining character
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80}), pcc, 4))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80})
       -- No second UTF-8 character
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80}), pcc, 4))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80})
 
       -- Combining character U+0300
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 4))
-      eq(0x0300, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc})
 
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80}), pcc, 4))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80})
       -- No following UTF-8 character
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc}), pcc, 4))
-      eq(0, pcc[0])
+      eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc})
       -- Combining character U+0301
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81}), pcc, 4))
-      eq(0x0301, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81})
 
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80}), pcc, 4))
-      eq(0, pcc[0])
+      eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80})
     end)
 
     itp('5+-byte sequences', function()
-      local pcc = to_intp()
-
       -- No following combining character
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80, 0x80})
       -- No second UTF-8 character
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80, 0x80})
 
       -- Combining character U+0300
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 5))
-      eq(0x0300, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x00})
 
       -- Combining characters U+0300 and U+0301
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81}), pcc, 5))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0000, pcc[2])
+      eq({"\x7f\xcc\x80\xcc\x81", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81})
       -- Combining characters U+0300, U+0301, U+0302
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82}), pcc, 7))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0000, pcc[3])
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82})
       -- Combining characters U+0300, U+0301, U+0302, U+0303
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83}), pcc, 9))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0303, pcc[3])
-      eq(0x0000, pcc[4])
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83})
       -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
-        {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84}), pcc, 11))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0303, pcc[3])
-      eq(0x0304, pcc[4])
-      eq(0x0000, pcc[5])
-      -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
-      -- U+0305
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
-        {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0303, pcc[3])
-      eq(0x0304, pcc[4])
-      eq(0x0305, pcc[5])
-      eq(1, pcc[6])
-
-      -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
-      -- U+0305, U+0306, but only save six (= MAX_MCO).
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
-        {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86}), pcc, 15))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0303, pcc[3])
-      eq(0x0304, pcc[4])
-      eq(0x0305, pcc[5])
-      eq(0x0001, pcc[6])
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84})
+      -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85})
 
-      -- Only three following combining characters U+0300, U+0301, U+0302
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
-        {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0000, pcc[3])
+      -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305, U+0306
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85\xcc\x86", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86})
 
+      -- Only three following combining characters U+0300, U+0301, U+0302
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85})
 
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80, 0x80})
       -- No following UTF-8 character
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc, 0x80})
       -- Combining character U+0301
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0x7f}), pcc, 5))
-      eq(0x0301, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0x7f})
       -- Combining character U+0301
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0xcc}), pcc, 5))
-      eq(0x0301, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0xcc})
 
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x7f}), pcc, 5))
-      eq(0, pcc[0])
+      eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x7f})
 
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x80})
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0xcc}), pcc, 5))
-      eq(0, pcc[0])
+      eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xcc})
 
       -- Combining characters U+1AB0 and U+0301
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string(
-        {0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81}), pcc, 9))
-      eq(0x1ab0, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0000, pcc[2])
+      eq({"\xf4\x80\x80\x80\xe1\xaa\xb0\xcc\x81", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81})
     end)
 
   end)