refactor(grid): make screen rendering more multibyte than ever before

Problem: buffer text with composing chars are converted from UTF-8 to an array of up to seven UTF-32 values and then converted back to UTF-8 strings. Solution: Convert buffer text directly to UTF-8 based schar_T values. The limit of the text size is now in schar_T bytes, which is currently 31+1 but easily could be raised as it no longer multiplies the size of the entire screen grid when not used, the full size is only required for temporary scratch buffers. Also does some general cleanup to win_line text handling, which was unnecessarily complicated due to multibyte rendering being an "opt-in" feature long ago. Nowadays, a char is just a char, regardless if it consists of one ASCII byte or multiple bytes.
author: bfredl <bjorn.linse@gmail.com> 2023-11-06 14:52:27 +0100
committer: bfredl <bjorn.linse@gmail.com> 2023-11-17 12:58:57 +0100
commit: b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254 (patch)
tree: 434ec27e069ba57406ce9f6d194627e95c3d315c /test
parent: 20ec4c776a07492c2e3b995e10b40b1cdb52bc7a (diff)
download: rneovim-b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254.tar.gz
rneovim-b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254.tar.bz2
rneovim-b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254.zip
4 files changed, 114 insertions, 201 deletions
diff --git a/test/functional/ui/fold_spec.lua b/test/functional/ui/fold_spec.lua
index 9a0182ea29..1addf7088e 100644
--- a/test/functional/ui/fold_spec.lua
+++ b/test/functional/ui/fold_spec.lua
@@ -1102,8 +1102,6 @@ describe("folded lines", function()
     end)
 
     it("works with multibyte text", function()
-      -- Currently the only allowed value of 'maxcombine'
-      eq(6, meths.get_option_value('maxcombine', {}))
       eq(true, meths.get_option_value('arabicshape', {}))
       insert([[
         å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢͟ العَرَبِيَّة
@@ -1120,7 +1118,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ                               |
+          å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ                               |
           möre tex^t                                    |
           {1:~                                            }|
           {1:~                                            }|
@@ -1132,7 +1130,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ                               |
+          å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ                               |
           möre tex^t                                    |
           {1:~                                            }|
           {1:~                                            }|
@@ -1156,7 +1154,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:^+--  2 lines: å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
+          {5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1168,7 +1166,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:^+--  2 lines: å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
+          {5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1192,7 +1190,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:^+--  2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة·················}|
+          {5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة·················}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1204,7 +1202,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:^+--  2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة·················}|
+          {5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة·················}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1228,7 +1226,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {7:+ }{8:  1 }{5:^+--  2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة···········}|
+          {7:+ }{8:  1 }{5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة···········}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1240,7 +1238,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {7:+ }{8:  1 }{5:^+--  2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة···········}|
+          {7:+ }{8:  1 }{5:^+--  2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة···········}|
           {1:~                                            }|
           {1:~                                            }|
           {1:~                                            }|
@@ -1265,7 +1263,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:···········ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2  --^+}{8: 1  }{7: +}|
+          {5:···········ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}{8: 1  }{7: +}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1277,7 +1275,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:···········ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2  --^+}{8: 1  }{7: +}|
+          {5:···········ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}{8: 1  }{7: +}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1301,7 +1299,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:·················ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2  --^+}|
+          {5:·················ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1313,7 +1311,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:·················ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2  --^+}|
+          {5:·················ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1337,7 +1335,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-          {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̎͂̀̂͛͛ 语 å :senil 2  --^+}|
+          {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1349,7 +1347,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-          {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̎͂̀̂͛͛ 语 å :senil 2  --^+}|
+          {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2  --^+}|
           {1:                                            ~}|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1373,7 +1371,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-                                         ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̎͂̀̂͛͛ 语 å|
+                                         ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
                                               txet eröm|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1385,7 +1383,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-                                         ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̎͂̀̂͛͛ 语 å|
+                                         ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
                                               txet eröm|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1409,7 +1407,7 @@ describe("folded lines", function()
           [2:---------------------------------------------]|
           [3:---------------------------------------------]|
         ## grid 2
-                                         ةيَّبِرَعَ^لا x̎͂̀̂͛͛ 语 å|
+                                         ةيَّبِرَعَ^لا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
                                               txet eröm|
           {1:                                            ~}|
           {1:                                            ~}|
@@ -1421,7 +1419,7 @@ describe("folded lines", function()
         ]])
       else
         screen:expect([[
-                                         ةيَّبِرَعَ^لا x̎͂̀̂͛͛ 语 å|
+                                         ةيَّبِرَعَ^لا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
                                               txet eröm|
           {1:                                            ~}|
           {1:                                            ~}|
diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua
index 077dd1a779..d72bf27d6b 100644
--- a/test/functional/ui/multibyte_spec.lua
+++ b/test/functional/ui/multibyte_spec.lua
@@ -228,6 +228,36 @@ describe("multibyte rendering", function()
     ]]}
 
   end)
+
+  it('works with arabicshape and multiple composing chars', function()
+    -- this tests an important edge case: arabicshape might increase the byte size of the base
+    -- character in a way so that the last composing char no longer fits. use "g8" on the text
+    -- to observe what is happening (the final E1 80 B7 gets deleted with 'arabicshape')
+    -- If we would increase the schar_t size, say from 32 to 64 bytes, we need to extend the
+    -- test text with even more zalgo energy to still touch this edge case.
+
+    meths.buf_set_lines(0,0,-1,true, {"سلام့̀́̂̃̄̅̆̇̈̉̊̋̌"})
+    command('set noarabicshape')
+
+    screen:expect{grid=[[
+      ^سلام့̀́̂̃̄̅̆̇̈̉̊̋̌                                                        |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+
+    command('set arabicshape')
+    screen:expect{grid=[[
+      ^ﺱﻼﻣ̀́̂̃̄̅̆̇̈̉̊̋̌                                                         |
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+      {1:~                                                           }|
+                                                                  |
+    ]]}
+  end)
 end)
 
 describe('multibyte rendering: statusline', function()
diff --git a/test/functional/ui/output_spec.lua b/test/functional/ui/output_spec.lua
index 0dd1f0325c..7b93b74eac 100644
--- a/test/functional/ui/output_spec.lua
+++ b/test/functional/ui/output_spec.lua
@@ -225,8 +225,8 @@ describe("shell command :!", function()
         å                                                    |
         ref: å̲                                               |
         1: å̲                                                 |
-        2: å ̲                                               |
-        3: å ̲                                               |
+        2: å ̲                                                |
+        3: å ̲                                                |
                                                              |
         {3:Press ENTER or type command to continue}^              |
       ]])
diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua
index fdb1bceab0..cd94624570 100644
--- a/test/unit/mbyte_spec.lua
+++ b/test/unit/mbyte_spec.lua
@@ -4,17 +4,9 @@ local itp = helpers.gen_itp(it)
 local ffi     = helpers.ffi
 local eq      = helpers.eq
 
-local mbyte = helpers.cimport("./src/nvim/mbyte.h")
-local charset = helpers.cimport('./src/nvim/charset.h')
+local lib = helpers.cimport('./src/nvim/mbyte.h', './src/nvim/charset.h', './src/nvim/grid.h')
 
 describe('mbyte', function()
-  -- Array for composing characters
-  local intp = ffi.typeof('int[?]')
-  local function to_intp()
-    -- how to get MAX_MCO from globals.h?
-    return intp(7, 1)
-  end
-
   -- Convert from bytes to string
   local function to_string(bytes)
     local s = {}
@@ -30,14 +22,14 @@ describe('mbyte', function()
   itp('utf_ptr2char', function()
     -- For strings with length 1 the first byte is returned.
     for c = 0, 255 do
-      eq(c, mbyte.utf_ptr2char(to_string({c, 0})))
+      eq(c, lib.utf_ptr2char(to_string({c, 0})))
     end
 
     -- Some ill formed byte sequences that should not be recognized as UTF-8
     -- First byte: 0xc0 or 0xc1
     -- Second byte: 0x80 .. 0xbf
-    --eq(0x00c0, mbyte.utf_ptr2char(to_string({0xc0, 0x80})))
-    --eq(0x00c1, mbyte.utf_ptr2char(to_string({0xc1, 0xbf})))
+    --eq(0x00c0, lib.utf_ptr2char(to_string({0xc0, 0x80})))
+    --eq(0x00c1, lib.utf_ptr2char(to_string({0xc1, 0xbf})))
     --
     -- Sequences with more than four bytes
   end)
@@ -47,240 +39,133 @@ describe('mbyte', function()
       local char_p = ffi.typeof('char[?]')
       for c = n * 0x1000, n * 0x1000 + 0xFFF do
         local p = char_p(4, 0)
-        mbyte.utf_char2bytes(c, p)
-        eq(c, mbyte.utf_ptr2char(p))
-        eq(charset.vim_iswordc(c), charset.vim_iswordp(p))
+        lib.utf_char2bytes(c, p)
+        eq(c, lib.utf_ptr2char(p))
+        eq(lib.vim_iswordc(c), lib.vim_iswordp(p))
       end
     end)
   end
 
-  describe('utfc_ptr2char_len', function()
+  describe('utfc_ptr2schar_len', function()
+    local function test_seq(seq)
+      local firstc = ffi.new("int[1]")
+      local buf = ffi.new("char[32]")
+      lib.schar_get(buf, lib.utfc_ptr2schar_len(to_string(seq), #seq, firstc))
+      return {ffi.string(buf), firstc[0]}
+    end
+
+    local function byte(val)
+      return {string.char(val), val}
+    end
 
     itp('1-byte sequences', function()
-      local pcc = to_intp()
-      for c = 0, 255 do
-        eq(c, mbyte.utfc_ptr2char_len(to_string({c}), pcc, 1))
-        eq(0, pcc[0])
+      eq({'', 0}, test_seq{0})
+      for c = 1, 127 do
+        eq(byte(c), test_seq{c})
+      end
+      for c = 128, 255 do
+        eq({'', c}, test_seq{c})
       end
     end)
 
     itp('2-byte sequences', function()
-      local pcc = to_intp()
       -- No combining characters
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f}), pcc, 2))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x7f})
       -- No combining characters
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80}), pcc, 2))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x80})
 
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f}), pcc, 2))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0x7f})
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80}), pcc, 2))
-      eq(0, pcc[0])
+      eq({'\xc2\x80', 0x80}, test_seq{0xc2, 0x80})
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0xc0}), pcc, 2))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0xc0})
     end)
 
     itp('3-byte sequences', function()
-      local pcc = to_intp()
-
       -- No second UTF-8 character
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80, 0x80}), pcc, 3))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x80, 0x80})
       -- No combining character
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0x80}), pcc, 3))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0xc2, 0x80})
 
       -- Combining character is U+0300
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80}), pcc, 3))
-      eq(0x0300, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80})
 
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc}), pcc, 3))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc})
       -- Incomplete combining character
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc}), pcc, 3))
-      eq(0, pcc[0])
+      eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc})
 
-      -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x20d0, mbyte.utfc_ptr2char_len(to_string({0xe2, 0x83, 0x90}), pcc, 3))
-      eq(0, pcc[0])
+      -- One UTF-8 character (composing only)
+      eq({" \xe2\x83\x90", 0x20d0}, test_seq{0xe2, 0x83, 0x90})
     end)
 
     itp('4-byte sequences', function()
-      local pcc = to_intp()
 
       -- No following combining character
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80}), pcc, 4))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80})
       -- No second UTF-8 character
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80}), pcc, 4))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80})
 
       -- Combining character U+0300
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 4))
-      eq(0x0300, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc})
 
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80}), pcc, 4))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80})
       -- No following UTF-8 character
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc}), pcc, 4))
-      eq(0, pcc[0])
+      eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc})
       -- Combining character U+0301
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81}), pcc, 4))
-      eq(0x0301, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81})
 
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80}), pcc, 4))
-      eq(0, pcc[0])
+      eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80})
     end)
 
     itp('5+-byte sequences', function()
-      local pcc = to_intp()
-
       -- No following combining character
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80, 0x80})
       -- No second UTF-8 character
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80, 0x80})
 
       -- Combining character U+0300
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 5))
-      eq(0x0300, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x00})
 
       -- Combining characters U+0300 and U+0301
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81}), pcc, 5))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0000, pcc[2])
+      eq({"\x7f\xcc\x80\xcc\x81", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81})
       -- Combining characters U+0300, U+0301, U+0302
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82}), pcc, 7))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0000, pcc[3])
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82})
       -- Combining characters U+0300, U+0301, U+0302, U+0303
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83}), pcc, 9))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0303, pcc[3])
-      eq(0x0000, pcc[4])
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83})
       -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
-        {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84}), pcc, 11))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0303, pcc[3])
-      eq(0x0304, pcc[4])
-      eq(0x0000, pcc[5])
-      -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
-      -- U+0305
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
-        {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0303, pcc[3])
-      eq(0x0304, pcc[4])
-      eq(0x0305, pcc[5])
-      eq(1, pcc[6])
-
-      -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
-      -- U+0305, U+0306, but only save six (= MAX_MCO).
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
-        {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86}), pcc, 15))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0303, pcc[3])
-      eq(0x0304, pcc[4])
-      eq(0x0305, pcc[5])
-      eq(0x0001, pcc[6])
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84})
+      -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85})
 
-      -- Only three following combining characters U+0300, U+0301, U+0302
-      pcc = to_intp()
-      eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
-        {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
-      eq(0x0300, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0302, pcc[2])
-      eq(0x0000, pcc[3])
+      -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305, U+0306
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85\xcc\x86", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86})
 
+      -- Only three following combining characters U+0300, U+0301, U+0302
+      eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85})
 
       -- No UTF-8 sequence
-      pcc = to_intp()
-      eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80, 0x80})
       -- No following UTF-8 character
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc, 0x80})
       -- Combining character U+0301
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0x7f}), pcc, 5))
-      eq(0x0301, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0x7f})
       -- Combining character U+0301
-      pcc = to_intp()
-      eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0xcc}), pcc, 5))
-      eq(0x0301, pcc[0])
-      eq(0x0000, pcc[1])
+      eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0xcc})
 
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x7f}), pcc, 5))
-      eq(0, pcc[0])
+      eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x7f})
 
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x80}), pcc, 5))
-      eq(0, pcc[0])
+      eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x80})
       -- One UTF-8 character
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0xcc}), pcc, 5))
-      eq(0, pcc[0])
+      eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xcc})
 
       -- Combining characters U+1AB0 and U+0301
-      pcc = to_intp()
-      eq(0x100000, mbyte.utfc_ptr2char_len(to_string(
-        {0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81}), pcc, 9))
-      eq(0x1ab0, pcc[0])
-      eq(0x0301, pcc[1])
-      eq(0x0000, pcc[2])
+      eq({"\xf4\x80\x80\x80\xe1\xaa\xb0\xcc\x81", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81})
     end)
 
   end)
author	bfredl <bjorn.linse@gmail.com>	2023-11-06 14:52:27 +0100
committer	bfredl <bjorn.linse@gmail.com>	2023-11-17 12:58:57 +0100
commit	b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254 (patch)
tree	434ec27e069ba57406ce9f6d194627e95c3d315c /test
parent	20ec4c776a07492c2e3b995e10b40b1cdb52bc7a (diff)
download	rneovim-b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254.tar.gz rneovim-b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254.tar.bz2 rneovim-b522cb1ac3fbdf6e68eed5d0b6e1cbeaf3ac2254.zip