aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/functional/ui/fold_spec.lua38
-rw-r--r--test/functional/ui/multibyte_spec.lua30
-rw-r--r--test/functional/ui/output_spec.lua4
-rw-r--r--test/unit/mbyte_spec.lua243
4 files changed, 114 insertions, 201 deletions
diff --git a/test/functional/ui/fold_spec.lua b/test/functional/ui/fold_spec.lua
index 9a0182ea29..1addf7088e 100644
--- a/test/functional/ui/fold_spec.lua
+++ b/test/functional/ui/fold_spec.lua
@@ -1102,8 +1102,6 @@ describe("folded lines", function()
end)
it("works with multibyte text", function()
- -- Currently the only allowed value of 'maxcombine'
- eq(6, meths.get_option_value('maxcombine', {}))
eq(true, meths.get_option_value('arabicshape', {}))
insert([[
å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢͟ العَرَبِيَّة
@@ -1120,7 +1118,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ |
+ å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ |
möre tex^t |
{1:~ }|
{1:~ }|
@@ -1132,7 +1130,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ |
+ å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ |
möre tex^t |
{1:~ }|
{1:~ }|
@@ -1156,7 +1154,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- {5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
+ {5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
{1:~ }|
{1:~ }|
{1:~ }|
@@ -1168,7 +1166,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- {5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
+ {5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ ﺎﻠﻋَﺮَﺒِﻳَّﺓ·················}|
{1:~ }|
{1:~ }|
{1:~ }|
@@ -1192,7 +1190,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- {5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة·················}|
+ {5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة·················}|
{1:~ }|
{1:~ }|
{1:~ }|
@@ -1204,7 +1202,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- {5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة·················}|
+ {5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة·················}|
{1:~ }|
{1:~ }|
{1:~ }|
@@ -1228,7 +1226,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- {7:+ }{8: 1 }{5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة···········}|
+ {7:+ }{8: 1 }{5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة···········}|
{1:~ }|
{1:~ }|
{1:~ }|
@@ -1240,7 +1238,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- {7:+ }{8: 1 }{5:^+-- 2 lines: å 语 x̎͂̀̂͛͛ العَرَبِيَّة···········}|
+ {7:+ }{8: 1 }{5:^+-- 2 lines: å 语 x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ العَرَبِيَّة···········}|
{1:~ }|
{1:~ }|
{1:~ }|
@@ -1265,7 +1263,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- {5:···········ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2 --^+}{8: 1 }{7: +}|
+ {5:···········ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}{8: 1 }{7: +}|
{1: ~}|
{1: ~}|
{1: ~}|
@@ -1277,7 +1275,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- {5:···········ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2 --^+}{8: 1 }{7: +}|
+ {5:···········ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}{8: 1 }{7: +}|
{1: ~}|
{1: ~}|
{1: ~}|
@@ -1301,7 +1299,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- {5:·················ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2 --^+}|
+ {5:·················ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}|
{1: ~}|
{1: ~}|
{1: ~}|
@@ -1313,7 +1311,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- {5:·················ةيَّبِرَعَلا x̎͂̀̂͛͛ 语 å :senil 2 --^+}|
+ {5:·················ةيَّبِرَعَلا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}|
{1: ~}|
{1: ~}|
{1: ~}|
@@ -1337,7 +1335,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̎͂̀̂͛͛ 语 å :senil 2 --^+}|
+ {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}|
{1: ~}|
{1: ~}|
{1: ~}|
@@ -1349,7 +1347,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̎͂̀̂͛͛ 语 å :senil 2 --^+}|
+ {5:·················ﺔﻴَّﺑِﺮَﻌَﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å :senil 2 --^+}|
{1: ~}|
{1: ~}|
{1: ~}|
@@ -1373,7 +1371,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̎͂̀̂͛͛ 语 å|
+ ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
txet eröm|
{1: ~}|
{1: ~}|
@@ -1385,7 +1383,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̎͂̀̂͛͛ 语 å|
+ ﺔﻴَّﺑِﺮَﻌَ^ﻟﺍ x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
txet eröm|
{1: ~}|
{1: ~}|
@@ -1409,7 +1407,7 @@ describe("folded lines", function()
[2:---------------------------------------------]|
[3:---------------------------------------------]|
## grid 2
- ةيَّبِرَعَ^لا x̎͂̀̂͛͛ 语 å|
+ ةيَّبِرَعَ^لا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
txet eröm|
{1: ~}|
{1: ~}|
@@ -1421,7 +1419,7 @@ describe("folded lines", function()
]])
else
screen:expect([[
- ةيَّبِرَعَ^لا x̎͂̀̂͛͛ 语 å|
+ ةيَّبِرَعَ^لا x̨̣̘̫̲͚͎̎͂̀̂͛͛̾͢ 语 å|
txet eröm|
{1: ~}|
{1: ~}|
diff --git a/test/functional/ui/multibyte_spec.lua b/test/functional/ui/multibyte_spec.lua
index 077dd1a779..d72bf27d6b 100644
--- a/test/functional/ui/multibyte_spec.lua
+++ b/test/functional/ui/multibyte_spec.lua
@@ -228,6 +228,36 @@ describe("multibyte rendering", function()
]]}
end)
+
+ it('works with arabicshape and multiple composing chars', function()
+ -- this tests an important edge case: arabicshape might increase the byte size of the base
+ -- character in a way so that the last composing char no longer fits. use "g8" on the text
+ -- to observe what is happening (the final E1 80 B7 gets deleted with 'arabicshape')
+ -- If we would increase the schar_t size, say from 32 to 64 bytes, we need to extend the
+ -- test text with even more zalgo energy to still touch this edge case.
+
+ meths.buf_set_lines(0,0,-1,true, {"سلام့̀́̂̃̄̅̆̇̈̉̊̋̌"})
+ command('set noarabicshape')
+
+ screen:expect{grid=[[
+ ^سلام့̀́̂̃̄̅̆̇̈̉̊̋̌ |
+ {1:~ }|
+ {1:~ }|
+ {1:~ }|
+ {1:~ }|
+ |
+ ]]}
+
+ command('set arabicshape')
+ screen:expect{grid=[[
+ ^ﺱﻼﻣ̀́̂̃̄̅̆̇̈̉̊̋̌ |
+ {1:~ }|
+ {1:~ }|
+ {1:~ }|
+ {1:~ }|
+ |
+ ]]}
+ end)
end)
describe('multibyte rendering: statusline', function()
diff --git a/test/functional/ui/output_spec.lua b/test/functional/ui/output_spec.lua
index 0dd1f0325c..7b93b74eac 100644
--- a/test/functional/ui/output_spec.lua
+++ b/test/functional/ui/output_spec.lua
@@ -225,8 +225,8 @@ describe("shell command :!", function()
å |
ref: å̲ |
1: å̲ |
- 2: å ̲ |
- 3: å ̲ |
+ 2: å ̲ |
+ 3: å ̲ |
|
{3:Press ENTER or type command to continue}^ |
]])
diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua
index fdb1bceab0..cd94624570 100644
--- a/test/unit/mbyte_spec.lua
+++ b/test/unit/mbyte_spec.lua
@@ -4,17 +4,9 @@ local itp = helpers.gen_itp(it)
local ffi = helpers.ffi
local eq = helpers.eq
-local mbyte = helpers.cimport("./src/nvim/mbyte.h")
-local charset = helpers.cimport('./src/nvim/charset.h')
+local lib = helpers.cimport('./src/nvim/mbyte.h', './src/nvim/charset.h', './src/nvim/grid.h')
describe('mbyte', function()
- -- Array for composing characters
- local intp = ffi.typeof('int[?]')
- local function to_intp()
- -- how to get MAX_MCO from globals.h?
- return intp(7, 1)
- end
-
-- Convert from bytes to string
local function to_string(bytes)
local s = {}
@@ -30,14 +22,14 @@ describe('mbyte', function()
itp('utf_ptr2char', function()
-- For strings with length 1 the first byte is returned.
for c = 0, 255 do
- eq(c, mbyte.utf_ptr2char(to_string({c, 0})))
+ eq(c, lib.utf_ptr2char(to_string({c, 0})))
end
-- Some ill formed byte sequences that should not be recognized as UTF-8
-- First byte: 0xc0 or 0xc1
-- Second byte: 0x80 .. 0xbf
- --eq(0x00c0, mbyte.utf_ptr2char(to_string({0xc0, 0x80})))
- --eq(0x00c1, mbyte.utf_ptr2char(to_string({0xc1, 0xbf})))
+ --eq(0x00c0, lib.utf_ptr2char(to_string({0xc0, 0x80})))
+ --eq(0x00c1, lib.utf_ptr2char(to_string({0xc1, 0xbf})))
--
-- Sequences with more than four bytes
end)
@@ -47,240 +39,133 @@ describe('mbyte', function()
local char_p = ffi.typeof('char[?]')
for c = n * 0x1000, n * 0x1000 + 0xFFF do
local p = char_p(4, 0)
- mbyte.utf_char2bytes(c, p)
- eq(c, mbyte.utf_ptr2char(p))
- eq(charset.vim_iswordc(c), charset.vim_iswordp(p))
+ lib.utf_char2bytes(c, p)
+ eq(c, lib.utf_ptr2char(p))
+ eq(lib.vim_iswordc(c), lib.vim_iswordp(p))
end
end)
end
- describe('utfc_ptr2char_len', function()
+ describe('utfc_ptr2schar_len', function()
+ local function test_seq(seq)
+ local firstc = ffi.new("int[1]")
+ local buf = ffi.new("char[32]")
+ lib.schar_get(buf, lib.utfc_ptr2schar_len(to_string(seq), #seq, firstc))
+ return {ffi.string(buf), firstc[0]}
+ end
+
+ local function byte(val)
+ return {string.char(val), val}
+ end
itp('1-byte sequences', function()
- local pcc = to_intp()
- for c = 0, 255 do
- eq(c, mbyte.utfc_ptr2char_len(to_string({c}), pcc, 1))
- eq(0, pcc[0])
+ eq({'', 0}, test_seq{0})
+ for c = 1, 127 do
+ eq(byte(c), test_seq{c})
+ end
+ for c = 128, 255 do
+ eq({'', c}, test_seq{c})
end
end)
itp('2-byte sequences', function()
- local pcc = to_intp()
-- No combining characters
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f}), pcc, 2))
- eq(0, pcc[0])
+ eq(byte(0x7f), test_seq{0x7f, 0x7f})
-- No combining characters
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80}), pcc, 2))
- eq(0, pcc[0])
+ eq(byte(0x7f), test_seq{0x7f, 0x80})
-- No UTF-8 sequence
- pcc = to_intp()
- eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f}), pcc, 2))
- eq(0, pcc[0])
+ eq({'', 0xc2}, test_seq{0xc2, 0x7f})
-- One UTF-8 character
- pcc = to_intp()
- eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80}), pcc, 2))
- eq(0, pcc[0])
+ eq({'\xc2\x80', 0x80}, test_seq{0xc2, 0x80})
-- No UTF-8 sequence
- pcc = to_intp()
- eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0xc0}), pcc, 2))
- eq(0, pcc[0])
+ eq({'', 0xc2}, test_seq{0xc2, 0xc0})
end)
itp('3-byte sequences', function()
- local pcc = to_intp()
-
-- No second UTF-8 character
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80, 0x80}), pcc, 3))
- eq(0, pcc[0])
+ eq(byte(0x7f), test_seq{0x7f, 0x80, 0x80})
-- No combining character
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0x80}), pcc, 3))
- eq(0, pcc[0])
+ eq(byte(0x7f), test_seq{0x7f, 0xc2, 0x80})
-- Combining character is U+0300
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80}), pcc, 3))
- eq(0x0300, pcc[0])
- eq(0x0000, pcc[1])
+ eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80})
-- No UTF-8 sequence
- pcc = to_intp()
- eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc}), pcc, 3))
- eq(0, pcc[0])
+ eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc})
-- Incomplete combining character
- pcc = to_intp()
- eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc}), pcc, 3))
- eq(0, pcc[0])
+ eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc})
- -- One UTF-8 character
- pcc = to_intp()
- eq(0x20d0, mbyte.utfc_ptr2char_len(to_string({0xe2, 0x83, 0x90}), pcc, 3))
- eq(0, pcc[0])
+ -- One UTF-8 character (composing only)
+ eq({" \xe2\x83\x90", 0x20d0}, test_seq{0xe2, 0x83, 0x90})
end)
itp('4-byte sequences', function()
- local pcc = to_intp()
-- No following combining character
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80}), pcc, 4))
- eq(0, pcc[0])
+ eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80})
-- No second UTF-8 character
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80}), pcc, 4))
- eq(0, pcc[0])
+ eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80})
-- Combining character U+0300
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 4))
- eq(0x0300, pcc[0])
- eq(0x0000, pcc[1])
+ eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc})
-- No UTF-8 sequence
- pcc = to_intp()
- eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80}), pcc, 4))
- eq(0, pcc[0])
+ eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80})
-- No following UTF-8 character
- pcc = to_intp()
- eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc}), pcc, 4))
- eq(0, pcc[0])
+ eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc})
-- Combining character U+0301
- pcc = to_intp()
- eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81}), pcc, 4))
- eq(0x0301, pcc[0])
- eq(0x0000, pcc[1])
+ eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81})
-- One UTF-8 character
- pcc = to_intp()
- eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80}), pcc, 4))
- eq(0, pcc[0])
+ eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80})
end)
itp('5+-byte sequences', function()
- local pcc = to_intp()
-
-- No following combining character
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
- eq(0, pcc[0])
+ eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80, 0x80})
-- No second UTF-8 character
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80, 0x80}), pcc, 5))
- eq(0, pcc[0])
+ eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80, 0x80})
-- Combining character U+0300
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 5))
- eq(0x0300, pcc[0])
- eq(0x0000, pcc[1])
+ eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x00})
-- Combining characters U+0300 and U+0301
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81}), pcc, 5))
- eq(0x0300, pcc[0])
- eq(0x0301, pcc[1])
- eq(0x0000, pcc[2])
+ eq({"\x7f\xcc\x80\xcc\x81", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81})
-- Combining characters U+0300, U+0301, U+0302
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82}), pcc, 7))
- eq(0x0300, pcc[0])
- eq(0x0301, pcc[1])
- eq(0x0302, pcc[2])
- eq(0x0000, pcc[3])
+ eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82})
-- Combining characters U+0300, U+0301, U+0302, U+0303
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83}), pcc, 9))
- eq(0x0300, pcc[0])
- eq(0x0301, pcc[1])
- eq(0x0302, pcc[2])
- eq(0x0303, pcc[3])
- eq(0x0000, pcc[4])
+ eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83})
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
- {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84}), pcc, 11))
- eq(0x0300, pcc[0])
- eq(0x0301, pcc[1])
- eq(0x0302, pcc[2])
- eq(0x0303, pcc[3])
- eq(0x0304, pcc[4])
- eq(0x0000, pcc[5])
- -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
- -- U+0305
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
- {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
- eq(0x0300, pcc[0])
- eq(0x0301, pcc[1])
- eq(0x0302, pcc[2])
- eq(0x0303, pcc[3])
- eq(0x0304, pcc[4])
- eq(0x0305, pcc[5])
- eq(1, pcc[6])
-
- -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
- -- U+0305, U+0306, but only save six (= MAX_MCO).
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
- {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86}), pcc, 15))
- eq(0x0300, pcc[0])
- eq(0x0301, pcc[1])
- eq(0x0302, pcc[2])
- eq(0x0303, pcc[3])
- eq(0x0304, pcc[4])
- eq(0x0305, pcc[5])
- eq(0x0001, pcc[6])
+ eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84})
+ -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305
+ eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85})
- -- Only three following combining characters U+0300, U+0301, U+0302
- pcc = to_intp()
- eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
- {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
- eq(0x0300, pcc[0])
- eq(0x0301, pcc[1])
- eq(0x0302, pcc[2])
- eq(0x0000, pcc[3])
+ -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305, U+0306
+ eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85\xcc\x86", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86})
+ -- Only three following combining characters U+0300, U+0301, U+0302
+ eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85})
-- No UTF-8 sequence
- pcc = to_intp()
- eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
- eq(0, pcc[0])
+ eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80, 0x80})
-- No following UTF-8 character
- pcc = to_intp()
- eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc, 0x80}), pcc, 5))
- eq(0, pcc[0])
+ eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc, 0x80})
-- Combining character U+0301
- pcc = to_intp()
- eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0x7f}), pcc, 5))
- eq(0x0301, pcc[0])
- eq(0x0000, pcc[1])
+ eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0x7f})
-- Combining character U+0301
- pcc = to_intp()
- eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0xcc}), pcc, 5))
- eq(0x0301, pcc[0])
- eq(0x0000, pcc[1])
+ eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0xcc})
-- One UTF-8 character
- pcc = to_intp()
- eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x7f}), pcc, 5))
- eq(0, pcc[0])
+ eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x7f})
-- One UTF-8 character
- pcc = to_intp()
- eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x80}), pcc, 5))
- eq(0, pcc[0])
+ eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x80})
-- One UTF-8 character
- pcc = to_intp()
- eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0xcc}), pcc, 5))
- eq(0, pcc[0])
+ eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xcc})
-- Combining characters U+1AB0 and U+0301
- pcc = to_intp()
- eq(0x100000, mbyte.utfc_ptr2char_len(to_string(
- {0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81}), pcc, 9))
- eq(0x1ab0, pcc[0])
- eq(0x0301, pcc[1])
- eq(0x0000, pcc[2])
+ eq({"\xf4\x80\x80\x80\xe1\xaa\xb0\xcc\x81", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81})
end)
end)