diff options
author | Josh Rahm <joshuarahm@gmail.com> | 2023-11-30 20:35:25 +0000 |
---|---|---|
committer | Josh Rahm <joshuarahm@gmail.com> | 2023-11-30 20:35:25 +0000 |
commit | 1b7b916b7631ddf73c38e3a0070d64e4636cb2f3 (patch) | |
tree | cd08258054db80bb9a11b1061bb091c70b76926a /test/unit/mbyte_spec.lua | |
parent | eaa89c11d0f8aefbb512de769c6c82f61a8baca3 (diff) | |
parent | 4a8bf24ac690004aedf5540fa440e788459e5e34 (diff) | |
download | rneovim-aucmd_textputpost.tar.gz rneovim-aucmd_textputpost.tar.bz2 rneovim-aucmd_textputpost.zip |
Merge remote-tracking branch 'upstream/master' into aucmd_textputpostaucmd_textputpost
Diffstat (limited to 'test/unit/mbyte_spec.lua')
-rw-r--r-- | test/unit/mbyte_spec.lua | 243 |
1 files changed, 64 insertions, 179 deletions
diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua index fdb1bceab0..cd94624570 100644 --- a/test/unit/mbyte_spec.lua +++ b/test/unit/mbyte_spec.lua @@ -4,17 +4,9 @@ local itp = helpers.gen_itp(it) local ffi = helpers.ffi local eq = helpers.eq -local mbyte = helpers.cimport("./src/nvim/mbyte.h") -local charset = helpers.cimport('./src/nvim/charset.h') +local lib = helpers.cimport('./src/nvim/mbyte.h', './src/nvim/charset.h', './src/nvim/grid.h') describe('mbyte', function() - -- Array for composing characters - local intp = ffi.typeof('int[?]') - local function to_intp() - -- how to get MAX_MCO from globals.h? - return intp(7, 1) - end - -- Convert from bytes to string local function to_string(bytes) local s = {} @@ -30,14 +22,14 @@ describe('mbyte', function() itp('utf_ptr2char', function() -- For strings with length 1 the first byte is returned. for c = 0, 255 do - eq(c, mbyte.utf_ptr2char(to_string({c, 0}))) + eq(c, lib.utf_ptr2char(to_string({c, 0}))) end -- Some ill formed byte sequences that should not be recognized as UTF-8 -- First byte: 0xc0 or 0xc1 -- Second byte: 0x80 .. 0xbf - --eq(0x00c0, mbyte.utf_ptr2char(to_string({0xc0, 0x80}))) - --eq(0x00c1, mbyte.utf_ptr2char(to_string({0xc1, 0xbf}))) + --eq(0x00c0, lib.utf_ptr2char(to_string({0xc0, 0x80}))) + --eq(0x00c1, lib.utf_ptr2char(to_string({0xc1, 0xbf}))) -- -- Sequences with more than four bytes end) @@ -47,240 +39,133 @@ describe('mbyte', function() local char_p = ffi.typeof('char[?]') for c = n * 0x1000, n * 0x1000 + 0xFFF do local p = char_p(4, 0) - mbyte.utf_char2bytes(c, p) - eq(c, mbyte.utf_ptr2char(p)) - eq(charset.vim_iswordc(c), charset.vim_iswordp(p)) + lib.utf_char2bytes(c, p) + eq(c, lib.utf_ptr2char(p)) + eq(lib.vim_iswordc(c), lib.vim_iswordp(p)) end end) end - describe('utfc_ptr2char_len', function() + describe('utfc_ptr2schar_len', function() + local function test_seq(seq) + local firstc = ffi.new("int[1]") + local buf = ffi.new("char[32]") + lib.schar_get(buf, lib.utfc_ptr2schar_len(to_string(seq), #seq, firstc)) + return {ffi.string(buf), firstc[0]} + end + + local function byte(val) + return {string.char(val), val} + end itp('1-byte sequences', function() - local pcc = to_intp() - for c = 0, 255 do - eq(c, mbyte.utfc_ptr2char_len(to_string({c}), pcc, 1)) - eq(0, pcc[0]) + eq({'', 0}, test_seq{0}) + for c = 1, 127 do + eq(byte(c), test_seq{c}) + end + for c = 128, 255 do + eq({'', c}, test_seq{c}) end end) itp('2-byte sequences', function() - local pcc = to_intp() -- No combining characters - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f}), pcc, 2)) - eq(0, pcc[0]) + eq(byte(0x7f), test_seq{0x7f, 0x7f}) -- No combining characters - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80}), pcc, 2)) - eq(0, pcc[0]) + eq(byte(0x7f), test_seq{0x7f, 0x80}) -- No UTF-8 sequence - pcc = to_intp() - eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f}), pcc, 2)) - eq(0, pcc[0]) + eq({'', 0xc2}, test_seq{0xc2, 0x7f}) -- One UTF-8 character - pcc = to_intp() - eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80}), pcc, 2)) - eq(0, pcc[0]) + eq({'\xc2\x80', 0x80}, test_seq{0xc2, 0x80}) -- No UTF-8 sequence - pcc = to_intp() - eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0xc0}), pcc, 2)) - eq(0, pcc[0]) + eq({'', 0xc2}, test_seq{0xc2, 0xc0}) end) itp('3-byte sequences', function() - local pcc = to_intp() - -- No second UTF-8 character - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80, 0x80}), pcc, 3)) - eq(0, pcc[0]) + eq(byte(0x7f), test_seq{0x7f, 0x80, 0x80}) -- No combining character - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0x80}), pcc, 3)) - eq(0, pcc[0]) + eq(byte(0x7f), test_seq{0x7f, 0xc2, 0x80}) -- Combining character is U+0300 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80}), pcc, 3)) - eq(0x0300, pcc[0]) - eq(0x0000, pcc[1]) + eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80}) -- No UTF-8 sequence - pcc = to_intp() - eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc}), pcc, 3)) - eq(0, pcc[0]) + eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc}) -- Incomplete combining character - pcc = to_intp() - eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc}), pcc, 3)) - eq(0, pcc[0]) + eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc}) - -- One UTF-8 character - pcc = to_intp() - eq(0x20d0, mbyte.utfc_ptr2char_len(to_string({0xe2, 0x83, 0x90}), pcc, 3)) - eq(0, pcc[0]) + -- One UTF-8 character (composing only) + eq({" \xe2\x83\x90", 0x20d0}, test_seq{0xe2, 0x83, 0x90}) end) itp('4-byte sequences', function() - local pcc = to_intp() -- No following combining character - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80}), pcc, 4)) - eq(0, pcc[0]) + eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80}) -- No second UTF-8 character - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80}), pcc, 4)) - eq(0, pcc[0]) + eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80}) -- Combining character U+0300 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 4)) - eq(0x0300, pcc[0]) - eq(0x0000, pcc[1]) + eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc}) -- No UTF-8 sequence - pcc = to_intp() - eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80}), pcc, 4)) - eq(0, pcc[0]) + eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80}) -- No following UTF-8 character - pcc = to_intp() - eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc}), pcc, 4)) - eq(0, pcc[0]) + eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc}) -- Combining character U+0301 - pcc = to_intp() - eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81}), pcc, 4)) - eq(0x0301, pcc[0]) - eq(0x0000, pcc[1]) + eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81}) -- One UTF-8 character - pcc = to_intp() - eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80}), pcc, 4)) - eq(0, pcc[0]) + eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80}) end) itp('5+-byte sequences', function() - local pcc = to_intp() - -- No following combining character - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5)) - eq(0, pcc[0]) + eq(byte(0x7f), test_seq{0x7f, 0x7f, 0xcc, 0x80, 0x80}) -- No second UTF-8 character - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80, 0x80}), pcc, 5)) - eq(0, pcc[0]) + eq(byte(0x7f), test_seq{0x7f, 0xc2, 0xcc, 0x80, 0x80}) -- Combining character U+0300 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 5)) - eq(0x0300, pcc[0]) - eq(0x0000, pcc[1]) + eq({"\x7f\xcc\x80", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x00}) -- Combining characters U+0300 and U+0301 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81}), pcc, 5)) - eq(0x0300, pcc[0]) - eq(0x0301, pcc[1]) - eq(0x0000, pcc[2]) + eq({"\x7f\xcc\x80\xcc\x81", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81}) -- Combining characters U+0300, U+0301, U+0302 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82}), pcc, 7)) - eq(0x0300, pcc[0]) - eq(0x0301, pcc[1]) - eq(0x0302, pcc[2]) - eq(0x0000, pcc[3]) + eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82}) -- Combining characters U+0300, U+0301, U+0302, U+0303 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83}), pcc, 9)) - eq(0x0300, pcc[0]) - eq(0x0301, pcc[1]) - eq(0x0302, pcc[2]) - eq(0x0303, pcc[3]) - eq(0x0000, pcc[4]) + eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83}) -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string( - {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84}), pcc, 11)) - eq(0x0300, pcc[0]) - eq(0x0301, pcc[1]) - eq(0x0302, pcc[2]) - eq(0x0303, pcc[3]) - eq(0x0304, pcc[4]) - eq(0x0000, pcc[5]) - -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, - -- U+0305 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string( - {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13)) - eq(0x0300, pcc[0]) - eq(0x0301, pcc[1]) - eq(0x0302, pcc[2]) - eq(0x0303, pcc[3]) - eq(0x0304, pcc[4]) - eq(0x0305, pcc[5]) - eq(1, pcc[6]) - - -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, - -- U+0305, U+0306, but only save six (= MAX_MCO). - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string( - {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86}), pcc, 15)) - eq(0x0300, pcc[0]) - eq(0x0301, pcc[1]) - eq(0x0302, pcc[2]) - eq(0x0303, pcc[3]) - eq(0x0304, pcc[4]) - eq(0x0305, pcc[5]) - eq(0x0001, pcc[6]) + eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84}) + -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305 + eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85}) - -- Only three following combining characters U+0300, U+0301, U+0302 - pcc = to_intp() - eq(0x007f, mbyte.utfc_ptr2char_len(to_string( - {0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13)) - eq(0x0300, pcc[0]) - eq(0x0301, pcc[1]) - eq(0x0302, pcc[2]) - eq(0x0000, pcc[3]) + -- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304, U+0305, U+0306 + eq({"\x7f\xcc\x80\xcc\x81\xcc\x82\xcc\x83\xcc\x84\xcc\x85\xcc\x86", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86}) + -- Only three following combining characters U+0300, U+0301, U+0302 + eq({"\x7f\xcc\x80\xcc\x81\xcc\x82", 0x7f}, test_seq{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85}) -- No UTF-8 sequence - pcc = to_intp() - eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5)) - eq(0, pcc[0]) + eq({'', 0xc2}, test_seq{0xc2, 0x7f, 0xcc, 0x80, 0x80}) -- No following UTF-8 character - pcc = to_intp() - eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc, 0x80}), pcc, 5)) - eq(0, pcc[0]) + eq({"\xc2\x80", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0xcc, 0x80}) -- Combining character U+0301 - pcc = to_intp() - eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0x7f}), pcc, 5)) - eq(0x0301, pcc[0]) - eq(0x0000, pcc[1]) + eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0x7f}) -- Combining character U+0301 - pcc = to_intp() - eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0xcc}), pcc, 5)) - eq(0x0301, pcc[0]) - eq(0x0000, pcc[1]) + eq({"\xc2\x80\xcc\x81", 0x80}, test_seq{0xc2, 0x80, 0xcc, 0x81, 0xcc}) -- One UTF-8 character - pcc = to_intp() - eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x7f}), pcc, 5)) - eq(0, pcc[0]) + eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x7f}) -- One UTF-8 character - pcc = to_intp() - eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x80}), pcc, 5)) - eq(0, pcc[0]) + eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0x80}) -- One UTF-8 character - pcc = to_intp() - eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0xcc}), pcc, 5)) - eq(0, pcc[0]) + eq({"\xf4\x80\x80\x80", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xcc}) -- Combining characters U+1AB0 and U+0301 - pcc = to_intp() - eq(0x100000, mbyte.utfc_ptr2char_len(to_string( - {0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81}), pcc, 9)) - eq(0x1ab0, pcc[0]) - eq(0x0301, pcc[1]) - eq(0x0000, pcc[2]) + eq({"\xf4\x80\x80\x80\xe1\xaa\xb0\xcc\x81", 0x100000}, test_seq{0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81}) end) end) |