diff options
-rw-r--r-- | runtime/doc/if_lua.txt | 18 | ||||
-rw-r--r-- | src/nvim/lua/executor.c | 65 | ||||
-rw-r--r-- | src/nvim/mbyte.c | 25 | ||||
-rw-r--r-- | test/functional/lua/utility_functions_spec.lua | 55 |
4 files changed, 148 insertions, 15 deletions
diff --git a/runtime/doc/if_lua.txt b/runtime/doc/if_lua.txt index 7ddcb6cc92..a9b8c5fae8 100644 --- a/runtime/doc/if_lua.txt +++ b/runtime/doc/if_lua.txt @@ -459,6 +459,24 @@ vim.stricmp({a}, {b}) *vim.stricmp()* are equal, {a} is greater than {b} or {a} is lesser than {b}, respectively. +vim.str_utfindex({str}[, {index}]) *vim.str_utfindex()* + Convert byte index to UTF-32 and UTF-16 indicies. If {index} is not + supplied, the length of the string is used. All indicies are zero-based. + Returns two values: the UTF-32 and UTF-16 indicies respectively. + + Embedded NUL bytes are treated as terminating the string. Invalid + UTF-8 bytes, and embedded surrogates are counted as one code + point each. An {index} in the middle of a UTF-8 sequence is rounded + upwards to the end of that sequence. + +vim.str_byteindex({str}, {index}[, {use_utf16}]) *vim.str_byteindex()* + Convert UTF-32 or UTF-16 {index} to byte index. If {use_utf16} is not + supplied, it defaults to false (use UTF-32). Returns the byte index. + + Invalid UTF-8 and NUL is treated like by |vim.str_byteindex()|. An {index} + in the middle of a UTF-16 sequence is rounded upwards to the end of that + sequence. + vim.schedule({callback}) *vim.schedule()* Schedules {callback} to be invoked soon by the main event-loop. Useful to avoid |textlock| or other temporary restrictions. diff --git a/src/nvim/lua/executor.c b/src/nvim/lua/executor.c index 4051354d65..29682e8add 100644 --- a/src/nvim/lua/executor.c +++ b/src/nvim/lua/executor.c @@ -112,6 +112,65 @@ static int nlua_stricmp(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL return 1; } +/// convert byte index to UTF-32 and UTF-16 indicies +/// +/// Expects a string and an optional index. If no index is supplied, the length +/// of the string is returned. +/// +/// Returns two values: the UTF-32 and UTF-16 indicies. +static int nlua_str_utfindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL +{ + size_t s1_len; + const char *s1 = luaL_checklstring(lstate, 1, &s1_len); + intptr_t idx; + if (lua_gettop(lstate) >= 2) { + idx = luaL_checkinteger(lstate, 2); + if (idx < 0 || idx > (intptr_t)s1_len) { + return luaL_error(lstate, "index out of range"); + } + } else { + idx = (intptr_t)s1_len; + } + + size_t codepoints = 0, codeunits = 0; + mb_utflen((const char_u *)s1, (size_t)idx, &codepoints, &codeunits); + + lua_pushinteger(lstate, (long)codepoints); + lua_pushinteger(lstate, (long)codeunits); + + return 2; +} + +/// convert UTF-32 or UTF-16 indicies to byte index. +/// +/// Expects up to three args: string, index and use_utf16. +/// If use_utf16 is not supplied it defaults to false (use UTF-32) +/// +/// Returns the byte index. +static int nlua_str_byteindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL +{ + size_t s1_len; + const char *s1 = luaL_checklstring(lstate, 1, &s1_len); + intptr_t idx = luaL_checkinteger(lstate, 2); + if (idx < 0) { + return luaL_error(lstate, "index out of range"); + } + bool use_utf16 = false; + if (lua_gettop(lstate) >= 3) { + use_utf16 = lua_toboolean(lstate, 3); + } + + ssize_t byteidx = mb_utf_index_to_bytes((const char_u *)s1, s1_len, + (size_t)idx, use_utf16); + if (byteidx == -1) { + return luaL_error(lstate, "index out of range"); + } + + lua_pushinteger(lstate, (long)byteidx); + + return 1; +} + static void nlua_luv_error_event(void **argv) { char *error = (char *)argv[0]; @@ -220,6 +279,12 @@ static int nlua_state_init(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL // stricmp lua_pushcfunction(lstate, &nlua_stricmp); lua_setfield(lstate, -2, "stricmp"); + // str_utfindex + lua_pushcfunction(lstate, &nlua_str_utfindex); + lua_setfield(lstate, -2, "str_utfindex"); + // str_byteindex + lua_pushcfunction(lstate, &nlua_str_byteindex); + lua_setfield(lstate, -2, "str_byteindex"); // schedule lua_pushcfunction(lstate, &nlua_schedule); lua_setfield(lstate, -2, "schedule"); diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index bf8ce46113..c9ac335f7b 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1470,6 +1470,31 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints, *codeunits += count + extra; } +ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len, + size_t index, bool use_utf16_units) + FUNC_ATTR_NONNULL_ALL +{ + size_t count = 0; + size_t clen, i; + if (index == 0) { + return 0; + } + for (i = 0; i < len && s[i] != NUL; i += clen) { + clen = utf_ptr2len_len(s+i, len-i); + // NB: gets the byte value of invalid sequence bytes. + // we only care whether the char fits in the BMP or not + int c = (clen > 1) ? utf_ptr2char(s+i) : s[i]; + count++; + if (use_utf16_units && c > 0xFFFF) { + count++; + } + if (count >= index) { + return i+clen; + } + } + return -1; +} + /* * Version of strnicmp() that handles multi-byte characters. diff --git a/test/functional/lua/utility_functions_spec.lua b/test/functional/lua/utility_functions_spec.lua index 780d3a1565..0d93914119 100644 --- a/test/functional/lua/utility_functions_spec.lua +++ b/test/functional/lua/utility_functions_spec.lua @@ -2,12 +2,12 @@ local helpers = require('test.functional.helpers')(after_each) local funcs = helpers.funcs -local meths = helpers.meths local clear = helpers.clear local eq = helpers.eq local eval = helpers.eval local feed = helpers.feed local meth_pcall = helpers.meth_pcall +local exec_lua = helpers.exec_lua before_each(clear) @@ -110,28 +110,53 @@ describe('lua function', function() eq(1, funcs.luaeval('vim.stricmp("\\0C\\0", "\\0B\\0")')) end) + it("vim.str_utfindex/str_byteindex", function() + exec_lua([[_G.test_text = "xy åäö ɧ 汉语 ↥ 🤦x🦄 å بِيَّ"]]) + local indicies32 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,29,33,34,35,37,38,40,42,44,46,48} + local indicies16 = {[0]=0,1,2,3,5,7,9,10,12,13,16,19,20,23,24,28,28,29,33,33,34,35,37,38,40,42,44,46,48} + for i,k in pairs(indicies32) do + eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ...)", i), i) + end + for i,k in pairs(indicies16) do + eq(k, exec_lua("return vim.str_byteindex(_G.test_text, ..., true)", i), i) + end + local i32, i16 = 0, 0 + for k = 0,48 do + if indicies32[i32] < k then + i32 = i32 + 1 + end + if indicies16[i16] < k then + i16 = i16 + 1 + if indicies16[i16+1] == indicies16[i16] then + i16 = i16 + 1 + end + end + eq({i32, i16}, exec_lua("return {vim.str_utfindex(_G.test_text, ...)}", k), k) + end + end) + it("vim.schedule", function() - meths.execute_lua([[ + exec_lua([[ test_table = {} vim.schedule(function() table.insert(test_table, "xx") end) table.insert(test_table, "yy") - ]], {}) - eq({"yy","xx"}, meths.execute_lua("return test_table", {})) + ]]) + eq({"yy","xx"}, exec_lua("return test_table")) -- type checked args eq({false, 'Error executing lua: vim.schedule: expected function'}, - meth_pcall(meths.execute_lua, "vim.schedule('stringly')", {})) + meth_pcall(exec_lua, "vim.schedule('stringly')")) eq({false, 'Error executing lua: vim.schedule: expected function'}, - meth_pcall(meths.execute_lua, "vim.schedule()", {})) + meth_pcall(exec_lua, "vim.schedule()")) - meths.execute_lua([[ + exec_lua([[ vim.schedule(function() error("big failure\nvery async") end) - ]], {}) + ]]) feed("<cr>") eq('Error executing vim.schedule lua callback: [string "<nvim>"]:2: big failure\nvery async', eval("v:errmsg")) @@ -139,7 +164,7 @@ describe('lua function', function() it("vim.split", function() local split = function(str, sep) - return meths.execute_lua('return vim.split(...)', {str, sep}) + return exec_lua('return vim.split(...)', str, sep) end local tests = { @@ -172,7 +197,7 @@ describe('lua function', function() it('vim.trim', function() local trim = function(s) - return meths.execute_lua('return vim.trim(...)', { s }) + return exec_lua('return vim.trim(...)', s) end local trims = { @@ -194,7 +219,7 @@ describe('lua function', function() it('vim.inspect', function() -- just make sure it basically works, it has its own test suite local inspect = function(t, opts) - return meths.execute_lua('return vim.inspect(...)', { t, opts }) + return exec_lua('return vim.inspect(...)', t, opts) end eq('2', inspect(2)) @@ -202,18 +227,18 @@ describe('lua function', function() inspect({ a = { b = 1 } }, { newline = '+', indent = '' })) -- special value vim.inspect.KEY works - eq('{ KEY_a = "x", KEY_b = "y"}', meths.execute_lua([[ + eq('{ KEY_a = "x", KEY_b = "y"}', exec_lua([[ return vim.inspect({a="x", b="y"}, {newline = '', process = function(item, path) if path[#path] == vim.inspect.KEY then return 'KEY_'..item end return item end}) - ]], {})) + ]])) end) it("vim.deepcopy", function() - local is_dc = meths.execute_lua([[ + local is_dc = exec_lua([[ local a = { x = { 1, 2 }, y = 5} local b = vim.deepcopy(a) @@ -222,7 +247,7 @@ describe('lua function', function() return b.x[1] == 1 and b.x[2] == 2 and b.y == 5 and count == 2 and tostring(a) ~= tostring(b) - ]], {}) + ]]) assert(is_dc) end) |