diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/nvim/lua/executor.c | 65 | ||||
-rw-r--r-- | src/nvim/mbyte.c | 25 |
2 files changed, 90 insertions, 0 deletions
diff --git a/src/nvim/lua/executor.c b/src/nvim/lua/executor.c index 4051354d65..29682e8add 100644 --- a/src/nvim/lua/executor.c +++ b/src/nvim/lua/executor.c @@ -112,6 +112,65 @@ static int nlua_stricmp(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL return 1; } +/// convert byte index to UTF-32 and UTF-16 indicies +/// +/// Expects a string and an optional index. If no index is supplied, the length +/// of the string is returned. +/// +/// Returns two values: the UTF-32 and UTF-16 indicies. +static int nlua_str_utfindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL +{ + size_t s1_len; + const char *s1 = luaL_checklstring(lstate, 1, &s1_len); + intptr_t idx; + if (lua_gettop(lstate) >= 2) { + idx = luaL_checkinteger(lstate, 2); + if (idx < 0 || idx > (intptr_t)s1_len) { + return luaL_error(lstate, "index out of range"); + } + } else { + idx = (intptr_t)s1_len; + } + + size_t codepoints = 0, codeunits = 0; + mb_utflen((const char_u *)s1, (size_t)idx, &codepoints, &codeunits); + + lua_pushinteger(lstate, (long)codepoints); + lua_pushinteger(lstate, (long)codeunits); + + return 2; +} + +/// convert UTF-32 or UTF-16 indicies to byte index. +/// +/// Expects up to three args: string, index and use_utf16. +/// If use_utf16 is not supplied it defaults to false (use UTF-32) +/// +/// Returns the byte index. +static int nlua_str_byteindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL +{ + size_t s1_len; + const char *s1 = luaL_checklstring(lstate, 1, &s1_len); + intptr_t idx = luaL_checkinteger(lstate, 2); + if (idx < 0) { + return luaL_error(lstate, "index out of range"); + } + bool use_utf16 = false; + if (lua_gettop(lstate) >= 3) { + use_utf16 = lua_toboolean(lstate, 3); + } + + ssize_t byteidx = mb_utf_index_to_bytes((const char_u *)s1, s1_len, + (size_t)idx, use_utf16); + if (byteidx == -1) { + return luaL_error(lstate, "index out of range"); + } + + lua_pushinteger(lstate, (long)byteidx); + + return 1; +} + static void nlua_luv_error_event(void **argv) { char *error = (char *)argv[0]; @@ -220,6 +279,12 @@ static int nlua_state_init(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL // stricmp lua_pushcfunction(lstate, &nlua_stricmp); lua_setfield(lstate, -2, "stricmp"); + // str_utfindex + lua_pushcfunction(lstate, &nlua_str_utfindex); + lua_setfield(lstate, -2, "str_utfindex"); + // str_byteindex + lua_pushcfunction(lstate, &nlua_str_byteindex); + lua_setfield(lstate, -2, "str_byteindex"); // schedule lua_pushcfunction(lstate, &nlua_schedule); lua_setfield(lstate, -2, "schedule"); diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index bf8ce46113..c9ac335f7b 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1470,6 +1470,31 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints, *codeunits += count + extra; } +ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len, + size_t index, bool use_utf16_units) + FUNC_ATTR_NONNULL_ALL +{ + size_t count = 0; + size_t clen, i; + if (index == 0) { + return 0; + } + for (i = 0; i < len && s[i] != NUL; i += clen) { + clen = utf_ptr2len_len(s+i, len-i); + // NB: gets the byte value of invalid sequence bytes. + // we only care whether the char fits in the BMP or not + int c = (clen > 1) ? utf_ptr2char(s+i) : s[i]; + count++; + if (use_utf16_units && c > 0xFFFF) { + count++; + } + if (count >= index) { + return i+clen; + } + } + return -1; +} + /* * Version of strnicmp() that handles multi-byte characters. |