aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/nvim/lua/executor.c65
-rw-r--r--src/nvim/mbyte.c25
2 files changed, 90 insertions, 0 deletions
diff --git a/src/nvim/lua/executor.c b/src/nvim/lua/executor.c
index 4051354d65..29682e8add 100644
--- a/src/nvim/lua/executor.c
+++ b/src/nvim/lua/executor.c
@@ -112,6 +112,65 @@ static int nlua_stricmp(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
return 1;
}
+/// convert byte index to UTF-32 and UTF-16 indicies
+///
+/// Expects a string and an optional index. If no index is supplied, the length
+/// of the string is returned.
+///
+/// Returns two values: the UTF-32 and UTF-16 indicies.
+static int nlua_str_utfindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
+{
+ size_t s1_len;
+ const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
+ intptr_t idx;
+ if (lua_gettop(lstate) >= 2) {
+ idx = luaL_checkinteger(lstate, 2);
+ if (idx < 0 || idx > (intptr_t)s1_len) {
+ return luaL_error(lstate, "index out of range");
+ }
+ } else {
+ idx = (intptr_t)s1_len;
+ }
+
+ size_t codepoints = 0, codeunits = 0;
+ mb_utflen((const char_u *)s1, (size_t)idx, &codepoints, &codeunits);
+
+ lua_pushinteger(lstate, (long)codepoints);
+ lua_pushinteger(lstate, (long)codeunits);
+
+ return 2;
+}
+
+/// convert UTF-32 or UTF-16 indicies to byte index.
+///
+/// Expects up to three args: string, index and use_utf16.
+/// If use_utf16 is not supplied it defaults to false (use UTF-32)
+///
+/// Returns the byte index.
+static int nlua_str_byteindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
+{
+ size_t s1_len;
+ const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
+ intptr_t idx = luaL_checkinteger(lstate, 2);
+ if (idx < 0) {
+ return luaL_error(lstate, "index out of range");
+ }
+ bool use_utf16 = false;
+ if (lua_gettop(lstate) >= 3) {
+ use_utf16 = lua_toboolean(lstate, 3);
+ }
+
+ ssize_t byteidx = mb_utf_index_to_bytes((const char_u *)s1, s1_len,
+ (size_t)idx, use_utf16);
+ if (byteidx == -1) {
+ return luaL_error(lstate, "index out of range");
+ }
+
+ lua_pushinteger(lstate, (long)byteidx);
+
+ return 1;
+}
+
static void nlua_luv_error_event(void **argv)
{
char *error = (char *)argv[0];
@@ -220,6 +279,12 @@ static int nlua_state_init(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
// stricmp
lua_pushcfunction(lstate, &nlua_stricmp);
lua_setfield(lstate, -2, "stricmp");
+ // str_utfindex
+ lua_pushcfunction(lstate, &nlua_str_utfindex);
+ lua_setfield(lstate, -2, "str_utfindex");
+ // str_byteindex
+ lua_pushcfunction(lstate, &nlua_str_byteindex);
+ lua_setfield(lstate, -2, "str_byteindex");
// schedule
lua_pushcfunction(lstate, &nlua_schedule);
lua_setfield(lstate, -2, "schedule");
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index bf8ce46113..c9ac335f7b 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1470,6 +1470,31 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints,
*codeunits += count + extra;
}
+ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len,
+ size_t index, bool use_utf16_units)
+ FUNC_ATTR_NONNULL_ALL
+{
+ size_t count = 0;
+ size_t clen, i;
+ if (index == 0) {
+ return 0;
+ }
+ for (i = 0; i < len && s[i] != NUL; i += clen) {
+ clen = utf_ptr2len_len(s+i, len-i);
+ // NB: gets the byte value of invalid sequence bytes.
+ // we only care whether the char fits in the BMP or not
+ int c = (clen > 1) ? utf_ptr2char(s+i) : s[i];
+ count++;
+ if (use_utf16_units && c > 0xFFFF) {
+ count++;
+ }
+ if (count >= index) {
+ return i+clen;
+ }
+ }
+ return -1;
+}
+
/*
* Version of strnicmp() that handles multi-byte characters.