aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Lingelbach <m.j.lbach@gmail.com>2021-10-30 10:30:40 -0700
committerGitHub <noreply@github.com>2021-10-30 10:30:40 -0700
commit2230b578d1d36728d156f82fb0e1a44c1f810b8c (patch)
treea4f94d41d9b89fc54b5b58ffdcbd1826757ed860 /src
parent97ae0ab4d8e4cdc5be2dab43e328f0a9d248b30a (diff)
downloadrneovim-2230b578d1d36728d156f82fb0e1a44c1f810b8c.tar.gz
rneovim-2230b578d1d36728d156f82fb0e1a44c1f810b8c.tar.bz2
rneovim-2230b578d1d36728d156f82fb0e1a44c1f810b8c.zip
feat: add vim.str_utf_{start,end} (#16129)
vim.str_utf_{start,end} return the offset from the current position to the start and end of the current utf-character (nearest codepoint) respectively.
Diffstat (limited to 'src')
-rw-r--r--src/nvim/lua/stdlib.c45
-rw-r--r--src/nvim/mbyte.c34
2 files changed, 79 insertions, 0 deletions
diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c
index 2d969357b4..788819ab03 100644
--- a/src/nvim/lua/stdlib.c
+++ b/src/nvim/lua/stdlib.c
@@ -216,6 +216,45 @@ static int nlua_str_utf_pos(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
return 1;
}
+/// Return the offset from the 1-indexed byte position to the first byte of the
+/// current character.
+///
+/// Expects a string and an int.
+///
+/// Returns the byte offset to the first byte of the current character
+/// pointed into by the offset.
+static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
+{
+ size_t s1_len;
+ const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
+ long offset = luaL_checkinteger(lstate, 2);
+ if (offset < 0 || offset > (intptr_t)s1_len) {
+ return luaL_error(lstate, "index out of range");
+ }
+ int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+ lua_pushinteger(lstate, tail_offset);
+ return 1;
+}
+
+/// Return the offset from the 1-indexed byte position to the last
+/// byte of the current character.
+///
+/// Expects a string and an int.
+///
+/// Returns the byte offset to the last byte of the current character
+/// pointed into by the offset.
+static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
+{
+ size_t s1_len;
+ const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
+ long offset = luaL_checkinteger(lstate, 2);
+ if (offset < 0 || offset > (intptr_t)s1_len) {
+ return luaL_error(lstate, "index out of range");
+ }
+ int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+ lua_pushinteger(lstate, tail_offset);
+ return 1;
+}
/// convert UTF-32 or UTF-16 indices to byte index.
///
@@ -439,6 +478,12 @@ void nlua_state_add_stdlib(lua_State *const lstate)
// str_utf_pos
lua_pushcfunction(lstate, &nlua_str_utf_pos);
lua_setfield(lstate, -2, "str_utf_pos");
+ // str_utf_start
+ lua_pushcfunction(lstate, &nlua_str_utf_start);
+ lua_setfield(lstate, -2, "str_utf_start");
+ // str_utf_end
+ lua_pushcfunction(lstate, &nlua_str_utf_end);
+ lua_setfield(lstate, -2, "str_utf_end");
// regex
lua_pushcfunction(lstate, &nlua_regex);
lua_setfield(lstate, -2, "regex");
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index bd680330ca..7ce4e2b4f5 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1883,6 +1883,40 @@ int mb_tail_off(char_u *base, char_u *p)
return i;
}
+
+/// Return the offset from "p" to the first byte of the character it points
+/// into. Can start anywhere in a stream of bytes.
+///
+/// @param[in] base Pointer to start of string
+/// @param[in] p Pointer to byte for which to return the offset to the previous codepoint
+//
+/// @return 0 if invalid sequence, else offset to previous codepoint
+int mb_head_off(char_u *base, char_u *p)
+{
+ int i;
+ int j;
+
+ if (*p == NUL) {
+ return 0;
+ }
+
+ // Find the first character that is not 10xx.xxxx
+ for (i = 0; p - i > base; i--) {
+ if ((p[i] & 0xc0) != 0x80) {
+ break;
+ }
+ }
+
+ // Find the last character that is 10xx.xxxx
+ for (j = 0; (p[j + 1] & 0xc0) == 0x80; j++) {}
+
+ // Check for illegal sequence.
+ if (utf8len_tab[p[i]] == 1) {
+ return 0;
+ }
+ return i;
+}
+
/*
* Find the next illegal byte sequence.
*/