diff options
author | Michael Lingelbach <m.j.lbach@gmail.com> | 2021-10-30 10:30:40 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-30 10:30:40 -0700 |
commit | 2230b578d1d36728d156f82fb0e1a44c1f810b8c (patch) | |
tree | a4f94d41d9b89fc54b5b58ffdcbd1826757ed860 /src/nvim/mbyte.c | |
parent | 97ae0ab4d8e4cdc5be2dab43e328f0a9d248b30a (diff) | |
download | rneovim-2230b578d1d36728d156f82fb0e1a44c1f810b8c.tar.gz rneovim-2230b578d1d36728d156f82fb0e1a44c1f810b8c.tar.bz2 rneovim-2230b578d1d36728d156f82fb0e1a44c1f810b8c.zip |
feat: add vim.str_utf_{start,end} (#16129)
vim.str_utf_{start,end} return the offset from the current position to
the start and end of the current utf-character (nearest codepoint)
respectively.
Diffstat (limited to 'src/nvim/mbyte.c')
-rw-r--r-- | src/nvim/mbyte.c | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index bd680330ca..7ce4e2b4f5 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1883,6 +1883,40 @@ int mb_tail_off(char_u *base, char_u *p) return i; } + +/// Return the offset from "p" to the first byte of the character it points +/// into. Can start anywhere in a stream of bytes. +/// +/// @param[in] base Pointer to start of string +/// @param[in] p Pointer to byte for which to return the offset to the previous codepoint +// +/// @return 0 if invalid sequence, else offset to previous codepoint +int mb_head_off(char_u *base, char_u *p) +{ + int i; + int j; + + if (*p == NUL) { + return 0; + } + + // Find the first character that is not 10xx.xxxx + for (i = 0; p - i > base; i--) { + if ((p[i] & 0xc0) != 0x80) { + break; + } + } + + // Find the last character that is 10xx.xxxx + for (j = 0; (p[j + 1] & 0xc0) == 0x80; j++) {} + + // Check for illegal sequence. + if (utf8len_tab[p[i]] == 1) { + return 0; + } + return i; +} + /* * Find the next illegal byte sequence. */ |