aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorzeertzjq <zeertzjq@outlook.com>2023-04-26 09:50:37 +0800
committerGitHub <noreply@github.com>2023-04-26 09:50:37 +0800
commit191e8b40625731a652bade7000911554834afe5f (patch)
treeba2267f67dc2fc833de914708efde646d9df466b /src
parent8af97ecefa71c6391a52ab799d354e058cb470be (diff)
downloadrneovim-191e8b40625731a652bade7000911554834afe5f.tar.gz
rneovim-191e8b40625731a652bade7000911554834afe5f.tar.bz2
rneovim-191e8b40625731a652bade7000911554834afe5f.zip
vim-patch:9.0.1485: no functions for converting from/to UTF-16 index (#23318)
Problem: no functions for converting from/to UTF-16 index. Solution: Add UTF-16 flag to existing funtions and add strutf16len() and utf16idx(). (Yegappan Lakshmanan, closes vim/vim#12216) https://github.com/vim/vim/commit/67672ef097dd708244ff042a8364994da2b91e75 Co-authored-by: Yegappan Lakshmanan <yegappan@yahoo.com>
Diffstat (limited to 'src')
-rw-r--r--src/nvim/eval.lua8
-rw-r--r--src/nvim/strings.c150
2 files changed, 139 insertions, 19 deletions
diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua
index 357ecd5575..09705148d0 100644
--- a/src/nvim/eval.lua
+++ b/src/nvim/eval.lua
@@ -65,8 +65,8 @@ return {
bufwinid={args=1, base=1},
bufwinnr={args=1, base=1},
byte2line={args=1, base=1},
- byteidx={args=2, base=1, fast=true},
- byteidxcomp={args=2, base=1, fast=true},
+ byteidx={args={2, 3}, base=1, fast=true},
+ byteidxcomp={args={2, 3}, base=1, fast=true},
call={args={2, 3}, base=1},
ceil={args=1, base=1, float_func="ceil"},
changenr={},
@@ -75,7 +75,7 @@ return {
char2nr={args={1, 2}, base=1, fast=true},
charclass={args=1, base=1},
charcol={args={1, 2}, base=1},
- charidx={args={2, 3}, base=1},
+ charidx={args={2, 4}, base=1},
chdir={args=1, base=1},
cindent={args=1, base=1},
clearmatches={args={0, 1}, base=1},
@@ -397,6 +397,7 @@ return {
strptime={args=2, base=1},
strridx={args={2, 3}, base=1},
strtrans={args=1, base=1, fast=true},
+ strutf16len={args={1, 2}, base=1},
strwidth={args=1, base=1, fast=true},
submatch={args={1, 2}, base=1},
substitute={args=4, base=1},
@@ -435,6 +436,7 @@ return {
undofile={args=1, base=1},
undotree={},
uniq={args={1, 3}, base=1},
+ utf16idx={args={2, 4}, base=1},
values={args=1, base=1},
virtcol={args={1, 2}, base=1},
virtcol2col={args=3, base=1},
diff --git a/src/nvim/strings.c b/src/nvim/strings.c
index d5d7d62c38..e8c04aa5c7 100644
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@@ -1504,22 +1504,44 @@ char *strrep(const char *src, const char *what, const char *rep)
static void byteidx(typval_T *argvars, typval_T *rettv, int comp)
{
+ rettv->vval.v_number = -1;
+
const char *const str = tv_get_string_chk(&argvars[0]);
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
- rettv->vval.v_number = -1;
if (str == NULL || idx < 0) {
return;
}
+ varnumber_T utf16idx = false;
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ utf16idx = tv_get_bool(&argvars[2]);
+ if (utf16idx < 0 || utf16idx > 1) {
+ semsg(_(e_using_number_as_bool_nr), utf16idx);
+ return;
+ }
+ }
+
+ int (*ptr2len)(const char *);
+ if (comp) {
+ ptr2len = utf_ptr2len;
+ } else {
+ ptr2len = utfc_ptr2len;
+ }
+
const char *t = str;
for (; idx > 0; idx--) {
if (*t == NUL) { // EOL reached.
return;
}
- if (comp) {
- t += utf_ptr2len(t);
- } else {
- t += utfc_ptr2len(t);
+ if (utf16idx) {
+ const int clen = ptr2len(t);
+ const int c = (clen > 1) ? utf_ptr2char(t) : *t;
+ if (c > 0xFFFF) {
+ idx--;
+ }
+ }
+ if (idx > 0) {
+ t += ptr2len(t);
}
}
rettv->vval.v_number = (varnumber_T)(t - str);
@@ -1542,24 +1564,27 @@ void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
{
rettv->vval.v_number = -1;
- if ((tv_check_for_string_arg(argvars, 0) == FAIL
- || tv_check_for_number_arg(argvars, 1) == FAIL
- || tv_check_for_opt_bool_arg(argvars, 2) == FAIL)) {
+ if (tv_check_for_string_arg(argvars, 0) == FAIL
+ || tv_check_for_number_arg(argvars, 1) == FAIL
+ || tv_check_for_opt_bool_arg(argvars, 2) == FAIL
+ || (argvars[2].v_type != VAR_UNKNOWN
+ && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
return;
}
- const char *str = tv_get_string_chk(&argvars[0]);
+ const char *const str = tv_get_string_chk(&argvars[0]);
varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
if (str == NULL || idx < 0) {
return;
}
- int countcc = 0;
+
+ varnumber_T countcc = false;
+ varnumber_T utf16idx = false;
if (argvars[2].v_type != VAR_UNKNOWN) {
- countcc = (int)tv_get_number(&argvars[2]);
- }
- if (countcc < 0 || countcc > 1) {
- semsg(_(e_using_number_as_bool_nr), countcc);
- return;
+ countcc = tv_get_bool(&argvars[2]);
+ if (argvars[3].v_type != VAR_UNKNOWN) {
+ utf16idx = tv_get_bool(&argvars[3]);
+ }
}
int (*ptr2len)(const char *);
@@ -1571,10 +1596,18 @@ void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
const char *p;
int len;
- for (p = str, len = 0; p <= str + idx; len++) {
+ for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) {
if (*p == NUL) {
return;
}
+ if (utf16idx) {
+ idx--;
+ const int clen = ptr2len(p);
+ const int c = (clen > 1) ? utf_ptr2char(p) : *p;
+ if (c > 0xFFFF) {
+ idx--;
+ }
+ }
p += ptr2len(p);
}
@@ -1743,6 +1776,36 @@ void f_strchars(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
}
}
+/// "strutf16len()" function
+void f_strutf16len(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->vval.v_number = -1;
+
+ if (tv_check_for_string_arg(argvars, 0) == FAIL
+ || tv_check_for_opt_bool_arg(argvars, 1) == FAIL) {
+ return;
+ }
+
+ varnumber_T countcc = false;
+ if (argvars[1].v_type != VAR_UNKNOWN) {
+ countcc = tv_get_bool(&argvars[1]);
+ }
+
+ const char *s = tv_get_string(&argvars[0]);
+ varnumber_T len = 0;
+ int (*func_mb_ptr2char_adv)(const char **pp);
+
+ func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
+ while (*s != NUL) {
+ const int ch = func_mb_ptr2char_adv(&s);
+ if (ch > 0xFFFF) {
+ len++;
+ }
+ len++;
+ }
+ rettv->vval.v_number = len;
+}
+
/// "strdisplaywidth()" function
void f_strdisplaywidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
{
@@ -1914,6 +1977,61 @@ void f_strtrans(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
rettv->vval.v_string = transstr(tv_get_string(&argvars[0]), true);
}
+/// "utf16idx()" function
+void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ rettv->vval.v_number = -1;
+
+ if (tv_check_for_string_arg(argvars, 0) == FAIL
+ || tv_check_for_opt_number_arg(argvars, 1) == FAIL
+ || tv_check_for_opt_bool_arg(argvars, 2) == FAIL
+ || (argvars[2].v_type != VAR_UNKNOWN
+ && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
+ return;
+ }
+
+ const char *const str = tv_get_string_chk(&argvars[0]);
+ varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
+ if (str == NULL || idx < 0) {
+ return;
+ }
+
+ varnumber_T countcc = false;
+ varnumber_T charidx = false;
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ countcc = tv_get_bool(&argvars[2]);
+ if (argvars[3].v_type != VAR_UNKNOWN) {
+ charidx = tv_get_bool(&argvars[3]);
+ }
+ }
+
+ int (*ptr2len)(const char *);
+ if (countcc) {
+ ptr2len = utf_ptr2len;
+ } else {
+ ptr2len = utfc_ptr2len;
+ }
+
+ const char *p;
+ int len;
+ for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) {
+ if (*p == NUL) {
+ return;
+ }
+ const int clen = ptr2len(p);
+ const int c = (clen > 1) ? utf_ptr2char(p) : *p;
+ if (c > 0xFFFF) {
+ len++;
+ }
+ p += ptr2len(p);
+ if (charidx) {
+ idx--;
+ }
+ }
+
+ rettv->vval.v_number = len > 0 ? len - 1 : 0;
+}
+
/// "tolower(string)" function
void f_tolower(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
{