diff options
-rw-r--r-- | runtime/doc/builtin.txt | 29 | ||||
-rw-r--r-- | runtime/doc/eval.txt | 8 | ||||
-rw-r--r-- | runtime/doc/usr_41.txt | 3 | ||||
-rw-r--r-- | src/nvim/eval.c | 185 | ||||
-rw-r--r-- | src/nvim/eval.lua | 3 | ||||
-rw-r--r-- | src/nvim/eval/typval.c | 75 | ||||
-rw-r--r-- | src/nvim/generators/gen_eval.lua | 1 | ||||
-rw-r--r-- | src/nvim/strings.c | 22 | ||||
-rw-r--r-- | test/old/testdir/test_expr_utf8.vim | 10 | ||||
-rw-r--r-- | test/old/testdir/test_visual.vim | 2 |
10 files changed, 260 insertions, 78 deletions
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index b37ac117f3..c2dc5ddd5b 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -464,6 +464,8 @@ sign_unplacelist({list}) List unplace a list of signs simplify({filename}) String simplify filename as much as possible sin({expr}) Float sine of {expr} sinh({expr}) Float hyperbolic sine of {expr} +slice({expr}, {start} [, {end}]) String, List or Blob + slice of a String, List or Blob sockconnect({mode}, {address} [, {opts}]) Number Connects to socket sort({list} [, {func} [, {dict}]]) @@ -484,7 +486,7 @@ str2list({expr} [, {utf8}]) List convert each character of {expr} to str2nr({expr} [, {base} [, {quoted}]]) Number convert String to Number strcharlen({expr}) Number character length of the String {expr} -strcharpart({str}, {start} [, {len}]) +strcharpart({str}, {start} [, {len} [, {skipcc}]]) String {len} characters of {str} at character {start} strchars({expr} [, {skipcc}]) Number character count of the String {expr} @@ -7773,6 +7775,19 @@ sinh({expr}) *sinh()* Can also be used as a |method|: > Compute()->sinh() +slice({expr}, {start} [, {end}]) *slice()* + Similar to using a |slice| "expr[start : end]", but "end" is + used exclusive. And for a string the indexes are used as + character indexes instead of byte indexes. + Also, composing characters are not counted. + When {end} is omitted the slice continues to the last item. + When {end} is -1 the last item is omitted. + Returns an empty value if {start} or {end} are invalid. + + Can also be used as a |method|: > + GetList()->slice(offset) + + sockconnect({mode}, {address} [, {opts}]) *sockconnect()* Connect a socket to an address. If {mode} is "pipe" then {address} should be the path of a local domain socket (on @@ -8115,12 +8130,16 @@ strcharlen({string}) *strcharlen()* GetText()->strcharlen() -strcharpart({src}, {start} [, {len}]) *strcharpart()* +strcharpart({src}, {start} [, {len} [, {skipcc}]]) *strcharpart()* Like |strpart()| but using character index and length instead - of byte index and length. Composing characters are counted - separately. + of byte index and length. + When {skipcc} is omitted or zero, composing characters are + counted separately. + When {skipcc} set to 1, Composing characters are ignored, + similar to |slice()|. When a character index is used where a character does not - exist it is assumed to be one character. For example: > + exist it is omitted and counted as one character. For + example: > strcharpart('abc', -1, 2) < results in 'a'. diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index 0c18fd5b4e..05fdf2f5bb 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -270,6 +270,9 @@ similar to -1. > :let shortlist = mylist[2:2] " List with one item: [3] :let otherlist = mylist[:] " make a copy of the List +Notice that the last index is inclusive. If you prefer using an exclusive +index use the |slice()| method. + If the first index is beyond the last item of the List or the second item is before the first item, the result is an empty list. There is no error message. @@ -1152,6 +1155,8 @@ text column numbers start with one! Example, to get the byte under the cursor: > :let c = getline(".")[col(".") - 1] +Index zero gives the first byte. Careful: text column numbers start with one! + If the length of the String is less than the index, the result is an empty String. A negative index always results in an empty string (reason: backward compatibility). Use [-1:] to get the last byte. @@ -1176,6 +1181,9 @@ In legacy Vim script the indexes are byte indexes. This doesn't recognize multibyte encodings, see |byteidx()| for computing the indexes. If expr8 is a Number it is first converted to a String. +The item at index expr1b is included, it is inclusive. For an exclusive index +use the |slice()| function. + If expr1a is omitted zero is used. If expr1b is omitted the length of the string minus one is used. diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index 8e1b72eadc..9075d60b1b 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -630,6 +630,8 @@ String manipulation: *string-functions* submatch() get a specific match in ":s" and substitute() strpart() get part of a string using byte index strcharpart() get part of a string using char index + slice() take a slice of a string, using char index in + Vim9 script strgetchar() get character from a string using char index expand() expand special keywords expandcmd() expand a command like done for `:edit` @@ -659,6 +661,7 @@ List manipulation: *list-functions* filter() remove selected items from a List map() change each List item reduce() reduce a List to a value + slice() take a slice of a List sort() sort a List reverse() reverse the order of a List or Blob uniq() remove copies of repeated adjacent items diff --git a/src/nvim/eval.c b/src/nvim/eval.c index 941bb8b2b3..c0756c1ed6 100644 --- a/src/nvim/eval.c +++ b/src/nvim/eval.c @@ -1692,7 +1692,7 @@ void set_var_lval(lval_T *lp, char *endp, typval_T *rettv, int copy, const bool lp->ll_n2 = tv_blob_len(lp->ll_blob) - 1; } - if (tv_blob_set_range(lp->ll_blob, (int)lp->ll_n1, (int)lp->ll_n2, rettv) == FAIL) { + if (tv_blob_set_range(lp->ll_blob, lp->ll_n1, lp->ll_n2, rettv) == FAIL) { return; } } else { @@ -3542,7 +3542,7 @@ static int eval_index(char **arg, typval_T *rettv, evalarg_T *const evalarg, boo if (evaluate) { int res = eval_index_inner(rettv, range, - empty1 ? NULL : &var1, empty2 ? NULL : &var2, + empty1 ? NULL : &var1, empty2 ? NULL : &var2, false, key, keylen, verbose); if (!empty1) { tv_clear(&var1); @@ -3592,17 +3592,31 @@ static int check_can_index(typval_T *rettv, bool evaluate, bool verbose) return OK; } +/// slice() function +void f_slice(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + if (check_can_index(argvars, true, false) == OK) { + tv_copy(argvars, rettv); + eval_index_inner(rettv, true, argvars + 1, + argvars[2].v_type == VAR_UNKNOWN ? NULL : argvars + 2, + true, NULL, 0, false); + } +} + /// Apply index or range to "rettv". -/// "var1" is the first index, NULL for [:expr]. -/// "var2" is the second index, NULL for [expr] and [expr: ] +/// +/// @param var1 the first index, NULL for [:expr]. +/// @param var2 the second index, NULL for [expr] and [expr: ] +/// @param exclusive true for slice(): second index is exclusive, use character +/// index for string. /// Alternatively, "key" is not NULL, then key[keylen] is the dict index. static int eval_index_inner(typval_T *rettv, bool is_range, typval_T *var1, typval_T *var2, - const char *key, ptrdiff_t keylen, bool verbose) + bool exclusive, const char *key, ptrdiff_t keylen, bool verbose) { - int n1 = 0; - int n2 = 0; + varnumber_T n1 = 0; + varnumber_T n2 = 0; if (var1 != NULL && rettv->v_type != VAR_DICT) { - n1 = (int)tv_get_number(var1); + n1 = tv_get_number(var1); } if (is_range) { @@ -3612,10 +3626,10 @@ static int eval_index_inner(typval_T *rettv, bool is_range, typval_T *var1, typv } return FAIL; } - if (var2 == NULL) { - n2 = -1; + if (var2 != NULL) { + n2 = tv_get_number(var2); } else { - n2 = (int)tv_get_number(var2); + n2 = VARNUMBER_MAX; } } @@ -3627,12 +3641,19 @@ static int eval_index_inner(typval_T *rettv, bool is_range, typval_T *var1, typv case VAR_PARTIAL: case VAR_UNKNOWN: break; // Not evaluating, skipping over subscript + case VAR_NUMBER: case VAR_STRING: { const char *const s = tv_get_string(rettv); char *v; int len = (int)strlen(s); - if (is_range) { + if (exclusive) { + if (is_range) { + v = string_slice(s, n1, n2, exclusive); + } else { + v = char_from_string(s, n1); + } + } else if (is_range) { // The resulting variable is a substring. If the indexes // are out of range the result is empty. if (n1 < 0) { @@ -3646,6 +3667,9 @@ static int eval_index_inner(typval_T *rettv, bool is_range, typval_T *var1, typv } else if (n2 >= len) { n2 = len; } + if (exclusive) { + n2--; + } if (n1 >= len || n2 < 0 || n1 > n2) { v = NULL; } else { @@ -3666,65 +3690,24 @@ static int eval_index_inner(typval_T *rettv, bool is_range, typval_T *var1, typv rettv->vval.v_string = v; break; } - case VAR_BLOB: { - int len = tv_blob_len(rettv->vval.v_blob); - if (is_range) { - // The resulting variable is a sub-blob. If the indexes - // are out of range the result is empty. - if (n1 < 0) { - n1 = len + n1; - if (n1 < 0) { - n1 = 0; - } - } - if (n2 < 0) { - n2 = len + n2; - } else if (n2 >= len) { - n2 = len - 1; - } - if (n1 >= len || n2 < 0 || n1 > n2) { - tv_clear(rettv); - rettv->v_type = VAR_BLOB; - rettv->vval.v_blob = NULL; - } else { - blob_T *const blob = tv_blob_alloc(); - ga_grow(&blob->bv_ga, n2 - n1 + 1); - blob->bv_ga.ga_len = n2 - n1 + 1; - for (int i = n1; i <= n2; i++) { - tv_blob_set(blob, i - n1, tv_blob_get(rettv->vval.v_blob, i)); - } - tv_clear(rettv); - tv_blob_set_ret(rettv, blob); - } - } else { - // The resulting variable is a byte value. - // If the index is too big or negative that is an error. - if (n1 < 0) { - n1 = len + n1; - } - if (n1 < len && n1 >= 0) { - const int v = (int)tv_blob_get(rettv->vval.v_blob, n1); - tv_clear(rettv); - rettv->v_type = VAR_NUMBER; - rettv->vval.v_number = v; - } else { - semsg(_(e_blobidx), (int64_t)n1); - } - } + + case VAR_BLOB: + tv_blob_slice_or_index(rettv->vval.v_blob, is_range, n1, n2, exclusive, rettv); break; - } + case VAR_LIST: if (var1 == NULL) { n1 = 0; } if (var2 == NULL) { - n2 = -1; + n2 = VARNUMBER_MAX; } if (tv_list_slice_or_index(rettv->vval.v_list, - is_range, n1, n2, rettv, verbose) == FAIL) { + is_range, n1, n2, exclusive, rettv, verbose) == FAIL) { return FAIL; } break; + case VAR_DICT: { if (key == NULL) { key = tv_get_string_chk(var1); @@ -7264,6 +7247,88 @@ int check_luafunc_name(const char *const str, const bool paren) return (int)(p - str); } +/// Return the character "str[index]" where "index" is the character index. If +/// "index" is out of range NULL is returned. +char *char_from_string(const char *str, varnumber_T index) +{ + size_t nbyte = 0; + varnumber_T nchar = index; + + if (str == NULL || index < 0) { + return NULL; + } + size_t slen = strlen(str); + while (nchar > 0 && nbyte < slen) { + nbyte += (size_t)utf_ptr2len(str + nbyte); + nchar--; + } + if (nbyte >= slen) { + return NULL; + } + return xstrnsave(str + nbyte, (size_t)utf_ptr2len(str + nbyte)); +} + +/// Get the byte index for character index "idx" in string "str" with length +/// "str_len". +/// If going over the end return "str_len". +/// If "idx" is negative count from the end, -1 is the last character. +/// When going over the start return -1. +static ssize_t char_idx2byte(const char *str, size_t str_len, varnumber_T idx) +{ + varnumber_T nchar = idx; + size_t nbyte = 0; + + if (nchar >= 0) { + while (nchar > 0 && nbyte < str_len) { + nbyte += (size_t)utf_ptr2len(str + nbyte); + nchar--; + } + } else { + nbyte = str_len; + while (nchar < 0 && nbyte > 0) { + nbyte--; + nbyte -= (size_t)utf_head_off(str, str + nbyte); + nchar++; + } + if (nchar < 0) { + return -1; + } + } + return (ssize_t)nbyte; +} + +/// Return the slice "str[first:last]" using character indexes. +/// +/// @param exclusive true for slice(). +/// +/// Return NULL when the result is empty. +char *string_slice(const char *str, varnumber_T first, varnumber_T last, bool exclusive) +{ + if (str == NULL) { + return NULL; + } + size_t slen = strlen(str); + ssize_t start_byte = char_idx2byte(str, slen, first); + if (start_byte < 0) { + start_byte = 0; // first index very negative: use zero + } + ssize_t end_byte; + if ((last == -1 && !exclusive) || last == VARNUMBER_MAX) { + end_byte = (ssize_t)slen; + } else { + end_byte = char_idx2byte(str, slen, last); + if (!exclusive && end_byte >= 0 && end_byte < (ssize_t)slen) { + // end index is inclusive + end_byte += utf_ptr2len(str + end_byte); + } + } + + if (start_byte >= (ssize_t)slen || end_byte <= start_byte) { + return NULL; + } + return xstrnsave(str + start_byte, (size_t)(end_byte - start_byte)); +} + /// Handle: /// - expr[expr], expr[expr:expr] subscript /// - ".name" lookup diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua index 09705148d0..d9c7208c02 100644 --- a/src/nvim/eval.lua +++ b/src/nvim/eval.lua @@ -371,6 +371,7 @@ return { simplify={args=1, base=1}, sin={args=1, base=1, float_func="sin"}, sinh={args=1, base=1, float_func="sinh"}, + slice={args={2, 3}, base=1}, sockconnect={args={2,3}}, sort={args={1, 3}, base=1}, soundfold={args=1, base=1}, @@ -385,7 +386,7 @@ return { str2list={args={1, 2}, base=1}, str2nr={args={1, 3}, base=1}, strcharlen={args=1, base=1}, - strcharpart={args={2, 3}, base=1, fast=true}, + strcharpart={args={2, 4}, base=1, fast=true}, strchars={args={1, 2}, base=1}, strdisplaywidth={args={1, 2}, base=1}, strftime={args={1, 2}, base=1}, diff --git a/src/nvim/eval/typval.c b/src/nvim/eval/typval.c index 0b2be3074f..8b51500e18 100644 --- a/src/nvim/eval/typval.c +++ b/src/nvim/eval/typval.c @@ -765,10 +765,10 @@ int tv_list_concat(list_T *const l1, list_T *const l2, typval_T *const tv) return OK; } -static list_T *tv_list_slice(list_T *ol, int n1, int n2) +static list_T *tv_list_slice(list_T *ol, varnumber_T n1, varnumber_T n2) { list_T *l = tv_list_alloc(n2 - n1 + 1); - listitem_T *item = tv_list_find(ol, n1); + listitem_T *item = tv_list_find(ol, (int)n1); for (; n1 <= n2; n1++) { tv_list_append_tv(l, TV_LIST_ITEM_TV(item)); item = TV_LIST_ITEM_NEXT(rettv->vval.v_list, item); @@ -776,12 +776,12 @@ static list_T *tv_list_slice(list_T *ol, int n1, int n2) return l; } -int tv_list_slice_or_index(list_T *list, bool range, int n1_arg, int n2_arg, typval_T *rettv, - bool verbose) +int tv_list_slice_or_index(list_T *list, bool range, varnumber_T n1_arg, varnumber_T n2_arg, + bool exclusive, typval_T *rettv, bool verbose) { int len = tv_list_len(rettv->vval.v_list); - int n1 = n1_arg; - int n2 = n2_arg; + varnumber_T n1 = n1_arg; + varnumber_T n2 = n2_arg; if (n1 < 0) { n1 = len + n1; @@ -801,7 +801,10 @@ int tv_list_slice_or_index(list_T *list, bool range, int n1_arg, int n2_arg, typ if (n2 < 0) { n2 = len + n2; } else if (n2 >= len) { - n2 = len - 1; + n2 = len - (exclusive ? 0 : 1); + } + if (exclusive) { + n2--; } if (n2 < 0 || n2 + 1 < n1) { n2 = -1; @@ -2773,6 +2776,60 @@ bool tv_blob_equal(const blob_T *const b1, const blob_T *const b2) return true; } +int tv_blob_slice_or_index(const blob_T *blob, int is_range, varnumber_T n1, varnumber_T n2, + bool exclusive, typval_T *rettv) +{ + int len = tv_blob_len(rettv->vval.v_blob); + if (is_range) { + // The resulting variable is a sub-blob. If the indexes + // are out of range the result is empty. + if (n1 < 0) { + n1 = len + n1; + if (n1 < 0) { + n1 = 0; + } + } + if (n2 < 0) { + n2 = len + n2; + } else if (n2 >= len) { + n2 = len - (exclusive ? 0 : 1); + } + if (exclusive) { + n2--; + } + if (n1 >= len || n2 < 0 || n1 > n2) { + tv_clear(rettv); + rettv->v_type = VAR_BLOB; + rettv->vval.v_blob = NULL; + } else { + blob_T *const new_blob = tv_blob_alloc(); + ga_grow(&new_blob->bv_ga, (int)(n2 - n1 + 1)); + new_blob->bv_ga.ga_len = (int)(n2 - n1 + 1); + for (int i = (int)n1; i <= (int)n2; i++) { + tv_blob_set(new_blob, i - (int)n1, tv_blob_get(rettv->vval.v_blob, i)); + } + tv_clear(rettv); + tv_blob_set_ret(rettv, new_blob); + } + } else { + // The resulting variable is a byte value. + // If the index is too big or negative that is an error. + if (n1 < 0) { + n1 = len + n1; + } + if (n1 < len && n1 >= 0) { + const int v = (int)tv_blob_get(rettv->vval.v_blob, (int)n1); + tv_clear(rettv); + rettv->v_type = VAR_NUMBER; + rettv->vval.v_number = v; + } else { + semsg(_(e_blobidx), (int64_t)n1); + return FAIL; + } + } + return OK; +} + /// Check if "n1" is a valid index for a blob with length "bloblen". int tv_blob_check_index(int bloblen, varnumber_T n1, bool quiet) { @@ -2800,14 +2857,14 @@ int tv_blob_check_range(int bloblen, varnumber_T n1, varnumber_T n2, bool quiet) /// Set bytes "n1" to "n2" (inclusive) in "dest" to the value of "src". /// Caller must make sure "src" is a blob. /// Returns FAIL if the number of bytes does not match. -int tv_blob_set_range(blob_T *dest, int n1, int n2, typval_T *src) +int tv_blob_set_range(blob_T *dest, varnumber_T n1, varnumber_T n2, typval_T *src) { if (n2 - n1 + 1 != tv_blob_len(src->vval.v_blob)) { emsg(_("E972: Blob value does not have the right number of bytes")); return FAIL; } - for (int il = n1, ir = 0; il <= n2; il++) { + for (int il = (int)n1, ir = 0; il <= (int)n2; il++) { tv_blob_set(dest, il, tv_blob_get(src->vval.v_blob, ir++)); } return OK; diff --git a/src/nvim/generators/gen_eval.lua b/src/nvim/generators/gen_eval.lua index 7d531bc228..e93e9a8d02 100644 --- a/src/nvim/generators/gen_eval.lua +++ b/src/nvim/generators/gen_eval.lua @@ -17,6 +17,7 @@ hashpipe:write([[ #include "nvim/cmdexpand.h" #include "nvim/cmdhist.h" #include "nvim/digraph.h" +#include "nvim/eval.h" #include "nvim/eval/buffer.h" #include "nvim/eval/funcs.h" #include "nvim/eval/typval.h" diff --git a/src/nvim/strings.c b/src/nvim/strings.c index 61e00f85dc..5231ec0841 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -1834,12 +1834,26 @@ void f_strcharpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) const size_t slen = strlen(p); int nbyte = 0; + varnumber_T skipcc = false; bool error = false; varnumber_T nchar = tv_get_number_chk(&argvars[1], &error); if (!error) { + if (argvars[2].v_type != VAR_UNKNOWN + && argvars[3].v_type != VAR_UNKNOWN) { + skipcc = tv_get_bool(&argvars[3]); + if (skipcc < 0 || skipcc > 1) { + semsg(_(e_using_number_as_bool_nr), skipcc); + return; + } + } + if (nchar > 0) { while (nchar > 0 && (size_t)nbyte < slen) { - nbyte += utf_ptr2len(p + nbyte); + if (skipcc) { + nbyte += utfc_ptr2len(p + nbyte); + } else { + nbyte += utf_ptr2len(p + nbyte); + } nchar--; } } else { @@ -1855,7 +1869,11 @@ void f_strcharpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) if (off < 0) { len += 1; } else { - len += utf_ptr2len(p + off); + if (skipcc) { + len += utfc_ptr2len(p + off); + } else { + len += utf_ptr2len(p + off); + } } charlen--; } diff --git a/test/old/testdir/test_expr_utf8.vim b/test/old/testdir/test_expr_utf8.vim index fad725d2e5..c6d2e4ed7e 100644 --- a/test/old/testdir/test_expr_utf8.vim +++ b/test/old/testdir/test_expr_utf8.vim @@ -31,4 +31,14 @@ func Test_strcharpart() call assert_equal('a', strcharpart('àxb', 0, 1)) call assert_equal('̀', strcharpart('àxb', 1, 1)) call assert_equal('x', strcharpart('àxb', 2, 1)) + + + call assert_equal('a', strcharpart('àxb', 0, 1, 0)) + call assert_equal('à', strcharpart('àxb', 0, 1, 1)) + call assert_equal('x', strcharpart('àxb', 1, 1, 1)) + + call assert_fails("let v = strcharpart('abc', 0, 0, [])", 'E745:') + call assert_fails("let v = strcharpart('abc', 0, 0, 2)", 'E1023:') endfunc + +" vim: shiftwidth=2 sts=2 expandtab diff --git a/test/old/testdir/test_visual.vim b/test/old/testdir/test_visual.vim index d10a946200..04282416bb 100644 --- a/test/old/testdir/test_visual.vim +++ b/test/old/testdir/test_visual.vim @@ -935,7 +935,7 @@ func Test_visual_block_mode() endfunc func Test_visual_force_motion_feedkeys() - onoremap <expr> i- execute('let g:mode = mode(1)') + onoremap <expr> i- execute('let g:mode = mode(1)')->slice(0, 0) call feedkeys('dvi-', 'x') call assert_equal('nov', g:mode) call feedkeys('di-', 'x') |