diff options
author | zeertzjq <zeertzjq@outlook.com> | 2023-06-09 17:43:46 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-09 17:43:46 +0800 |
commit | 106922898ad1510954737d38e7f8db78559ae6bd (patch) | |
tree | 05c95cb03be4dbfa9b9bc21bc20fe13c85885ccb | |
parent | e5e0bda41b640d324350c5147b956e37e9f8b32c (diff) | |
download | rneovim-106922898ad1510954737d38e7f8db78559ae6bd.tar.gz rneovim-106922898ad1510954737d38e7f8db78559ae6bd.tar.bz2 rneovim-106922898ad1510954737d38e7f8db78559ae6bd.zip |
vim-patch:9.0.1617: charidx() result is not consistent with byteidx() (#23963)
Problem: charidx() and utf16idx() result is not consistent with byteidx().
Solution: When the index is equal to the length of the text return the
lenght of the text instead of -1. (Yegappan Lakshmanan,
closes vim/vim#12503)
https://github.com/vim/vim/commit/577922b917e48285a7a312daf7b5bbc6e272939c
Co-authored-by: Yegappan Lakshmanan <yegappan@yahoo.com>
-rw-r--r-- | runtime/doc/builtin.txt | 20 | ||||
-rw-r--r-- | src/nvim/strings.c | 10 | ||||
-rw-r--r-- | test/old/testdir/test_functions.vim | 116 |
3 files changed, 99 insertions, 47 deletions
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 27d52b7ac6..bdd9f2fd3a 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -1167,11 +1167,13 @@ charidx({string}, {idx} [, {countcc} [, {utf16}]]) When {utf16} is present and TRUE, {idx} is used as the UTF-16 index in the String {expr} instead of as the byte index. - Returns -1 if the arguments are invalid or if {idx} is greater - than the index of the last byte in {string}. An error is - given if the first argument is not a string, the second - argument is not a number or when the third argument is present - and is not zero or one. + Returns -1 if the arguments are invalid or if there are less + than {idx} bytes. If there are exactly {idx} bytes the length + of the string in characters is returned. + + An error is given and -1 is returned if the first argument is + not a string, the second argument is not a number or when the + third argument is present and is not zero or one. See |byteidx()| and |byteidxcomp()| for getting the byte index from the character index and |utf16idx()| for getting the @@ -9138,8 +9140,8 @@ uniq({list} [, {func} [, {dict}]]) *uniq()* *E882* < *utf16idx()* utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) - Same as |charidx()| but returns the UTF-16 index of the byte - at {idx} in {string} (after converting it to UTF-16). + Same as |charidx()| but returns the UTF-16 code unit index of + the byte at {idx} in {string} (after converting it to UTF-16). When {charidx} is present and TRUE, {idx} is used as the character index in the String {string} instead of as the byte @@ -9147,6 +9149,10 @@ utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) An {idx} in the middle of a UTF-8 sequence is rounded upwards to the end of that sequence. + Returns -1 if the arguments are invalid or if there are less + than {idx} bytes in {string}. If there are exactly {idx} bytes + the length of the string in UTF-16 code units is returned. + See |byteidx()| and |byteidxcomp()| for getting the byte index from the UTF-16 index and |charidx()| for getting the character index from the UTF-16 index. diff --git a/src/nvim/strings.c b/src/nvim/strings.c index 4e521b14f7..a0d62f5df5 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -1603,6 +1603,11 @@ void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) int len; for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) { if (*p == NUL) { + // If the index is exactly the number of bytes or utf-16 code units + // in the string then return the length of the string in characters. + if (utf16idx ? (idx == 0) : (p == (str + idx))) { + rettv->vval.v_number = len; + } return; } if (utf16idx) { @@ -2047,6 +2052,11 @@ void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) int len; for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) { if (*p == NUL) { + // If the index is exactly the number of bytes or characters in the + // string then return the length of the string in utf-16 code units. + if (charidx ? (idx == 0) : (p == (str + idx))) { + rettv->vval.v_number = len; + } return; } const int clen = ptr2len(p); diff --git a/test/old/testdir/test_functions.vim b/test/old/testdir/test_functions.vim index 177fef9e99..39d994a2df 100644 --- a/test/old/testdir/test_functions.vim +++ b/test/old/testdir/test_functions.vim @@ -1267,7 +1267,8 @@ func Test_charidx() call assert_equal(1, charidx(a, 3)) call assert_equal(2, charidx(a, 4)) call assert_equal(3, charidx(a, 7)) - call assert_equal(-1, charidx(a, 8)) + call assert_equal(4, charidx(a, 8)) + call assert_equal(-1, charidx(a, 9)) call assert_equal(-1, charidx(a, -1)) " count composing characters @@ -1275,14 +1276,18 @@ func Test_charidx() call assert_equal(2, a->charidx(2, 1)) call assert_equal(3, a->charidx(4, 1)) call assert_equal(5, a->charidx(7, 1)) - call assert_equal(-1, a->charidx(8, 1)) + call assert_equal(6, a->charidx(8, 1)) + call assert_equal(-1, a->charidx(9, 1)) " empty string - call assert_equal(-1, charidx('', 0)) - call assert_equal(-1, charidx('', 0, 1)) + call assert_equal(0, charidx('', 0)) + call assert_equal(-1, charidx('', 1)) + call assert_equal(0, charidx('', 0, 1)) + call assert_equal(-1, charidx('', 1, 1)) " error cases - call assert_equal(-1, charidx(v:_null_string, 0)) + call assert_equal(0, charidx(v:_null_string, 0)) + call assert_equal(-1, charidx(v:_null_string, 1)) call assert_fails('let x = charidx([], 1)', 'E1174:') call assert_fails('let x = charidx("abc", [])', 'E1210:') call assert_fails('let x = charidx("abc", 1, [])', 'E1212:') @@ -1294,10 +1299,10 @@ endfunc func Test_charidx_from_utf16_index() " string with single byte characters let str = "abc" - for i in range(3) + for i in range(4) call assert_equal(i, charidx(str, i, v:false, v:true)) endfor - call assert_equal(-1, charidx(str, 3, v:false, v:true)) + call assert_equal(-1, charidx(str, 4, v:false, v:true)) " string with two byte characters let str = "a漏漏b" @@ -1305,7 +1310,8 @@ func Test_charidx_from_utf16_index() call assert_equal(1, charidx(str, 1, v:false, v:true)) call assert_equal(2, charidx(str, 2, v:false, v:true)) call assert_equal(3, charidx(str, 3, v:false, v:true)) - call assert_equal(-1, charidx(str, 4, v:false, v:true)) + call assert_equal(4, charidx(str, 4, v:false, v:true)) + call assert_equal(-1, charidx(str, 5, v:false, v:true)) " string with four byte characters let str = "a馃槉馃槉b" @@ -1315,38 +1321,48 @@ func Test_charidx_from_utf16_index() call assert_equal(2, charidx(str, 3, v:false, v:true)) call assert_equal(2, charidx(str, 4, v:false, v:true)) call assert_equal(3, charidx(str, 5, v:false, v:true)) - call assert_equal(-1, charidx(str, 6, v:false, v:true)) + call assert_equal(4, charidx(str, 6, v:false, v:true)) + call assert_equal(-1, charidx(str, 7, v:false, v:true)) " string with composing characters let str = '-a虂-b虂' for i in str->strcharlen()->range() call assert_equal(i, charidx(str, i, v:false, v:true)) endfor - call assert_equal(-1, charidx(str, 4, v:false, v:true)) + call assert_equal(4, charidx(str, 4, v:false, v:true)) + call assert_equal(-1, charidx(str, 5, v:false, v:true)) for i in str->strchars()->range() call assert_equal(i, charidx(str, i, v:true, v:true)) endfor - call assert_equal(-1, charidx(str, 6, v:true, v:true)) + call assert_equal(6, charidx(str, 6, v:true, v:true)) + call assert_equal(-1, charidx(str, 7, v:true, v:true)) " string with multiple composing characters let str = '-a台虂-a台虂' for i in str->strcharlen()->range() call assert_equal(i, charidx(str, i, v:false, v:true)) endfor - call assert_equal(-1, charidx(str, 4, v:false, v:true)) + call assert_equal(4, charidx(str, 4, v:false, v:true)) + call assert_equal(-1, charidx(str, 5, v:false, v:true)) for i in str->strchars()->range() call assert_equal(i, charidx(str, i, v:true, v:true)) endfor - call assert_equal(-1, charidx(str, 8, v:true, v:true)) + call assert_equal(8, charidx(str, 8, v:true, v:true)) + call assert_equal(-1, charidx(str, 9, v:true, v:true)) " empty string - call assert_equal(-1, charidx('', 0, v:false, v:true)) - call assert_equal(-1, charidx('', 0, v:true, v:true)) + call assert_equal(0, charidx('', 0, v:false, v:true)) + call assert_equal(-1, charidx('', 1, v:false, v:true)) + call assert_equal(0, charidx('', 0, v:true, v:true)) + call assert_equal(-1, charidx('', 1, v:true, v:true)) " error cases - call assert_equal(-1, charidx('', 0, v:false, v:true)) - call assert_equal(-1, charidx('', 0, v:true, v:true)) - call assert_equal(-1, charidx(v:_null_string, 0, v:false, v:true)) + call assert_equal(0, charidx('', 0, v:false, v:true)) + call assert_equal(-1, charidx('', 1, v:false, v:true)) + call assert_equal(0, charidx('', 0, v:true, v:true)) + call assert_equal(-1, charidx('', 1, v:true, v:true)) + call assert_equal(0, charidx(v:_null_string, 0, v:false, v:true)) + call assert_equal(-1, charidx(v:_null_string, 1, v:false, v:true)) call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:') call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:') endfunc @@ -1355,10 +1371,10 @@ endfunc func Test_utf16idx_from_byteidx() " UTF-16 index of a string with single byte characters let str = "abc" - for i in range(3) + for i in range(4) call assert_equal(i, utf16idx(str, i)) endfor - call assert_equal(-1, utf16idx(str, 3)) + call assert_equal(-1, utf16idx(str, 4)) " UTF-16 index of a string with two byte characters let str = 'a漏漏b' @@ -1368,7 +1384,8 @@ func Test_utf16idx_from_byteidx() call assert_equal(2, str->utf16idx(3)) call assert_equal(2, str->utf16idx(4)) call assert_equal(3, str->utf16idx(5)) - call assert_equal(-1, str->utf16idx(6)) + call assert_equal(4, str->utf16idx(6)) + call assert_equal(-1, str->utf16idx(7)) " UTF-16 index of a string with four byte characters let str = 'a馃槉馃槉b' @@ -1382,7 +1399,8 @@ func Test_utf16idx_from_byteidx() call assert_equal(4, utf16idx(str, 7)) call assert_equal(4, utf16idx(str, 8)) call assert_equal(5, utf16idx(str, 9)) - call assert_equal(-1, utf16idx(str, 10)) + call assert_equal(6, utf16idx(str, 10)) + call assert_equal(-1, utf16idx(str, 11)) " UTF-16 index of a string with composing characters let str = '-a虂-b虂' @@ -1394,7 +1412,8 @@ func Test_utf16idx_from_byteidx() call assert_equal(3, utf16idx(str, 5)) call assert_equal(3, utf16idx(str, 6)) call assert_equal(3, utf16idx(str, 7)) - call assert_equal(-1, utf16idx(str, 8)) + call assert_equal(4, utf16idx(str, 8)) + call assert_equal(-1, utf16idx(str, 9)) call assert_equal(0, utf16idx(str, 0, v:true)) call assert_equal(1, utf16idx(str, 1, v:true)) call assert_equal(2, utf16idx(str, 2, v:true)) @@ -1403,7 +1422,8 @@ func Test_utf16idx_from_byteidx() call assert_equal(4, utf16idx(str, 5, v:true)) call assert_equal(5, utf16idx(str, 6, v:true)) call assert_equal(5, utf16idx(str, 7, v:true)) - call assert_equal(-1, utf16idx(str, 8, v:true)) + call assert_equal(6, utf16idx(str, 8, v:true)) + call assert_equal(-1, utf16idx(str, 9, v:true)) " string with multiple composing characters let str = '-a台虂-a台虂' @@ -1419,7 +1439,8 @@ func Test_utf16idx_from_byteidx() call assert_equal(3, utf16idx(str, 9)) call assert_equal(3, utf16idx(str, 10)) call assert_equal(3, utf16idx(str, 11)) - call assert_equal(-1, utf16idx(str, 12)) + call assert_equal(4, utf16idx(str, 12)) + call assert_equal(-1, utf16idx(str, 13)) call assert_equal(0, utf16idx(str, 0, v:true)) call assert_equal(1, utf16idx(str, 1, v:true)) call assert_equal(2, utf16idx(str, 2, v:true)) @@ -1432,16 +1453,21 @@ func Test_utf16idx_from_byteidx() call assert_equal(6, utf16idx(str, 9, v:true)) call assert_equal(7, utf16idx(str, 10, v:true)) call assert_equal(7, utf16idx(str, 11, v:true)) - call assert_equal(-1, utf16idx(str, 12, v:true)) + call assert_equal(8, utf16idx(str, 12, v:true)) + call assert_equal(-1, utf16idx(str, 13, v:true)) " empty string - call assert_equal(-1, utf16idx('', 0)) - call assert_equal(-1, utf16idx('', 0, v:true)) + call assert_equal(0, utf16idx('', 0)) + call assert_equal(-1, utf16idx('', 1)) + call assert_equal(0, utf16idx('', 0, v:true)) + call assert_equal(-1, utf16idx('', 1, v:true)) " error cases - call assert_equal(-1, utf16idx("", 0)) + call assert_equal(0, utf16idx("", 0)) + call assert_equal(-1, utf16idx("", 1)) call assert_equal(-1, utf16idx("abc", -1)) - call assert_equal(-1, utf16idx(v:_null_string, 0)) + call assert_equal(0, utf16idx(v:_null_string, 0)) + call assert_equal(-1, utf16idx(v:_null_string, 1)) call assert_fails('let l = utf16idx([], 0)', 'E1174:') call assert_fails('let l = utf16idx("ab", [])', 'E1210:') call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:') @@ -1453,14 +1479,16 @@ func Test_utf16idx_from_charidx() for i in str->strcharlen()->range() call assert_equal(i, utf16idx(str, i, v:false, v:true)) endfor - call assert_equal(-1, utf16idx(str, 3, v:false, v:true)) + call assert_equal(3, utf16idx(str, 3, v:false, v:true)) + call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) " UTF-16 index of a string with two byte characters let str = "a漏漏b" for i in str->strcharlen()->range() call assert_equal(i, utf16idx(str, i, v:false, v:true)) endfor - call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) + call assert_equal(4, utf16idx(str, 4, v:false, v:true)) + call assert_equal(-1, utf16idx(str, 5, v:false, v:true)) " UTF-16 index of a string with four byte characters let str = "a馃槉馃槉b" @@ -1468,36 +1496,44 @@ func Test_utf16idx_from_charidx() call assert_equal(2, utf16idx(str, 1, v:false, v:true)) call assert_equal(4, utf16idx(str, 2, v:false, v:true)) call assert_equal(5, utf16idx(str, 3, v:false, v:true)) - call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) + call assert_equal(6, utf16idx(str, 4, v:false, v:true)) + call assert_equal(-1, utf16idx(str, 5, v:false, v:true)) " UTF-16 index of a string with composing characters let str = '-a虂-b虂' for i in str->strcharlen()->range() call assert_equal(i, utf16idx(str, i, v:false, v:true)) endfor - call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) + call assert_equal(4, utf16idx(str, 4, v:false, v:true)) + call assert_equal(-1, utf16idx(str, 5, v:false, v:true)) for i in str->strchars()->range() call assert_equal(i, utf16idx(str, i, v:true, v:true)) endfor - call assert_equal(-1, utf16idx(str, 6, v:true, v:true)) + call assert_equal(6, utf16idx(str, 6, v:true, v:true)) + call assert_equal(-1, utf16idx(str, 7, v:true, v:true)) " string with multiple composing characters let str = '-a台虂-a台虂' for i in str->strcharlen()->range() call assert_equal(i, utf16idx(str, i, v:false, v:true)) endfor - call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) + call assert_equal(4, utf16idx(str, 4, v:false, v:true)) + call assert_equal(-1, utf16idx(str, 5, v:false, v:true)) for i in str->strchars()->range() call assert_equal(i, utf16idx(str, i, v:true, v:true)) endfor - call assert_equal(-1, utf16idx(str, 8, v:true, v:true)) + call assert_equal(8, utf16idx(str, 8, v:true, v:true)) + call assert_equal(-1, utf16idx(str, 9, v:true, v:true)) " empty string - call assert_equal(-1, utf16idx('', 0, v:false, v:true)) - call assert_equal(-1, utf16idx('', 0, v:true, v:true)) + call assert_equal(0, utf16idx('', 0, v:false, v:true)) + call assert_equal(-1, utf16idx('', 1, v:false, v:true)) + call assert_equal(0, utf16idx('', 0, v:true, v:true)) + call assert_equal(-1, utf16idx('', 1, v:true, v:true)) " error cases - call assert_equal(-1, utf16idx(v:_null_string, 0, v:true, v:true)) + call assert_equal(0, utf16idx(v:_null_string, 0, v:true, v:true)) + call assert_equal(-1, utf16idx(v:_null_string, 1, v:true, v:true)) call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:') endfunc |