fix(lsp): str_byteindex_enc bounds checking #30747

Problem: Previously the index was only checked against the UTF8 length. This could cause unexpected behaviours for strings containing multibyte chars Solution: Check indicies correctly against their max value before returning the fallback length
author: Tristan Knight <admin@snappeh.com> 2024-10-16 17:12:19 +0100
committer: GitHub <noreply@github.com> 2024-10-16 09:12:19 -0700
commit: 80e37aa533573ef1ad96bcccc006b8d45dc963b9 (patch)
tree: 774f2f40bfe8364dca6169fd52d4fe1552347e73 /runtime/lua/vim/lsp
parent: f72dc2b4c805f309f23aff62b3e7ba7b71a554d2 (diff)
download: rneovim-80e37aa533573ef1ad96bcccc006b8d45dc963b9.tar.gz
rneovim-80e37aa533573ef1ad96bcccc006b8d45dc963b9.tar.bz2
rneovim-80e37aa533573ef1ad96bcccc006b8d45dc963b9.zip
1 files changed, 10 insertions, 11 deletions
diff --git a/runtime/lua/vim/lsp/util.lua b/runtime/lua/vim/lsp/util.lua
index fc822f1403..2b9e734c18 100644
--- a/runtime/lua/vim/lsp/util.lua
+++ b/runtime/lua/vim/lsp/util.lua
@@ -171,25 +171,24 @@ end
 ---@param encoding string utf-8|utf-16|utf-32| defaults to utf-16
 ---@return integer byte (utf-8) index of `encoding` index `index` in `line`
 function M._str_byteindex_enc(line, index, encoding)
-  local len = #line
-  if index > len then
-    -- LSP spec: if character > line length, default to the line length.
-    -- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
-    return len
-  end
+  -- LSP spec: if character > line length, default to the line length.
+  -- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
+  local len8 = #line
   if not encoding then
     encoding = 'utf-16'
   end
   if encoding == 'utf-8' then
-    if index then
+    if index and index <= len8 then
       return index
     else
-      return len
+      return len8
     end
-  elseif encoding == 'utf-16' then
-    return vim.str_byteindex(line, index, true)
+  end
+  local len32, len16 = vim.str_utfindex(line)
+  if encoding == 'utf-16' then
+    return index <= len16 and vim.str_byteindex(line, index, true) or len8
   elseif encoding == 'utf-32' then
-    return vim.str_byteindex(line, index)
+    return index <= len32 and vim.str_byteindex(line, index) or len8
   else
     error('Invalid encoding: ' .. vim.inspect(encoding))
   end
author	Tristan Knight <admin@snappeh.com>	2024-10-16 17:12:19 +0100
committer	GitHub <noreply@github.com>	2024-10-16 09:12:19 -0700
commit	80e37aa533573ef1ad96bcccc006b8d45dc963b9 (patch)
tree	774f2f40bfe8364dca6169fd52d4fe1552347e73 /runtime/lua/vim/lsp
parent	f72dc2b4c805f309f23aff62b3e7ba7b71a554d2 (diff)
download	rneovim-80e37aa533573ef1ad96bcccc006b8d45dc963b9.tar.gz rneovim-80e37aa533573ef1ad96bcccc006b8d45dc963b9.tar.bz2 rneovim-80e37aa533573ef1ad96bcccc006b8d45dc963b9.zip