refactor(lsp): drop str_byteindex/str_utfindex wrappers #30915

* deprecate old signatures * move to new str_byteindex/str_utfindex signature * use single-underscore name (double-underscore is reserved for Lua itself)
author: Tristan Knight <admin@snappeh.com> 2024-10-26 15:38:25 +0100
committer: GitHub <noreply@github.com> 2024-10-26 07:38:25 -0700
commit: 25b53b593ef6f229fbec5b3dc205a7539579d13a (patch)
tree: 8c13b6b78e22437b8fd22ac8b639ecc65417fff3
parent: b922b7d6d7889cce863540df7b0da7d512f8a2a1 (diff)
download: rneovim-25b53b593ef6f229fbec5b3dc205a7539579d13a.tar.gz
rneovim-25b53b593ef6f229fbec5b3dc205a7539579d13a.tar.bz2
rneovim-25b53b593ef6f229fbec5b3dc205a7539579d13a.zip
12 files changed, 54 insertions, 176 deletions
diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt
index cde98e7593..6211f9b4e4 100644
--- a/runtime/doc/news.txt
+++ b/runtime/doc/news.txt
@@ -208,6 +208,8 @@ LUA
 • |vim.fs.rm()| can delete files and directories.
 • |vim.validate()| now has a new signature which uses less tables,
   is more peformant and easier to read.
+• |vim.str_byteindex()| and |vim.str_utfindex()| gained overload signatures
+  supporting two new parameters, `encoding` and `strict_indexing`.
 
 OPTIONS
 
diff --git a/runtime/lua/man.lua b/runtime/lua/man.lua
index 6f60bf1cef..114c94f9e5 100644
--- a/runtime/lua/man.lua
+++ b/runtime/lua/man.lua
@@ -305,7 +305,7 @@ local function matchstr(text, pat_or_re)
     return
   end
 
-  return text:sub(vim.str_utfindex(text, s) + 1, vim.str_utfindex(text, e))
+  return text:sub(vim.str_utfindex(text, 'utf-32', s) + 1, vim.str_utfindex(text, 'utf-32', e))
 end
 
 -- attempt to extract the name and sect out of 'name(sect)'
diff --git a/runtime/lua/vim/_editor.lua b/runtime/lua/vim/_editor.lua
index 496bbf747c..c6aa303124 100644
--- a/runtime/lua/vim/_editor.lua
+++ b/runtime/lua/vim/_editor.lua
@@ -545,7 +545,7 @@ function vim.region(bufnr, pos1, pos2, regtype, inclusive)
   -- TODO: handle double-width characters
   if regtype:byte() == 22 then
     local bufline = vim.api.nvim_buf_get_lines(bufnr, pos1[1], pos1[1] + 1, true)[1]
-    pos1[2] = vim.str_utfindex(bufline, pos1[2])
+    pos1[2] = vim.str_utfindex(bufline, 'utf-32', pos1[2])
   end
 
   local region = {}
@@ -557,14 +557,14 @@ function vim.region(bufnr, pos1, pos2, regtype, inclusive)
       c2 = c1 + tonumber(regtype:sub(2))
       -- and adjust for non-ASCII characters
       local bufline = vim.api.nvim_buf_get_lines(bufnr, l, l + 1, true)[1]
-      local utflen = vim.str_utfindex(bufline, #bufline)
+      local utflen = vim.str_utfindex(bufline, 'utf-32', #bufline)
       if c1 <= utflen then
-        c1 = assert(tonumber(vim.str_byteindex(bufline, c1)))
+        c1 = assert(tonumber(vim.str_byteindex(bufline, 'utf-32', c1)))
       else
         c1 = #bufline + 1
       end
       if c2 <= utflen then
-        c2 = assert(tonumber(vim.str_byteindex(bufline, c2)))
+        c2 = assert(tonumber(vim.str_byteindex(bufline, 'utf-32', c2)))
       else
         c2 = #bufline + 1
       end
@@ -740,9 +740,14 @@ function vim.str_byteindex(s, encoding, index, strict_indexing)
     --   • {str}        (`string`)
     --   • {index}      (`integer`)
     --   • {use_utf16}  (`boolean?`)
+    vim.deprecate(
+      'vim.str_byteindex',
+      'vim.str_byteindex(s, encoding, index, strict_indexing)',
+      '1.0'
+    )
     local old_index = encoding
     local use_utf16 = index or false
-    return vim.__str_byteindex(s, old_index, use_utf16) or error('index out of range')
+    return vim._str_byteindex(s, old_index, use_utf16) or error('index out of range')
   end
 
   vim.validate('s', s, 'string')
@@ -769,7 +774,7 @@ function vim.str_byteindex(s, encoding, index, strict_indexing)
     end
     return index
   end
-  return vim.__str_byteindex(s, index, encoding == 'utf-16')
+  return vim._str_byteindex(s, index, encoding == 'utf-16')
     or strict_indexing and error('index out of range')
     or len
 end
@@ -793,8 +798,13 @@ function vim.str_utfindex(s, encoding, index, strict_indexing)
     -- Parameters: ~
     --   • {str}    (`string`)
     --   • {index}  (`integer?`)
+    vim.deprecate(
+      'vim.str_utfindex',
+      'vim.str_utfindex(s, encoding, index, strict_indexing)',
+      '1.0'
+    )
     local old_index = encoding
-    local col32, col16 = vim.__str_utfindex(s, old_index) --[[@as integer,integer]]
+    local col32, col16 = vim._str_utfindex(s, old_index) --[[@as integer,integer]]
     if not col32 or not col16 then
       error('index out of range')
     end
@@ -828,7 +838,7 @@ function vim.str_utfindex(s, encoding, index, strict_indexing)
     local len = #s
     return index <= len and index or (strict_indexing and error('index out of range') or len)
   end
-  local col32, col16 = vim.__str_utfindex(s, index) --[[@as integer?,integer?]]
+  local col32, col16 = vim._str_utfindex(s, index) --[[@as integer?,integer?]]
   local col = encoding == 'utf-16' and col16 or col32
   if col then
     return col
@@ -836,7 +846,7 @@ function vim.str_utfindex(s, encoding, index, strict_indexing)
   if strict_indexing then
     error('index out of range')
   end
-  local max32, max16 = vim.__str_utfindex(s)--[[@as integer integer]]
+  local max32, max16 = vim._str_utfindex(s)--[[@as integer integer]]
   return encoding == 'utf-16' and max16 or max32
 end
 
diff --git a/runtime/lua/vim/lsp.lua b/runtime/lua/vim/lsp.lua
index 4f13ad5721..125238a8e9 100644
--- a/runtime/lua/vim/lsp.lua
+++ b/runtime/lua/vim/lsp.lua
@@ -1049,7 +1049,7 @@ function lsp.formatexpr(opts)
     if client.supports_method(ms.textDocument_rangeFormatting) then
       local params = util.make_formatting_params()
       local end_line = vim.fn.getline(end_lnum) --[[@as string]]
-      local end_col = util._str_utfindex_enc(end_line, nil, client.offset_encoding)
+      local end_col = vim.str_utfindex(end_line, client.offset_encoding)
       --- @cast params +lsp.DocumentRangeFormattingParams
       params.range = {
         start = {
diff --git a/runtime/lua/vim/lsp/completion.lua b/runtime/lua/vim/lsp/completion.lua
index e36d329dc5..10086fa49e 100644
--- a/runtime/lua/vim/lsp/completion.lua
+++ b/runtime/lua/vim/lsp/completion.lua
@@ -315,7 +315,7 @@ local function adjust_start_col(lnum, line, items, encoding)
     end
   end
   if min_start_char then
-    return lsp.util._str_byteindex_enc(line, min_start_char, encoding)
+    return vim.str_byteindex(line, encoding, min_start_char, false)
   else
     return nil
   end
diff --git a/runtime/lua/vim/lsp/diagnostic.lua b/runtime/lua/vim/lsp/diagnostic.lua
index bf72222536..c59e2db901 100644
--- a/runtime/lua/vim/lsp/diagnostic.lua
+++ b/runtime/lua/vim/lsp/diagnostic.lua
@@ -33,25 +33,6 @@ local function severity_vim_to_lsp(severity)
   return severity
 end
 
----@param lines string[]?
----@param lnum integer
----@param col integer
----@param offset_encoding string
----@return integer
-local function line_byte_from_position(lines, lnum, col, offset_encoding)
-  if not lines or offset_encoding == 'utf-8' then
-    return col
-  end
-
-  local line = lines[lnum + 1]
-  local ok, result = pcall(vim.str_byteindex, line, col, offset_encoding == 'utf-16')
-  if ok then
-    return result --- @type integer
-  end
-
-  return col
-end
-
 ---@param bufnr integer
 ---@return string[]?
 local function get_buf_lines(bufnr)
@@ -118,12 +99,13 @@ local function diagnostic_lsp_to_vim(diagnostics, bufnr, client_id)
       )
       message = diagnostic.message.value
     end
+    local line = buf_lines and buf_lines[start.line + 1] or ''
     --- @type vim.Diagnostic
     return {
       lnum = start.line,
-      col = line_byte_from_position(buf_lines, start.line, start.character, offset_encoding),
+      col = vim.str_byteindex(line, offset_encoding, start.character, false),
       end_lnum = _end.line,
-      end_col = line_byte_from_position(buf_lines, _end.line, _end.character, offset_encoding),
+      end_col = vim.str_byteindex(line, offset_encoding, _end.character, false),
       severity = severity_lsp_to_vim(diagnostic.severity),
       message = message,
       source = diagnostic.source,
diff --git a/runtime/lua/vim/lsp/inlay_hint.lua b/runtime/lua/vim/lsp/inlay_hint.lua
index 61d119e653..e5892928cf 100644
--- a/runtime/lua/vim/lsp/inlay_hint.lua
+++ b/runtime/lua/vim/lsp/inlay_hint.lua
@@ -70,20 +70,12 @@ function M.on_inlayhint(err, result, ctx, _)
   end
 
   local lines = api.nvim_buf_get_lines(bufnr, 0, -1, false)
-  ---@param position lsp.Position
-  ---@return integer
-  local function pos_to_byte(position)
-    local col = position.character
-    if col > 0 then
-      local line = lines[position.line + 1] or ''
-      return util._str_byteindex_enc(line, col, client.offset_encoding)
-    end
-    return col
-  end
 
   for _, hint in ipairs(result) do
     local lnum = hint.position.line
-    hint.position.character = pos_to_byte(hint.position)
+    local line = lines and lines[lnum + 1] or ''
+    hint.position.character =
+      vim.str_byteindex(line, client.offset_encoding, hint.position.character, false)
     table.insert(new_lnum_hints[lnum], hint)
   end
 
diff --git a/runtime/lua/vim/lsp/semantic_tokens.lua b/runtime/lua/vim/lsp/semantic_tokens.lua
index 0f6e45c330..d680522592 100644
--- a/runtime/lua/vim/lsp/semantic_tokens.lua
+++ b/runtime/lua/vim/lsp/semantic_tokens.lua
@@ -137,16 +137,10 @@ local function tokens_to_ranges(data, bufnr, client, request)
     local token_type = token_types[data[i + 3] + 1]
     local modifiers = modifiers_from_number(data[i + 4], token_modifiers)
 
-    local function _get_byte_pos(col)
-      if col > 0 then
-        local buf_line = lines[line + 1] or ''
-        return util._str_byteindex_enc(buf_line, col, client.offset_encoding)
-      end
-      return col
-    end
-
-    local start_col = _get_byte_pos(start_char)
-    local end_col = _get_byte_pos(start_char + data[i + 2])
+    local end_char = start_char + data[i + 2]
+    local buf_line = lines and lines[line + 1] or ''
+    local start_col = vim.str_byteindex(buf_line, client.offset_encoding, start_char, false)
+    local end_col = vim.str_byteindex(buf_line, client.offset_encoding, end_char, false)
 
     if token_type then
       ranges[#ranges + 1] = {
diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
index bdfe8d51b8..3df45ebff0 100644
--- a/runtime/lua/vim/lsp/sync.lua
+++ b/runtime/lua/vim/lsp/sync.lua
@@ -48,45 +48,6 @@ local str_utfindex = vim.str_utfindex
 local str_utf_start = vim.str_utf_start
 local str_utf_end = vim.str_utf_end
 
--- Given a line, byte idx, and offset_encoding convert to the
--- utf-8, utf-16, or utf-32 index.
----@param line string the line to index into
----@param byte integer the byte idx
----@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
----@return integer utf_idx for the given encoding
-local function byte_to_utf(line, byte, offset_encoding)
-  -- convert to 0 based indexing for str_utfindex
-  byte = byte - 1
-
-  local utf_idx, _ --- @type integer, integer
-  -- Convert the byte range to utf-{8,16,32} and convert 1-based (lua) indexing to 0-based
-  if offset_encoding == 'utf-16' then
-    _, utf_idx = str_utfindex(line, byte)
-  elseif offset_encoding == 'utf-32' then
-    utf_idx, _ = str_utfindex(line, byte)
-  else
-    utf_idx = byte
-  end
-
-  -- convert to 1 based indexing
-  return utf_idx + 1
-end
-
----@param line string
----@param offset_encoding string
----@return integer
-local function compute_line_length(line, offset_encoding)
-  local length, _ --- @type integer, integer
-  if offset_encoding == 'utf-16' then
-    _, length = str_utfindex(line)
-  elseif offset_encoding == 'utf-32' then
-    length, _ = str_utfindex(line)
-  else
-    length = #line
-  end
-  return length
-end
-
 -- Given a line, byte idx, alignment, and offset_encoding convert to the aligned
 -- utf-8 index and either the utf-16, or utf-32 index.
 ---@param line string the line to index into
@@ -101,7 +62,7 @@ local function align_end_position(line, byte, offset_encoding)
     char = byte
     -- Called in the case of extending an empty line "" -> "a"
   elseif byte == #line + 1 then
-    char = compute_line_length(line, offset_encoding) + 1
+    char = str_utfindex(line, offset_encoding) + 1
   else
     -- Modifying line, find the nearest utf codepoint
     local offset = str_utf_start(line, byte)
@@ -111,9 +72,10 @@ local function align_end_position(line, byte, offset_encoding)
       byte = byte + str_utf_end(line, byte) + 1
     end
     if byte <= #line then
-      char = byte_to_utf(line, byte, offset_encoding)
+      --- Convert to 0 based for input, and from 0 based for output
+      char = str_utfindex(line, offset_encoding, byte - 1) + 1
     else
-      char = compute_line_length(line, offset_encoding) + 1
+      char = str_utfindex(line, offset_encoding) + 1
     end
     -- Extending line, find the nearest utf codepoint for the last valid character
   end
@@ -153,7 +115,7 @@ local function compute_start_range(
     if line then
       line_idx = firstline - 1
       byte_idx = #line + 1
-      char_idx = compute_line_length(line, offset_encoding) + 1
+      char_idx = str_utfindex(line, offset_encoding) + 1
     else
       line_idx = firstline
       byte_idx = 1
@@ -190,10 +152,11 @@ local function compute_start_range(
     char_idx = 1
   elseif start_byte_idx == #prev_line + 1 then
     byte_idx = start_byte_idx
-    char_idx = compute_line_length(prev_line, offset_encoding) + 1
+    char_idx = str_utfindex(prev_line, offset_encoding) + 1
   else
     byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
-    char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding)
+    --- Convert to 0 based for input, and from 0 based for output
+    char_idx = vim.str_utfindex(prev_line, offset_encoding, byte_idx - 1) + 1
   end
 
   -- Return the start difference (shared for new and prev lines)
@@ -230,7 +193,7 @@ local function compute_end_range(
     return {
       line_idx = lastline - 1,
       byte_idx = #prev_line + 1,
-      char_idx = compute_line_length(prev_line, offset_encoding) + 1,
+      char_idx = str_utfindex(prev_line, offset_encoding) + 1,
     }, { line_idx = 1, byte_idx = 1, char_idx = 1 }
   end
   -- If firstline == new_lastline, the first change occurred on a line that was deleted.
@@ -376,7 +339,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi
   local start_line = lines[start_range.line_idx]
   local range_length --- @type integer
   if start_line and #start_line > 0 then
-    range_length = compute_line_length(start_line, offset_encoding)
+    range_length = str_utfindex(start_line, offset_encoding)
       - start_range.char_idx
       + 1
       + line_ending_length
@@ -389,7 +352,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi
   for idx = start_range.line_idx + 1, end_range.line_idx - 1 do
     -- Length full line plus newline character
     if #lines[idx] > 0 then
-      range_length = range_length + compute_line_length(lines[idx], offset_encoding) + #line_ending
+      range_length = range_length + str_utfindex(lines[idx], offset_encoding) + #line_ending
     else
       range_length = range_length + line_ending_length
     end
diff --git a/runtime/lua/vim/lsp/util.lua b/runtime/lua/vim/lsp/util.lua
index 2e9c71cf38..9646f4d571 100644
--- a/runtime/lua/vim/lsp/util.lua
+++ b/runtime/lua/vim/lsp/util.lua
@@ -116,71 +116,6 @@ local function create_window_without_focus()
   return new
 end
 
---- Convert byte index to `encoding` index.
---- Convenience wrapper around vim.str_utfindex
----@param line string line to be indexed
----@param index integer? byte index (utf-8), or `nil` for length
----@param encoding 'utf-8'|'utf-16'|'utf-32'? defaults to utf-16
----@return integer `encoding` index of `index` in `line`
-function M._str_utfindex_enc(line, index, encoding)
-  local len32, len16 = vim.str_utfindex(line)
-  if not encoding then
-    encoding = 'utf-16'
-  end
-  if encoding == 'utf-8' then
-    if index then
-      return index
-    else
-      return #line
-    end
-  elseif encoding == 'utf-16' then
-    if not index or index > len16 then
-      return len16
-    end
-    local _, col16 = vim.str_utfindex(line, index)
-    return col16
-  elseif encoding == 'utf-32' then
-    if not index or index > len32 then
-      return len32
-    end
-    local col32, _ = vim.str_utfindex(line, index)
-    return col32
-  else
-    error('Invalid encoding: ' .. vim.inspect(encoding))
-  end
-end
-
---- Convert UTF index to `encoding` index.
---- Convenience wrapper around vim.str_byteindex
----Alternative to vim.str_byteindex that takes an encoding.
----@param line string line to be indexed
----@param index integer UTF index
----@param encoding string utf-8|utf-16|utf-32| defaults to utf-16
----@return integer byte (utf-8) index of `encoding` index `index` in `line`
-function M._str_byteindex_enc(line, index, encoding)
-  -- LSP spec: if character > line length, default to the line length.
-  -- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
-  local len8 = #line
-  if not encoding then
-    encoding = 'utf-16'
-  end
-  if encoding == 'utf-8' then
-    if index and index <= len8 then
-      return index
-    else
-      return len8
-    end
-  end
-  local len32, len16 = vim.str_utfindex(line)
-  if encoding == 'utf-16' then
-    return index <= len16 and vim.str_byteindex(line, index, true) or len8
-  elseif encoding == 'utf-32' then
-    return index <= len32 and vim.str_byteindex(line, index) or len8
-  else
-    error('Invalid encoding: ' .. vim.inspect(encoding))
-  end
-end
-
 --- Replaces text in a range with new text.
 ---
 --- CAUTION: Changes in-place!
@@ -352,7 +287,7 @@ local function get_line_byte_from_position(bufnr, position, offset_encoding)
   -- character
   if col > 0 then
     local line = get_line(bufnr, position.line) or ''
-    return M._str_byteindex_enc(line, col, offset_encoding)
+    return vim.str_byteindex(line, offset_encoding, col, false)
   end
   return col
 end
@@ -1787,8 +1722,8 @@ function M.locations_to_items(locations, offset_encoding)
       local end_row = end_pos.line
       local line = lines[row] or ''
       local end_line = lines[end_row] or ''
-      local col = M._str_byteindex_enc(line, pos.character, offset_encoding)
-      local end_col = M._str_byteindex_enc(end_line, end_pos.character, offset_encoding)
+      local col = vim.str_byteindex(line, offset_encoding, pos.character, false)
+      local end_col = vim.str_byteindex(end_line, offset_encoding, end_pos.character, false)
 
       items[#items + 1] = {
         filename = filename,
@@ -1911,7 +1846,7 @@ local function make_position_param(window, offset_encoding)
     return { line = 0, character = 0 }
   end
 
-  col = M._str_utfindex_enc(line, col, offset_encoding)
+  col = vim.str_utfindex(line, offset_encoding, col, false)
 
   return { line = row, character = col }
 end
@@ -2092,7 +2027,7 @@ function M.character_offset(buf, row, col, offset_encoding)
     )
     offset_encoding = vim.lsp.get_clients({ bufnr = buf })[1].offset_encoding
   end
-  return M._str_utfindex_enc(line, col, offset_encoding)
+  return vim.str_utfindex(line, offset_encoding, col, false)
 end
 
 --- Helper function to return nested values in language server settings
diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c
index bf8b085458..e719d99640 100644
--- a/src/nvim/lua/stdlib.c
+++ b/src/nvim/lua/stdlib.c
@@ -699,10 +699,10 @@ void nlua_state_add_stdlib(lua_State *const lstate, bool is_thread)
     lua_setfield(lstate, -2, "stricmp");
     // str_utfindex
     lua_pushcfunction(lstate, &nlua_str_utfindex);
-    lua_setfield(lstate, -2, "__str_utfindex");
+    lua_setfield(lstate, -2, "_str_utfindex");
     // str_byteindex
     lua_pushcfunction(lstate, &nlua_str_byteindex);
-    lua_setfield(lstate, -2, "__str_byteindex");
+    lua_setfield(lstate, -2, "_str_byteindex");
     // str_utf_pos
     lua_pushcfunction(lstate, &nlua_str_utf_pos);
     lua_setfield(lstate, -2, "str_utf_pos");
diff --git a/test/functional/plugin/lsp/diagnostic_spec.lua b/test/functional/plugin/lsp/diagnostic_spec.lua
index 78c684083b..b7e292cad0 100644
--- a/test/functional/plugin/lsp/diagnostic_spec.lua
+++ b/test/functional/plugin/lsp/diagnostic_spec.lua
@@ -219,13 +219,13 @@ describe('vim.lsp.diagnostic', function()
       eq(1, #result)
       eq(
         exec_lua(function()
-          return vim.str_byteindex(line, 7, true)
+          return vim.str_byteindex(line, 'utf-16', 7)
         end),
         result[1].col
       )
       eq(
         exec_lua(function()
-          return vim.str_byteindex(line, 8, true)
+          return vim.str_byteindex(line, 'utf-16', 8)
         end),
         result[1].end_col
       )
author	Tristan Knight <admin@snappeh.com>	2024-10-26 15:38:25 +0100
committer	GitHub <noreply@github.com>	2024-10-26 07:38:25 -0700
commit	25b53b593ef6f229fbec5b3dc205a7539579d13a (patch)
tree	8c13b6b78e22437b8fd22ac8b639ecc65417fff3
parent	b922b7d6d7889cce863540df7b0da7d512f8a2a1 (diff)
download	rneovim-25b53b593ef6f229fbec5b3dc205a7539579d13a.tar.gz rneovim-25b53b593ef6f229fbec5b3dc205a7539579d13a.tar.bz2 rneovim-25b53b593ef6f229fbec5b3dc205a7539579d13a.zip