From 36c401db243da08a60bde7f2c42e1ed9f5204c85 Mon Sep 17 00:00:00 2001
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
Date: Fri, 17 Dec 2021 18:05:00 -0800
Subject: fix(lsp): correctly align start and end range to codepoints during
 incremental sync (#16670)

Closes #16624

Fixes two issues with aligning the start position and end position to
codepoints when calculating the start and end range.

When aligning the start position:
* use aligned byte index to calculate character index rather than
  the unadjusted byte

When aligning the end position:
* do not adjust the end byte if it falls on a UTF-8 codepoint
* align byte to the first byte of the next codepoint rather than the
  last byte of the current codepoint
* compute character character end range on the aligned byte index

This commit also adds additional test coverage, including multibyte operations
that previously failed before this commit.
---
 runtime/lua/vim/lsp/sync.lua | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'runtime/lua/vim')

diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
index 5df2a4d144..d01f45ad8f 100644
--- a/runtime/lua/vim/lsp/sync.lua
+++ b/runtime/lua/vim/lsp/sync.lua
@@ -105,15 +105,16 @@ local function align_end_position(line, byte, offset_encoding)
     char = compute_line_length(line, offset_encoding) + 1
   else
     -- Modifying line, find the nearest utf codepoint
-    local offset = str_utf_end(line, byte)
+    local offset = str_utf_start(line, byte)
     -- If the byte does not fall on the start of the character, then
     -- align to the start of the next character.
-    if offset > 0 then
-      char = byte_to_utf(line, byte, offset_encoding) + 1
-      byte = byte + offset
-    else
+    if offset < 0 then
+      byte = byte + str_utf_end(line, byte) + 1
+    end
+    if byte <= #line then
       char = byte_to_utf(line, byte, offset_encoding)
-      byte = byte + offset
+    else
+      char = compute_line_length(line, offset_encoding) + 1
     end
     -- Extending line, find the nearest utf codepoint for the last valid character
   end
@@ -167,7 +168,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
     char_idx = compute_line_length(prev_line, offset_encoding)  + 1
   else
     byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
-    char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
+    char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding)
   end
 
   -- Return the start difference (shared for new and prev lines)
-- 
cgit