From 36c401db243da08a60bde7f2c42e1ed9f5204c85 Mon Sep 17 00:00:00 2001 From: Rishikesh Vaishnav Date: Fri, 17 Dec 2021 18:05:00 -0800 Subject: fix(lsp): correctly align start and end range to codepoints during incremental sync (#16670) Closes #16624 Fixes two issues with aligning the start position and end position to codepoints when calculating the start and end range. When aligning the start position: * use aligned byte index to calculate character index rather than the unadjusted byte When aligning the end position: * do not adjust the end byte if it falls on a UTF-8 codepoint * align byte to the first byte of the next codepoint rather than the last byte of the current codepoint * compute character character end range on the aligned byte index This commit also adds additional test coverage, including multibyte operations that previously failed before this commit. --- runtime/lua/vim/lsp/sync.lua | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'runtime/lua/vim') diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua index 5df2a4d144..d01f45ad8f 100644 --- a/runtime/lua/vim/lsp/sync.lua +++ b/runtime/lua/vim/lsp/sync.lua @@ -105,15 +105,16 @@ local function align_end_position(line, byte, offset_encoding) char = compute_line_length(line, offset_encoding) + 1 else -- Modifying line, find the nearest utf codepoint - local offset = str_utf_end(line, byte) + local offset = str_utf_start(line, byte) -- If the byte does not fall on the start of the character, then -- align to the start of the next character. - if offset > 0 then - char = byte_to_utf(line, byte, offset_encoding) + 1 - byte = byte + offset - else + if offset < 0 then + byte = byte + str_utf_end(line, byte) + 1 + end + if byte <= #line then char = byte_to_utf(line, byte, offset_encoding) - byte = byte + offset + else + char = compute_line_length(line, offset_encoding) + 1 end -- Extending line, find the nearest utf codepoint for the last valid character end @@ -167,7 +168,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline, char_idx = compute_line_length(prev_line, offset_encoding) + 1 else byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx) - char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding) + char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding) end -- Return the start difference (shared for new and prev lines) -- cgit