aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzeertzjq <zeertzjq@outlook.com>2025-04-05 09:42:00 +0800
committerGitHub <noreply@github.com>2025-04-05 09:42:00 +0800
commite8785c2e94508eeabf6ff63e1fe1bcaecceef946 (patch)
tree95550a4242171634e6fda593be9c757ffeb70ecc
parent1e1384b6dd29c071ed76e4e2ed233511404bae72 (diff)
downloadrneovim-e8785c2e94508eeabf6ff63e1fe1bcaecceef946.tar.gz
rneovim-e8785c2e94508eeabf6ff63e1fe1bcaecceef946.tar.bz2
rneovim-e8785c2e94508eeabf6ff63e1fe1bcaecceef946.zip
vim-patch:9.1.1276: inline word diff treats multibyte chars as word char (#33323)
Problem: inline word diff treats multibyte chars as word char (after 9.1.1243) Solution: treat all non-alphanumeric characters as non-word characters (Yee Cheng Chin) Previously inline word diff simply used Vim's definition of keyword to determine what is a word, which leads to multi-byte character classes such as emojis and CJK (Chinese/Japanese/Korean) characters all classifying as word characters, leading to entire sentences being grouped as a single word which does not provide meaningful information in a diff highlight. Fix this by treating all non-alphanumeric characters (with class number above 2) as non-word characters, as there is usually no benefit in using word diff on them. These include CJK characters, emojis, and also subscript/superscript numbers. Meanwhile, multi-byte characters like Cyrillic and Greek letters will still continue to considered as words. Note that this is slightly inconsistent with how words are defined elsewhere, as Vim usually considers any character with class >=2 to be a "word". related: vim/vim#16881 (diff inline highlight) closes: vim/vim#17050 https://github.com/vim/vim/commit/9aa120f7ada592ed03b37f4de8ee413c5385f123 Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>
-rw-r--r--runtime/doc/options.txt5
-rw-r--r--runtime/lua/vim/_meta/options.lua5
-rw-r--r--src/nvim/diff.c9
-rw-r--r--src/nvim/mbyte.c10
-rw-r--r--src/nvim/options.lua5
-rw-r--r--test/functional/ui/diff_spec.lua16
-rw-r--r--test/old/testdir/test_diffmode.vim5
7 files changed, 45 insertions, 10 deletions
diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt
index 972d2cce51..e94b72a260 100644
--- a/runtime/doc/options.txt
+++ b/runtime/doc/options.txt
@@ -2132,7 +2132,10 @@ A jump table for the options with a short description can be found at |Q_op|.
difference.
word Use internal diff to perform a
|word|-wise diff and highlight the
- difference.
+ difference. Non-alphanumeric
+ multi-byte characters such as emoji
+ and CJK characters are considered
+ individual words.
internal Use the internal diff library. This is
ignored when 'diffexpr' is set. *E960*
diff --git a/runtime/lua/vim/_meta/options.lua b/runtime/lua/vim/_meta/options.lua
index 0fc8518aa4..c032edf43c 100644
--- a/runtime/lua/vim/_meta/options.lua
+++ b/runtime/lua/vim/_meta/options.lua
@@ -1729,7 +1729,10 @@ vim.go.dex = vim.go.diffexpr
--- difference.
--- word Use internal diff to perform a
--- `word`-wise diff and highlight the
---- difference.
+--- difference. Non-alphanumeric
+--- multi-byte characters such as emoji
+--- and CJK characters are considered
+--- individual words.
---
--- internal Use the internal diff library. This is
--- ignored when 'diffexpr' is set. *E960*
diff --git a/src/nvim/diff.c b/src/nvim/diff.c
index 6309aa6c5e..ad3e093dde 100644
--- a/src/nvim/diff.c
+++ b/src/nvim/diff.c
@@ -2990,10 +2990,15 @@ static void diff_find_change_inline_diff(diff_T *dp)
char *s = curline;
while (*s != NUL) {
- // Always use the first buffer's 'iskeyword' to have a consistent diff
bool new_in_keyword = false;
if (diff_flags & DIFF_INLINE_WORD) {
- new_in_keyword = vim_iswordp_buf(s, curtab->tp_diffbuf[file1_idx]);
+ // Always use the first buffer's 'iskeyword' to have a
+ // consistent diff.
+ // For multibyte chars, only treat alphanumeric chars
+ // (class 2) as "word", as other classes such as emojis and
+ // CJK ideographs do not usually benefit from word diff as
+ // Vim doesn't have a good way to segment them.
+ new_in_keyword = (mb_get_class_tab(s, curtab->tp_diffbuf[file1_idx]->b_chartab) == 2);
}
if (in_keyword && !new_in_keyword) {
ga_append(curstr, NL);
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index add650e7a9..18df6b7e76 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -417,11 +417,11 @@ void remove_bom(char *s)
}
}
-// Get class of pointer:
-// 0 for blank or NUL
-// 1 for punctuation
-// 2 for an (ASCII) word character
-// >2 for other word characters
+/// Get class of pointer:
+/// 0 for blank or NUL
+/// 1 for punctuation
+/// 2 for an alphanumeric word character
+/// >2 for other word characters, including CJK and emoji
int mb_get_class(const char *p)
FUNC_ATTR_PURE
{
diff --git a/src/nvim/options.lua b/src/nvim/options.lua
index 0a807979ac..a5171ed58b 100644
--- a/src/nvim/options.lua
+++ b/src/nvim/options.lua
@@ -2286,7 +2286,10 @@ local options = {
difference.
word Use internal diff to perform a
|word|-wise diff and highlight the
- difference.
+ difference. Non-alphanumeric
+ multi-byte characters such as emoji
+ and CJK characters are considered
+ individual words.
internal Use the internal diff library. This is
ignored when 'diffexpr' is set. *E960*
diff --git a/test/functional/ui/diff_spec.lua b/test/functional/ui/diff_spec.lua
index 223622eb6b..21174eaa45 100644
--- a/test/functional/ui/diff_spec.lua
+++ b/test/functional/ui/diff_spec.lua
@@ -2485,6 +2485,22 @@ it('diff mode inline highlighting', function()
command('windo set iskeyword& | 1wincmd w')
+ screen:try_resize(75, 20)
+ command('wincmd =')
+ -- word diff: test handling of multi-byte characters. Only alphanumeric chars
+ -- (e.g. Greek alphabet, but not CJK/emoji) count as words.
+ WriteDiffFiles(
+ '🚀⛵️一二三ひらがなΔέλτα Δelta foobar',
+ '🚀🛸一二四ひらなδέλτα δelta foobar'
+ )
+ command('set diffopt=internal,filler diffopt+=inline:word')
+ screen:expect([[
+ {7: }{4:^🚀}{27:⛵️}{4:一二}{27:三}{4:ひら}{100:が}{4:な}{27:Δέλτα}{4: }{27:Δelta}{4: fooba}│{7: }{4:🚀}{27:🛸}{4:一二}{27:四}{4:ひらな}{27:δέλτα}{4: }{27:δelta}{4: foobar }|
+ {1:~ }│{1:~ }|*17
+ {3:Xdifile1 }{2:Xdifile2 }|
+ |
+ ]])
+
screen:try_resize(69, 20)
command('wincmd =')
-- char diff: should slide highlight to whitespace boundary if possible for
diff --git a/test/old/testdir/test_diffmode.vim b/test/old/testdir/test_diffmode.vim
index 6a5054b37b..2d3bcb8234 100644
--- a/test/old/testdir/test_diffmode.vim
+++ b/test/old/testdir/test_diffmode.vim
@@ -2193,6 +2193,11 @@ func Test_diff_inline()
call term_sendkeys(buf, ":windo set iskeyword&\<CR>:1wincmd w\<CR>")
+ " word diff: test handling of multi-byte characters. Only alphanumeric chars
+ " (e.g. Greek alphabet, but not CJK/emoji) count as words.
+ call WriteDiffFiles(buf, ["🚀⛵️一二三ひらがなΔέλτα Δelta foobar"], ["🚀🛸一二四ひらなδέλτα δelta foobar"])
+ call VerifyInternal(buf, "Test_diff_inline_word_03", " diffopt+=inline:word")
+
" char diff: should slide highlight to whitespace boundary if possible for
" better readability (by using forced indent-heuristics). A wrong result
" would be if the highlight is "Bar, prefix". It should be "prefixBar, "