aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/nvim/diff.c9
-rw-r--r--src/nvim/mbyte.c10
-rw-r--r--src/nvim/options.lua5
3 files changed, 16 insertions, 8 deletions
diff --git a/src/nvim/diff.c b/src/nvim/diff.c
index 6309aa6c5e..ad3e093dde 100644
--- a/src/nvim/diff.c
+++ b/src/nvim/diff.c
@@ -2990,10 +2990,15 @@ static void diff_find_change_inline_diff(diff_T *dp)
char *s = curline;
while (*s != NUL) {
- // Always use the first buffer's 'iskeyword' to have a consistent diff
bool new_in_keyword = false;
if (diff_flags & DIFF_INLINE_WORD) {
- new_in_keyword = vim_iswordp_buf(s, curtab->tp_diffbuf[file1_idx]);
+ // Always use the first buffer's 'iskeyword' to have a
+ // consistent diff.
+ // For multibyte chars, only treat alphanumeric chars
+ // (class 2) as "word", as other classes such as emojis and
+ // CJK ideographs do not usually benefit from word diff as
+ // Vim doesn't have a good way to segment them.
+ new_in_keyword = (mb_get_class_tab(s, curtab->tp_diffbuf[file1_idx]->b_chartab) == 2);
}
if (in_keyword && !new_in_keyword) {
ga_append(curstr, NL);
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index add650e7a9..18df6b7e76 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -417,11 +417,11 @@ void remove_bom(char *s)
}
}
-// Get class of pointer:
-// 0 for blank or NUL
-// 1 for punctuation
-// 2 for an (ASCII) word character
-// >2 for other word characters
+/// Get class of pointer:
+/// 0 for blank or NUL
+/// 1 for punctuation
+/// 2 for an alphanumeric word character
+/// >2 for other word characters, including CJK and emoji
int mb_get_class(const char *p)
FUNC_ATTR_PURE
{
diff --git a/src/nvim/options.lua b/src/nvim/options.lua
index 0a807979ac..a5171ed58b 100644
--- a/src/nvim/options.lua
+++ b/src/nvim/options.lua
@@ -2286,7 +2286,10 @@ local options = {
difference.
word Use internal diff to perform a
|word|-wise diff and highlight the
- difference.
+ difference. Non-alphanumeric
+ multi-byte characters such as emoji
+ and CJK characters are considered
+ individual words.
internal Use the internal diff library. This is
ignored when 'diffexpr' is set. *E960*