vim-patch:9.1.1243: diff mode is lacking for changes within lines

Problem: Diff mode's inline highlighting is lackluster. It only performs a line-by-line comparison, and calculates a single shortest range within a line that could encompass all the changes. In lines with multiple changes, or those that span multiple lines, this approach tends to end up highlighting much more than necessary. Solution: Implement new inline highlighting modes by doing per-character or per-word diff within the diff block, and highlight only the relevant parts, add "inline:simple" to the defaults (which is the old behaviour) This change introduces a new diffopt option "inline:<type>". Setting to "none" will disable all inline highlighting, "simple" (the default) will use the old behavior, "char" / "word" will perform a character/word-wise diff of the texts within each diff block and only highlight the differences. The new char/word inline diff only use the internal xdiff, and will respect diff options such as algorithm choice, icase, and misc iwhite options. indent-heuristics is always on to perform better sliding. For character highlight, a post-process of the diff results is first applied before we show the highlight. This is because a naive diff will create a result with a lot of small diff chunks and gaps, due to the repetitive nature of individual characters. The post-process is a heuristic-based refinement that attempts to merge adjacent diff blocks if they are separated by a short gap (1-3 characters), and can be further tuned in the future for better results. This process results in more characters than necessary being highlighted but overall less visual noise. For word highlight, always use first buffer's iskeyword definition. Otherwise if each buffer has different iskeyword settings we would not be able to group words properly. The char/word diffing is always per-diff block, not per line, meaning that changes that span multiple lines will show up correctly. Added/removed newlines are not shown by default, but if the user has 'list' set (with "eol" listchar defined), the eol character will be be highlighted correctly for the specific newline characters. Also, add a new "DiffTextAdd" highlight group linked to "DiffText" by default. It allows color schemes to use different colors for texts that have been added within a line versus modified. This doesn't interact with linematch perfectly currently. The linematch feature splits up diff blocks into multiple smaller blocks for better visual matching, which makes inline highlight less useful especially for multi-line change (e.g. a line is broken into two lines). This could be addressed in the future. As a side change, this also removes the bounds checking introduced to diff_read() as they were added to mask existing logic bugs that were properly fixed in vim/vim#16768. closes: vim/vim#16881 https://github.com/vim/vim/commit/9943d4790e42721a6777da9e12637aa595ba4965 Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>
author: zeertzjq <zeertzjq@outlook.com> 2025-03-27 09:24:26 +0800
committer: zeertzjq <zeertzjq@outlook.com> 2025-03-28 14:45:01 +0800
commit: 2331c52affe64070ad59c0ef63ddcc8f7ca41781 (patch)
tree: a2438a9e9cecad08f85890c959a02523a1861260 /src/nvim/diff.c
parent: ae98d0a560b08d901ee9aae85df634de0ae3fe0a (diff)
download: rneovim-2331c52affe64070ad59c0ef63ddcc8f7ca41781.tar.gz
rneovim-2331c52affe64070ad59c0ef63ddcc8f7ca41781.tar.bz2
rneovim-2331c52affe64070ad59c0ef63ddcc8f7ca41781.zip
1 files changed, 659 insertions, 60 deletions
diff --git a/src/nvim/diff.c b/src/nvim/diff.c
index 4c5b86adc4..585a937558 100644
--- a/src/nvim/diff.c
+++ b/src/nvim/diff.c
@@ -87,7 +87,13 @@ static bool diff_need_update = false;  // ex_diffupdate needs to be called
 #define DIFF_CLOSE_OFF  0x400   // diffoff when closing window
 #define DIFF_FOLLOWWRAP 0x800   // follow the wrap option
 #define DIFF_LINEMATCH  0x1000  // match most similar lines within diff
+#define DIFF_INLINE_NONE    0x2000  // no inline highlight
+#define DIFF_INLINE_SIMPLE  0x4000  // inline highlight with simple algorithm
+#define DIFF_INLINE_CHAR    0x8000  // inline highlight with character diff
+#define DIFF_INLINE_WORD    0x10000  // inline highlight with word diff
 #define ALL_WHITE_DIFF (DIFF_IWHITE | DIFF_IWHITEALL | DIFF_IWHITEEOL)
+#define ALL_INLINE (DIFF_INLINE_NONE | DIFF_INLINE_SIMPLE | DIFF_INLINE_CHAR | DIFF_INLINE_WORD)
+#define ALL_INLINE_DIFF (DIFF_INLINE_CHAR | DIFF_INLINE_WORD)
 static int diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF;
 
 static int diff_algorithm = 0;
@@ -137,6 +143,15 @@ typedef enum {
 # include "diff.c.generated.h"
 #endif
 
+#define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \
+  for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next)
+
+static void clear_diffblock(diff_T *dp)
+{
+  ga_clear(&dp->df_changes);
+  xfree(dp);
+}
+
 /// Called when deleting or unloading a buffer: No longer make a diff with it.
 ///
 /// @param buf
@@ -523,7 +538,7 @@ static void diff_mark_adjust_tp(tabpage_T *tp, int idx, linenr_T line1, linenr_T
 /// @return The new diff block.
 static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp)
 {
-  diff_T *dnew = xmalloc(sizeof(*dnew));
+  diff_T *dnew = xcalloc(1, sizeof(*dnew));
 
   dnew->is_linematched = false;
   dnew->df_next = dp;
@@ -533,13 +548,15 @@ static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp)
     dprev->df_next = dnew;
   }
 
+  dnew->has_changes = false;
+  ga_init(&dnew->df_changes, sizeof(diffline_change_T), 20);
   return dnew;
 }
 
 static diff_T *diff_free(tabpage_T *tp, diff_T *dprev, diff_T *dp)
 {
   diff_T *ret = dp->df_next;
-  xfree(dp);
+  clear_diffblock(dp);
 
   if (dprev == NULL) {
     tp->tp_first_diff = ret;
@@ -764,15 +781,32 @@ static int diff_write_buffer(buf_T *buf, mmfile_t *m, linenr_T start, linenr_T e
     char *s = ml_get_buf(buf, lnum);
     if (diff_flags & DIFF_ICASE) {
       while (*s != NUL) {
+        int c;
+        int c_len = 1;
         char cbuf[MB_MAXBYTES + 1];
 
-        // xdiff doesn't support ignoring case, fold-case the text.
-        int c = *s == NL ? NUL : utf_fold(utf_ptr2char(s));
+        if (*s == NL) {
+          c = NUL;
+        } else {
+          // xdiff doesn't support ignoring case, fold-case the text.
+          c = utf_ptr2char(s);
+          c_len = utf_char2len(c);
+          c = utf_fold(c);
+        }
         const int orig_len = utfc_ptr2len(s);
 
-        // TODO(Bram): handle byte length difference
-        char *s1 = (utf_char2bytes(c, cbuf) != orig_len) ? s : cbuf;
-        memmove(ptr + len, s1, (size_t)orig_len);
+        if (utf_char2bytes(c, cbuf) != c_len) {
+          // TODO(Bram): handle byte length difference
+          // One example is Å (3 bytes) and å (2 bytes).
+          memmove(ptr + len, s, (size_t)orig_len);
+        } else {
+          memmove(ptr + len, cbuf, (size_t)c_len);
+          if (orig_len > c_len) {
+            // Copy remaining composing characters
+            memmove(ptr + len + c_len, s + c_len, (size_t)(orig_len - c_len));
+          }
+        }
+
         s += orig_len;
         len += (size_t)orig_len;
       }
@@ -944,8 +978,7 @@ void ex_diffupdate(exarg_T *eap)
   }
 
   // Only use the internal method if it did not fail for one of the buffers.
-  diffio_T diffio;
-  CLEAR_FIELD(diffio);
+  diffio_T diffio = { 0 };
   diffio.dio_internal = diff_internal();
 
   diff_try_update(&diffio, idx_orig, eap);
@@ -1640,11 +1673,6 @@ static void process_hunk(diff_T **dpp, diff_T **dprevp, int idx_orig, int idx_ne
       if (off > 0) {
         dp->df_count[idx_new] += off;
       }
-      if ((dp->df_lnum[idx_new] + dp->df_count[idx_new] - 1)
-          > curtab->tp_diffbuf[idx_new]->b_ml.ml_line_count) {
-        dp->df_count[idx_new] = curtab->tp_diffbuf[idx_new]->b_ml.ml_line_count
-                                - dp->df_lnum[idx_new] + 1;
-      }
     }
 
     // Adjust the size of the block to include all the lines to the
@@ -1662,11 +1690,6 @@ static void process_hunk(diff_T **dpp, diff_T **dprevp, int idx_orig, int idx_ne
         // overlap later.
         dp->df_count[idx_new] += -off;
       }
-      if ((dp->df_lnum[idx_new] + dp->df_count[idx_new] - 1)
-          > curtab->tp_diffbuf[idx_new]->b_ml.ml_line_count) {
-        dp->df_count[idx_new] = curtab->tp_diffbuf[idx_new]->b_ml.ml_line_count
-                                - dp->df_lnum[idx_new] + 1;
-      }
       off = 0;
     }
 
@@ -1683,7 +1706,7 @@ static void process_hunk(diff_T **dpp, diff_T **dprevp, int idx_orig, int idx_ne
 
     while (dn != dp->df_next) {
       dpl = dn->df_next;
-      xfree(dn);
+      clear_diffblock(dn);
       dn = dpl;
     }
   } else {
@@ -1717,7 +1740,7 @@ static void process_hunk(diff_T **dpp, diff_T **dprevp, int idx_orig, int idx_ne
 static void diff_read(int idx_orig, int idx_new, diffio_T *dio)
 {
   FILE *fd = NULL;
-  int line_idx = 0;
+  int line_hunk_idx = 0;  // line or hunk index
   diff_T *dprev = NULL;
   diff_T *dp = curtab->tp_first_diff;
   diffout_T *dout = &dio->dio_diff;
@@ -1735,7 +1758,7 @@ static void diff_read(int idx_orig, int idx_new, diffio_T *dio)
   while (true) {
     diffhunk_T hunk = { 0 };
     bool eof = dio->dio_internal
-               ? extract_hunk_internal(dout, &hunk, &line_idx)
+               ? extract_hunk_internal(dout, &hunk, &line_hunk_idx)
                : extract_hunk(fd, &hunk, &diffstyle);
 
     if (eof) {
@@ -1789,7 +1812,7 @@ void diff_clear(tabpage_T *tp)
   diff_T *next_p;
   for (diff_T *p = tp->tp_first_diff; p != NULL; p = next_p) {
     next_p = p->df_next;
-    xfree(p);
+    clear_diffblock(p);
   }
   tp->tp_first_diff = NULL;
 }
@@ -2532,6 +2555,28 @@ int diffopt_changed(void)
       } else {
         return FAIL;
       }
+    } else if (strncmp(p, "inline:", 7) == 0) {
+      // Note: Keep this in sync with opt_dip_inline_values.
+      p += 7;
+      if (strncmp(p, "none", 4) == 0) {
+        p += 4;
+        diff_flags_new &= ~(ALL_INLINE);
+        diff_flags_new |= DIFF_INLINE_NONE;
+      } else if (strncmp(p, "simple", 6) == 0) {
+        p += 6;
+        diff_flags_new &= ~(ALL_INLINE);
+        diff_flags_new |= DIFF_INLINE_SIMPLE;
+      } else if (strncmp(p, "char", 4) == 0) {
+        p += 4;
+        diff_flags_new &= ~(ALL_INLINE);
+        diff_flags_new |= DIFF_INLINE_CHAR;
+      } else if (strncmp(p, "word", 4) == 0) {
+        p += 4;
+        diff_flags_new &= ~(ALL_INLINE);
+        diff_flags_new |= DIFF_INLINE_WORD;
+      } else {
+        return FAIL;
+      }
     } else if ((strncmp(p, "linematch:", 10) == 0) && ascii_isdigit(p[10])) {
       p += 10;
       linematch_lines_new = getdigits_int(&p, false, linematch_lines_new);
@@ -2604,48 +2649,101 @@ bool diffopt_filler(void)
   return (diff_flags & DIFF_FILLER) != 0;
 }
 
-/// Find the difference within a changed line.
-///
-/// @param  wp      window whose current buffer to check
-/// @param  lnum    line number to check within the buffer
-/// @param  startp  first char of the change
-/// @param  endp    last char of the change
-///
-/// @return true if the line was added, no other buffer has it.
-bool diff_find_change(win_T *wp, linenr_T lnum, int *startp, int *endp)
-  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
+/// Called when a line has been updated. Used for updating inline diff in Insert
+/// mode without waiting for global diff update later.
+void diff_update_line(linenr_T lnum)
 {
-  // Make a copy of the line, the next ml_get() will invalidate it.
-  char *line_org = xstrdup(ml_get_buf(wp->w_buffer, lnum));
+  if (!(diff_flags & ALL_INLINE_DIFF)) {
+    // We only care if we are doing inline-diff where we cache the diff results
+    return;
+  }
 
-  int idx = diff_buf_idx(wp->w_buffer, curtab);
+  int idx = diff_buf_idx(curbuf, curtab);
   if (idx == DB_COUNT) {
-    // cannot happen
-    xfree(line_org);
-    return false;
+    return;
   }
-
-  // search for a change that includes "lnum" in the list of diffblocks.
   diff_T *dp;
-  for (dp = curtab->tp_first_diff; dp != NULL; dp = dp->df_next) {
+  FOR_ALL_DIFFBLOCKS_IN_TAB(curtab, dp) {
     if (lnum <= dp->df_lnum[idx] + dp->df_count[idx]) {
       break;
     }
   }
-  if (dp != NULL && dp->is_linematched) {
-    while (dp && dp->df_next
-           && lnum == dp->df_count[idx] + dp->df_lnum[idx]
-           && dp->df_next->df_lnum[idx] == lnum) {
-      dp = dp->df_next;
-    }
+
+  // clear the inline change cache as it's invalid
+  if (dp != NULL) {
+    dp->has_changes = false;
+    dp->df_changes.ga_len = 0;
   }
+}
 
-  if ((dp == NULL) || (diff_check_sanity(curtab, dp) == FAIL)) {
-    xfree(line_org);
+/// used for simple inline diff algorithm
+static diffline_change_T simple_diffline_change;
+
+/// Parse a diffline struct and returns the [start,end] byte offsets
+///
+/// Returns true if this change was added, no other buffer has it.
+bool diff_change_parse(diffline_T *diffline, diffline_change_T *change, int *change_start,
+                       int *change_end)
+{
+  if (change->dc_start_lnum_off[diffline->bufidx] < diffline->lineoff) {
+    *change_start = 0;
+  } else {
+    *change_start = change->dc_start[diffline->bufidx];
+  }
+  if (change->dc_end_lnum_off[diffline->bufidx] > diffline->lineoff) {
+    *change_end = INT_MAX;
+  } else {
+    *change_end = change->dc_end[diffline->bufidx];
+  }
 
+  if (change == &simple_diffline_change) {
+    // This is what we returned from simple inline diff. We always consider
+    // the range to be changed, rather than added for now.
     return false;
   }
 
+  // Find out whether this is an addition. Note that for multi buffer diff,
+  // to tell whether lines are additions we check whether all the other diff
+  // lines are identical (in diff_check_with_linestatus). If so, we mark them
+  // as add. We don't do that for inline diff here for simplicity.
+  for (int i = 0; i < DB_COUNT; i++) {
+    if (i == diffline->bufidx) {
+      continue;
+    }
+    if (change->dc_start[i] != change->dc_end[i]
+        || change->dc_end_lnum_off[i] != change->dc_start_lnum_off[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+/// Find the difference within a changed line and returns [startp,endp] byte
+/// positions.  Performs a simple algorithm by finding a single range in the
+/// middle.
+///
+/// If diffopt has DIFF_INLINE_NONE set, then this will only calculate the return
+/// value (added or changed), but startp/endp will not be calculated.
+///
+/// @param  wp      window whose current buffer to check
+/// @param  lnum    line number to check within the buffer
+/// @param  startp  first char of the change
+/// @param  endp    last char of the change
+///
+/// @return true if the line was added, no other buffer has it.
+static bool diff_find_change_simple(win_T *wp, linenr_T lnum, const diff_T *dp, int idx,
+                                    int *startp, int *endp)
+  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
+{
+  char *line_org;
+  if (diff_flags & DIFF_INLINE_NONE) {
+    // We only care about the return value, not the actual string comparisons.
+    line_org = NULL;
+  } else {
+    // Make a copy of the line, the next ml_get() will invalidate it.
+    line_org = xstrdup(ml_get_buf(wp->w_buffer, lnum));
+  }
+
   int si_org;
   int si_new;
   int ei_org;
@@ -2660,6 +2758,10 @@ bool diff_find_change(win_T *wp, linenr_T lnum, int *startp, int *endp)
         continue;
       }
       added = false;
+      if (diff_flags & DIFF_INLINE_NONE) {
+        break;  // early terminate as we only care about the return value
+      }
+
       char *line_new = ml_get_buf(curtab->tp_diffbuf[i], dp->df_lnum[i] + off);
 
       // Search for start of difference
@@ -2738,6 +2840,470 @@ bool diff_find_change(win_T *wp, linenr_T lnum, int *startp, int *endp)
   return added;
 }
 
+/// Mapping used for mapping from temporary mmfile created for inline diff back
+/// to original buffer's line/col.
+typedef struct {
+  colnr_T byte_start;
+  colnr_T num_bytes;
+  int lineoff;
+} linemap_entry_T;
+
+/// Refine inline character-wise diff blocks to create a more human readable
+/// highlight. Otherwise a naive diff under existing algorithms tends to create
+/// a messy output with lots of small gaps.
+/// It does this by merging adjacent long diff blocks if they are only separated
+/// by a couple characters.
+/// These are done by heuristics and can be further tuned.
+static void diff_refine_inline_char_highlight(diff_T *dp_orig, garray_T *linemap, int idx1)
+{
+  // Perform multiple passes so that newly merged blocks will now be long
+  // enough which may cause other previously unmerged gaps to be merged as
+  // well.
+  int pass = 1;
+  do {
+    bool has_unmerged_gaps = false;
+    bool has_merged_gaps = false;
+    diff_T *dp = dp_orig;
+    while (dp != NULL && dp->df_next != NULL) {
+      // Only use first buffer to calculate the gap because the gap is
+      // unchanged text, which would be the same in all buffers.
+      if (dp->df_lnum[idx1] + dp->df_count[idx1] - 1 >= linemap[idx1].ga_len
+          || dp->df_next->df_lnum[idx1] - 1 >= linemap[idx1].ga_len) {
+        dp = dp->df_next;
+        continue;
+      }
+
+      // If the gap occurs over different lines, don't consider it
+      linemap_entry_T *entry1 =
+        &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_lnum[idx1]
+                                                    + dp->df_count[idx1] - 1];
+      linemap_entry_T *entry2 =
+        &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_next->df_lnum[idx1] - 1];
+      if (entry1->lineoff != entry2->lineoff) {
+        dp = dp->df_next;
+        continue;
+      }
+
+      linenr_T gap = dp->df_next->df_lnum[idx1] - (dp->df_lnum[idx1] + dp->df_count[idx1]);
+      if (gap <= 3) {
+        linenr_T max_df_count = 0;
+        for (int i = 0; i < DB_COUNT; i++) {
+          max_df_count = MAX(max_df_count, dp->df_count[i] + dp->df_next->df_count[i]);
+        }
+
+        if (max_df_count >= gap * 4) {
+          // Merge current block with the next one. Don't advance the
+          // pointer so we try the same merged block against the next
+          // one.
+          for (int i = 0; i < DB_COUNT; i++) {
+            dp->df_count[i] = dp->df_next->df_lnum[i]
+                              + dp->df_next->df_count[i] - dp->df_lnum[i];
+          }
+          diff_T *dp_next = dp->df_next;
+          dp->df_next = dp_next->df_next;
+          clear_diffblock(dp_next);
+          has_merged_gaps = true;
+          continue;
+        } else {
+          has_unmerged_gaps = true;
+        }
+      }
+      dp = dp->df_next;
+    }
+    if (!has_unmerged_gaps || !has_merged_gaps) {
+      break;
+    }
+  } while (pass++ < 4);  // use limited number of passes to avoid excessive looping
+}
+
+/// Find the inline difference within a diff block among differnt buffers.  Do
+/// this by splitting each block's content into characters or words, and then
+/// use internal xdiff to calculate the per-character/word diff.  The result is
+/// stored in dp instead of returned by the function.
+static void diff_find_change_inline_diff(diff_T *dp)
+{
+  const int save_diff_algorithm = diff_algorithm;
+
+  diffio_T dio = { 0 };
+  ga_init(&dio.dio_diff.dout_ga, sizeof(char *), 1000);
+
+  // inline diff only supports internal algo
+  dio.dio_internal = true;
+
+  // always use indent-heuristics to slide diff splits along
+  // whitespace
+  diff_algorithm |= XDF_INDENT_HEURISTIC;
+
+  // diff_read() has an implicit dependency on curtab->tp_first_diff
+  diff_T *orig_diff = curtab->tp_first_diff;
+  curtab->tp_first_diff = NULL;
+
+  garray_T linemap[DB_COUNT];
+  garray_T file1_str;
+  garray_T file2_str;
+
+  // Buffers to populate mmfile 1/2 that would be passed to xdiff as memory
+  // files. Use a grow array as it is not obvious how much exact space we
+  // need.
+  ga_init(&file1_str, 1, 1024);
+  ga_init(&file2_str, 1, 1024);
+
+  // Line map to map from generated mmfiles' line numbers back to original
+  // diff blocks' locations. Need this even for char diff because not all
+  // characters are 1-byte long / ASCII.
+  for (int i = 0; i < DB_COUNT; i++) {
+    ga_init(&linemap[i], sizeof(linemap_entry_T), 128);
+  }
+
+  int file1_idx = -1;
+  for (int i = 0; i < DB_COUNT; i++) {
+    dio.dio_diff.dout_ga.ga_len = 0;
+
+    buf_T *buf = curtab->tp_diffbuf[i];
+    if (buf == NULL || buf->b_ml.ml_mfp == NULL) {
+      continue;  // skip buffer that isn't loaded
+    }
+    if (dp->df_count[i] == 0) {
+      continue;  // skip buffer that don't have any texts in this block
+    }
+    if (file1_idx == -1) {
+      file1_idx = i;
+    }
+
+    garray_T *curstr = (file1_idx != i) ? &file2_str : &file1_str;
+
+    linenr_T numlines = 0;
+    curstr->ga_len = 0;
+
+    // Split each line into chars/words and populate fake file buffer as
+    // newline-delimited tokens as that's what xdiff requires.
+    for (int off = 0; off < dp->df_count[i]; off++) {
+      char *curline = ml_get_buf(curtab->tp_diffbuf[i], dp->df_lnum[i] + off);
+
+      bool in_keyword = false;
+
+      // iwhiteeol support vars
+      bool last_white = false;
+      int eol_ga_len = -1;
+      int eol_linemap_len = -1;
+      int eol_numlines = -1;
+
+      char *s = curline;
+      while (*s != NUL) {
+        // Always use the first buffer's 'iskeyword' to have a consistent diff
+        bool new_in_keyword = false;
+        if (diff_flags & DIFF_INLINE_WORD) {
+          new_in_keyword = vim_iswordp_buf(s, curtab->tp_diffbuf[file1_idx]);
+        }
+        if (in_keyword && !new_in_keyword) {
+          ga_append(curstr, NL);
+          numlines++;
+        }
+
+        if (ascii_iswhite(*s)) {
+          if (diff_flags & DIFF_IWHITEALL) {
+            in_keyword = false;
+            s = skipwhite(s);
+            continue;
+          } else if ((diff_flags & DIFF_IWHITEEOL) || (diff_flags & DIFF_IWHITE)) {
+            if (!last_white) {
+              eol_ga_len = curstr->ga_len;
+              eol_linemap_len = linemap[i].ga_len;
+              eol_numlines = numlines;
+              last_white = true;
+            }
+          }
+        } else {
+          if ((diff_flags & DIFF_IWHITEEOL) || (diff_flags & DIFF_IWHITE)) {
+            last_white = false;
+            eol_ga_len = -1;
+            eol_linemap_len = -1;
+            eol_numlines = -1;
+          }
+        }
+
+        int char_len = 1;
+        if (*s == NL) {
+          // NL is internal substitute for NUL
+          ga_append(curstr, NUL);
+        } else {
+          char_len = utfc_ptr2len(s);
+
+          if (ascii_iswhite(*s) && (diff_flags & DIFF_IWHITE)) {
+            // Treat the entire white space span as a single char.
+            char_len = (int)(skipwhite(s) - s);
+          }
+
+          if (diff_flags & DIFF_ICASE) {
+            // xdiff doesn't support ignoring case, fold-case the text manually.
+            int c = utf_ptr2char(s);
+            int c_len = utf_char2len(c);
+            c = utf_fold(c);
+            char cbuf[MB_MAXBYTES + 1];
+            int c_fold_len = utf_char2bytes(c, cbuf);
+            ga_concat_len(curstr, cbuf, (size_t)c_fold_len);
+            if (char_len > c_len) {
+              // There may be remaining composing characters. Write those back in.
+              // Composing characters don't need case folding.
+              ga_concat_len(curstr, s + c_len, (size_t)(char_len - c_len));
+            }
+          } else {
+            ga_concat_len(curstr, s, (size_t)char_len);
+          }
+        }
+
+        if (!new_in_keyword) {
+          ga_append(curstr, NL);
+          numlines++;
+        }
+
+        if (!new_in_keyword || (new_in_keyword && !in_keyword)) {
+          // create a new mapping entry from the xdiff mmfile back to
+          // original line/col.
+          linemap_entry_T linemap_entry = {
+            .lineoff = off,
+            .byte_start = (colnr_T)(s - curline),
+            .num_bytes = char_len,
+          };
+          GA_APPEND(linemap_entry_T, &linemap[i], linemap_entry);
+        } else {
+          // Still inside a keyword. Just increment byte count but
+          // don't make a new entry.
+          // linemap always has at least one entry here
+          ((linemap_entry_T *)linemap[i].ga_data)[linemap[i].ga_len - 1].num_bytes += char_len;
+        }
+
+        in_keyword = new_in_keyword;
+        s += char_len;
+      }
+      if (in_keyword) {
+        ga_append(curstr, NL);
+        numlines++;
+      }
+
+      if ((diff_flags & DIFF_IWHITEEOL) || (diff_flags & DIFF_IWHITE)) {
+        // Need to trim trailing whitespace. Do this simply by
+        // resetting arrays back to before we encountered them.
+        if (eol_ga_len != -1) {
+          curstr->ga_len = eol_ga_len;
+          linemap[i].ga_len = eol_linemap_len;
+          numlines = eol_numlines;
+        }
+      }
+
+      if (!(diff_flags & DIFF_IWHITEALL)) {
+        // Add an empty line token mapped to the end-of-line in the
+        // original file. This helps diff newline differences among
+        // files, which will be visualized when using 'list' as the eol
+        // listchar will be highlighted.
+        ga_append(curstr, NL);
+        numlines++;
+
+        linemap_entry_T linemap_entry = {
+          .lineoff = off,
+          .byte_start = (colnr_T)(s - curline),
+          .num_bytes = sizeof(NL),
+        };
+        GA_APPEND(linemap_entry_T, &linemap[i], linemap_entry);
+      }
+    }
+
+    if (file1_idx != i) {
+      dio.dio_new.din_mmfile.ptr = (char *)curstr->ga_data;
+      dio.dio_new.din_mmfile.size = curstr->ga_len;
+    } else {
+      dio.dio_orig.din_mmfile.ptr = (char *)curstr->ga_data;
+      dio.dio_orig.din_mmfile.size = curstr->ga_len;
+    }
+    if (file1_idx != i) {
+      // Perform diff with first file and read the results
+      int diff_status = diff_file_internal(&dio);
+      if (diff_status == FAIL) {
+        goto done;
+      }
+
+      diff_read(0, i, &dio);
+      clear_diffout(&dio.dio_diff);
+    }
+  }
+  diff_T *new_diff = curtab->tp_first_diff;
+
+  if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) {
+    diff_refine_inline_char_highlight(new_diff, linemap, file1_idx);
+  }
+
+  // After the diff, use the linemap to obtain the original line/col of the
+  // changes and cache them in dp.
+  dp->df_changes.ga_len = 0;  // this should already be zero
+  for (; new_diff != NULL; new_diff = new_diff->df_next) {
+    diffline_change_T change = { 0 };
+    for (int i = 0; i < DB_COUNT; i++) {
+      if (new_diff->df_lnum[i] == 0) {
+        continue;
+      }
+      linenr_T diff_lnum = new_diff->df_lnum[i] - 1;  // use zero-index
+      linenr_T diff_lnum_end = diff_lnum + new_diff->df_count[i];
+
+      if (diff_lnum >= linemap[i].ga_len) {
+        change.dc_start[i] = MAXCOL;
+        change.dc_start_lnum_off[i] = INT_MAX;
+      } else {
+        change.dc_start[i] = ((linemap_entry_T *)linemap[i].ga_data)[diff_lnum].byte_start;
+        change.dc_start_lnum_off[i] = ((linemap_entry_T *)linemap[i].ga_data)[diff_lnum].lineoff;
+      }
+
+      if (diff_lnum == diff_lnum_end) {
+        change.dc_end[i] = change.dc_start[i];
+        change.dc_end_lnum_off[i] = change.dc_start_lnum_off[i];
+      } else if (diff_lnum_end - 1 >= linemap[i].ga_len) {
+        change.dc_end[i] = MAXCOL;
+        change.dc_end_lnum_off[i] = INT_MAX;
+      } else {
+        change.dc_end[i] = ((linemap_entry_T *)linemap[i].ga_data)[diff_lnum_end - 1].byte_start +
+                           ((linemap_entry_T *)linemap[i].ga_data)[diff_lnum_end - 1].num_bytes;
+        change.dc_end_lnum_off[i] = ((linemap_entry_T *)linemap[i].ga_data)[diff_lnum_end -
+                                                                            1].lineoff;
+      }
+    }
+    GA_APPEND(diffline_change_T, &dp->df_changes, change);
+  }
+
+done:
+  diff_algorithm = save_diff_algorithm;
+
+  dp->has_changes = true;
+
+  diff_clear(curtab);
+  curtab->tp_first_diff = orig_diff;
+
+  ga_clear(&file1_str);
+  ga_clear(&file2_str);
+  // No need to clear dio.dio_orig/dio_new because they were referencing
+  // strings that are now cleared.
+  clear_diffout(&dio.dio_diff);
+  for (int i = 0; i < DB_COUNT; i++) {
+    ga_clear(&linemap[i]);
+  }
+}
+
+/// Find the difference within a changed line.
+/// Returns true if the line was added, no other buffer has it.
+bool diff_find_change(win_T *wp, linenr_T lnum, diffline_T *diffline)
+  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
+{
+  int idx = diff_buf_idx(wp->w_buffer, curtab);
+  if (idx == DB_COUNT) {  // cannot happen
+    return false;
+  }
+
+  // search for a change that includes "lnum" in the list of diffblocks.
+  diff_T *dp;
+  FOR_ALL_DIFFBLOCKS_IN_TAB(curtab, dp) {
+    if (lnum <= dp->df_lnum[idx] + dp->df_count[idx]) {
+      break;
+    }
+  }
+  if (dp && dp->is_linematched) {
+    while (dp && dp->df_next
+           && lnum == dp->df_count[idx] + dp->df_lnum[idx]
+           && dp->df_next->df_lnum[idx] == lnum) {
+      dp = dp->df_next;
+    }
+  }
+  if (dp == NULL || diff_check_sanity(curtab, dp) == FAIL) {
+    return false;
+  }
+
+  if (lnum - dp->df_lnum[idx] > INT_MAX) {
+    // Integer overflow protection
+    return false;
+  }
+  int off = lnum - dp->df_lnum[idx];
+
+  if (!(diff_flags & ALL_INLINE_DIFF)) {
+    // Use simple algorithm
+    int change_start = MAXCOL;  // first col of changed area
+    int change_end = -1;        // last col of changed area
+
+    int ret = diff_find_change_simple(wp, lnum, dp, idx, &change_start, &change_end);
+
+    // convert from inclusive end to exclusive end per diffline's contract
+    change_end += 1;
+
+    // Create a mock diffline struct. We always only have one so no need to
+    // allocate memory.
+    idx = diff_buf_idx(wp->w_buffer, curtab);
+    CLEAR_FIELD(simple_diffline_change);
+    diffline->changes = &simple_diffline_change;
+    diffline->num_changes = 1;
+    diffline->bufidx = idx;
+    diffline->lineoff = lnum - dp->df_lnum[idx];
+
+    simple_diffline_change.dc_start[idx] = change_start;
+    simple_diffline_change.dc_end[idx] = change_end;
+    simple_diffline_change.dc_start_lnum_off[idx] = off;
+    simple_diffline_change.dc_end_lnum_off[idx] = off;
+    return ret;
+  }
+
+  // Use inline diff algorithm.
+  // The diff changes are usually cached so we check that first.
+  if (!dp->has_changes) {
+    diff_find_change_inline_diff(dp);
+  }
+
+  garray_T *changes = &dp->df_changes;
+
+  // Use linear search to find the first change for this line. We could
+  // optimize this to use binary search, but there should usually be a
+  // limited number of inline changes per diff block, and limited number of
+  // diff blocks shown on screen, so it is not necessary.
+  int num_changes = 0;
+  int change_idx = 0;
+  diffline->changes = NULL;
+  for (change_idx = 0; change_idx < changes->ga_len; change_idx++) {
+    diffline_change_T *change =
+      &((diffline_change_T *)dp->df_changes.ga_data)[change_idx];
+    if (change->dc_end_lnum_off[idx] < off) {
+      continue;
+    }
+    if (change->dc_start_lnum_off[idx] > off) {
+      break;
+    }
+    if (diffline->changes == NULL) {
+      diffline->changes = change;
+    }
+    num_changes++;
+  }
+  diffline->num_changes = num_changes;
+  diffline->bufidx = idx;
+  diffline->lineoff = off;
+
+  // Detect simple cases of added lines in the end within a diff block. This
+  // has to be the last change of this diff block, and all other buffers are
+  // considering this to be an addition past their last line. Other scenarios
+  // will be considered a changed line instead.
+  bool added = false;
+  if (num_changes == 1 && change_idx == dp->df_changes.ga_len) {
+    added = true;
+    for (int i = 0; i < DB_COUNT; i++) {
+      if (idx == i) {
+        continue;
+      }
+      if (curtab->tp_diffbuf[i] == NULL) {
+        continue;
+      }
+      diffline_change_T *change =
+        &((diffline_change_T *)dp->df_changes.ga_data)[dp->df_changes.ga_len - 1];
+      if (change->dc_start_lnum_off[i] != INT_MAX) {
+        added = false;
+        break;
+      }
+    }
+  }
+  return added;
+}
+
 /// Check that line "lnum" is not close to a diff block, this line should
 /// be in a fold.
 ///
@@ -3499,20 +4065,29 @@ void f_diff_filler(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
 /// "diff_hlID()" function
 void f_diff_hlID(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
 {
-  linenr_T lnum = tv_get_lnum(argvars);
   static linenr_T prev_lnum = 0;
   static varnumber_T changedtick = 0;
   static int fnum = 0;
+  static int prev_diff_flags = 0;
   static int change_start = 0;
   static int change_end = 0;
   static hlf_T hlID = (hlf_T)0;
 
+  diffline_T diffline = { 0 };
+  // Remember the results if using simple since it's recalculated per
+  // call. Otherwise just call diff_find_change() every time since
+  // internally the result is cached interally.
+  const bool cache_results = !(diff_flags & ALL_INLINE_DIFF);
+
+  linenr_T lnum = tv_get_lnum(argvars);
   if (lnum < 0) {       // ignore type error in {lnum} arg
     lnum = 0;
   }
-  if (lnum != prev_lnum
+  if (!cache_results
+      || lnum != prev_lnum
       || changedtick != buf_get_changedtick(curbuf)
-      || fnum != curbuf->b_fnum) {
+      || fnum != curbuf->b_fnum
+      || diff_flags != prev_diff_flags) {
     // New line, buffer, change: need to get the values.
     int linestatus = 0;
     int filler_lines = diff_check_with_linestatus(curwin, lnum, &linestatus);
@@ -3520,10 +4095,14 @@ void f_diff_hlID(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
       if (filler_lines == -1 || linestatus == -1) {
         change_start = MAXCOL;
         change_end = -1;
-        if (diff_find_change(curwin, lnum, &change_start, &change_end)) {
+        if (diff_find_change(curwin, lnum, &diffline)) {
           hlID = HLF_ADD;               // added line
         } else {
           hlID = HLF_CHD;               // changed line
+          if (diffline.num_changes > 0 && cache_results) {
+            change_start = diffline.changes[0].dc_start[diffline.bufidx];
+            change_end = diffline.changes[0].dc_end[diffline.bufidx];
+          }
         }
       } else {
         hlID = HLF_ADD;         // added line
@@ -3531,17 +4110,37 @@ void f_diff_hlID(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
     } else {
       hlID = (hlf_T)0;
     }
-    prev_lnum = lnum;
-    changedtick = buf_get_changedtick(curbuf);
-    fnum = curbuf->b_fnum;
+
+    if (cache_results) {
+      prev_lnum = lnum;
+      changedtick = buf_get_changedtick(curbuf);
+      fnum = curbuf->b_fnum;
+      prev_diff_flags = diff_flags;
+    }
   }
 
   if (hlID == HLF_CHD || hlID == HLF_TXD) {
     int col = (int)tv_get_number(&argvars[1]) - 1;  // Ignore type error in {col}.
-    if (col >= change_start && col <= change_end) {
-      hlID = HLF_TXD;  // Changed text.
+    if (cache_results) {
+      if (col >= change_start && col < change_end) {
+        hlID = HLF_TXD;  // Changed text.
+      } else {
+        hlID = HLF_CHD;  // Changed line.
+      }
     } else {
-      hlID = HLF_CHD;  // Changed line.
+      hlID = HLF_CHD;
+      for (int i = 0; i < diffline.num_changes; i++) {
+        bool added = diff_change_parse(&diffline, &diffline.changes[i],
+                                       &change_start, &change_end);
+        if (col >= change_start && col < change_end) {
+          hlID = added ? HLF_TXA : HLF_TXD;
+          break;
+        }
+        if (col < change_start) {
+          // the remaining changes are past this column and not relevant
+          break;
+        }
+      }
     }
   }
   rettv->vval.v_number = hlID;
author	zeertzjq <zeertzjq@outlook.com>	2025-03-27 09:24:26 +0800
committer	zeertzjq <zeertzjq@outlook.com>	2025-03-28 14:45:01 +0800
commit	2331c52affe64070ad59c0ef63ddcc8f7ca41781 (patch)
tree	a2438a9e9cecad08f85890c959a02523a1861260 /src/nvim/diff.c
parent	ae98d0a560b08d901ee9aae85df634de0ae3fe0a (diff)
download	rneovim-2331c52affe64070ad59c0ef63ddcc8f7ca41781.tar.gz rneovim-2331c52affe64070ad59c0ef63ddcc8f7ca41781.tar.bz2 rneovim-2331c52affe64070ad59c0ef63ddcc8f7ca41781.zip