7 files changed, 879 insertions, 81 deletions
diff --git a/src/nvim/buffer_defs.h b/src/nvim/buffer_defs.h
index d8edd4b2d0..3629199f9a 100644
--- a/src/nvim/buffer_defs.h
+++ b/src/nvim/buffer_defs.h
@@ -894,6 +894,8 @@ struct diffblock_S {
   diff_T *df_next;
   linenr_T df_lnum[DB_COUNT];           // line number in buffer
   linenr_T df_count[DB_COUNT];          // nr of inserted/changed lines
+  bool is_linematched;  // has the linematch algorithm ran on this diff hunk to divide it into
+                        // smaller diff hunks?
 };
 
 #define SNAP_HELP_IDX   0
diff --git a/src/nvim/diff.c b/src/nvim/diff.c
index 964db599a3..503e546562 100644
--- a/src/nvim/diff.c
+++ b/src/nvim/diff.c
@@ -27,6 +27,7 @@
 #include "nvim/fileio.h"
 #include "nvim/fold.h"
 #include "nvim/garray.h"
+#include "nvim/linematch.h"
 #include "nvim/mark.h"
 #include "nvim/mbyte.h"
 #include "nvim/memline.h"
@@ -62,10 +63,12 @@ static bool diff_need_update = false;  // ex_diffupdate needs to be called
 #define DIFF_INTERNAL   0x200   // use internal xdiff algorithm
 #define DIFF_CLOSE_OFF  0x400   // diffoff when closing window
 #define DIFF_FOLLOWWRAP 0x800   // follow the wrap option
+#define DIFF_LINEMATCH  0x1000  // match most similar lines within diff
 #define ALL_WHITE_DIFF (DIFF_IWHITE | DIFF_IWHITEALL | DIFF_IWHITEEOL)
 static int diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF;
 
 static long diff_algorithm = 0;
+static int linematch_lines = 0;
 
 #define LBUFLEN 50               // length of line in diff file
 
@@ -362,7 +365,7 @@ static void diff_mark_adjust_tp(tabpage_T *tp, int idx, linenr_T line1, linenr_T
     if (last >= line1 - 1) {
       // 6. change below line2: only adjust for amount_after; also when
       // "deleted" became zero when deleted all lines between two diffs.
-      if (dp->df_lnum[idx] - (deleted + inserted != 0) > line2) {
+      if (dp->df_lnum[idx] - (deleted + inserted != 0) > line2 - dp->is_linematched) {
         if (amount_after == 0) {
           // nothing left to change
           break;
@@ -454,7 +457,7 @@ static void diff_mark_adjust_tp(tabpage_T *tp, int idx, linenr_T line1, linenr_T
     }
 
     // check if this block touches the previous one, may merge them.
-    if ((dprev != NULL)
+    if ((dprev != NULL) && !dp->is_linematched
         && (dprev->df_lnum[idx] + dprev->df_count[idx] == dp->df_lnum[idx])) {
       int i;
       for (i = 0; i < DB_COUNT; i++) {
@@ -513,6 +516,7 @@ static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp)
 {
   diff_T *dnew = xmalloc(sizeof(*dnew));
 
+  dnew->is_linematched = false;
   dnew->df_next = dp;
   if (dprev == NULL) {
     tp->tp_first_diff = dnew;
@@ -727,12 +731,16 @@ static void clear_diffout(diffout_T *dout)
 /// @param din
 ///
 /// @return FAIL for failure.
-static int diff_write_buffer(buf_T *buf, mmfile_t *m)
+static int diff_write_buffer(buf_T *buf, mmfile_t *m, linenr_T start, linenr_T end)
 {
   size_t len = 0;
 
+  if (end < 0) {
+    end = buf->b_ml.ml_line_count;
+  }
+
   // xdiff requires one big block of memory with all the text.
-  for (linenr_T lnum = 1; lnum <= buf->b_ml.ml_line_count; lnum++) {
+  for (linenr_T lnum = start; lnum <= end; lnum++) {
     len += strlen(ml_get_buf(buf, lnum, false)) + 1;
   }
   char *ptr = try_malloc(len);
@@ -753,7 +761,7 @@ static int diff_write_buffer(buf_T *buf, mmfile_t *m)
   m->size = (long)len;
 
   len = 0;
-  for (linenr_T lnum = 1; lnum <= buf->b_ml.ml_line_count; lnum++) {
+  for (linenr_T lnum = start; lnum <= end; lnum++) {
     char *s = ml_get_buf(buf, lnum, false);
     if (diff_flags & DIFF_ICASE) {
       while (*s != NUL) {
@@ -792,7 +800,7 @@ static int diff_write_buffer(buf_T *buf, mmfile_t *m)
 static int diff_write(buf_T *buf, diffin_T *din)
 {
   if (din->din_fname == NULL) {
-    return diff_write_buffer(buf, &din->din_mmfile);
+    return diff_write_buffer(buf, &din->din_mmfile, 1, -1);
   }
 
   // Always use 'fileformat' set to "unix".
@@ -1784,6 +1792,294 @@ void diff_clear(tabpage_T *tp)
   tp->tp_first_diff = NULL;
 }
 
+///
+/// return true if the options are set to use diff linematch
+///
+bool diff_linematch(diff_T *dp)
+{
+  if (!(diff_flags & DIFF_LINEMATCH)) {
+    return false;
+  }
+  // are there more than three diff buffers?
+  int tsize = 0;
+  for (int i = 0; i < DB_COUNT; i++) {
+    if (curtab->tp_diffbuf[i] != NULL) {
+      // for the rare case (bug?) that the count of a diff block is negative, do
+      // not run the algorithm because this will try to allocate a negative
+      // amount of space and crash
+      if (dp->df_count[i] < 0) {
+        return false;
+      }
+      tsize += dp->df_count[i];
+    }
+  }
+  // avoid allocating a huge array because it will lag
+  return tsize <= linematch_lines;
+}
+
+static int get_max_diff_length(const diff_T *dp)
+{
+  int maxlength = 0;
+  for (int k = 0; k < DB_COUNT; k++) {
+    if (curtab->tp_diffbuf[k] != NULL) {
+      if (dp->df_count[k] > maxlength) {
+        maxlength = dp->df_count[k];
+      }
+    }
+  }
+  return maxlength;
+}
+
+static void find_top_diff_block(diff_T **thistopdiff, diff_T **nextblockblock, int fromidx,
+                                int topline)
+{
+  diff_T *topdiff = NULL;
+  diff_T *localtopdiff = NULL;
+  int topdiffchange = 0;
+
+  for (topdiff = curtab->tp_first_diff; topdiff != NULL; topdiff = topdiff->df_next) {
+    // set the top of the current overlapping diff block set as we
+    // iterate through all of the sets of overlapping diff blocks
+    if (!localtopdiff || topdiffchange) {
+      localtopdiff = topdiff;
+      topdiffchange = 0;
+    }
+
+    // check if the fromwin topline is matched by the current diff. if so, set it to the top of the diff block
+    if (topline >= topdiff->df_lnum[fromidx] && topline <=
+        (topdiff->df_lnum[fromidx] + topdiff->df_count[fromidx])) {
+      // this line is inside the current diff block, so we will save the
+      // top block of the set of blocks to refer to later
+      if ((*thistopdiff) == NULL) {
+        (*thistopdiff) = localtopdiff;
+      }
+    }
+
+    // check if the next set of overlapping diff blocks is next
+    if (!(topdiff->df_next && (topdiff->df_next->df_lnum[fromidx] ==
+                               (topdiff->df_lnum[fromidx] + topdiff->df_count[fromidx])))) {
+      // mark that the next diff block is belongs to a different set of
+      // overlapping diff blocks
+      topdiffchange = 1;
+
+      // if we already have found that the line number is inside a diff block,
+      // set the marker of the next block and finish the iteration
+      if (*thistopdiff) {
+        (*nextblockblock) = topdiff->df_next;
+        break;
+      }
+    }
+  }
+}
+
+static void count_filler_lines_and_topline(int *curlinenum_to, int *linesfiller,
+                                           const diff_T *thistopdiff, const int toidx,
+                                           int virtual_lines_passed)
+{
+  const diff_T *curdif = thistopdiff;
+  int ch_virtual_lines = 0;
+  int isfiller = 0;
+  while (virtual_lines_passed) {
+    if (ch_virtual_lines) {
+      virtual_lines_passed--;
+      ch_virtual_lines--;
+      if (!isfiller) {
+        (*curlinenum_to)++;
+      } else {
+        (*linesfiller)++;
+      }
+    } else {
+      (*linesfiller) = 0;
+      ch_virtual_lines = get_max_diff_length(curdif);
+      isfiller = (curdif->df_count[toidx] ? 0 : 1);
+      if (isfiller) {
+        while (curdif && curdif->df_next && curdif->df_lnum[toidx] ==
+               curdif->df_next->df_lnum[toidx]
+               && curdif->df_next->df_count[toidx] == 0) {
+          curdif = curdif->df_next;
+          ch_virtual_lines += get_max_diff_length(curdif);
+        }
+      }
+      if (curdif) {
+        curdif = curdif->df_next;
+      }
+    }
+  }
+}
+
+static void calculate_topfill_and_topline(const int fromidx, const int toidx, const
+                                          int from_topline, const int from_topfill, int *topfill,
+                                          linenr_T *topline)
+{
+  // 1. find the position from the top of the diff block, and the start
+  // of the next diff block
+  diff_T *thistopdiff = NULL;
+  diff_T *nextblockblock = NULL;
+  int virtual_lines_passed = 0;
+
+  find_top_diff_block(&thistopdiff, &nextblockblock, fromidx, from_topline);
+
+  // count the virtual lines that have been passed
+
+  diff_T *curdif = thistopdiff;
+  while (curdif && (curdif->df_lnum[fromidx] + curdif->df_count[fromidx])
+         <= from_topline) {
+    virtual_lines_passed += get_max_diff_length(curdif);
+
+    curdif = curdif->df_next;
+  }
+
+  if (curdif != nextblockblock) {
+    virtual_lines_passed += from_topline - curdif->df_lnum[fromidx];
+  }
+  virtual_lines_passed -= from_topfill;
+
+  // count the same amount of virtual lines in the toidx buffer
+  curdif = thistopdiff;
+  int curlinenum_to = thistopdiff->df_lnum[toidx];
+  int linesfiller = 0;
+  count_filler_lines_and_topline(&curlinenum_to, &linesfiller,
+                                 thistopdiff, toidx, virtual_lines_passed);
+
+  // count the number of filler lines that would normally be above this line
+  int maxfiller = 0;
+  for (diff_T *dpfillertest = thistopdiff; dpfillertest != NULL;
+       dpfillertest = dpfillertest->df_next) {
+    if (dpfillertest->df_lnum[toidx] == curlinenum_to) {
+      while (dpfillertest && dpfillertest->df_lnum[toidx] == curlinenum_to) {
+        maxfiller += dpfillertest->df_count[toidx] ? 0 : get_max_diff_length(dpfillertest);
+        dpfillertest = dpfillertest->df_next;
+      }
+      break;
+    }
+  }
+  (*topfill) = maxfiller - linesfiller;
+  (*topline) = curlinenum_to;
+}
+
+static int linematched_filler_lines(diff_T *dp, int idx, linenr_T lnum, int *linestatus)
+{
+  int filler_lines_d1 = 0;
+  while (dp && dp->df_next
+         && lnum == (dp->df_lnum[idx] + dp->df_count[idx])
+         && dp->df_next->df_lnum[idx] == lnum) {
+    if (dp->df_count[idx] == 0) {
+      filler_lines_d1 += get_max_diff_length(dp);
+    }
+    dp = dp->df_next;
+  }
+
+  if (dp->df_count[idx] == 0) {
+    filler_lines_d1 += get_max_diff_length(dp);
+  }
+
+  if (lnum < dp->df_lnum[idx] + dp->df_count[idx]) {
+    int j = 0;
+    for (int i = 0; i < DB_COUNT; i++) {
+      if (curtab->tp_diffbuf[i] != NULL) {
+        if (dp->df_count[i]) {
+          j++;
+        }
+      }
+      // is this an added line or a changed line?
+      if (linestatus) {
+        (*linestatus) = (j == 1) ? -2 : -1;
+      }
+    }
+  }
+  return filler_lines_d1;
+}
+
+// Apply results from the linematch algorithm and apply to 'dp' by splitting it into multiple
+// adjacent diff blocks.
+static void apply_linematch_results(diff_T *dp, size_t decisions_length, const int *decisions)
+{
+  // get the start line number here in each diff buffer, and then increment
+  int line_numbers[DB_COUNT];
+  int outputmap[DB_COUNT];
+  size_t ndiffs = 0;
+  for (int i = 0; i < DB_COUNT; i++) {
+    if (curtab->tp_diffbuf[i] != NULL) {
+      line_numbers[i] = dp->df_lnum[i];
+      dp->df_count[i] = 0;
+
+      // Keep track of the index of the diff buffer we are using here.
+      // We will use this to write the output of the algorithm to
+      // diff_T structs at the correct indexes
+      outputmap[ndiffs] = i;
+      ndiffs++;
+    }
+  }
+
+  // write the diffs starting with the current diff block
+  diff_T *dp_s = dp;
+  for (size_t i = 0; i < decisions_length; i++) {
+    // Don't allocate on first iter since we can reuse the initial diffblock
+    if (i != 0 && (decisions[i - 1] != decisions[i])) {
+      // create new sub diff blocks to segment the original diff block which we
+      // further divided by running the linematch algorithm
+      dp_s = diff_alloc_new(curtab, dp_s, dp_s->df_next);
+      dp_s->is_linematched = true;
+      for (int j = 0; j < DB_COUNT; j++) {
+        if (curtab->tp_diffbuf[j] != NULL) {
+          dp_s->df_lnum[j] = line_numbers[j];
+          dp_s->df_count[j] = 0;
+        }
+      }
+    }
+    for (size_t j = 0; j < ndiffs; j++) {
+      if (decisions[i] & (1 << j)) {
+        // will need to use the map here
+        dp_s->df_count[outputmap[j]]++;
+        line_numbers[outputmap[j]]++;
+      }
+    }
+  }
+  dp->is_linematched = true;
+}
+
+static void run_linematch_algorithm(diff_T *dp)
+{
+  // define buffers for diff algorithm
+  mmfile_t diffbufs_mm[DB_COUNT];
+  const char *diffbufs[DB_COUNT];
+  int diff_length[DB_COUNT];
+  size_t ndiffs = 0;
+  for (int i = 0; i < DB_COUNT; i++) {
+    if (curtab->tp_diffbuf[i] != NULL) {
+      // write the contents of the entire buffer to
+      // diffbufs_mm[diffbuffers_count]
+      diff_write_buffer(curtab->tp_diffbuf[i], &diffbufs_mm[ndiffs],
+                        dp->df_lnum[i], dp->df_lnum[i] + dp->df_count[i] - 1);
+
+      // we want to get the char* to the diff buffer that was just written
+      // we add it to the array of char*, diffbufs
+      diffbufs[ndiffs] = diffbufs_mm[ndiffs].ptr;
+
+      // keep track of the length of this diff block to pass it to the linematch
+      // algorithm
+      diff_length[ndiffs] = dp->df_count[i];
+
+      // increment the amount of diff buffers we are passing to the algorithm
+      ndiffs++;
+    }
+  }
+
+  // we will get the output of the linematch algorithm in the format of an array
+  // of integers (*decisions) and the length of that array (decisions_length)
+  int *decisions = NULL;
+  const bool iwhite = (diff_flags & (DIFF_IWHITEALL | DIFF_IWHITE)) > 0;
+  size_t decisions_length = linematch_nbuffers(diffbufs, diff_length, ndiffs, &decisions, iwhite);
+
+  for (size_t i = 0; i < ndiffs; i++) {
+    XFREE_CLEAR(diffbufs_mm[i].ptr);
+  }
+
+  apply_linematch_results(dp, decisions_length, decisions);
+
+  xfree(decisions);
+}
+
 /// Check diff status for line "lnum" in buffer "buf":
 ///
 /// Returns 0 for nothing special
@@ -1795,9 +2091,10 @@ void diff_clear(tabpage_T *tp)
 ///
 /// @param wp
 /// @param lnum
+/// @param[out] linestatus
 ///
 /// @return diff status.
-int diff_check(win_T *wp, linenr_T lnum)
+int diff_check_with_linestatus(win_T *wp, linenr_T lnum, int *linestatus)
 {
   buf_T *buf = wp->w_buffer;
 
@@ -1840,6 +2137,14 @@ int diff_check(win_T *wp, linenr_T lnum)
     return 0;
   }
 
+  if (!dp->is_linematched && diff_linematch(dp)) {
+    run_linematch_algorithm(dp);
+  }
+
+  if (dp->is_linematched) {
+    return linematched_filler_lines(dp, idx, lnum, linestatus);
+  }
+
   if (lnum < dp->df_lnum[idx] + dp->df_count[idx]) {
     int zero = false;
 
@@ -1894,15 +2199,16 @@ int diff_check(win_T *wp, linenr_T lnum)
 
   // Insert filler lines above the line just below the change.  Will return
   // 0 when this buf had the max count.
-  linenr_T maxcount = 0;
-  for (int i = 0; i < DB_COUNT; i++) {
-    if ((curtab->tp_diffbuf[i] != NULL) && (dp->df_count[i] > maxcount)) {
-      maxcount = dp->df_count[i];
-    }
-  }
+  int maxcount = get_max_diff_length(dp);
   return maxcount - dp->df_count[idx];
 }
 
+/// See diff_check_with_linestatus
+int diff_check(win_T *wp, linenr_T lnum)
+{
+  return diff_check_with_linestatus(wp, lnum, NULL);
+}
+
 /// Compare two entries in diff "dp" and return true if they are equal.
 ///
 /// @param  dp    diff
@@ -2062,46 +2368,51 @@ void diff_set_topline(win_T *fromwin, win_T *towin)
     towin->w_topline = lnum + (dp->df_lnum[toidx] - dp->df_lnum[fromidx]);
 
     if (lnum >= dp->df_lnum[fromidx]) {
-      // Inside a change: compute filler lines. With three or more
-      // buffers we need to know the largest count.
-      linenr_T max_count = 0;
-
-      for (int i = 0; i < DB_COUNT; i++) {
-        if ((curtab->tp_diffbuf[i] != NULL) && (max_count < dp->df_count[i])) {
-          max_count = dp->df_count[i];
-        }
-      }
+      if (diff_flags & DIFF_LINEMATCH) {
+        calculate_topfill_and_topline(fromidx, toidx, fromwin->w_topline,
+                                      fromwin->w_topfill, &towin->w_topfill, &towin->w_topline);
+      } else {
+        // Inside a change: compute filler lines. With three or more
+        // buffers we need to know the largest count.
+        linenr_T max_count = 0;
 
-      if (dp->df_count[toidx] == dp->df_count[fromidx]) {
-        // same number of lines: use same filler count
-        towin->w_topfill = fromwin->w_topfill;
-      } else if (dp->df_count[toidx] > dp->df_count[fromidx]) {
-        if (lnum == dp->df_lnum[fromidx] + dp->df_count[fromidx]) {
-          // more lines in towin and fromwin doesn't show diff
-          // lines, only filler lines
-          if (max_count - fromwin->w_topfill >= dp->df_count[toidx]) {
-            // towin also only shows filler lines
-            towin->w_topline = dp->df_lnum[toidx] + dp->df_count[toidx];
-            towin->w_topfill = fromwin->w_topfill;
-          } else {
-            // towin still has some diff lines to show
-            towin->w_topline = dp->df_lnum[toidx]
-                               + max_count - fromwin->w_topfill;
+        for (int i = 0; i < DB_COUNT; i++) {
+          if ((curtab->tp_diffbuf[i] != NULL) && (max_count < dp->df_count[i])) {
+            max_count = dp->df_count[i];
           }
         }
-      } else if (towin->w_topline >= dp->df_lnum[toidx]
-                 + dp->df_count[toidx]) {
-        // less lines in towin and no diff lines to show: compute
-        // filler lines
-        towin->w_topline = dp->df_lnum[toidx] + dp->df_count[toidx];
 
-        if (diff_flags & DIFF_FILLER) {
+        if (dp->df_count[toidx] == dp->df_count[fromidx]) {
+          // same number of lines: use same filler count
+          towin->w_topfill = fromwin->w_topfill;
+        } else if (dp->df_count[toidx] > dp->df_count[fromidx]) {
           if (lnum == dp->df_lnum[fromidx] + dp->df_count[fromidx]) {
-            // fromwin is also out of diff lines
-            towin->w_topfill = fromwin->w_topfill;
-          } else {
-            // fromwin has some diff lines
-            towin->w_topfill = dp->df_lnum[fromidx] + max_count - lnum;
+            // more lines in towin and fromwin doesn't show diff
+            // lines, only filler lines
+            if (max_count - fromwin->w_topfill >= dp->df_count[toidx]) {
+              // towin also only shows filler lines
+              towin->w_topline = dp->df_lnum[toidx] + dp->df_count[toidx];
+              towin->w_topfill = fromwin->w_topfill;
+            } else {
+              // towin still has some diff lines to show
+              towin->w_topline = dp->df_lnum[toidx]
+                                 + max_count - fromwin->w_topfill;
+            }
+          }
+        } else if (towin->w_topline >= dp->df_lnum[toidx]
+                   + dp->df_count[toidx]) {
+          // less lines in towin and no diff lines to show: compute
+          // filler lines
+          towin->w_topline = dp->df_lnum[toidx] + dp->df_count[toidx];
+
+          if (diff_flags & DIFF_FILLER) {
+            if (lnum == dp->df_lnum[fromidx] + dp->df_count[fromidx]) {
+              // fromwin is also out of diff lines
+              towin->w_topfill = fromwin->w_topfill;
+            } else {
+              // fromwin has some diff lines
+              towin->w_topfill = dp->df_lnum[fromidx] + max_count - lnum;
+            }
           }
         }
       }
@@ -2136,6 +2447,7 @@ void diff_set_topline(win_T *fromwin, win_T *towin)
 int diffopt_changed(void)
 {
   int diff_context_new = 6;
+  int linematch_lines_new = 0;
   int diff_flags_new = 0;
   int diff_foldcolumn_new = 2;
   long diff_algorithm_new = 0;
@@ -2205,6 +2517,10 @@ int diffopt_changed(void)
       } else {
         return FAIL;
       }
+    } else if ((STRNCMP(p, "linematch:", 10) == 0) && ascii_isdigit(p[10])) {
+      p += 10;
+      linematch_lines_new = getdigits_int(&p, false, linematch_lines_new);
+      diff_flags_new |= DIFF_LINEMATCH;
     }
 
     if ((*p != ',') && (*p != NUL)) {
@@ -2233,6 +2549,7 @@ int diffopt_changed(void)
 
   diff_flags = diff_flags_new;
   diff_context = diff_context_new == 0 ? 1 : diff_context_new;
+  linematch_lines = linematch_lines_new;
   diff_foldcolumn = diff_foldcolumn_new;
   diff_algorithm = diff_algorithm_new;
 
@@ -2300,6 +2617,13 @@ bool diff_find_change(win_T *wp, linenr_T lnum, int *startp, int *endp)
       break;
     }
   }
+  if (dp->is_linematched) {
+    while (dp && dp->df_next
+           && lnum == dp->df_count[idx] + dp->df_lnum[idx]
+           && dp->df_next->df_lnum[idx] == lnum) {
+      dp = dp->df_next;
+    }
+  }
 
   if ((dp == NULL) || (diff_check_sanity(curtab, dp) == FAIL)) {
     xfree(line_org);
@@ -2674,6 +2998,17 @@ static void diffgetput(const int addr_count, const int idx_cur, const int idx_fr
   diff_T *dprev = NULL;
 
   for (diff_T *dp = curtab->tp_first_diff; dp != NULL;) {
+    if (!addr_count) {
+      // handle the case with adjacent diff blocks
+      while (dp->is_linematched
+             && dp->df_next
+             && dp->df_next->df_lnum[idx_cur] == dp->df_lnum[idx_cur] + dp->df_count[idx_cur]
+             && dp->df_next->df_lnum[idx_cur] == line1 + off + 1) {
+        dprev = dp;
+        dp = dp->df_next;
+      }
+    }
+
     if (dp->df_lnum[idx_cur] > line2 + off) {
       // past the range that was specified
       break;
diff --git a/src/nvim/drawline.c b/src/nvim/drawline.c
index 08b6fd89af..cb7d85a467 100644
--- a/src/nvim/drawline.c
+++ b/src/nvim/drawline.c
@@ -804,9 +804,10 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool nochange,
 
   int bg_attr = win_bg_attr(wp);
 
-  filler_lines = diff_check(wp, lnum);
-  if (filler_lines < 0) {
-    if (filler_lines == -1) {
+  int linestatus = 0;
+  filler_lines = diff_check_with_linestatus(wp, lnum, &linestatus);
+  if (filler_lines < 0 || linestatus < 0) {
+    if (filler_lines == -1 || linestatus == -1) {
       if (diff_find_change(wp, lnum, &change_start, &change_end)) {
         diff_hlf = HLF_ADD;             // added line
       } else if (change_start == 0) {
@@ -817,7 +818,9 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool nochange,
     } else {
       diff_hlf = HLF_ADD;               // added line
     }
-    filler_lines = 0;
+    if (linestatus == 0) {
+      filler_lines = 0;
+    }
     area_highlighting = true;
   }
   VirtLines virt_lines = KV_INITIAL_VALUE;
diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c
index e676e2e656..0e3de29cce 100644
--- a/src/nvim/eval/funcs.c
+++ b/src/nvim/eval/funcs.c
@@ -1573,9 +1573,10 @@ static void f_diff_hlID(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
       || changedtick != buf_get_changedtick(curbuf)
       || fnum != curbuf->b_fnum) {
     // New line, buffer, change: need to get the values.
-    int filler_lines = diff_check(curwin, lnum);
-    if (filler_lines < 0) {
-      if (filler_lines == -1) {
+    int linestatus = 0;
+    int filler_lines = diff_check_with_linestatus(curwin, lnum, &linestatus);
+    if (filler_lines < 0 || linestatus < 0) {
+      if (filler_lines == -1 || linestatus == -1) {
         change_start = MAXCOL;
         change_end = -1;
         if (diff_find_change(curwin, lnum, &change_start, &change_end)) {
diff --git a/src/nvim/linematch.c b/src/nvim/linematch.c
new file mode 100644
index 0000000000..ef63bd2321
--- /dev/null
+++ b/src/nvim/linematch.c
@@ -0,0 +1,377 @@
+#include "nvim/linematch.h"
+#include "nvim/memory.h"
+#include "nvim/vim.h"
+
+// struct for running the diff linematch algorithm
+typedef struct {
+  int *df_decision;  // to keep track of this path traveled
+  int df_lev_score;  // to keep track of the total score of this path
+  size_t df_path_idx;   // current index of this path
+} diffcmppath_T;
+
+#define LN_MAX_BUFS 8
+
+#ifdef INCLUDE_GENERATED_DECLARATIONS
+# include "linematch.c.generated.h"
+#endif
+
+static size_t line_len(const char *s)
+{
+  char *end = strchr(s, '\n');
+  if (end) {
+    return (size_t)(end - s);
+  }
+  return STRLEN(s);
+}
+
+/// Same as matching_chars but ignore whitespace
+///
+/// @param s1
+/// @param s2
+static int matching_chars_iwhite(const char *s1, const char *s2)
+{
+  // the newly processed strings that will be compared
+  // delete the white space characters, and/or replace all upper case with lower
+  char *strsproc[2];
+  const char *strsorig[2] = { s1, s2 };
+  for (int k = 0; k < 2; k++) {
+    size_t d = 0;
+    size_t i = 0;
+    size_t slen = line_len(strsorig[k]);
+    strsproc[k] = xmalloc((slen + 1) * sizeof(char));
+    while (d + i < slen) {
+      char e = strsorig[k][i + d];
+      if (e != ' ' && e != '\t') {
+        strsproc[k][i] = e;
+        i++;
+      } else {
+        d++;
+      }
+    }
+    strsproc[k][i] = '\0';
+  }
+  int matching = matching_chars(strsproc[0], strsproc[1]);
+  xfree(strsproc[0]);
+  xfree(strsproc[1]);
+  return matching;
+}
+
+/// update the path of a point in the diff linematch algorithm
+/// @param diffcmppath
+/// @param score
+/// @param to
+/// @param from
+/// @param choice
+static void update_path_flat(diffcmppath_T *diffcmppath, int score, size_t to, size_t from,
+                             const int choice)
+{
+  size_t path_idx = diffcmppath[from].df_path_idx;
+
+  for (size_t k = 0; k < path_idx; k++) {
+    diffcmppath[to].df_decision[k] = diffcmppath[from].df_decision[k];
+  }
+
+  diffcmppath[to].df_decision[path_idx] = choice;
+  diffcmppath[to].df_lev_score = score;
+  diffcmppath[to].df_path_idx = path_idx + 1;
+}
+
+#define MATCH_CHAR_MAX_LEN 500
+
+/// Return matching characters between "s1" and "s2" whilst respecting sequence order.
+/// Consider the case of two strings 'AAACCC' and 'CCCAAA', the
+/// return value from this function will be 3, either to match
+/// the 3 C's, or the 3 A's.
+///
+/// Examples:
+///   matching_chars("aabc", "acba")               -> 2  // 'a' and 'b' in common
+///   matching_chars("123hello567", "he123ll567o") -> 8  // '123', 'll' and '567' in common
+///   matching_chars("abcdefg", "gfedcba")         -> 1  // all characters in common,
+///                                                      // but only at most 1 in sequence
+///
+/// @param s1
+/// @param s2
+static int matching_chars(const char *s1, const char *s2)
+{
+  size_t s1len = MIN(MATCH_CHAR_MAX_LEN, line_len(s1));
+  size_t s2len = MIN(MATCH_CHAR_MAX_LEN, line_len(s2));
+  int matrix[2][MATCH_CHAR_MAX_LEN] = { 0 };
+  bool icur = 1;  // save space by storing only two rows for i axis
+  for (size_t i = 0; i < s1len; i++) {
+    icur = !icur;
+    int *e1 = matrix[icur];
+    int *e2 = matrix[!icur];
+    for (size_t j = 0; j < s2len; j++) {
+      // skip char in s1
+      if (e2[j + 1] > e1[j + 1]) {
+        e1[j + 1] = e2[j + 1];
+      }
+      // skip char in s2
+      if (e1[j] > e1[j + 1]) {
+        e1[j + 1] = e1[j];
+      }
+      // compare char in s1 and s2
+      if ((s1[i] == s2[j]) && (e2[j] + 1) > e1[j + 1]) {
+        e1[j + 1] = e2[j] + 1;
+      }
+    }
+  }
+  return matrix[icur][s2len];
+}
+
+/// count the matching characters between a variable number of strings "sp"
+/// mark the strings that have already been compared to extract them later
+/// without re-running the character match counting.
+/// @param sp
+/// @param fomvals
+/// @param n
+static int count_n_matched_chars(const char **sp, const size_t n, bool iwhite)
+{
+  int matched_chars = 0;
+  int matched = 0;
+  for (size_t i = 0; i < n; i++) {
+    for (size_t j = i + 1; j < n; j++) {
+      if (sp[i] != NULL && sp[j] != NULL) {
+        matched++;
+        // TODO(lewis6991): handle whitespace ignoring higher up in the stack
+        matched_chars += iwhite ? matching_chars_iwhite(sp[i], sp[j])
+                                : matching_chars(sp[i], sp[j]);
+      }
+    }
+  }
+
+  // prioritize a match of 3 (or more lines) equally to a match of 2 lines
+  if (matched >= 2) {
+    matched_chars *= 2;
+    matched_chars /= matched;
+  }
+
+  return matched_chars;
+}
+
+void fastforward_buf_to_lnum(const char **s, size_t lnum)
+{
+  assert(lnum > 0);
+  for (size_t j = 0; j < lnum - 1; j++) {
+    *s = strchr(*s, '\n');
+    (*s)++;
+  }
+}
+
+/// try all the different ways to compare these lines and use the one that
+/// results in the most matching characters
+/// @param df_iters
+/// @param paths
+/// @param npaths
+/// @param path_idx
+/// @param choice
+/// @param diffcmppath
+/// @param diff_len
+/// @param ndiffs
+/// @param diff_blk
+static void try_possible_paths(const int *df_iters, const size_t *paths, const int npaths,
+                               const int path_idx, int *choice, diffcmppath_T *diffcmppath,
+                               const int *diff_len, const size_t ndiffs, const char **diff_blk,
+                               bool iwhite)
+{
+  if (path_idx == npaths) {
+    if ((*choice) > 0) {
+      int from_vals[LN_MAX_BUFS];
+      const int *to_vals = df_iters;
+      const char *current_lines[LN_MAX_BUFS];
+      for (size_t k = 0; k < ndiffs; k++) {
+        from_vals[k] = df_iters[k];
+        // get the index at all of the places
+        if ((*choice) & (1 << k)) {
+          from_vals[k]--;
+          const char *p = diff_blk[k];
+          fastforward_buf_to_lnum(&p, (size_t)df_iters[k]);
+          current_lines[k] = p;
+        } else {
+          current_lines[k] = NULL;
+        }
+      }
+      size_t unwrapped_idx_from = unwrap_indexes(from_vals, diff_len, ndiffs);
+      size_t unwrapped_idx_to = unwrap_indexes(to_vals, diff_len, ndiffs);
+      int matched_chars = count_n_matched_chars(current_lines, ndiffs, iwhite);
+      int score = diffcmppath[unwrapped_idx_from].df_lev_score + matched_chars;
+      if (score > diffcmppath[unwrapped_idx_to].df_lev_score) {
+        update_path_flat(diffcmppath, score, unwrapped_idx_to, unwrapped_idx_from, *choice);
+      }
+    } else {
+      // initialize the 0, 0, 0 ... choice
+      size_t i = 0;
+      while (i < ndiffs && df_iters[i] == 0) {
+        i++;
+        if (i == ndiffs) {
+          diffcmppath[0].df_lev_score = 0;
+          diffcmppath[0].df_path_idx = 0;
+        }
+      }
+    }
+    return;
+  }
+  size_t bit_place = paths[path_idx];
+  *(choice) |= (1 << bit_place);  // set it to 1
+  try_possible_paths(df_iters, paths, npaths, path_idx + 1, choice,
+                     diffcmppath, diff_len, ndiffs, diff_blk, iwhite);
+  *(choice) &= ~(1 << bit_place);  // set it to 0
+  try_possible_paths(df_iters, paths, npaths, path_idx + 1, choice,
+                     diffcmppath, diff_len, ndiffs, diff_blk, iwhite);
+}
+
+/// unwrap indexes to access n dimensional tensor
+/// @param values
+/// @param diff_len
+/// @param ndiffs
+static size_t unwrap_indexes(const int *values, const int *diff_len, const size_t ndiffs)
+{
+  size_t num_unwrap_scalar = 1;
+  for (size_t k = 0; k < ndiffs; k++) {
+    num_unwrap_scalar *= (size_t)diff_len[k] + 1;
+  }
+
+  size_t path_idx = 0;
+  for (size_t k = 0; k < ndiffs; k++) {
+    num_unwrap_scalar /= (size_t)diff_len[k] + 1;
+
+    // (k == 0) space optimization
+    int n = k == 0 ? values[k] % 2 : values[k];
+    path_idx += num_unwrap_scalar * (size_t)n;
+  }
+  return path_idx;
+}
+
+/// populate the values of the linematch algorithm tensor, and find the best
+/// decision for how to compare the relevant lines from each of the buffers at
+/// each point in the tensor
+/// @param df_iters
+/// @param ch_dim
+/// @param diffcmppath
+/// @param diff_len
+/// @param ndiffs
+/// @param diff_blk
+static void populate_tensor(int *df_iters, const size_t ch_dim, diffcmppath_T *diffcmppath,
+                            const int *diff_len, const size_t ndiffs, const char **diff_blk,
+                            bool iwhite)
+{
+  if (ch_dim == ndiffs) {
+    int npaths = 0;
+    size_t paths[LN_MAX_BUFS];
+
+    for (size_t j = 0; j < ndiffs; j++) {
+      if (df_iters[j] > 0) {
+        paths[npaths] = j;
+        npaths++;
+      }
+    }
+    int choice = 0;
+    size_t unwrapper_idx_to = unwrap_indexes(df_iters, diff_len, ndiffs);
+    diffcmppath[unwrapper_idx_to].df_lev_score = -1;
+    try_possible_paths(df_iters, paths, npaths, 0, &choice, diffcmppath,
+                       diff_len, ndiffs, diff_blk, iwhite);
+    return;
+  }
+
+  for (int i = 0; i <= diff_len[ch_dim]; i++) {
+    df_iters[ch_dim] = i;
+    populate_tensor(df_iters, ch_dim + 1, diffcmppath, diff_len,
+                    ndiffs, diff_blk, iwhite);
+  }
+}
+
+/// algorithm to find an optimal alignment of lines of a diff block with 2 or
+/// more files. The algorithm is generalized to work for any number of files
+/// which corresponds to another dimmension added to the tensor used in the
+/// algorithm
+///
+/// for questions and information about the linematch algorithm please contact
+/// Jonathon White (jonathonwhite@protonmail.com)
+///
+/// for explanation, a summary of the algorithm in 3 dimmensions (3 files
+///     compared) follows
+///
+/// The 3d case (for 3 buffers) of the algorithm implemented when diffopt
+/// 'linematch' is enabled. The algorithm constructs a 3d tensor to
+/// compare a diff between 3 buffers. The dimmensions of the tensor are
+/// the length of the diff in each buffer plus 1 A path is constructed by
+/// moving from one edge of the cube/3d tensor to the opposite edge.
+/// Motions from one cell of the cube to the next represent decisions. In
+/// a 3d cube, there are a total of 7 decisions that can be made,
+/// represented by the enum df_path3_choice which is defined in
+/// buffer_defs.h a comparison of buffer 0 and 1 represents a motion
+/// toward the opposite edge of the cube with components along the 0 and
+/// 1 axes.  a comparison of buffer 0, 1, and 2 represents a motion
+/// toward the opposite edge of the cube with components along the 0, 1,
+/// and 2 axes. A skip of buffer 0 represents a motion along only the 0
+/// axis. For each action, a point value is awarded, and the path is
+/// saved for reference later, if it is found to have been the optimal
+/// path. The optimal path has the highest score.  The score is
+/// calculated as the summation of the total characters matching between
+/// all of the lines which were compared. The structure of the algorithm
+/// is that of a dynamic programming problem.  We can calculate a point
+/// i,j,k in the cube as a function of i-1, j-1, and k-1. To find the
+/// score and path at point i,j,k, we must determine which path we want
+/// to use, this is done by looking at the possibilities and choosing
+/// the one which results in the local highest score.  The total highest
+/// scored path is, then in the end represented by the cell in the
+/// opposite corner from the start location.  The entire algorithm
+/// consits of populating the 3d cube with the optimal paths from which
+/// it may have came.
+///
+/// Optimizations:
+/// As the function to calculate the cell of a tensor at point i,j,k is a
+/// function of the cells at i-1, j-1, k-1, the whole tensor doesn't need
+/// to be stored in memory at once. In the case of the 3d cube, only two
+/// slices (along k and j axis) are stored in memory. For the 2d matrix
+/// (for 2 files), only two rows are stored at a time. The next/previous
+/// slice (or row) is always calculated from the other, and they alternate
+/// at each iteration.
+/// In the 3d case, 3 arrays are populated to memorize the score (matched
+/// characters) of the 3 buffers, so a redundant calculation of the
+/// scores does not occur
+/// @param diff_blk
+/// @param diff_len
+/// @param ndiffs
+/// @param [out] [allocated] decisions
+/// @return the length of decisions
+size_t linematch_nbuffers(const char **diff_blk, const int *diff_len, const size_t ndiffs,
+                          int **decisions, bool iwhite)
+{
+  assert(ndiffs <= LN_MAX_BUFS);
+
+  size_t memsize = 1;
+  size_t memsize_decisions = 0;
+  for (size_t i = 0; i < ndiffs; i++) {
+    assert(diff_len[i] >= 0);
+    memsize *= i == 0 ? 2 : (size_t)(diff_len[i] + 1);
+    memsize_decisions += (size_t)diff_len[i];
+  }
+
+  // create the flattened path matrix
+  diffcmppath_T *diffcmppath = xmalloc(sizeof(diffcmppath_T) * memsize);
+  // allocate memory here
+  for (size_t i = 0; i < memsize; i++) {
+    diffcmppath[i].df_decision = xmalloc(memsize_decisions * sizeof(int));
+  }
+
+  // memory for avoiding repetitive calculations of score
+  int df_iters[LN_MAX_BUFS];
+  populate_tensor(df_iters, 0, diffcmppath, diff_len, ndiffs, diff_blk, iwhite);
+
+  const size_t u = unwrap_indexes(diff_len, diff_len, ndiffs);
+  const size_t best_path_idx = diffcmppath[u].df_path_idx;
+  const int *best_path_decisions = diffcmppath[u].df_decision;
+
+  *decisions = xmalloc(sizeof(int) * best_path_idx);
+  for (size_t i = 0; i < best_path_idx; i++) {
+    (*decisions)[i] = best_path_decisions[i];
+  }
+
+  for (size_t i = 0; i < memsize; i++) {
+    xfree(diffcmppath[i].df_decision);
+  }
+  xfree(diffcmppath);
+
+  return best_path_idx;
+}
diff --git a/src/nvim/linematch.h b/src/nvim/linematch.h
new file mode 100644
index 0000000000..052d438617
--- /dev/null
+++ b/src/nvim/linematch.h
@@ -0,0 +1,10 @@
+#ifndef NVIM_LINEMATCH_H
+#define NVIM_LINEMATCH_H
+
+#include <stddef.h>
+
+#ifdef INCLUDE_GENERATED_DECLARATIONS
+# include "linematch.h.generated.h"
+#endif
+
+#endif  // NVIM_LINEMATCH_H
diff --git a/src/nvim/lua/xdiff.c b/src/nvim/lua/xdiff.c
index b2b5dfedee..428a33cdad 100644
--- a/src/nvim/lua/xdiff.c
+++ b/src/nvim/lua/xdiff.c
@@ -10,12 +10,16 @@
 #include <string.h>
 
 #include "nvim/api/private/helpers.h"
+#include "nvim/linematch.h"
 #include "nvim/lua/converter.h"
 #include "nvim/lua/executor.h"
 #include "nvim/lua/xdiff.h"
 #include "nvim/vim.h"
 #include "xdiff/xdiff.h"
 
+#define COMPARED_BUFFER0 (1 << 0)
+#define COMPARED_BUFFER1 (1 << 1)
+
 typedef enum {
   kNluaXdiffModeUnified = 0,
   kNluaXdiffModeOnHunkCB,
@@ -25,12 +29,72 @@ typedef enum {
 typedef struct {
   lua_State *lstate;
   Error *err;
+  mmfile_t *ma;
+  mmfile_t *mb;
+  bool linematch;
+  bool iwhite;
 } hunkpriv_t;
 
 #ifdef INCLUDE_GENERATED_DECLARATIONS
 # include "lua/xdiff.c.generated.h"
 #endif
 
+static void lua_pushhunk(lua_State *lstate, long start_a, long count_a, long start_b, long count_b)
+{
+  lua_createtable(lstate, 0, 0);
+  lua_pushinteger(lstate, start_a);
+  lua_rawseti(lstate, -2, 1);
+  lua_pushinteger(lstate, count_a);
+  lua_rawseti(lstate, -2, 2);
+  lua_pushinteger(lstate, start_b);
+  lua_rawseti(lstate, -2, 3);
+  lua_pushinteger(lstate, count_b);
+  lua_rawseti(lstate, -2, 4);
+  lua_rawseti(lstate, -2, (signed)lua_objlen(lstate, -2) + 1);
+}
+
+static void get_linematch_results(lua_State *lstate, mmfile_t *ma, mmfile_t *mb, long start_a,
+                                  long count_a, long start_b, long count_b, bool iwhite)
+{
+  // get the pointer to char of the start of the diff to pass it to linematch algorithm
+  const char *diff_begin[2] = { ma->ptr, mb->ptr };
+  int diff_length[2] = { (int)count_a, (int)count_b };
+
+  fastforward_buf_to_lnum(&diff_begin[0], (size_t)start_a);
+  fastforward_buf_to_lnum(&diff_begin[1], (size_t)start_b);
+
+  int *decisions = NULL;
+  size_t decisions_length = linematch_nbuffers(diff_begin, diff_length, 2, &decisions, iwhite);
+
+  long lnuma = start_a, lnumb = start_b;
+
+  long hunkstarta = lnuma;
+  long hunkstartb = lnumb;
+  long hunkcounta = 0;
+  long hunkcountb = 0;
+  for (size_t i = 0; i < decisions_length; i++) {
+    if (i && (decisions[i - 1] != decisions[i])) {
+      lua_pushhunk(lstate, hunkstarta, hunkcounta, hunkstartb, hunkcountb);
+
+      hunkstarta = lnuma;
+      hunkstartb = lnumb;
+      hunkcounta = 0;
+      hunkcountb = 0;
+      // create a new hunk
+    }
+    if (decisions[i] & COMPARED_BUFFER0) {
+      lnuma++;
+      hunkcounta++;
+    }
+    if (decisions[i] & COMPARED_BUFFER1) {
+      lnumb++;
+      hunkcountb++;
+    }
+  }
+  lua_pushhunk(lstate, hunkstarta, hunkcounta, hunkstartb, hunkcountb);
+  xfree(decisions);
+}
+
 static int write_string(void *priv, mmbuffer_t *mb, int nbuf)
 {
   luaL_Buffer *buf = (luaL_Buffer *)priv;
@@ -61,20 +125,14 @@ static int hunk_locations_cb(long start_a, long count_a, long start_b, long coun
   if (count_b > 0) {
     start_b += 1;
   }
-
-  lua_State *lstate = (lua_State *)cb_data;
-  lua_createtable(lstate, 0, 0);
-
-  lua_pushinteger(lstate, start_a);
-  lua_rawseti(lstate, -2, 1);
-  lua_pushinteger(lstate, count_a);
-  lua_rawseti(lstate, -2, 2);
-  lua_pushinteger(lstate, start_b);
-  lua_rawseti(lstate, -2, 3);
-  lua_pushinteger(lstate, count_b);
-  lua_rawseti(lstate, -2, 4);
-
-  lua_rawseti(lstate, -2, (signed)lua_objlen(lstate, -2) + 1);
+  hunkpriv_t *priv = (hunkpriv_t *)cb_data;
+  lua_State *lstate = priv->lstate;
+  if (priv->linematch) {
+    get_linematch_results(lstate, priv->ma, priv->mb, start_a, count_a, start_b, count_b,
+                          priv->iwhite);
+  } else {
+    lua_pushhunk(lstate, start_a, count_a, start_b, count_b);
+  }
 
   return 0;
 }
@@ -149,7 +207,7 @@ static bool check_xdiff_opt(ObjectType actType, ObjectType expType, const char *
 }
 
 static NluaXdiffMode process_xdl_diff_opts(lua_State *lstate, xdemitconf_t *cfg, xpparam_t *params,
-                                           Error *err)
+                                           bool *linematch, Error *err)
 {
   const DictionaryOf(LuaRef) opts = nlua_pop_Dictionary(lstate, true, err);
 
@@ -205,6 +263,11 @@ static NluaXdiffMode process_xdl_diff_opts(lua_State *lstate, xdemitconf_t *cfg,
         goto exit_1;
       }
       cfg->interhunkctxlen = v->data.integer;
+    } else if (strequal("linematch", k.data)) {
+      *linematch = api_object_to_bool(*v, "linematch", false, err);
+      if (ERROR_SET(err)) {
+        goto exit_1;
+      }
     } else {
       struct {
         const char *name;
@@ -244,10 +307,8 @@ static NluaXdiffMode process_xdl_diff_opts(lua_State *lstate, xdemitconf_t *cfg,
 
   if (had_on_hunk) {
     mode = kNluaXdiffModeOnHunkCB;
-    cfg->hunk_func = call_on_hunk_cb;
   } else if (had_result_type_indices) {
     mode = kNluaXdiffModeLocations;
-    cfg->hunk_func = hunk_locations_cb;
   }
 
 exit_1:
@@ -268,6 +329,7 @@ int nlua_xdl_diff(lua_State *lstate)
   xdemitconf_t cfg;
   xpparam_t params;
   xdemitcb_t ecb;
+  bool linematch = false;
 
   CLEAR_FIELD(cfg);
   CLEAR_FIELD(params);
@@ -280,7 +342,7 @@ int nlua_xdl_diff(lua_State *lstate)
       return luaL_argerror(lstate, 3, "expected table");
     }
 
-    mode = process_xdl_diff_opts(lstate, &cfg, &params, &err);
+    mode = process_xdl_diff_opts(lstate, &cfg, &params, &linematch, &err);
 
     if (ERROR_SET(&err)) {
       goto exit_0;
@@ -288,7 +350,7 @@ int nlua_xdl_diff(lua_State *lstate)
   }
 
   luaL_Buffer buf;
-  hunkpriv_t *priv = NULL;
+  hunkpriv_t priv;
   switch (mode) {
   case kNluaXdiffModeUnified:
     luaL_buffinit(lstate, &buf);
@@ -296,14 +358,24 @@ int nlua_xdl_diff(lua_State *lstate)
     ecb.out_line = write_string;
     break;
   case kNluaXdiffModeOnHunkCB:
-    priv = xmalloc(sizeof(*priv));
-    priv->lstate = lstate;
-    priv->err = &err;
-    ecb.priv = priv;
+    cfg.hunk_func = call_on_hunk_cb;
+    priv = (hunkpriv_t) {
+      .lstate = lstate,
+      .err    = &err,
+    };
+    ecb.priv = &priv;
     break;
   case kNluaXdiffModeLocations:
+    cfg.hunk_func = hunk_locations_cb;
+    priv = (hunkpriv_t) {
+      .lstate    = lstate,
+      .ma        = &ma,
+      .mb        = &mb,
+      .linematch = linematch,
+      .iwhite    = (params.flags & XDF_IGNORE_WHITESPACE) > 0
+    };
+    ecb.priv = &priv;
     lua_createtable(lstate, 0, 0);
-    ecb.priv = lstate;
     break;
   }
 
@@ -314,8 +386,6 @@ int nlua_xdl_diff(lua_State *lstate)
     }
   }
 
-  XFREE_CLEAR(priv);
-
 exit_0:
   if (ERROR_SET(&err)) {
     luaL_where(lstate, 1);