diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/nvim/buffer_defs.h | 2 | ||||
-rw-r--r-- | src/nvim/diff.c | 431 | ||||
-rw-r--r-- | src/nvim/drawline.c | 11 | ||||
-rw-r--r-- | src/nvim/eval/funcs.c | 7 | ||||
-rw-r--r-- | src/nvim/linematch.c | 377 | ||||
-rw-r--r-- | src/nvim/linematch.h | 10 | ||||
-rw-r--r-- | src/nvim/lua/xdiff.c | 122 |
7 files changed, 879 insertions, 81 deletions
diff --git a/src/nvim/buffer_defs.h b/src/nvim/buffer_defs.h index d8edd4b2d0..3629199f9a 100644 --- a/src/nvim/buffer_defs.h +++ b/src/nvim/buffer_defs.h @@ -894,6 +894,8 @@ struct diffblock_S { diff_T *df_next; linenr_T df_lnum[DB_COUNT]; // line number in buffer linenr_T df_count[DB_COUNT]; // nr of inserted/changed lines + bool is_linematched; // has the linematch algorithm ran on this diff hunk to divide it into + // smaller diff hunks? }; #define SNAP_HELP_IDX 0 diff --git a/src/nvim/diff.c b/src/nvim/diff.c index 964db599a3..503e546562 100644 --- a/src/nvim/diff.c +++ b/src/nvim/diff.c @@ -27,6 +27,7 @@ #include "nvim/fileio.h" #include "nvim/fold.h" #include "nvim/garray.h" +#include "nvim/linematch.h" #include "nvim/mark.h" #include "nvim/mbyte.h" #include "nvim/memline.h" @@ -62,10 +63,12 @@ static bool diff_need_update = false; // ex_diffupdate needs to be called #define DIFF_INTERNAL 0x200 // use internal xdiff algorithm #define DIFF_CLOSE_OFF 0x400 // diffoff when closing window #define DIFF_FOLLOWWRAP 0x800 // follow the wrap option +#define DIFF_LINEMATCH 0x1000 // match most similar lines within diff #define ALL_WHITE_DIFF (DIFF_IWHITE | DIFF_IWHITEALL | DIFF_IWHITEEOL) static int diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF; static long diff_algorithm = 0; +static int linematch_lines = 0; #define LBUFLEN 50 // length of line in diff file @@ -362,7 +365,7 @@ static void diff_mark_adjust_tp(tabpage_T *tp, int idx, linenr_T line1, linenr_T if (last >= line1 - 1) { // 6. change below line2: only adjust for amount_after; also when // "deleted" became zero when deleted all lines between two diffs. - if (dp->df_lnum[idx] - (deleted + inserted != 0) > line2) { + if (dp->df_lnum[idx] - (deleted + inserted != 0) > line2 - dp->is_linematched) { if (amount_after == 0) { // nothing left to change break; @@ -454,7 +457,7 @@ static void diff_mark_adjust_tp(tabpage_T *tp, int idx, linenr_T line1, linenr_T } // check if this block touches the previous one, may merge them. - if ((dprev != NULL) + if ((dprev != NULL) && !dp->is_linematched && (dprev->df_lnum[idx] + dprev->df_count[idx] == dp->df_lnum[idx])) { int i; for (i = 0; i < DB_COUNT; i++) { @@ -513,6 +516,7 @@ static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp) { diff_T *dnew = xmalloc(sizeof(*dnew)); + dnew->is_linematched = false; dnew->df_next = dp; if (dprev == NULL) { tp->tp_first_diff = dnew; @@ -727,12 +731,16 @@ static void clear_diffout(diffout_T *dout) /// @param din /// /// @return FAIL for failure. -static int diff_write_buffer(buf_T *buf, mmfile_t *m) +static int diff_write_buffer(buf_T *buf, mmfile_t *m, linenr_T start, linenr_T end) { size_t len = 0; + if (end < 0) { + end = buf->b_ml.ml_line_count; + } + // xdiff requires one big block of memory with all the text. - for (linenr_T lnum = 1; lnum <= buf->b_ml.ml_line_count; lnum++) { + for (linenr_T lnum = start; lnum <= end; lnum++) { len += strlen(ml_get_buf(buf, lnum, false)) + 1; } char *ptr = try_malloc(len); @@ -753,7 +761,7 @@ static int diff_write_buffer(buf_T *buf, mmfile_t *m) m->size = (long)len; len = 0; - for (linenr_T lnum = 1; lnum <= buf->b_ml.ml_line_count; lnum++) { + for (linenr_T lnum = start; lnum <= end; lnum++) { char *s = ml_get_buf(buf, lnum, false); if (diff_flags & DIFF_ICASE) { while (*s != NUL) { @@ -792,7 +800,7 @@ static int diff_write_buffer(buf_T *buf, mmfile_t *m) static int diff_write(buf_T *buf, diffin_T *din) { if (din->din_fname == NULL) { - return diff_write_buffer(buf, &din->din_mmfile); + return diff_write_buffer(buf, &din->din_mmfile, 1, -1); } // Always use 'fileformat' set to "unix". @@ -1784,6 +1792,294 @@ void diff_clear(tabpage_T *tp) tp->tp_first_diff = NULL; } +/// +/// return true if the options are set to use diff linematch +/// +bool diff_linematch(diff_T *dp) +{ + if (!(diff_flags & DIFF_LINEMATCH)) { + return false; + } + // are there more than three diff buffers? + int tsize = 0; + for (int i = 0; i < DB_COUNT; i++) { + if (curtab->tp_diffbuf[i] != NULL) { + // for the rare case (bug?) that the count of a diff block is negative, do + // not run the algorithm because this will try to allocate a negative + // amount of space and crash + if (dp->df_count[i] < 0) { + return false; + } + tsize += dp->df_count[i]; + } + } + // avoid allocating a huge array because it will lag + return tsize <= linematch_lines; +} + +static int get_max_diff_length(const diff_T *dp) +{ + int maxlength = 0; + for (int k = 0; k < DB_COUNT; k++) { + if (curtab->tp_diffbuf[k] != NULL) { + if (dp->df_count[k] > maxlength) { + maxlength = dp->df_count[k]; + } + } + } + return maxlength; +} + +static void find_top_diff_block(diff_T **thistopdiff, diff_T **nextblockblock, int fromidx, + int topline) +{ + diff_T *topdiff = NULL; + diff_T *localtopdiff = NULL; + int topdiffchange = 0; + + for (topdiff = curtab->tp_first_diff; topdiff != NULL; topdiff = topdiff->df_next) { + // set the top of the current overlapping diff block set as we + // iterate through all of the sets of overlapping diff blocks + if (!localtopdiff || topdiffchange) { + localtopdiff = topdiff; + topdiffchange = 0; + } + + // check if the fromwin topline is matched by the current diff. if so, set it to the top of the diff block + if (topline >= topdiff->df_lnum[fromidx] && topline <= + (topdiff->df_lnum[fromidx] + topdiff->df_count[fromidx])) { + // this line is inside the current diff block, so we will save the + // top block of the set of blocks to refer to later + if ((*thistopdiff) == NULL) { + (*thistopdiff) = localtopdiff; + } + } + + // check if the next set of overlapping diff blocks is next + if (!(topdiff->df_next && (topdiff->df_next->df_lnum[fromidx] == + (topdiff->df_lnum[fromidx] + topdiff->df_count[fromidx])))) { + // mark that the next diff block is belongs to a different set of + // overlapping diff blocks + topdiffchange = 1; + + // if we already have found that the line number is inside a diff block, + // set the marker of the next block and finish the iteration + if (*thistopdiff) { + (*nextblockblock) = topdiff->df_next; + break; + } + } + } +} + +static void count_filler_lines_and_topline(int *curlinenum_to, int *linesfiller, + const diff_T *thistopdiff, const int toidx, + int virtual_lines_passed) +{ + const diff_T *curdif = thistopdiff; + int ch_virtual_lines = 0; + int isfiller = 0; + while (virtual_lines_passed) { + if (ch_virtual_lines) { + virtual_lines_passed--; + ch_virtual_lines--; + if (!isfiller) { + (*curlinenum_to)++; + } else { + (*linesfiller)++; + } + } else { + (*linesfiller) = 0; + ch_virtual_lines = get_max_diff_length(curdif); + isfiller = (curdif->df_count[toidx] ? 0 : 1); + if (isfiller) { + while (curdif && curdif->df_next && curdif->df_lnum[toidx] == + curdif->df_next->df_lnum[toidx] + && curdif->df_next->df_count[toidx] == 0) { + curdif = curdif->df_next; + ch_virtual_lines += get_max_diff_length(curdif); + } + } + if (curdif) { + curdif = curdif->df_next; + } + } + } +} + +static void calculate_topfill_and_topline(const int fromidx, const int toidx, const + int from_topline, const int from_topfill, int *topfill, + linenr_T *topline) +{ + // 1. find the position from the top of the diff block, and the start + // of the next diff block + diff_T *thistopdiff = NULL; + diff_T *nextblockblock = NULL; + int virtual_lines_passed = 0; + + find_top_diff_block(&thistopdiff, &nextblockblock, fromidx, from_topline); + + // count the virtual lines that have been passed + + diff_T *curdif = thistopdiff; + while (curdif && (curdif->df_lnum[fromidx] + curdif->df_count[fromidx]) + <= from_topline) { + virtual_lines_passed += get_max_diff_length(curdif); + + curdif = curdif->df_next; + } + + if (curdif != nextblockblock) { + virtual_lines_passed += from_topline - curdif->df_lnum[fromidx]; + } + virtual_lines_passed -= from_topfill; + + // count the same amount of virtual lines in the toidx buffer + curdif = thistopdiff; + int curlinenum_to = thistopdiff->df_lnum[toidx]; + int linesfiller = 0; + count_filler_lines_and_topline(&curlinenum_to, &linesfiller, + thistopdiff, toidx, virtual_lines_passed); + + // count the number of filler lines that would normally be above this line + int maxfiller = 0; + for (diff_T *dpfillertest = thistopdiff; dpfillertest != NULL; + dpfillertest = dpfillertest->df_next) { + if (dpfillertest->df_lnum[toidx] == curlinenum_to) { + while (dpfillertest && dpfillertest->df_lnum[toidx] == curlinenum_to) { + maxfiller += dpfillertest->df_count[toidx] ? 0 : get_max_diff_length(dpfillertest); + dpfillertest = dpfillertest->df_next; + } + break; + } + } + (*topfill) = maxfiller - linesfiller; + (*topline) = curlinenum_to; +} + +static int linematched_filler_lines(diff_T *dp, int idx, linenr_T lnum, int *linestatus) +{ + int filler_lines_d1 = 0; + while (dp && dp->df_next + && lnum == (dp->df_lnum[idx] + dp->df_count[idx]) + && dp->df_next->df_lnum[idx] == lnum) { + if (dp->df_count[idx] == 0) { + filler_lines_d1 += get_max_diff_length(dp); + } + dp = dp->df_next; + } + + if (dp->df_count[idx] == 0) { + filler_lines_d1 += get_max_diff_length(dp); + } + + if (lnum < dp->df_lnum[idx] + dp->df_count[idx]) { + int j = 0; + for (int i = 0; i < DB_COUNT; i++) { + if (curtab->tp_diffbuf[i] != NULL) { + if (dp->df_count[i]) { + j++; + } + } + // is this an added line or a changed line? + if (linestatus) { + (*linestatus) = (j == 1) ? -2 : -1; + } + } + } + return filler_lines_d1; +} + +// Apply results from the linematch algorithm and apply to 'dp' by splitting it into multiple +// adjacent diff blocks. +static void apply_linematch_results(diff_T *dp, size_t decisions_length, const int *decisions) +{ + // get the start line number here in each diff buffer, and then increment + int line_numbers[DB_COUNT]; + int outputmap[DB_COUNT]; + size_t ndiffs = 0; + for (int i = 0; i < DB_COUNT; i++) { + if (curtab->tp_diffbuf[i] != NULL) { + line_numbers[i] = dp->df_lnum[i]; + dp->df_count[i] = 0; + + // Keep track of the index of the diff buffer we are using here. + // We will use this to write the output of the algorithm to + // diff_T structs at the correct indexes + outputmap[ndiffs] = i; + ndiffs++; + } + } + + // write the diffs starting with the current diff block + diff_T *dp_s = dp; + for (size_t i = 0; i < decisions_length; i++) { + // Don't allocate on first iter since we can reuse the initial diffblock + if (i != 0 && (decisions[i - 1] != decisions[i])) { + // create new sub diff blocks to segment the original diff block which we + // further divided by running the linematch algorithm + dp_s = diff_alloc_new(curtab, dp_s, dp_s->df_next); + dp_s->is_linematched = true; + for (int j = 0; j < DB_COUNT; j++) { + if (curtab->tp_diffbuf[j] != NULL) { + dp_s->df_lnum[j] = line_numbers[j]; + dp_s->df_count[j] = 0; + } + } + } + for (size_t j = 0; j < ndiffs; j++) { + if (decisions[i] & (1 << j)) { + // will need to use the map here + dp_s->df_count[outputmap[j]]++; + line_numbers[outputmap[j]]++; + } + } + } + dp->is_linematched = true; +} + +static void run_linematch_algorithm(diff_T *dp) +{ + // define buffers for diff algorithm + mmfile_t diffbufs_mm[DB_COUNT]; + const char *diffbufs[DB_COUNT]; + int diff_length[DB_COUNT]; + size_t ndiffs = 0; + for (int i = 0; i < DB_COUNT; i++) { + if (curtab->tp_diffbuf[i] != NULL) { + // write the contents of the entire buffer to + // diffbufs_mm[diffbuffers_count] + diff_write_buffer(curtab->tp_diffbuf[i], &diffbufs_mm[ndiffs], + dp->df_lnum[i], dp->df_lnum[i] + dp->df_count[i] - 1); + + // we want to get the char* to the diff buffer that was just written + // we add it to the array of char*, diffbufs + diffbufs[ndiffs] = diffbufs_mm[ndiffs].ptr; + + // keep track of the length of this diff block to pass it to the linematch + // algorithm + diff_length[ndiffs] = dp->df_count[i]; + + // increment the amount of diff buffers we are passing to the algorithm + ndiffs++; + } + } + + // we will get the output of the linematch algorithm in the format of an array + // of integers (*decisions) and the length of that array (decisions_length) + int *decisions = NULL; + const bool iwhite = (diff_flags & (DIFF_IWHITEALL | DIFF_IWHITE)) > 0; + size_t decisions_length = linematch_nbuffers(diffbufs, diff_length, ndiffs, &decisions, iwhite); + + for (size_t i = 0; i < ndiffs; i++) { + XFREE_CLEAR(diffbufs_mm[i].ptr); + } + + apply_linematch_results(dp, decisions_length, decisions); + + xfree(decisions); +} + /// Check diff status for line "lnum" in buffer "buf": /// /// Returns 0 for nothing special @@ -1795,9 +2091,10 @@ void diff_clear(tabpage_T *tp) /// /// @param wp /// @param lnum +/// @param[out] linestatus /// /// @return diff status. -int diff_check(win_T *wp, linenr_T lnum) +int diff_check_with_linestatus(win_T *wp, linenr_T lnum, int *linestatus) { buf_T *buf = wp->w_buffer; @@ -1840,6 +2137,14 @@ int diff_check(win_T *wp, linenr_T lnum) return 0; } + if (!dp->is_linematched && diff_linematch(dp)) { + run_linematch_algorithm(dp); + } + + if (dp->is_linematched) { + return linematched_filler_lines(dp, idx, lnum, linestatus); + } + if (lnum < dp->df_lnum[idx] + dp->df_count[idx]) { int zero = false; @@ -1894,15 +2199,16 @@ int diff_check(win_T *wp, linenr_T lnum) // Insert filler lines above the line just below the change. Will return // 0 when this buf had the max count. - linenr_T maxcount = 0; - for (int i = 0; i < DB_COUNT; i++) { - if ((curtab->tp_diffbuf[i] != NULL) && (dp->df_count[i] > maxcount)) { - maxcount = dp->df_count[i]; - } - } + int maxcount = get_max_diff_length(dp); return maxcount - dp->df_count[idx]; } +/// See diff_check_with_linestatus +int diff_check(win_T *wp, linenr_T lnum) +{ + return diff_check_with_linestatus(wp, lnum, NULL); +} + /// Compare two entries in diff "dp" and return true if they are equal. /// /// @param dp diff @@ -2062,46 +2368,51 @@ void diff_set_topline(win_T *fromwin, win_T *towin) towin->w_topline = lnum + (dp->df_lnum[toidx] - dp->df_lnum[fromidx]); if (lnum >= dp->df_lnum[fromidx]) { - // Inside a change: compute filler lines. With three or more - // buffers we need to know the largest count. - linenr_T max_count = 0; - - for (int i = 0; i < DB_COUNT; i++) { - if ((curtab->tp_diffbuf[i] != NULL) && (max_count < dp->df_count[i])) { - max_count = dp->df_count[i]; - } - } + if (diff_flags & DIFF_LINEMATCH) { + calculate_topfill_and_topline(fromidx, toidx, fromwin->w_topline, + fromwin->w_topfill, &towin->w_topfill, &towin->w_topline); + } else { + // Inside a change: compute filler lines. With three or more + // buffers we need to know the largest count. + linenr_T max_count = 0; - if (dp->df_count[toidx] == dp->df_count[fromidx]) { - // same number of lines: use same filler count - towin->w_topfill = fromwin->w_topfill; - } else if (dp->df_count[toidx] > dp->df_count[fromidx]) { - if (lnum == dp->df_lnum[fromidx] + dp->df_count[fromidx]) { - // more lines in towin and fromwin doesn't show diff - // lines, only filler lines - if (max_count - fromwin->w_topfill >= dp->df_count[toidx]) { - // towin also only shows filler lines - towin->w_topline = dp->df_lnum[toidx] + dp->df_count[toidx]; - towin->w_topfill = fromwin->w_topfill; - } else { - // towin still has some diff lines to show - towin->w_topline = dp->df_lnum[toidx] - + max_count - fromwin->w_topfill; + for (int i = 0; i < DB_COUNT; i++) { + if ((curtab->tp_diffbuf[i] != NULL) && (max_count < dp->df_count[i])) { + max_count = dp->df_count[i]; } } - } else if (towin->w_topline >= dp->df_lnum[toidx] - + dp->df_count[toidx]) { - // less lines in towin and no diff lines to show: compute - // filler lines - towin->w_topline = dp->df_lnum[toidx] + dp->df_count[toidx]; - if (diff_flags & DIFF_FILLER) { + if (dp->df_count[toidx] == dp->df_count[fromidx]) { + // same number of lines: use same filler count + towin->w_topfill = fromwin->w_topfill; + } else if (dp->df_count[toidx] > dp->df_count[fromidx]) { if (lnum == dp->df_lnum[fromidx] + dp->df_count[fromidx]) { - // fromwin is also out of diff lines - towin->w_topfill = fromwin->w_topfill; - } else { - // fromwin has some diff lines - towin->w_topfill = dp->df_lnum[fromidx] + max_count - lnum; + // more lines in towin and fromwin doesn't show diff + // lines, only filler lines + if (max_count - fromwin->w_topfill >= dp->df_count[toidx]) { + // towin also only shows filler lines + towin->w_topline = dp->df_lnum[toidx] + dp->df_count[toidx]; + towin->w_topfill = fromwin->w_topfill; + } else { + // towin still has some diff lines to show + towin->w_topline = dp->df_lnum[toidx] + + max_count - fromwin->w_topfill; + } + } + } else if (towin->w_topline >= dp->df_lnum[toidx] + + dp->df_count[toidx]) { + // less lines in towin and no diff lines to show: compute + // filler lines + towin->w_topline = dp->df_lnum[toidx] + dp->df_count[toidx]; + + if (diff_flags & DIFF_FILLER) { + if (lnum == dp->df_lnum[fromidx] + dp->df_count[fromidx]) { + // fromwin is also out of diff lines + towin->w_topfill = fromwin->w_topfill; + } else { + // fromwin has some diff lines + towin->w_topfill = dp->df_lnum[fromidx] + max_count - lnum; + } } } } @@ -2136,6 +2447,7 @@ void diff_set_topline(win_T *fromwin, win_T *towin) int diffopt_changed(void) { int diff_context_new = 6; + int linematch_lines_new = 0; int diff_flags_new = 0; int diff_foldcolumn_new = 2; long diff_algorithm_new = 0; @@ -2205,6 +2517,10 @@ int diffopt_changed(void) } else { return FAIL; } + } else if ((STRNCMP(p, "linematch:", 10) == 0) && ascii_isdigit(p[10])) { + p += 10; + linematch_lines_new = getdigits_int(&p, false, linematch_lines_new); + diff_flags_new |= DIFF_LINEMATCH; } if ((*p != ',') && (*p != NUL)) { @@ -2233,6 +2549,7 @@ int diffopt_changed(void) diff_flags = diff_flags_new; diff_context = diff_context_new == 0 ? 1 : diff_context_new; + linematch_lines = linematch_lines_new; diff_foldcolumn = diff_foldcolumn_new; diff_algorithm = diff_algorithm_new; @@ -2300,6 +2617,13 @@ bool diff_find_change(win_T *wp, linenr_T lnum, int *startp, int *endp) break; } } + if (dp->is_linematched) { + while (dp && dp->df_next + && lnum == dp->df_count[idx] + dp->df_lnum[idx] + && dp->df_next->df_lnum[idx] == lnum) { + dp = dp->df_next; + } + } if ((dp == NULL) || (diff_check_sanity(curtab, dp) == FAIL)) { xfree(line_org); @@ -2674,6 +2998,17 @@ static void diffgetput(const int addr_count, const int idx_cur, const int idx_fr diff_T *dprev = NULL; for (diff_T *dp = curtab->tp_first_diff; dp != NULL;) { + if (!addr_count) { + // handle the case with adjacent diff blocks + while (dp->is_linematched + && dp->df_next + && dp->df_next->df_lnum[idx_cur] == dp->df_lnum[idx_cur] + dp->df_count[idx_cur] + && dp->df_next->df_lnum[idx_cur] == line1 + off + 1) { + dprev = dp; + dp = dp->df_next; + } + } + if (dp->df_lnum[idx_cur] > line2 + off) { // past the range that was specified break; diff --git a/src/nvim/drawline.c b/src/nvim/drawline.c index 08b6fd89af..cb7d85a467 100644 --- a/src/nvim/drawline.c +++ b/src/nvim/drawline.c @@ -804,9 +804,10 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool nochange, int bg_attr = win_bg_attr(wp); - filler_lines = diff_check(wp, lnum); - if (filler_lines < 0) { - if (filler_lines == -1) { + int linestatus = 0; + filler_lines = diff_check_with_linestatus(wp, lnum, &linestatus); + if (filler_lines < 0 || linestatus < 0) { + if (filler_lines == -1 || linestatus == -1) { if (diff_find_change(wp, lnum, &change_start, &change_end)) { diff_hlf = HLF_ADD; // added line } else if (change_start == 0) { @@ -817,7 +818,9 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool nochange, } else { diff_hlf = HLF_ADD; // added line } - filler_lines = 0; + if (linestatus == 0) { + filler_lines = 0; + } area_highlighting = true; } VirtLines virt_lines = KV_INITIAL_VALUE; diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index e676e2e656..0e3de29cce 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -1573,9 +1573,10 @@ static void f_diff_hlID(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) || changedtick != buf_get_changedtick(curbuf) || fnum != curbuf->b_fnum) { // New line, buffer, change: need to get the values. - int filler_lines = diff_check(curwin, lnum); - if (filler_lines < 0) { - if (filler_lines == -1) { + int linestatus = 0; + int filler_lines = diff_check_with_linestatus(curwin, lnum, &linestatus); + if (filler_lines < 0 || linestatus < 0) { + if (filler_lines == -1 || linestatus == -1) { change_start = MAXCOL; change_end = -1; if (diff_find_change(curwin, lnum, &change_start, &change_end)) { diff --git a/src/nvim/linematch.c b/src/nvim/linematch.c new file mode 100644 index 0000000000..ef63bd2321 --- /dev/null +++ b/src/nvim/linematch.c @@ -0,0 +1,377 @@ +#include "nvim/linematch.h" +#include "nvim/memory.h" +#include "nvim/vim.h" + +// struct for running the diff linematch algorithm +typedef struct { + int *df_decision; // to keep track of this path traveled + int df_lev_score; // to keep track of the total score of this path + size_t df_path_idx; // current index of this path +} diffcmppath_T; + +#define LN_MAX_BUFS 8 + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "linematch.c.generated.h" +#endif + +static size_t line_len(const char *s) +{ + char *end = strchr(s, '\n'); + if (end) { + return (size_t)(end - s); + } + return STRLEN(s); +} + +/// Same as matching_chars but ignore whitespace +/// +/// @param s1 +/// @param s2 +static int matching_chars_iwhite(const char *s1, const char *s2) +{ + // the newly processed strings that will be compared + // delete the white space characters, and/or replace all upper case with lower + char *strsproc[2]; + const char *strsorig[2] = { s1, s2 }; + for (int k = 0; k < 2; k++) { + size_t d = 0; + size_t i = 0; + size_t slen = line_len(strsorig[k]); + strsproc[k] = xmalloc((slen + 1) * sizeof(char)); + while (d + i < slen) { + char e = strsorig[k][i + d]; + if (e != ' ' && e != '\t') { + strsproc[k][i] = e; + i++; + } else { + d++; + } + } + strsproc[k][i] = '\0'; + } + int matching = matching_chars(strsproc[0], strsproc[1]); + xfree(strsproc[0]); + xfree(strsproc[1]); + return matching; +} + +/// update the path of a point in the diff linematch algorithm +/// @param diffcmppath +/// @param score +/// @param to +/// @param from +/// @param choice +static void update_path_flat(diffcmppath_T *diffcmppath, int score, size_t to, size_t from, + const int choice) +{ + size_t path_idx = diffcmppath[from].df_path_idx; + + for (size_t k = 0; k < path_idx; k++) { + diffcmppath[to].df_decision[k] = diffcmppath[from].df_decision[k]; + } + + diffcmppath[to].df_decision[path_idx] = choice; + diffcmppath[to].df_lev_score = score; + diffcmppath[to].df_path_idx = path_idx + 1; +} + +#define MATCH_CHAR_MAX_LEN 500 + +/// Return matching characters between "s1" and "s2" whilst respecting sequence order. +/// Consider the case of two strings 'AAACCC' and 'CCCAAA', the +/// return value from this function will be 3, either to match +/// the 3 C's, or the 3 A's. +/// +/// Examples: +/// matching_chars("aabc", "acba") -> 2 // 'a' and 'b' in common +/// matching_chars("123hello567", "he123ll567o") -> 8 // '123', 'll' and '567' in common +/// matching_chars("abcdefg", "gfedcba") -> 1 // all characters in common, +/// // but only at most 1 in sequence +/// +/// @param s1 +/// @param s2 +static int matching_chars(const char *s1, const char *s2) +{ + size_t s1len = MIN(MATCH_CHAR_MAX_LEN, line_len(s1)); + size_t s2len = MIN(MATCH_CHAR_MAX_LEN, line_len(s2)); + int matrix[2][MATCH_CHAR_MAX_LEN] = { 0 }; + bool icur = 1; // save space by storing only two rows for i axis + for (size_t i = 0; i < s1len; i++) { + icur = !icur; + int *e1 = matrix[icur]; + int *e2 = matrix[!icur]; + for (size_t j = 0; j < s2len; j++) { + // skip char in s1 + if (e2[j + 1] > e1[j + 1]) { + e1[j + 1] = e2[j + 1]; + } + // skip char in s2 + if (e1[j] > e1[j + 1]) { + e1[j + 1] = e1[j]; + } + // compare char in s1 and s2 + if ((s1[i] == s2[j]) && (e2[j] + 1) > e1[j + 1]) { + e1[j + 1] = e2[j] + 1; + } + } + } + return matrix[icur][s2len]; +} + +/// count the matching characters between a variable number of strings "sp" +/// mark the strings that have already been compared to extract them later +/// without re-running the character match counting. +/// @param sp +/// @param fomvals +/// @param n +static int count_n_matched_chars(const char **sp, const size_t n, bool iwhite) +{ + int matched_chars = 0; + int matched = 0; + for (size_t i = 0; i < n; i++) { + for (size_t j = i + 1; j < n; j++) { + if (sp[i] != NULL && sp[j] != NULL) { + matched++; + // TODO(lewis6991): handle whitespace ignoring higher up in the stack + matched_chars += iwhite ? matching_chars_iwhite(sp[i], sp[j]) + : matching_chars(sp[i], sp[j]); + } + } + } + + // prioritize a match of 3 (or more lines) equally to a match of 2 lines + if (matched >= 2) { + matched_chars *= 2; + matched_chars /= matched; + } + + return matched_chars; +} + +void fastforward_buf_to_lnum(const char **s, size_t lnum) +{ + assert(lnum > 0); + for (size_t j = 0; j < lnum - 1; j++) { + *s = strchr(*s, '\n'); + (*s)++; + } +} + +/// try all the different ways to compare these lines and use the one that +/// results in the most matching characters +/// @param df_iters +/// @param paths +/// @param npaths +/// @param path_idx +/// @param choice +/// @param diffcmppath +/// @param diff_len +/// @param ndiffs +/// @param diff_blk +static void try_possible_paths(const int *df_iters, const size_t *paths, const int npaths, + const int path_idx, int *choice, diffcmppath_T *diffcmppath, + const int *diff_len, const size_t ndiffs, const char **diff_blk, + bool iwhite) +{ + if (path_idx == npaths) { + if ((*choice) > 0) { + int from_vals[LN_MAX_BUFS]; + const int *to_vals = df_iters; + const char *current_lines[LN_MAX_BUFS]; + for (size_t k = 0; k < ndiffs; k++) { + from_vals[k] = df_iters[k]; + // get the index at all of the places + if ((*choice) & (1 << k)) { + from_vals[k]--; + const char *p = diff_blk[k]; + fastforward_buf_to_lnum(&p, (size_t)df_iters[k]); + current_lines[k] = p; + } else { + current_lines[k] = NULL; + } + } + size_t unwrapped_idx_from = unwrap_indexes(from_vals, diff_len, ndiffs); + size_t unwrapped_idx_to = unwrap_indexes(to_vals, diff_len, ndiffs); + int matched_chars = count_n_matched_chars(current_lines, ndiffs, iwhite); + int score = diffcmppath[unwrapped_idx_from].df_lev_score + matched_chars; + if (score > diffcmppath[unwrapped_idx_to].df_lev_score) { + update_path_flat(diffcmppath, score, unwrapped_idx_to, unwrapped_idx_from, *choice); + } + } else { + // initialize the 0, 0, 0 ... choice + size_t i = 0; + while (i < ndiffs && df_iters[i] == 0) { + i++; + if (i == ndiffs) { + diffcmppath[0].df_lev_score = 0; + diffcmppath[0].df_path_idx = 0; + } + } + } + return; + } + size_t bit_place = paths[path_idx]; + *(choice) |= (1 << bit_place); // set it to 1 + try_possible_paths(df_iters, paths, npaths, path_idx + 1, choice, + diffcmppath, diff_len, ndiffs, diff_blk, iwhite); + *(choice) &= ~(1 << bit_place); // set it to 0 + try_possible_paths(df_iters, paths, npaths, path_idx + 1, choice, + diffcmppath, diff_len, ndiffs, diff_blk, iwhite); +} + +/// unwrap indexes to access n dimensional tensor +/// @param values +/// @param diff_len +/// @param ndiffs +static size_t unwrap_indexes(const int *values, const int *diff_len, const size_t ndiffs) +{ + size_t num_unwrap_scalar = 1; + for (size_t k = 0; k < ndiffs; k++) { + num_unwrap_scalar *= (size_t)diff_len[k] + 1; + } + + size_t path_idx = 0; + for (size_t k = 0; k < ndiffs; k++) { + num_unwrap_scalar /= (size_t)diff_len[k] + 1; + + // (k == 0) space optimization + int n = k == 0 ? values[k] % 2 : values[k]; + path_idx += num_unwrap_scalar * (size_t)n; + } + return path_idx; +} + +/// populate the values of the linematch algorithm tensor, and find the best +/// decision for how to compare the relevant lines from each of the buffers at +/// each point in the tensor +/// @param df_iters +/// @param ch_dim +/// @param diffcmppath +/// @param diff_len +/// @param ndiffs +/// @param diff_blk +static void populate_tensor(int *df_iters, const size_t ch_dim, diffcmppath_T *diffcmppath, + const int *diff_len, const size_t ndiffs, const char **diff_blk, + bool iwhite) +{ + if (ch_dim == ndiffs) { + int npaths = 0; + size_t paths[LN_MAX_BUFS]; + + for (size_t j = 0; j < ndiffs; j++) { + if (df_iters[j] > 0) { + paths[npaths] = j; + npaths++; + } + } + int choice = 0; + size_t unwrapper_idx_to = unwrap_indexes(df_iters, diff_len, ndiffs); + diffcmppath[unwrapper_idx_to].df_lev_score = -1; + try_possible_paths(df_iters, paths, npaths, 0, &choice, diffcmppath, + diff_len, ndiffs, diff_blk, iwhite); + return; + } + + for (int i = 0; i <= diff_len[ch_dim]; i++) { + df_iters[ch_dim] = i; + populate_tensor(df_iters, ch_dim + 1, diffcmppath, diff_len, + ndiffs, diff_blk, iwhite); + } +} + +/// algorithm to find an optimal alignment of lines of a diff block with 2 or +/// more files. The algorithm is generalized to work for any number of files +/// which corresponds to another dimmension added to the tensor used in the +/// algorithm +/// +/// for questions and information about the linematch algorithm please contact +/// Jonathon White (jonathonwhite@protonmail.com) +/// +/// for explanation, a summary of the algorithm in 3 dimmensions (3 files +/// compared) follows +/// +/// The 3d case (for 3 buffers) of the algorithm implemented when diffopt +/// 'linematch' is enabled. The algorithm constructs a 3d tensor to +/// compare a diff between 3 buffers. The dimmensions of the tensor are +/// the length of the diff in each buffer plus 1 A path is constructed by +/// moving from one edge of the cube/3d tensor to the opposite edge. +/// Motions from one cell of the cube to the next represent decisions. In +/// a 3d cube, there are a total of 7 decisions that can be made, +/// represented by the enum df_path3_choice which is defined in +/// buffer_defs.h a comparison of buffer 0 and 1 represents a motion +/// toward the opposite edge of the cube with components along the 0 and +/// 1 axes. a comparison of buffer 0, 1, and 2 represents a motion +/// toward the opposite edge of the cube with components along the 0, 1, +/// and 2 axes. A skip of buffer 0 represents a motion along only the 0 +/// axis. For each action, a point value is awarded, and the path is +/// saved for reference later, if it is found to have been the optimal +/// path. The optimal path has the highest score. The score is +/// calculated as the summation of the total characters matching between +/// all of the lines which were compared. The structure of the algorithm +/// is that of a dynamic programming problem. We can calculate a point +/// i,j,k in the cube as a function of i-1, j-1, and k-1. To find the +/// score and path at point i,j,k, we must determine which path we want +/// to use, this is done by looking at the possibilities and choosing +/// the one which results in the local highest score. The total highest +/// scored path is, then in the end represented by the cell in the +/// opposite corner from the start location. The entire algorithm +/// consits of populating the 3d cube with the optimal paths from which +/// it may have came. +/// +/// Optimizations: +/// As the function to calculate the cell of a tensor at point i,j,k is a +/// function of the cells at i-1, j-1, k-1, the whole tensor doesn't need +/// to be stored in memory at once. In the case of the 3d cube, only two +/// slices (along k and j axis) are stored in memory. For the 2d matrix +/// (for 2 files), only two rows are stored at a time. The next/previous +/// slice (or row) is always calculated from the other, and they alternate +/// at each iteration. +/// In the 3d case, 3 arrays are populated to memorize the score (matched +/// characters) of the 3 buffers, so a redundant calculation of the +/// scores does not occur +/// @param diff_blk +/// @param diff_len +/// @param ndiffs +/// @param [out] [allocated] decisions +/// @return the length of decisions +size_t linematch_nbuffers(const char **diff_blk, const int *diff_len, const size_t ndiffs, + int **decisions, bool iwhite) +{ + assert(ndiffs <= LN_MAX_BUFS); + + size_t memsize = 1; + size_t memsize_decisions = 0; + for (size_t i = 0; i < ndiffs; i++) { + assert(diff_len[i] >= 0); + memsize *= i == 0 ? 2 : (size_t)(diff_len[i] + 1); + memsize_decisions += (size_t)diff_len[i]; + } + + // create the flattened path matrix + diffcmppath_T *diffcmppath = xmalloc(sizeof(diffcmppath_T) * memsize); + // allocate memory here + for (size_t i = 0; i < memsize; i++) { + diffcmppath[i].df_decision = xmalloc(memsize_decisions * sizeof(int)); + } + + // memory for avoiding repetitive calculations of score + int df_iters[LN_MAX_BUFS]; + populate_tensor(df_iters, 0, diffcmppath, diff_len, ndiffs, diff_blk, iwhite); + + const size_t u = unwrap_indexes(diff_len, diff_len, ndiffs); + const size_t best_path_idx = diffcmppath[u].df_path_idx; + const int *best_path_decisions = diffcmppath[u].df_decision; + + *decisions = xmalloc(sizeof(int) * best_path_idx); + for (size_t i = 0; i < best_path_idx; i++) { + (*decisions)[i] = best_path_decisions[i]; + } + + for (size_t i = 0; i < memsize; i++) { + xfree(diffcmppath[i].df_decision); + } + xfree(diffcmppath); + + return best_path_idx; +} diff --git a/src/nvim/linematch.h b/src/nvim/linematch.h new file mode 100644 index 0000000000..052d438617 --- /dev/null +++ b/src/nvim/linematch.h @@ -0,0 +1,10 @@ +#ifndef NVIM_LINEMATCH_H +#define NVIM_LINEMATCH_H + +#include <stddef.h> + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "linematch.h.generated.h" +#endif + +#endif // NVIM_LINEMATCH_H diff --git a/src/nvim/lua/xdiff.c b/src/nvim/lua/xdiff.c index b2b5dfedee..428a33cdad 100644 --- a/src/nvim/lua/xdiff.c +++ b/src/nvim/lua/xdiff.c @@ -10,12 +10,16 @@ #include <string.h> #include "nvim/api/private/helpers.h" +#include "nvim/linematch.h" #include "nvim/lua/converter.h" #include "nvim/lua/executor.h" #include "nvim/lua/xdiff.h" #include "nvim/vim.h" #include "xdiff/xdiff.h" +#define COMPARED_BUFFER0 (1 << 0) +#define COMPARED_BUFFER1 (1 << 1) + typedef enum { kNluaXdiffModeUnified = 0, kNluaXdiffModeOnHunkCB, @@ -25,12 +29,72 @@ typedef enum { typedef struct { lua_State *lstate; Error *err; + mmfile_t *ma; + mmfile_t *mb; + bool linematch; + bool iwhite; } hunkpriv_t; #ifdef INCLUDE_GENERATED_DECLARATIONS # include "lua/xdiff.c.generated.h" #endif +static void lua_pushhunk(lua_State *lstate, long start_a, long count_a, long start_b, long count_b) +{ + lua_createtable(lstate, 0, 0); + lua_pushinteger(lstate, start_a); + lua_rawseti(lstate, -2, 1); + lua_pushinteger(lstate, count_a); + lua_rawseti(lstate, -2, 2); + lua_pushinteger(lstate, start_b); + lua_rawseti(lstate, -2, 3); + lua_pushinteger(lstate, count_b); + lua_rawseti(lstate, -2, 4); + lua_rawseti(lstate, -2, (signed)lua_objlen(lstate, -2) + 1); +} + +static void get_linematch_results(lua_State *lstate, mmfile_t *ma, mmfile_t *mb, long start_a, + long count_a, long start_b, long count_b, bool iwhite) +{ + // get the pointer to char of the start of the diff to pass it to linematch algorithm + const char *diff_begin[2] = { ma->ptr, mb->ptr }; + int diff_length[2] = { (int)count_a, (int)count_b }; + + fastforward_buf_to_lnum(&diff_begin[0], (size_t)start_a); + fastforward_buf_to_lnum(&diff_begin[1], (size_t)start_b); + + int *decisions = NULL; + size_t decisions_length = linematch_nbuffers(diff_begin, diff_length, 2, &decisions, iwhite); + + long lnuma = start_a, lnumb = start_b; + + long hunkstarta = lnuma; + long hunkstartb = lnumb; + long hunkcounta = 0; + long hunkcountb = 0; + for (size_t i = 0; i < decisions_length; i++) { + if (i && (decisions[i - 1] != decisions[i])) { + lua_pushhunk(lstate, hunkstarta, hunkcounta, hunkstartb, hunkcountb); + + hunkstarta = lnuma; + hunkstartb = lnumb; + hunkcounta = 0; + hunkcountb = 0; + // create a new hunk + } + if (decisions[i] & COMPARED_BUFFER0) { + lnuma++; + hunkcounta++; + } + if (decisions[i] & COMPARED_BUFFER1) { + lnumb++; + hunkcountb++; + } + } + lua_pushhunk(lstate, hunkstarta, hunkcounta, hunkstartb, hunkcountb); + xfree(decisions); +} + static int write_string(void *priv, mmbuffer_t *mb, int nbuf) { luaL_Buffer *buf = (luaL_Buffer *)priv; @@ -61,20 +125,14 @@ static int hunk_locations_cb(long start_a, long count_a, long start_b, long coun if (count_b > 0) { start_b += 1; } - - lua_State *lstate = (lua_State *)cb_data; - lua_createtable(lstate, 0, 0); - - lua_pushinteger(lstate, start_a); - lua_rawseti(lstate, -2, 1); - lua_pushinteger(lstate, count_a); - lua_rawseti(lstate, -2, 2); - lua_pushinteger(lstate, start_b); - lua_rawseti(lstate, -2, 3); - lua_pushinteger(lstate, count_b); - lua_rawseti(lstate, -2, 4); - - lua_rawseti(lstate, -2, (signed)lua_objlen(lstate, -2) + 1); + hunkpriv_t *priv = (hunkpriv_t *)cb_data; + lua_State *lstate = priv->lstate; + if (priv->linematch) { + get_linematch_results(lstate, priv->ma, priv->mb, start_a, count_a, start_b, count_b, + priv->iwhite); + } else { + lua_pushhunk(lstate, start_a, count_a, start_b, count_b); + } return 0; } @@ -149,7 +207,7 @@ static bool check_xdiff_opt(ObjectType actType, ObjectType expType, const char * } static NluaXdiffMode process_xdl_diff_opts(lua_State *lstate, xdemitconf_t *cfg, xpparam_t *params, - Error *err) + bool *linematch, Error *err) { const DictionaryOf(LuaRef) opts = nlua_pop_Dictionary(lstate, true, err); @@ -205,6 +263,11 @@ static NluaXdiffMode process_xdl_diff_opts(lua_State *lstate, xdemitconf_t *cfg, goto exit_1; } cfg->interhunkctxlen = v->data.integer; + } else if (strequal("linematch", k.data)) { + *linematch = api_object_to_bool(*v, "linematch", false, err); + if (ERROR_SET(err)) { + goto exit_1; + } } else { struct { const char *name; @@ -244,10 +307,8 @@ static NluaXdiffMode process_xdl_diff_opts(lua_State *lstate, xdemitconf_t *cfg, if (had_on_hunk) { mode = kNluaXdiffModeOnHunkCB; - cfg->hunk_func = call_on_hunk_cb; } else if (had_result_type_indices) { mode = kNluaXdiffModeLocations; - cfg->hunk_func = hunk_locations_cb; } exit_1: @@ -268,6 +329,7 @@ int nlua_xdl_diff(lua_State *lstate) xdemitconf_t cfg; xpparam_t params; xdemitcb_t ecb; + bool linematch = false; CLEAR_FIELD(cfg); CLEAR_FIELD(params); @@ -280,7 +342,7 @@ int nlua_xdl_diff(lua_State *lstate) return luaL_argerror(lstate, 3, "expected table"); } - mode = process_xdl_diff_opts(lstate, &cfg, ¶ms, &err); + mode = process_xdl_diff_opts(lstate, &cfg, ¶ms, &linematch, &err); if (ERROR_SET(&err)) { goto exit_0; @@ -288,7 +350,7 @@ int nlua_xdl_diff(lua_State *lstate) } luaL_Buffer buf; - hunkpriv_t *priv = NULL; + hunkpriv_t priv; switch (mode) { case kNluaXdiffModeUnified: luaL_buffinit(lstate, &buf); @@ -296,14 +358,24 @@ int nlua_xdl_diff(lua_State *lstate) ecb.out_line = write_string; break; case kNluaXdiffModeOnHunkCB: - priv = xmalloc(sizeof(*priv)); - priv->lstate = lstate; - priv->err = &err; - ecb.priv = priv; + cfg.hunk_func = call_on_hunk_cb; + priv = (hunkpriv_t) { + .lstate = lstate, + .err = &err, + }; + ecb.priv = &priv; break; case kNluaXdiffModeLocations: + cfg.hunk_func = hunk_locations_cb; + priv = (hunkpriv_t) { + .lstate = lstate, + .ma = &ma, + .mb = &mb, + .linematch = linematch, + .iwhite = (params.flags & XDF_IGNORE_WHITESPACE) > 0 + }; + ecb.priv = &priv; lua_createtable(lstate, 0, 0); - ecb.priv = lstate; break; } @@ -314,8 +386,6 @@ int nlua_xdl_diff(lua_State *lstate) } } - XFREE_CLEAR(priv); - exit_0: if (ERROR_SET(&err)) { luaL_where(lstate, 1); |