aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLewis Russell <lewis6991@gmail.com>2024-09-26 16:10:11 +0100
committerLewis Russell <lewis6991@gmail.com>2024-09-30 11:51:33 +0100
commitc65646c2474d22948c604168a68f6626a645d1d2 (patch)
treefcf8e49ae06878638e0dacc34e08d7892ccc8524
parent20251be15a4ad3f6e7016450ca3338d52b2f0951 (diff)
downloadrneovim-c65646c2474d22948c604168a68f6626a645d1d2.tar.gz
rneovim-c65646c2474d22948c604168a68f6626a645d1d2.tar.bz2
rneovim-c65646c2474d22948c604168a68f6626a645d1d2.zip
fix(diff): use mmfile_t in linematch
Problem: Linematch used to use strchr to navigate a string, however strchr does not supoprt embedded NULs. Solution: Use `mmfile_t` instead of `char *` in linematch and introduce `strnchr()`. Also remove heap allocations from `matching_char_iwhite()` Fixes: #30505
-rw-r--r--runtime/doc/lua.txt26
-rw-r--r--runtime/lua/vim/_meta/diff.lua26
-rwxr-xr-xsrc/clint.py1
-rw-r--r--src/nvim/diff.c6
-rw-r--r--src/nvim/linematch.c94
-rw-r--r--src/nvim/linematch.h1
-rw-r--r--src/nvim/lua/xdiff.c8
-rw-r--r--src/nvim/strings.c14
-rw-r--r--test/functional/lua/xdiff_spec.lua9
9 files changed, 107 insertions, 78 deletions
diff --git a/runtime/doc/lua.txt b/runtime/doc/lua.txt
index 29b35d69a0..cbe283065d 100644
--- a/runtime/doc/lua.txt
+++ b/runtime/doc/lua.txt
@@ -713,8 +713,8 @@ vim.diff({a}, {b}, {opts}) *vim.diff()*
Parameters: ~
• {a} (`string`) First string to compare
• {b} (`string`) Second string to compare
- • {opts} (`table`) Optional parameters:
- • {on_hunk}
+ • {opts} (`table?`) Optional parameters:
+ • {on_hunk}?
(`fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer`)
Invoked for each hunk in the diff. Return a negative number
to cancel the callback for any remaining hunks. Arguments:
@@ -722,33 +722,33 @@ vim.diff({a}, {b}, {opts}) *vim.diff()*
• `count_a` (`integer`): Hunk size in {a}.
• `start_b` (`integer`): Start line of hunk in {b}.
• `count_b` (`integer`): Hunk size in {b}.
- • {result_type} (`'unified'|'indices'`, default: `'unified'`)
+ • {result_type}? (`'unified'|'indices'`, default: `'unified'`)
Form of the returned diff:
• `unified`: String in unified format.
• `indices`: Array of hunk locations. Note: This option is
ignored if `on_hunk` is used.
- • {linematch} (`boolean|integer`) Run linematch on the
+ • {linematch}? (`boolean|integer`) Run linematch on the
resulting hunks from xdiff. When integer, only hunks upto
this size in lines are run through linematch. Requires
`result_type = indices`, ignored otherwise.
- • {algorithm} (`'myers'|'minimal'|'patience'|'histogram'`,
+ • {algorithm}? (`'myers'|'minimal'|'patience'|'histogram'`,
default: `'myers'`) Diff algorithm to use. Values:
• `myers`: the default algorithm
• `minimal`: spend extra time to generate the smallest
possible diff
• `patience`: patience diff algorithm
• `histogram`: histogram diff algorithm
- • {ctxlen} (`integer`) Context length
- • {interhunkctxlen} (`integer`) Inter hunk context length
- • {ignore_whitespace} (`boolean`) Ignore whitespace
- • {ignore_whitespace_change} (`boolean`) Ignore whitespace
+ • {ctxlen}? (`integer`) Context length
+ • {interhunkctxlen}? (`integer`) Inter hunk context length
+ • {ignore_whitespace}? (`boolean`) Ignore whitespace
+ • {ignore_whitespace_change}? (`boolean`) Ignore whitespace
change
- • {ignore_whitespace_change_at_eol} (`boolean`) Ignore
+ • {ignore_whitespace_change_at_eol}? (`boolean`) Ignore
whitespace change at end-of-line.
- • {ignore_cr_at_eol} (`boolean`) Ignore carriage return at
+ • {ignore_cr_at_eol}? (`boolean`) Ignore carriage return at
end-of-line
- • {ignore_blank_lines} (`boolean`) Ignore blank lines
- • {indent_heuristic} (`boolean`) Use the indent heuristic for
+ • {ignore_blank_lines}? (`boolean`) Ignore blank lines
+ • {indent_heuristic}? (`boolean`) Use the indent heuristic for
the internal diff library.
Return: ~
diff --git a/runtime/lua/vim/_meta/diff.lua b/runtime/lua/vim/_meta/diff.lua
index 617bc87f59..5a0de68119 100644
--- a/runtime/lua/vim/_meta/diff.lua
+++ b/runtime/lua/vim/_meta/diff.lua
@@ -11,19 +11,19 @@
--- - `count_a` (`integer`): Hunk size in {a}.
--- - `start_b` (`integer`): Start line of hunk in {b}.
--- - `count_b` (`integer`): Hunk size in {b}.
---- @field on_hunk fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer
+--- @field on_hunk? fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer
---
--- Form of the returned diff:
--- - `unified`: String in unified format.
--- - `indices`: Array of hunk locations.
--- Note: This option is ignored if `on_hunk` is used.
--- (default: `'unified'`)
---- @field result_type 'unified'|'indices'
+--- @field result_type? 'unified'|'indices'
---
--- Run linematch on the resulting hunks from xdiff. When integer, only hunks
--- upto this size in lines are run through linematch.
--- Requires `result_type = indices`, ignored otherwise.
---- @field linematch boolean|integer
+--- @field linematch? boolean|integer
---
--- Diff algorithm to use. Values:
--- - `myers`: the default algorithm
@@ -31,15 +31,15 @@
--- - `patience`: patience diff algorithm
--- - `histogram`: histogram diff algorithm
--- (default: `'myers'`)
---- @field algorithm 'myers'|'minimal'|'patience'|'histogram'
---- @field ctxlen integer Context length
---- @field interhunkctxlen integer Inter hunk context length
---- @field ignore_whitespace boolean Ignore whitespace
---- @field ignore_whitespace_change boolean Ignore whitespace change
---- @field ignore_whitespace_change_at_eol boolean Ignore whitespace change at end-of-line.
---- @field ignore_cr_at_eol boolean Ignore carriage return at end-of-line
---- @field ignore_blank_lines boolean Ignore blank lines
---- @field indent_heuristic boolean Use the indent heuristic for the internal diff library.
+--- @field algorithm? 'myers'|'minimal'|'patience'|'histogram'
+--- @field ctxlen? integer Context length
+--- @field interhunkctxlen? integer Inter hunk context length
+--- @field ignore_whitespace? boolean Ignore whitespace
+--- @field ignore_whitespace_change? boolean Ignore whitespace change
+--- @field ignore_whitespace_change_at_eol? boolean Ignore whitespace change at end-of-line.
+--- @field ignore_cr_at_eol? boolean Ignore carriage return at end-of-line
+--- @field ignore_blank_lines? boolean Ignore blank lines
+--- @field indent_heuristic? boolean Use the indent heuristic for the internal diff library.
-- luacheck: no unused args
@@ -65,7 +65,7 @@
---
---@param a string First string to compare
---@param b string Second string to compare
----@param opts vim.diff.Opts
+---@param opts? vim.diff.Opts
---@return string|integer[][]?
--- See {opts.result_type}. `nil` if {opts.on_hunk} is given.
function vim.diff(a, b, opts) end
diff --git a/src/clint.py b/src/clint.py
index 1ed01382c8..b57bbe354b 100755
--- a/src/clint.py
+++ b/src/clint.py
@@ -881,6 +881,7 @@ def CheckIncludes(filename, lines, error):
"nvim/func_attr.h",
"termkey/termkey.h",
"vterm/vterm.h",
+ "xdiff/xdiff.h",
]
for i in check_includes_ignore:
diff --git a/src/nvim/diff.c b/src/nvim/diff.c
index 05e5bed50c..d22fb65827 100644
--- a/src/nvim/diff.c
+++ b/src/nvim/diff.c
@@ -2005,7 +2005,7 @@ static void run_linematch_algorithm(diff_T *dp)
{
// define buffers for diff algorithm
mmfile_t diffbufs_mm[DB_COUNT];
- const char *diffbufs[DB_COUNT];
+ const mmfile_t *diffbufs[DB_COUNT];
int diff_length[DB_COUNT];
size_t ndiffs = 0;
for (int i = 0; i < DB_COUNT; i++) {
@@ -2015,9 +2015,7 @@ static void run_linematch_algorithm(diff_T *dp)
diff_write_buffer(curtab->tp_diffbuf[i], &diffbufs_mm[ndiffs],
dp->df_lnum[i], dp->df_lnum[i] + dp->df_count[i] - 1);
- // we want to get the char* to the diff buffer that was just written
- // we add it to the array of char*, diffbufs
- diffbufs[ndiffs] = diffbufs_mm[ndiffs].ptr;
+ diffbufs[ndiffs] = &diffbufs_mm[ndiffs];
// keep track of the length of this diff block to pass it to the linematch
// algorithm
diff --git a/src/nvim/linematch.c b/src/nvim/linematch.c
index 24b2c82381..8943e6e8a6 100644
--- a/src/nvim/linematch.c
+++ b/src/nvim/linematch.c
@@ -10,6 +10,8 @@
#include "nvim/macros_defs.h"
#include "nvim/memory.h"
#include "nvim/pos_defs.h"
+#include "nvim/strings.h"
+#include "xdiff/xdiff.h"
#define LN_MAX_BUFS 8
#define LN_DECISION_MAX 255 // pow(2, LN_MAX_BUFS(8)) - 1 = 255
@@ -29,49 +31,49 @@ struct diffcmppath_S {
# include "linematch.c.generated.h"
#endif
-static size_t line_len(const char *s)
+static size_t line_len(const mmfile_t *m)
{
- char *end = strchr(s, '\n');
+ char *s = m->ptr;
+ size_t n = (size_t)m->size;
+ char *end = strnchr(s, &n, '\n');
if (end) {
return (size_t)(end - s);
}
- return strlen(s);
+ return (size_t)m->size;
}
+#define MATCH_CHAR_MAX_LEN 800
+
/// Same as matching_chars but ignore whitespace
///
/// @param s1
/// @param s2
-static int matching_chars_iwhite(const char *s1, const char *s2)
+static int matching_chars_iwhite(const mmfile_t *s1, const mmfile_t *s2)
{
// the newly processed strings that will be compared
- // delete the white space characters, and/or replace all upper case with lower
- char *strsproc[2];
- const char *strsorig[2] = { s1, s2 };
+ // delete the white space characters
+ mmfile_t sp[2];
+ char p[2][MATCH_CHAR_MAX_LEN];
for (int k = 0; k < 2; k++) {
- size_t d = 0;
- size_t i = 0;
- size_t slen = line_len(strsorig[k]);
- strsproc[k] = xmalloc((slen + 1) * sizeof(char));
- while (d + i < slen) {
- char e = strsorig[k][i + d];
+ const mmfile_t *s = k == 0 ? s1 : s2;
+ size_t pi = 0;
+ size_t slen = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s));
+ for (size_t i = 0; i <= slen; i++) {
+ char e = s->ptr[i];
if (e != ' ' && e != '\t') {
- strsproc[k][i] = e;
- i++;
- } else {
- d++;
+ p[k][pi] = e;
+ pi++;
}
}
- strsproc[k][i] = NUL;
+
+ sp[k] = (mmfile_t){
+ .ptr = p[k],
+ .size = (int)pi,
+ };
}
- int matching = matching_chars(strsproc[0], strsproc[1]);
- xfree(strsproc[0]);
- xfree(strsproc[1]);
- return matching;
+ return matching_chars(&sp[0], &sp[1]);
}
-#define MATCH_CHAR_MAX_LEN 800
-
/// Return matching characters between "s1" and "s2" whilst respecting sequence order.
/// Consider the case of two strings 'AAACCC' and 'CCCAAA', the
/// return value from this function will be 3, either to match
@@ -83,12 +85,14 @@ static int matching_chars_iwhite(const char *s1, const char *s2)
/// matching_chars("abcdefg", "gfedcba") -> 1 // all characters in common,
/// // but only at most 1 in sequence
///
-/// @param s1
-/// @param s2
-static int matching_chars(const char *s1, const char *s2)
+/// @param m1
+/// @param m2
+static int matching_chars(const mmfile_t *m1, const mmfile_t *m2)
{
- size_t s1len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s1));
- size_t s2len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s2));
+ size_t s1len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m1));
+ size_t s2len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m2));
+ char *s1 = m1->ptr;
+ char *s2 = m2->ptr;
int matrix[2][MATCH_CHAR_MAX_LEN] = { 0 };
bool icur = 1; // save space by storing only two rows for i axis
for (size_t i = 0; i < s1len; i++) {
@@ -119,13 +123,13 @@ static int matching_chars(const char *s1, const char *s2)
/// @param sp
/// @param fomvals
/// @param n
-static int count_n_matched_chars(const char **sp, const size_t n, bool iwhite)
+static int count_n_matched_chars(mmfile_t **sp, const size_t n, bool iwhite)
{
int matched_chars = 0;
int matched = 0;
for (size_t i = 0; i < n; i++) {
for (size_t j = i + 1; j < n; j++) {
- if (sp[i] != NULL && sp[j] != NULL) {
+ if (sp[i]->ptr != NULL && sp[j]->ptr != NULL) {
matched++;
// TODO(lewis6991): handle whitespace ignoring higher up in the stack
matched_chars += iwhite ? matching_chars_iwhite(sp[i], sp[j])
@@ -143,15 +147,17 @@ static int count_n_matched_chars(const char **sp, const size_t n, bool iwhite)
return matched_chars;
}
-void fastforward_buf_to_lnum(const char **s, linenr_T lnum)
+mmfile_t fastforward_buf_to_lnum(mmfile_t s, linenr_T lnum)
{
for (int i = 0; i < lnum - 1; i++) {
- *s = strchr(*s, '\n');
- if (!*s) {
- return;
+ s.ptr = strnchr(s.ptr, (size_t *)&s.size, '\n');
+ if (!s.ptr) {
+ break;
}
- (*s)++;
+ s.ptr++;
+ s.size--;
}
+ return s;
}
/// try all the different ways to compare these lines and use the one that
@@ -167,25 +173,25 @@ void fastforward_buf_to_lnum(const char **s, linenr_T lnum)
/// @param diff_blk
static void try_possible_paths(const int *df_iters, const size_t *paths, const int npaths,
const int path_idx, int *choice, diffcmppath_T *diffcmppath,
- const int *diff_len, const size_t ndiffs, const char **diff_blk,
+ const int *diff_len, const size_t ndiffs, const mmfile_t **diff_blk,
bool iwhite)
{
if (path_idx == npaths) {
if ((*choice) > 0) {
int from_vals[LN_MAX_BUFS] = { 0 };
const int *to_vals = df_iters;
- const char *current_lines[LN_MAX_BUFS];
+ mmfile_t mm[LN_MAX_BUFS]; // stack memory for current_lines
+ mmfile_t *current_lines[LN_MAX_BUFS];
for (size_t k = 0; k < ndiffs; k++) {
from_vals[k] = df_iters[k];
// get the index at all of the places
if ((*choice) & (1 << k)) {
from_vals[k]--;
- const char *p = diff_blk[k];
- fastforward_buf_to_lnum(&p, df_iters[k]);
- current_lines[k] = p;
+ mm[k] = fastforward_buf_to_lnum(*diff_blk[k], df_iters[k]);
} else {
- current_lines[k] = NULL;
+ mm[k] = (mmfile_t){ 0 };
}
+ current_lines[k] = &mm[k];
}
size_t unwrapped_idx_from = unwrap_indexes(from_vals, diff_len, ndiffs);
size_t unwrapped_idx_to = unwrap_indexes(to_vals, diff_len, ndiffs);
@@ -244,7 +250,7 @@ static size_t unwrap_indexes(const int *values, const int *diff_len, const size_
/// @param ndiffs
/// @param diff_blk
static void populate_tensor(int *df_iters, const size_t ch_dim, diffcmppath_T *diffcmppath,
- const int *diff_len, const size_t ndiffs, const char **diff_blk,
+ const int *diff_len, const size_t ndiffs, const mmfile_t **diff_blk,
bool iwhite)
{
if (ch_dim == ndiffs) {
@@ -327,7 +333,7 @@ static void populate_tensor(int *df_iters, const size_t ch_dim, diffcmppath_T *d
/// @param ndiffs
/// @param [out] [allocated] decisions
/// @return the length of decisions
-size_t linematch_nbuffers(const char **diff_blk, const int *diff_len, const size_t ndiffs,
+size_t linematch_nbuffers(const mmfile_t **diff_blk, const int *diff_len, const size_t ndiffs,
int **decisions, bool iwhite)
{
assert(ndiffs <= LN_MAX_BUFS);
diff --git a/src/nvim/linematch.h b/src/nvim/linematch.h
index eaf0d54bec..5f6667a7df 100644
--- a/src/nvim/linematch.h
+++ b/src/nvim/linematch.h
@@ -3,6 +3,7 @@
#include <stddef.h> // IWYU pragma: keep
#include "nvim/pos_defs.h" // IWYU pragma: keep
+#include "xdiff/xdiff.h"
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "linematch.h.generated.h"
diff --git a/src/nvim/lua/xdiff.c b/src/nvim/lua/xdiff.c
index 8d791a7e74..b9f96abf73 100644
--- a/src/nvim/lua/xdiff.c
+++ b/src/nvim/lua/xdiff.c
@@ -67,11 +67,11 @@ static void get_linematch_results(lua_State *lstate, mmfile_t *ma, mmfile_t *mb,
int count_a, int start_b, int count_b, bool iwhite)
{
// get the pointer to char of the start of the diff to pass it to linematch algorithm
- const char *diff_begin[2] = { ma->ptr, mb->ptr };
- int diff_length[2] = { count_a, count_b };
+ mmfile_t ma0 = fastforward_buf_to_lnum(*ma, (linenr_T)start_a + 1);
+ mmfile_t mb0 = fastforward_buf_to_lnum(*mb, (linenr_T)start_b + 1);
- fastforward_buf_to_lnum(&diff_begin[0], (linenr_T)start_a + 1);
- fastforward_buf_to_lnum(&diff_begin[1], (linenr_T)start_b + 1);
+ const mmfile_t *diff_begin[2] = { &ma0, &mb0 };
+ int diff_length[2] = { count_a, count_b };
int *decisions = NULL;
size_t decisions_length = linematch_nbuffers(diff_begin, diff_length, 2, &decisions, iwhite);
diff --git a/src/nvim/strings.c b/src/nvim/strings.c
index b7a87ae030..118abbae6d 100644
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@@ -496,6 +496,20 @@ char *vim_strchr(const char *const string, const int c)
}
}
+// Sized version of strchr that can handle embedded NULs.
+// Adjusts n to the new size.
+char *strnchr(const char *p, size_t *n, int c)
+{
+ while (*n > 0) {
+ if (*p == c) {
+ return (char *)p;
+ }
+ p++;
+ (*n)--;
+ }
+ return NULL;
+}
+
// Sort an array of strings.
static int sort_compare(const void *s1, const void *s2)
diff --git a/test/functional/lua/xdiff_spec.lua b/test/functional/lua/xdiff_spec.lua
index d5589c1f13..ed65193244 100644
--- a/test/functional/lua/xdiff_spec.lua
+++ b/test/functional/lua/xdiff_spec.lua
@@ -174,4 +174,13 @@ describe('xdiff bindings', function()
pcall_err(exec_lua, [[vim.diff('a', 'b', { on_hunk = true })]])
)
end)
+
+ it('can handle strings with embedded NUL characters (GitHub #30305)', function()
+ eq(
+ { { 0, 0, 1, 1 }, { 1, 0, 3, 2 } },
+ exec_lua(function()
+ return vim.diff('\n', '\0\n\n\nb', { linematch = true, result_type = 'indices' })
+ end)
+ )
+ end)
end)