aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--runtime/doc/builtin.txt22
-rw-r--r--runtime/doc/usr_41.txt1
-rw-r--r--src/nvim/eval.lua1
-rw-r--r--src/nvim/search.c298
-rw-r--r--src/nvim/testdir/test_functions.vim26
5 files changed, 348 insertions, 0 deletions
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt
index 56bc8bfb3e..198ef25ed9 100644
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -296,6 +296,7 @@ matcharg({nr}) List arguments of |:match|
matchdelete({id} [, {win}]) Number delete match identified by {id}
matchend({expr}, {pat}[, {start}[, {count}]])
Number position where {pat} ends in {expr}
+matchfuzzy({list}, {str}) List fuzzy match {str} in {list}
matchlist({expr}, {pat}[, {start}[, {count}]])
List match and submatches of {pat} in {expr}
matchstr({expr}, {pat}[, {start}[, {count}]])
@@ -4857,6 +4858,27 @@ matchend({expr}, {pat} [, {start} [, {count}]]) *matchend()*
Can also be used as a |method|: >
GetText()->matchend('word')
+matchfuzzy({list}, {str}) *matchfuzzy()*
+ Returns a list with all the strings in {list} that fuzzy
+ match {str}. The strings in the returned list are sorted
+ based on the matching score. {str} is treated as a literal
+ string and regular expression matching is NOT supported.
+ The maximum supported {str} length is 256.
+
+ If there are no matching strings or there is an error, then an
+ empty list is returned. If length of {str} is greater than
+ 256, then returns an empty list.
+
+ Example: >
+ :echo matchfuzzy(["clay", "crow"], "cay")
+< results in ["clay"]. >
+ :echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl")
+< results in a list of buffer names fuzzy matching "ndl". >
+ :echo v:oldfiles->matchfuzzy("test")
+< results in a list of file names fuzzy matching "test". >
+ :let l = readfile("buffer.c")->matchfuzzy("str")
+< results in a list of lines in "buffer.c" fuzzy matching "str".
+
matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()*
Same as |match()|, but return a |List|. The first item in the
list is the matched string, same as what matchstr() would
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt
index bf29c94d51..19f58cddd6 100644
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -608,6 +608,7 @@ String manipulation: *string-functions*
toupper() turn a string to uppercase
match() position where a pattern matches in a string
matchend() position where a pattern match ends in a string
+ matchfuzzy() fuzzy matches a string in a list of strings
matchstr() match of a pattern in a string
matchstrpos() match and positions of a pattern in a string
matchlist() like matchstr() and also return submatches
diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua
index eedc8ac45d..37b051222e 100644
--- a/src/nvim/eval.lua
+++ b/src/nvim/eval.lua
@@ -249,6 +249,7 @@ return {
matcharg={args=1, base=1},
matchdelete={args={1, 2}, base=1},
matchend={args={2, 4}, base=1},
+ matchfuzzy={args=2, base=1},
matchlist={args={2, 4}, base=1},
matchstr={args={2, 4}, base=1},
matchstrpos={args={2,4}, base=1},
diff --git a/src/nvim/search.c b/src/nvim/search.c
index 93180f97fe..7bc994fad3 100644
--- a/src/nvim/search.c
+++ b/src/nvim/search.c
@@ -4764,6 +4764,304 @@ the_end:
restore_last_search_pattern();
}
+/// Fuzzy string matching
+///
+/// Ported from the lib_fts library authored by Forrest Smith.
+/// https://github.com/forrestthewoods/lib_fts/tree/master/code
+///
+/// Blog describing the algorithm:
+/// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
+///
+/// Each matching string is assigned a score. The following factors are checked:
+/// Matched letter
+/// Unmatched letter
+/// Consecutively matched letters
+/// Proximity to start
+/// Letter following a separator (space, underscore)
+/// Uppercase letter following lowercase (aka CamelCase)
+///
+/// Matched letters are good. Unmatched letters are bad. Matching near the start
+/// is good. Matching the first letter in the middle of a phrase is good.
+/// Matching the uppercase letters in camel case entries is good.
+///
+/// The score assigned for each factor is explained below.
+/// File paths are different from file names. File extensions may be ignorable.
+/// Single words care about consecutive matches but not separators or camel
+/// case.
+/// Score starts at 0
+/// Matched letter: +0 points
+/// Unmatched letter: -1 point
+/// Consecutive match bonus: +5 points
+/// Separator bonus: +10 points
+/// Camel case bonus: +10 points
+/// Unmatched leading letter: -3 points (max: -9)
+///
+/// There is some nuance to this. Scores don’t have an intrinsic meaning. The
+/// score range isn’t 0 to 100. It’s roughly [-50, 50]. Longer words have a
+/// lower minimum score due to unmatched letter penalty. Longer search patterns
+/// have a higher maximum score due to match bonuses.
+///
+/// Separator and camel case bonus is worth a LOT. Consecutive matches are worth
+/// quite a bit.
+///
+/// There is a penalty if you DON’T match the first three letters. Which
+/// effectively rewards matching near the start. However there’s no difference
+/// in matching between the middle and end.
+///
+/// There is not an explicit bonus for an exact match. Unmatched letters receive
+/// a penalty. So shorter strings and closer matches are worth more.
+typedef struct {
+ const listitem_T *item;
+ int score;
+} fuzzyItem_T;
+
+static bool fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, int *const outScore,
+ const char_u *const strBegin, const char_u *const srcMatches,
+ char_u *const matches, const int maxMatches, int nextMatch,
+ int *const recursionCount, const int recursionLimit)
+ FUNC_ATTR_NONNULL_ARG(1, 2, 3, 4, 6, 9) FUNC_ATTR_WARN_UNUSED_RESULT
+{
+ // Recursion params
+ bool recursiveMatch = false;
+ char_u bestRecursiveMatches[256];
+ int bestRecursiveScore = 0;
+
+ // Count recursions
+ (*recursionCount)++;
+ if (*recursionCount >= recursionLimit) {
+ return false;
+ }
+
+ // Detect end of strings
+ if (*fuzpat == '\0' || *str == '\0') {
+ return false;
+ }
+
+ // Loop through fuzpat and str looking for a match
+ bool first_match = true;
+ while (*fuzpat != '\0' && *str != '\0') {
+ // Found match
+ if (mb_tolower(*fuzpat) == mb_tolower(*str)) {
+ // Supplied matches buffer was too short
+ if (nextMatch >= maxMatches) {
+ return false;
+ }
+
+ // "Copy-on-Write" srcMatches into matches
+ if (first_match && srcMatches) {
+ memcpy(matches, srcMatches, nextMatch);
+ first_match = false;
+ }
+
+ // Recursive call that "skips" this match
+ char_u recursiveMatches[256];
+ int recursiveScore = 0;
+ if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore, strBegin, matches,
+ recursiveMatches, sizeof(recursiveMatches), nextMatch,
+ recursionCount, recursionLimit)) {
+ // Pick best recursive score
+ if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
+ memcpy(bestRecursiveMatches, recursiveMatches, 256);
+ bestRecursiveScore = recursiveScore;
+ }
+ recursiveMatch = true;
+ }
+
+ // Advance
+ matches[nextMatch++] = (char_u)(str - strBegin);
+ fuzpat++;
+ }
+ str++;
+ }
+
+ // Determine if full fuzpat was matched
+ const bool matched = *fuzpat == '\0';
+
+ // Calculate score
+ if (matched) {
+ // bonus for adjacent matches
+ const int sequential_bonus = 15;
+ // bonus if match occurs after a separator
+ const int separator_bonus = 30;
+ // bonus if match is uppercase and prev is lower
+ const int camel_bonus = 30;
+ // bonus if the first letter is matched
+ const int first_letter_bonus = 15;
+ // penalty applied for every letter in str before the first match
+ const int leading_letter_penalty = -5;
+ // maximum penalty for leading letters
+ const int max_leading_letter_penalty = -15;
+ // penalty for every letter that doesn't matter
+ const int unmatched_letter_penalty = -1;
+
+ // Iterate str to end
+ while (*str != '\0') {
+ str++;
+ }
+
+ // Initialize score
+ *outScore = 100;
+
+ // Apply leading letter penalty
+ int penalty = leading_letter_penalty * matches[0];
+ if (penalty < max_leading_letter_penalty) {
+ penalty = max_leading_letter_penalty;
+ }
+ *outScore += penalty;
+
+ // Apply unmatched penalty
+ const int unmatched = (int)(str - strBegin) - nextMatch;
+ *outScore += unmatched_letter_penalty * unmatched;
+
+ // Apply ordering bonuses
+ for (int i = 0; i < nextMatch; i++) {
+ const char_u currIdx = matches[i];
+
+ if (i > 0) {
+ const char_u prevIdx = matches[i - 1];
+
+ // Sequential
+ if (currIdx == (prevIdx + 1)) {
+ *outScore += sequential_bonus;
+ }
+ }
+
+ // Check for bonuses based on neighbor character value
+ if (currIdx > 0) {
+ // Camel case
+ const char_u neighbor = strBegin[currIdx - 1];
+ const char_u curr = strBegin[currIdx];
+
+ if (islower(neighbor) && isupper(curr)) {
+ *outScore += camel_bonus;
+ }
+
+ // Separator
+ const bool neighborSeparator = neighbor == '_' || neighbor == ' ';
+ if (neighborSeparator) {
+ *outScore += separator_bonus;
+ }
+ } else {
+ // First letter
+ *outScore += first_letter_bonus;
+ }
+ }
+ }
+
+ // Return best result
+ if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) {
+ // Recursive score is better than "this"
+ memcpy(matches, bestRecursiveMatches, maxMatches);
+ *outScore = bestRecursiveScore;
+ return true;
+ } else if (matched) {
+ return true; // "this" score is better than recursive
+ }
+
+ return false; // no match
+}
+
+/// fuzzy_match()
+///
+/// Performs exhaustive search via recursion to find all possible matches and
+/// match with highest score.
+/// Scores values have no intrinsic meaning. Possible score range is not
+/// normalized and varies with pattern.
+/// Recursion is limited internally (default=10) to prevent degenerate cases
+/// (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
+/// Uses char_u for match indices. Therefore patterns are limited to 256
+/// characters.
+///
+/// Returns true if fuzpat is found AND calculates a score.
+static bool fuzzy_match(const char_u *const str, const char_u *const fuzpat, int *const outScore)
+ FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
+{
+ char_u matches[256];
+ int recursionCount = 0;
+ int recursionLimit = 10;
+
+ *outScore = 0;
+
+ return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches, sizeof(matches), 0,
+ &recursionCount, recursionLimit);
+}
+
+/// Sort the fuzzy matches in the descending order of the match score.
+static int fuzzy_item_compare(const void *const s1, const void *const s2)
+ FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
+{
+ const int v1 = ((const fuzzyItem_T *)s1)->score;
+ const int v2 = ((const fuzzyItem_T *)s2)->score;
+
+ return v1 == v2 ? 0 : v1 > v2 ? -1 : 1;
+}
+
+/// Fuzzy search the string 'str' in 'strlist' and return the matching strings
+/// in 'fmatchlist'.
+static void match_fuzzy(const list_T *const strlist, const char_u *const str,
+ list_T *const fmatchlist)
+ FUNC_ATTR_NONNULL_ARG(2, 3)
+{
+ const long len = tv_list_len(strlist);
+ if (len == 0) {
+ return;
+ }
+
+ fuzzyItem_T *const ptrs = xmalloc(sizeof(fuzzyItem_T) * len);
+ long i = 0;
+ bool found_match = false;
+
+ // For all the string items in strlist, get the fuzzy matching score
+ TV_LIST_ITER_CONST(strlist, li, {
+ ptrs[i].item = li;
+ ptrs[i].score = -9999;
+ // ignore non-string items in the list
+ const typval_T *const tv = TV_LIST_ITEM_TV(li);
+ if (tv->v_type == VAR_STRING && tv->vval.v_string != NULL) {
+ int score;
+ if (fuzzy_match(tv->vval.v_string, str, &score)) {
+ ptrs[i].score = score;
+ found_match = true;
+ }
+ }
+ i++;
+ });
+
+ if (found_match) {
+ // Sort the list by the descending order of the match score
+ qsort(ptrs, (size_t)len, sizeof(fuzzyItem_T), fuzzy_item_compare);
+
+ // Copy the matching strings with 'score != -9999' to the return list
+ for (i = 0; i < len; i++) {
+ if (ptrs[i].score == -9999) {
+ break;
+ }
+ const typval_T *const tv = TV_LIST_ITEM_TV(ptrs[i].item);
+ tv_list_append_string(fmatchlist, (const char *)tv->vval.v_string, -1);
+ }
+ }
+
+ xfree(ptrs);
+}
+
+/// "matchfuzzy()" function
+void f_matchfuzzy(typval_T *argvars, typval_T *rettv, FunPtr fptr)
+{
+ if (argvars[0].v_type != VAR_LIST) {
+ emsg(_(e_listreq));
+ return;
+ }
+ if (argvars[0].vval.v_list == NULL) {
+ return;
+ }
+ if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) {
+ semsg(_(e_invarg2), tv_get_string(&argvars[1]));
+ return;
+ }
+ match_fuzzy(argvars[0].vval.v_list, (const char_u *)tv_get_string(&argvars[1]),
+ tv_list_alloc_ret(rettv, kListLenUnknown));
+}
+
/// Find identifiers or defines in included files.
/// If p_ic && (compl_cont_status & CONT_SOL) then ptr must be in lowercase.
///
diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim
index 438bed51c6..57ae0d3020 100644
--- a/src/nvim/testdir/test_functions.vim
+++ b/src/nvim/testdir/test_functions.vim
@@ -1731,6 +1731,32 @@ func Test_nr2char()
call assert_equal("\x80\xfc\b\xfd\x80\xfeX\x80\xfeX\x80\xfeX\x80\xfeX\x80\xfeX", eval('"\<M-' .. nr2char(0x40000000) .. '>"'))
endfunc
+" Test for matchfuzzy()
+func Test_matchfuzzy()
+ call assert_fails('call matchfuzzy(10, "abc")', 'E714:')
+ " Needs v8.2.1183.
+ " call assert_fails('call matchfuzzy(["abc"], [])', 'E730:')
+ call assert_fails('call matchfuzzy(["abc"], [])', 'E475:')
+ call assert_equal([], matchfuzzy([], 'abc'))
+ call assert_equal([], matchfuzzy(['abc'], ''))
+ call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac'))
+ call assert_equal([], matchfuzzy([10, 20], 'ac'))
+ call assert_equal(['abc'], matchfuzzy(['abc'], 'abc'))
+ call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra'))
+ call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa'))
+ call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one'))
+ call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
+ call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
+ call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
+ call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
+
+ %bw!
+ eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)})
+ let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl')
+ call assert_equal(1, len(l))
+ call assert_match('needle', l[0])
+endfunc
+
" Test for getcurpos() and setpos()
func Test_getcurpos_setpos()
new