diff options
author | Sean Dewar <seandewar@users.noreply.github.com> | 2022-02-07 22:57:25 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-07 22:57:25 +0000 |
commit | 9259bc62150e28875d19ea1449d9ea649d842c62 (patch) | |
tree | 8aa408f641cf572b11b97020a516c4ed2039e9ea | |
parent | f02a5a7bdaafc1c5ff61aee133eb2b6ba5f57586 (diff) | |
parent | 02e74314459621bfd18b3b3a1e3ac261658dc096 (diff) | |
download | rneovim-9259bc62150e28875d19ea1449d9ea649d842c62.tar.gz rneovim-9259bc62150e28875d19ea1449d9ea649d842c62.tar.bz2 rneovim-9259bc62150e28875d19ea1449d9ea649d842c62.zip |
Merge pull request #16873 from seandewar/vim-8.2.1665
vim-patch:8.2.{1665,1726,1872,1893,1921,2280,2813}: `matchfuzzy` and friends
-rw-r--r-- | runtime/doc/builtin.txt | 85 | ||||
-rw-r--r-- | runtime/doc/pattern.txt | 33 | ||||
-rw-r--r-- | runtime/doc/quickfix.txt | 15 | ||||
-rw-r--r-- | runtime/doc/usr_41.txt | 2 | ||||
-rw-r--r-- | src/nvim/eval.lua | 2 | ||||
-rw-r--r-- | src/nvim/eval/funcs.c | 1 | ||||
-rw-r--r-- | src/nvim/ex_cmds.c | 6 | ||||
-rw-r--r-- | src/nvim/globals.h | 1 | ||||
-rw-r--r-- | src/nvim/quickfix.c | 121 | ||||
-rw-r--r-- | src/nvim/quickfix.h | 1 | ||||
-rw-r--r-- | src/nvim/search.c | 529 | ||||
-rw-r--r-- | src/nvim/search.h | 3 | ||||
-rw-r--r-- | src/nvim/testdir/test_matchfuzzy.vim | 248 | ||||
-rw-r--r-- | src/nvim/testdir/test_quickfix.vim | 50 |
14 files changed, 1049 insertions, 48 deletions
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 56bc8bfb3e..b7a9a06b3c 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -296,6 +296,10 @@ matcharg({nr}) List arguments of |:match| matchdelete({id} [, {win}]) Number delete match identified by {id} matchend({expr}, {pat}[, {start}[, {count}]]) Number position where {pat} ends in {expr} +matchfuzzy({list}, {str} [, {dict}]) + List fuzzy match {str} in {list} +matchfuzzypos({list}, {str} [, {dict}]) + List fuzzy match {str} in {list} matchlist({expr}, {pat}[, {start}[, {count}]]) List match and submatches of {pat} in {expr} matchstr({expr}, {pat}[, {start}[, {count}]]) @@ -4857,6 +4861,87 @@ matchend({expr}, {pat} [, {start} [, {count}]]) *matchend()* Can also be used as a |method|: > GetText()->matchend('word') +matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* + If {list} is a list of strings, then returns a |List| with all + the strings in {list} that fuzzy match {str}. The strings in + the returned list are sorted based on the matching score. + + The optional {dict} argument always supports the following + items: + matchseq When this item is present and {str} contains + multiple words separated by white space, then + returns only matches that contain the words in + the given sequence. + + If {list} is a list of dictionaries, then the optional {dict} + argument supports the following additional items: + key key of the item which is fuzzy matched against + {str}. The value of this item should be a + string. + text_cb |Funcref| that will be called for every item + in {list} to get the text for fuzzy matching. + This should accept a dictionary item as the + argument and return the text for that item to + use for fuzzy matching. + + {str} is treated as a literal string and regular expression + matching is NOT supported. The maximum supported {str} length + is 256. + + When {str} has multiple words each separated by white space, + then the list of strings that have all the words is returned. + + If there are no matching strings or there is an error, then an + empty list is returned. If length of {str} is greater than + 256, then returns an empty list. + + Refer to |fuzzy-match| for more information about fuzzy + matching strings. + + Example: > + :echo matchfuzzy(["clay", "crow"], "cay") +< results in ["clay"]. > + :echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl") +< results in a list of buffer names fuzzy matching "ndl". > + :echo getbufinfo()->matchfuzzy("ndl", {'key' : 'name'}) +< results in a list of buffer information dicts with buffer + names fuzzy matching "ndl". > + :echo getbufinfo()->matchfuzzy("spl", + \ {'text_cb' : {v -> v.name}}) +< results in a list of buffer information dicts with buffer + names fuzzy matching "spl". > + :echo v:oldfiles->matchfuzzy("test") +< results in a list of file names fuzzy matching "test". > + :let l = readfile("buffer.c")->matchfuzzy("str") +< results in a list of lines in "buffer.c" fuzzy matching "str". > + :echo ['one two', 'two one']->matchfuzzy('two one') +< results in ['two one', 'one two']. > + :echo ['one two', 'two one']->matchfuzzy('two one', + \ {'matchseq': 1}) +< results in ['two one']. + +matchfuzzypos({list}, {str} [, {dict}]) *matchfuzzypos()* + Same as |matchfuzzy()|, but returns the list of matched + strings, the list of character positions where characters + in {str} matches and a list of matching scores. You can + use |byteidx()| to convert a character position to a byte + position. + + If {str} matches multiple times in a string, then only the + positions for the best match is returned. + + If there are no matching strings or there is an error, then a + list with three empty list items is returned. + + Example: > + :echo matchfuzzypos(['testing'], 'tsg') +< results in [['testing'], [[0, 2, 6]], [99]] > + :echo matchfuzzypos(['clay', 'lacy'], 'la') +< results in [['lacy', 'clay'], [[0, 1], [1, 2]], [153, 133]] > + :echo [{'text': 'hello', 'id' : 10}] + \ ->matchfuzzypos('ll', {'key' : 'text'}) +< results in [[{'id': 10, 'text': 'hello'}], [[2, 3]], [127]] + matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()* Same as |match()|, but return a |List|. The first item in the list is the matched string, same as what matchstr() would diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index 634145da3e..42005b0d78 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1421,5 +1421,38 @@ Finally, these constructs are unique to Perl: are suggested to use ":match" for manual matching and ":2match" for another plugin. +============================================================================== +11. Fuzzy matching *fuzzy-match* + +Fuzzy matching refers to matching strings using a non-exact search string. +Fuzzy matching will match a string, if all the characters in the search string +are present anywhere in the string in the same order. Case is ignored. In a +matched string, other characters can be present between two consecutive +characters in the search string. If the search string has multiple words, then +each word is matched separately. So the words in the search string can be +present in any order in a string. + +Fuzzy matching assigns a score for each matched string based on the following +criteria: + - The number of sequentially matching characters. + - The number of characters (distance) between two consecutive matching + characters. + - Matches at the beginning of a word + - Matches at a camel case character (e.g. Case in CamelCase) + - Matches after a path separator or a hyphen. + - The number of unmatched characters in a string. +The matching string with the highest score is returned first. + +For example, when you search for the "get pat" string using fuzzy matching, it +will match the strings "GetPattern", "PatternGet", "getPattern", "patGetter", +"getSomePattern", "MatchpatternGet" etc. + +The functions |matchfuzzy()| and |matchfuzzypos()| can be used to fuzzy search +a string in a List of strings. The matchfuzzy() function returns a List of +matching strings. The matchfuzzypos() functions returns the List of matches, +the matching positions and the fuzzy match scores. + +The "f" flag of `:vimgrep` enables fuzzy matching. + vim:tw=78:ts=8:noet:ft=help:norl: diff --git a/runtime/doc/quickfix.txt b/runtime/doc/quickfix.txt index bb4d807413..ed736ad4eb 100644 --- a/runtime/doc/quickfix.txt +++ b/runtime/doc/quickfix.txt @@ -989,7 +989,7 @@ commands can be combined to create a NewGrep command: > 5.1 using Vim's internal grep *:vim* *:vimgrep* *E682* *E683* -:vim[grep][!] /{pattern}/[g][j] {file} ... +:vim[grep][!] /{pattern}/[g][j][f] {file} ... Search for {pattern} in the files {file} ... and set the error list to the matches. Files matching 'wildignore' are ignored; files in 'suffixes' are @@ -1014,6 +1014,13 @@ commands can be combined to create a NewGrep command: > updated. With the [!] any changes in the current buffer are abandoned. + 'f' When the 'f' flag is specified, fuzzy string + matching is used to find matching lines. In this + case, {pattern} is treated as a literal string + instead of a regular expression. See + |fuzzy-match| for more information about fuzzy + matching strings. + |QuickFixCmdPre| and |QuickFixCmdPost| are triggered. A file that is opened for matching may use a buffer number, but it is reused if possible to avoid @@ -1042,20 +1049,20 @@ commands can be combined to create a NewGrep command: > :vimgrep Error *.c < *:lv* *:lvimgrep* -:lv[imgrep][!] /{pattern}/[g][j] {file} ... +:lv[imgrep][!] /{pattern}/[g][j][f] {file} ... :lv[imgrep][!] {pattern} {file} ... Same as ":vimgrep", except the location list for the current window is used instead of the quickfix list. *:vimgrepa* *:vimgrepadd* -:vimgrepa[dd][!] /{pattern}/[g][j] {file} ... +:vimgrepa[dd][!] /{pattern}/[g][j][f] {file} ... :vimgrepa[dd][!] {pattern} {file} ... Just like ":vimgrep", but instead of making a new list of errors the matches are appended to the current list. *:lvimgrepa* *:lvimgrepadd* -:lvimgrepa[dd][!] /{pattern}/[g][j] {file} ... +:lvimgrepa[dd][!] /{pattern}/[g][j][f] {file} ... :lvimgrepa[dd][!] {pattern} {file} ... Same as ":vimgrepadd", except the location list for the current window is used instead of the quickfix diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index bf29c94d51..bf024315f6 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -608,6 +608,8 @@ String manipulation: *string-functions* toupper() turn a string to uppercase match() position where a pattern matches in a string matchend() position where a pattern match ends in a string + matchfuzzy() fuzzy matches a string in a list of strings + matchfuzzypos() fuzzy matches a string in a list of strings matchstr() match of a pattern in a string matchstrpos() match and positions of a pattern in a string matchlist() like matchstr() and also return submatches diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua index eedc8ac45d..6dedd0f745 100644 --- a/src/nvim/eval.lua +++ b/src/nvim/eval.lua @@ -249,6 +249,8 @@ return { matcharg={args=1, base=1}, matchdelete={args={1, 2}, base=1}, matchend={args={2, 4}, base=1}, + matchfuzzy={args={2, 3}, base=1}, + matchfuzzypos={args={2, 3}, base=1}, matchlist={args={2, 4}, base=1}, matchstr={args={2, 4}, base=1}, matchstrpos={args={2,4}, base=1}, diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index 138745094c..111fae0928 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -98,7 +98,6 @@ PRAGMA_DIAG_POP #endif -static char *e_listarg = N_("E686: Argument of %s must be a List"); static char *e_listblobarg = N_("E899: Argument of %s must be a List or Blob"); static char *e_invalwindow = N_("E957: Invalid window number"); diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index 3b3d4e50cc..81fce3565a 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -6141,12 +6141,14 @@ char_u *skip_vimgrep_pat(char_u *p, char_u **s, int *flags) p++; // Find the flags - while (*p == 'g' || *p == 'j') { + while (*p == 'g' || *p == 'j' || *p == 'f') { if (flags != NULL) { if (*p == 'g') { *flags |= VGR_GLOBAL; - } else { + } else if (*p == 'j') { *flags |= VGR_NOJUMP; + } else { + *flags |= VGR_FUZZY; } } p++; diff --git a/src/nvim/globals.h b/src/nvim/globals.h index 041b60d838..f6fbe98ff0 100644 --- a/src/nvim/globals.h +++ b/src/nvim/globals.h @@ -979,6 +979,7 @@ EXTERN char e_invalidreg[] INIT(= N_("E850: Invalid register name")); EXTERN char e_dirnotf[] INIT(= N_("E919: Directory not found in '%s': \"%s\"")); EXTERN char e_au_recursive[] INIT(= N_("E952: Autocommand caused recursive behavior")); EXTERN char e_autocmd_close[] INIT(= N_("E813: Cannot close autocmd window")); +EXTERN char e_listarg[] INIT(= N_("E686: Argument of %s must be a List")); EXTERN char e_unsupportedoption[] INIT(= N_("E519: Option not supported")); EXTERN char e_fnametoolong[] INIT(= N_("E856: Filename too long")); EXTERN char e_float_as_string[] INIT(= N_("E806: using Float as a String")); diff --git a/src/nvim/quickfix.c b/src/nvim/quickfix.c index 0196e05455..c609daa55c 100644 --- a/src/nvim/quickfix.c +++ b/src/nvim/quickfix.c @@ -5194,49 +5194,93 @@ static bool vgr_qflist_valid(win_T *wp, qf_info_T *qi, unsigned qfid, char_u *ti /// Search for a pattern in all the lines in a buffer and add the matching lines /// to a quickfix list. -static bool vgr_match_buflines(qf_list_T *qfl, char_u *fname, buf_T *buf, regmmatch_T *regmatch, - long *tomatch, int duplicate_name, int flags) - FUNC_ATTR_NONNULL_ARG(1, 3, 4, 5) +static bool vgr_match_buflines(qf_list_T *qfl, char_u *fname, buf_T *buf, char_u *spat, + regmmatch_T *regmatch, long *tomatch, int duplicate_name, int flags) + FUNC_ATTR_NONNULL_ARG(1, 3, 4, 5, 6) { bool found_match = false; for (long lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; lnum++) { colnr_T col = 0; - while (vim_regexec_multi(regmatch, curwin, buf, lnum, col, NULL, - NULL) > 0) { - // Pass the buffer number so that it gets used even for a - // dummy buffer, unless duplicate_name is set, then the - // buffer will be wiped out below. - if (qf_add_entry(qfl, - NULL, // dir - fname, - NULL, - duplicate_name ? 0 : buf->b_fnum, - ml_get_buf(buf, regmatch->startpos[0].lnum + lnum, - false), - regmatch->startpos[0].lnum + lnum, - regmatch->endpos[0].lnum + lnum, - regmatch->startpos[0].col + 1, - regmatch->endpos[0].col + 1, - false, // vis_col - NULL, // search pattern - 0, // nr - 0, // type - true) // valid - == QF_FAIL) { - got_int = true; - break; - } - found_match = true; - if (--*tomatch == 0) { - break; - } - if ((flags & VGR_GLOBAL) == 0 || regmatch->endpos[0].lnum > 0) { - break; + if (!(flags & VGR_FUZZY)) { + // Regular expression match + while (vim_regexec_multi(regmatch, curwin, buf, lnum, col, NULL, NULL) > 0) { + // Pass the buffer number so that it gets used even for a + // dummy buffer, unless duplicate_name is set, then the + // buffer will be wiped out below. + if (qf_add_entry(qfl, + NULL, // dir + fname, + NULL, + duplicate_name ? 0 : buf->b_fnum, + ml_get_buf(buf, regmatch->startpos[0].lnum + lnum, false), + regmatch->startpos[0].lnum + lnum, + regmatch->endpos[0].lnum + lnum, + regmatch->startpos[0].col + 1, + regmatch->endpos[0].col + 1, + false, // vis_col + NULL, // search pattern + 0, // nr + 0, // type + true) // valid + == QF_FAIL) { + got_int = true; + break; + } + found_match = true; + if (--*tomatch == 0) { + break; + } + if ((flags & VGR_GLOBAL) == 0 || regmatch->endpos[0].lnum > 0) { + break; + } + col = regmatch->endpos[0].col + (col == regmatch->endpos[0].col); + if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, false))) { + break; + } } - col = regmatch->endpos[0].col + (col == regmatch->endpos[0].col); - if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, false))) { - break; + } else { + const size_t pat_len = STRLEN(spat); + char_u *const str = ml_get_buf(buf, lnum, false); + int score; + uint32_t matches[MAX_FUZZY_MATCHES]; + const size_t sz = sizeof(matches) / sizeof(matches[0]); + + // Fuzzy string match + while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz) > 0) { + // Pass the buffer number so that it gets used even for a + // dummy buffer, unless duplicate_name is set, then the + // buffer will be wiped out below. + if (qf_add_entry(qfl, + NULL, // dir + fname, + NULL, + duplicate_name ? 0 : buf->b_fnum, + str, + lnum, + 0, + (colnr_T)matches[0] + col + 1, + 0, + false, // vis_col + NULL, // search pattern + 0, // nr + 0, // type + true) // valid + == QF_FAIL) { + got_int = true; + break; + } + found_match = true; + if (--*tomatch == 0) { + break; + } + if ((flags & VGR_GLOBAL) == 0) { + break; + } + col = (colnr_T)matches[pat_len - 1] + col + 1; + if (col > (colnr_T)STRLEN(str)) { + break; + } } } line_breakcheck(); @@ -5418,8 +5462,7 @@ void ex_vimgrep(exarg_T *eap) } else { // Try for a match in all lines of the buffer. // For ":1vimgrep" look for first match only. - found_match = vgr_match_buflines(qf_get_curlist(qi), - fname, buf, ®match, &tomatch, + found_match = vgr_match_buflines(qf_get_curlist(qi), fname, buf, s, ®match, &tomatch, duplicate_name, flags); if (using_dummy) { diff --git a/src/nvim/quickfix.h b/src/nvim/quickfix.h index f5178e332a..0da43e436c 100644 --- a/src/nvim/quickfix.h +++ b/src/nvim/quickfix.h @@ -7,6 +7,7 @@ // flags for skip_vimgrep_pat() #define VGR_GLOBAL 1 #define VGR_NOJUMP 2 +#define VGR_FUZZY 4 #ifdef INCLUDE_GENERATED_DECLARATIONS # include "quickfix.h.generated.h" diff --git a/src/nvim/search.c b/src/nvim/search.c index 93180f97fe..3a2435e07a 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -4764,6 +4764,535 @@ the_end: restore_last_search_pattern(); } +/// Fuzzy string matching +/// +/// Ported from the lib_fts library authored by Forrest Smith. +/// https://github.com/forrestthewoods/lib_fts/tree/master/code +/// +/// The following blog describes the fuzzy matching algorithm: +/// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/ +/// +/// Each matching string is assigned a score. The following factors are checked: +/// - Matched letter +/// - Unmatched letter +/// - Consecutively matched letters +/// - Proximity to start +/// - Letter following a separator (space, underscore) +/// - Uppercase letter following lowercase (aka CamelCase) +/// +/// Matched letters are good. Unmatched letters are bad. Matching near the start +/// is good. Matching the first letter in the middle of a phrase is good. +/// Matching the uppercase letters in camel case entries is good. +/// +/// The score assigned for each factor is explained below. +/// File paths are different from file names. File extensions may be ignorable. +/// Single words care about consecutive matches but not separators or camel +/// case. +/// Score starts at 100 +/// Matched letter: +0 points +/// Unmatched letter: -1 point +/// Consecutive match bonus: +15 points +/// First letter bonus: +15 points +/// Separator bonus: +30 points +/// Camel case bonus: +30 points +/// Unmatched leading letter: -5 points (max: -15) +/// +/// There is some nuance to this. Scores don’t have an intrinsic meaning. The +/// score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a +/// lower minimum score due to unmatched letter penalty. Longer search patterns +/// have a higher maximum score due to match bonuses. +/// +/// Separator and camel case bonus is worth a LOT. Consecutive matches are worth +/// quite a bit. +/// +/// There is a penalty if you DON’T match the first three letters. Which +/// effectively rewards matching near the start. However there’s no difference +/// in matching between the middle and end. +/// +/// There is not an explicit bonus for an exact match. Unmatched letters receive +/// a penalty. So shorter strings and closer matches are worth more. +typedef struct { + int idx; ///< used for stable sort + listitem_T *item; + int score; + list_T *lmatchpos; +} fuzzyItem_T; + +/// bonus for adjacent matches; this is higher than SEPARATOR_BONUS so that +/// matching a whole word is preferred. +#define SEQUENTIAL_BONUS 40 +/// bonus if match occurs after a path separator +#define PATH_SEPARATOR_BONUS 30 +/// bonus if match occurs after a word separator +#define WORD_SEPARATOR_BONUS 25 +/// bonus if match is uppercase and prev is lower +#define CAMEL_BONUS 30 +/// bonus if the first letter is matched +#define FIRST_LETTER_BONUS 15 +/// penalty applied for every letter in str before the first match +#define LEADING_LETTER_PENALTY -5 +/// maximum penalty for leading letters +#define MAX_LEADING_LETTER_PENALTY -15 +/// penalty for every letter that doesn't match +#define UNMATCHED_LETTER_PENALTY -1 +/// penalty for gap in matching positions (-2 * k) +#define GAP_PENALTY -2 +/// Score for a string that doesn't fuzzy match the pattern +#define SCORE_NONE -9999 + +#define FUZZY_MATCH_RECURSION_LIMIT 10 + +/// Compute a score for a fuzzy matched string. The matching character locations +/// are in 'matches'. +static int fuzzy_match_compute_score(const char_u *const str, const int strSz, + const uint32_t *const matches, const int numMatches) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE +{ + // Initialize score + int score = 100; + + // Apply leading letter penalty + int penalty = LEADING_LETTER_PENALTY * matches[0]; + if (penalty < MAX_LEADING_LETTER_PENALTY) { + penalty = MAX_LEADING_LETTER_PENALTY; + } + score += penalty; + + // Apply unmatched penalty + const int unmatched = strSz - numMatches; + score += UNMATCHED_LETTER_PENALTY * unmatched; + + // Apply ordering bonuses + for (int i = 0; i < numMatches; i++) { + const uint32_t currIdx = matches[i]; + + if (i > 0) { + const uint32_t prevIdx = matches[i - 1]; + + // Sequential + if (currIdx == prevIdx + 1) { + score += SEQUENTIAL_BONUS; + } else { + score += GAP_PENALTY * (currIdx - prevIdx); + } + } + + // Check for bonuses based on neighbor character value + if (currIdx > 0) { + // Camel case + const char_u *p = str; + int neighbor; + + for (uint32_t sidx = 0; sidx < currIdx; sidx++) { + neighbor = utf_ptr2char(p); + MB_PTR_ADV(p); + } + const int curr = utf_ptr2char(p); + + if (mb_islower(neighbor) && mb_isupper(curr)) { + score += CAMEL_BONUS; + } + + // Bonus if the match follows a separator character + if (neighbor == '/' || neighbor == '\\') { + score += PATH_SEPARATOR_BONUS; + } else if (neighbor == ' ' || neighbor == '_') { + score += WORD_SEPARATOR_BONUS; + } + } else { + // First letter + score += FIRST_LETTER_BONUS; + } + } + return score; +} + +/// Perform a recursive search for fuzzy matching 'fuzpat' in 'str'. +/// @return the number of matching characters. +static int fuzzy_match_recursive(const char_u *fuzpat, const char_u *str, uint32_t strIdx, + int *const outScore, const char_u *const strBegin, + const int strLen, const uint32_t *const srcMatches, + uint32_t *const matches, const int maxMatches, int nextMatch, + int *const recursionCount) + FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT +{ + // Recursion params + bool recursiveMatch = false; + uint32_t bestRecursiveMatches[MAX_FUZZY_MATCHES]; + int bestRecursiveScore = 0; + + // Count recursions + (*recursionCount)++; + if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) { + return 0; + } + + // Detect end of strings + if (*fuzpat == NUL || *str == NUL) { + return 0; + } + + // Loop through fuzpat and str looking for a match + bool first_match = true; + while (*fuzpat != NUL && *str != NUL) { + const int c1 = utf_ptr2char(fuzpat); + const int c2 = utf_ptr2char(str); + + // Found match + if (mb_tolower(c1) == mb_tolower(c2)) { + // Supplied matches buffer was too short + if (nextMatch >= maxMatches) { + return 0; + } + + // "Copy-on-Write" srcMatches into matches + if (first_match && srcMatches != NULL) { + memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0])); + first_match = false; + } + + // Recursive call that "skips" this match + uint32_t recursiveMatches[MAX_FUZZY_MATCHES]; + int recursiveScore = 0; + const char_u *const next_char = str + utfc_ptr2len(str); + if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, &recursiveScore, strBegin, strLen, + matches, recursiveMatches, + sizeof(recursiveMatches) / sizeof(recursiveMatches[0]), nextMatch, + recursionCount)) { + // Pick best recursive score + if (!recursiveMatch || recursiveScore > bestRecursiveScore) { + memcpy(bestRecursiveMatches, recursiveMatches, + MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0])); + bestRecursiveScore = recursiveScore; + } + recursiveMatch = true; + } + + // Advance + matches[nextMatch++] = strIdx; + MB_PTR_ADV(fuzpat); + } + MB_PTR_ADV(str); + strIdx++; + } + + // Determine if full fuzpat was matched + const bool matched = *fuzpat == NUL; + + // Calculate score + if (matched) { + *outScore = fuzzy_match_compute_score(strBegin, strLen, matches, nextMatch); + } + + // Return best result + if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) { + // Recursive score is better than "this" + memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); + *outScore = bestRecursiveScore; + return nextMatch; + } else if (matched) { + return nextMatch; // "this" score is better than recursive + } + + return 0; // no match +} + +/// fuzzy_match() +/// +/// Performs exhaustive search via recursion to find all possible matches and +/// match with highest score. +/// Scores values have no intrinsic meaning. Possible score range is not +/// normalized and varies with pattern. +/// Recursion is limited internally (default=10) to prevent degenerate cases +/// (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). +/// Uses char_u for match indices. Therefore patterns are limited to +/// MAX_FUZZY_MATCHES characters. +/// +/// @return true if 'pat_arg' matches 'str'. Also returns the match score in +/// 'outScore' and the matching character positions in 'matches'. +bool fuzzy_match(char_u *const str, const char_u *const pat_arg, const bool matchseq, + int *const outScore, uint32_t *const matches, const int maxMatches) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT +{ + const int len = mb_charlen(str); + bool complete = false; + int numMatches = 0; + + *outScore = 0; + + char_u *const save_pat = vim_strsave(pat_arg); + char_u *pat = save_pat; + char_u *p = pat; + + // Try matching each word in 'pat_arg' in 'str' + while (true) { + if (matchseq) { + complete = true; + } else { + // Extract one word from the pattern (separated by space) + p = skipwhite(p); + if (*p == NUL) { + break; + } + pat = p; + while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) { + MB_PTR_ADV(p); + } + if (*p == NUL) { // processed all the words + complete = true; + } + *p = NUL; + } + + int score = 0; + int recursionCount = 0; + const int matchCount + = fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL, matches + numMatches, + maxMatches - numMatches, 0, &recursionCount); + if (matchCount == 0) { + numMatches = 0; + break; + } + + // Accumulate the match score and the number of matches + *outScore += score; + numMatches += matchCount; + + if (complete) { + break; + } + + // try matching the next word + p++; + } + + xfree(save_pat); + return numMatches != 0; +} + +/// Sort the fuzzy matches in the descending order of the match score. +/// For items with same score, retain the order using the index (stable sort) +static int fuzzy_match_item_compare(const void *const s1, const void *const s2) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE +{ + const int v1 = ((const fuzzyItem_T *)s1)->score; + const int v2 = ((const fuzzyItem_T *)s2)->score; + const int idx1 = ((const fuzzyItem_T *)s1)->idx; + const int idx2 = ((const fuzzyItem_T *)s2)->idx; + + return v1 == v2 ? (idx1 - idx2) : v1 > v2 ? -1 : 1; +} + +/// Fuzzy search the string 'str' in a list of 'items' and return the matching +/// strings in 'fmatchlist'. +/// If 'matchseq' is true, then for multi-word search strings, match all the +/// words in sequence. +/// If 'items' is a list of strings, then search for 'str' in the list. +/// If 'items' is a list of dicts, then either use 'key' to lookup the string +/// for each item or use 'item_cb' Funcref function to get the string. +/// If 'retmatchpos' is true, then return a list of positions where 'str' +/// matches for each item. +static void fuzzy_match_in_list(list_T *const items, char_u *const str, const bool matchseq, + const char_u *const key, Callback *const item_cb, + const bool retmatchpos, list_T *const fmatchlist) + FUNC_ATTR_NONNULL_ARG(2, 5, 7) +{ + const long len = tv_list_len(items); + if (len == 0) { + return; + } + + fuzzyItem_T *const ptrs = xcalloc(len, sizeof(fuzzyItem_T)); + long i = 0; + bool found_match = false; + uint32_t matches[MAX_FUZZY_MATCHES]; + + // For all the string items in items, get the fuzzy matching score + TV_LIST_ITER(items, li, { + ptrs[i].idx = i; + ptrs[i].item = li; + ptrs[i].score = SCORE_NONE; + char_u *itemstr = NULL; + typval_T rettv; + rettv.v_type = VAR_UNKNOWN; + const typval_T *const tv = TV_LIST_ITEM_TV(li); + if (tv->v_type == VAR_STRING) { // list of strings + itemstr = tv->vval.v_string; + } else if (tv->v_type == VAR_DICT && (key != NULL || item_cb->type != kCallbackNone)) { + // For a dict, either use the specified key to lookup the string or + // use the specified callback function to get the string. + if (key != NULL) { + itemstr = (char_u *)tv_dict_get_string(tv->vval.v_dict, (const char *)key, false); + } else { + typval_T argv[2]; + + // Invoke the supplied callback (if any) to get the dict item + tv->vval.v_dict->dv_refcount++; + argv[0].v_type = VAR_DICT; + argv[0].vval.v_dict = tv->vval.v_dict; + argv[1].v_type = VAR_UNKNOWN; + if (callback_call(item_cb, 1, argv, &rettv)) { + if (rettv.v_type == VAR_STRING) { + itemstr = rettv.vval.v_string; + } + } + tv_dict_unref(tv->vval.v_dict); + } + } + + int score; + if (itemstr != NULL && fuzzy_match(itemstr, str, matchseq, &score, matches, + sizeof(matches) / sizeof(matches[0]))) { + // Copy the list of matching positions in itemstr to a list, if + // 'retmatchpos' is set. + if (retmatchpos) { + ptrs[i].lmatchpos = tv_list_alloc(kListLenMayKnow); + int j = 0; + const char_u *p = str; + while (*p != NUL) { + if (!ascii_iswhite(utf_ptr2char(p))) { + tv_list_append_number(ptrs[i].lmatchpos, matches[j]); + j++; + } + MB_PTR_ADV(p); + } + } + ptrs[i].score = score; + found_match = true; + } + i++; + tv_clear(&rettv); + }); + + if (found_match) { + // Sort the list by the descending order of the match score + qsort(ptrs, len, sizeof(fuzzyItem_T), fuzzy_match_item_compare); + + // For matchfuzzy(), return a list of matched strings. + // ['str1', 'str2', 'str3'] + // For matchfuzzypos(), return a list with three items. + // The first item is a list of matched strings. The second item + // is a list of lists where each list item is a list of matched + // character positions. The third item is a list of matching scores. + // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] + list_T *l; + if (retmatchpos) { + const listitem_T *const li = tv_list_find(fmatchlist, 0); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + l = TV_LIST_ITEM_TV(li)->vval.v_list; + } else { + l = fmatchlist; + } + + // Copy the matching strings with a valid score to the return list + for (i = 0; i < len; i++) { + if (ptrs[i].score == SCORE_NONE) { + break; + } + tv_list_append_tv(l, TV_LIST_ITEM_TV(ptrs[i].item)); + } + + // next copy the list of matching positions + if (retmatchpos) { + const listitem_T *li = tv_list_find(fmatchlist, -2); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + l = TV_LIST_ITEM_TV(li)->vval.v_list; + for (i = 0; i < len; i++) { + if (ptrs[i].score == SCORE_NONE) { + break; + } + tv_list_append_list(l, ptrs[i].lmatchpos); + } + + // copy the matching scores + li = tv_list_find(fmatchlist, -1); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + l = TV_LIST_ITEM_TV(li)->vval.v_list; + for (i = 0; i < len; i++) { + if (ptrs[i].score == SCORE_NONE) { + break; + } + tv_list_append_number(l, ptrs[i].score); + } + } + } + xfree(ptrs); +} + +/// Do fuzzy matching. Returns the list of matched strings in 'rettv'. +/// If 'retmatchpos' is true, also returns the matching character positions. +static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, + const bool retmatchpos) + FUNC_ATTR_NONNULL_ALL +{ + // validate and get the arguments + if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) { + semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); + return; + } + if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) { + semsg(_(e_invarg2), tv_get_string(&argvars[1])); + return; + } + + Callback cb = CALLBACK_NONE; + const char_u *key = NULL; + bool matchseq = false; + if (argvars[2].v_type != VAR_UNKNOWN) { + if (argvars[2].v_type != VAR_DICT || argvars[2].vval.v_dict == NULL) { + emsg(_(e_dictreq)); + return; + } + + // To search a dict, either a callback function or a key can be + // specified. + dict_T *const d = argvars[2].vval.v_dict; + const dictitem_T *const di = tv_dict_find(d, "key", -1); + if (di != NULL) { + if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL + || *di->di_tv.vval.v_string == NUL) { + semsg(_(e_invarg2), tv_get_string(&di->di_tv)); + return; + } + key = (const char_u *)tv_get_string(&di->di_tv); + } else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) { + semsg(_(e_invargval), "text_cb"); + return; + } + if (tv_dict_find(d, "matchseq", -1) != NULL) { + matchseq = true; + } + } + + // get the fuzzy matches + tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown); + if (retmatchpos) { + // For matchfuzzypos(), a list with three items are returned. First + // item is a list of matching strings, the second item is a list of + // lists with matching positions within each string and the third item + // is the list of scores of the matches. + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); + } + + fuzzy_match_in_list(argvars[0].vval.v_list, (char_u *)tv_get_string(&argvars[1]), matchseq, key, + &cb, retmatchpos, rettv->vval.v_list); + callback_free(&cb); +} + +/// "matchfuzzy()" function +void f_matchfuzzy(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + do_fuzzymatch(argvars, rettv, false); +} + +/// "matchfuzzypos()" function +void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + do_fuzzymatch(argvars, rettv, true); +} + /// Find identifiers or defines in included files. /// If p_ic && (compl_cont_status & CONT_SOL) then ptr must be in lowercase. /// diff --git a/src/nvim/search.h b/src/nvim/search.h index 15b8d41f39..53059cc1ea 100644 --- a/src/nvim/search.h +++ b/src/nvim/search.h @@ -55,6 +55,9 @@ #define SEARCH_STAT_DEF_MAX_COUNT 99 #define SEARCH_STAT_BUF_LEN 12 +/// Maximum number of characters that can be fuzzy matched +#define MAX_FUZZY_MATCHES 256 + /// Structure containing offset definition for the last search pattern /// /// @note Only offset for the last search pattern is used, not for the last diff --git a/src/nvim/testdir/test_matchfuzzy.vim b/src/nvim/testdir/test_matchfuzzy.vim new file mode 100644 index 0000000000..abcc9b40c1 --- /dev/null +++ b/src/nvim/testdir/test_matchfuzzy.vim @@ -0,0 +1,248 @@ +" Tests for fuzzy matching + +source shared.vim +source check.vim + +" Test for matchfuzzy() +func Test_matchfuzzy() + call assert_fails('call matchfuzzy(10, "abc")', 'E686:') + " Needs v8.2.1183; match the final error that's thrown for now + " call assert_fails('call matchfuzzy(["abc"], [])', 'E730:') + call assert_fails('call matchfuzzy(["abc"], [])', 'E475:') + call assert_fails("let x = matchfuzzy(v:_null_list, 'foo')", 'E686:') + call assert_fails('call matchfuzzy(["abc"], v:_null_string)', 'E475:') + call assert_equal([], matchfuzzy([], 'abc')) + call assert_equal([], matchfuzzy(['abc'], '')) + call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac')) + call assert_equal([], matchfuzzy([10, 20], 'ac')) + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) + call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) + call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) + call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) + call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) + call assert_equal(['onetwo', 'one_two'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) + call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) + call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) + call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) + " matches with same score should not be reordered + let l = ['abc1', 'abc2', 'abc3'] + call assert_equal(l, l->matchfuzzy('abc')) + + " Tests for match preferences + " preference for camel case match + call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) + " preference for match after a separator (_ or space) + call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) + " preference for leading letter match + call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo')) + " preference for sequential match + call assert_equal(['onetwo', 'oanbectdweo'], ['oanbectdweo', 'onetwo']->matchfuzzy('onetwo')) + " non-matching leading letter(s) penalty + call assert_equal(['xonetwo', 'xxonetwo'], ['xxonetwo', 'xonetwo']->matchfuzzy('onetwo')) + " total non-matching letter(s) penalty + call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one')) + " prefer complete matches over separator matches + call assert_equal(['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c'], ['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c']->matchfuzzy('vimrc')) + " gap penalty + call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab')) + " path separator vs word separator + call assert_equal(['color/setup.vim', 'color\\setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim')) + + " match multiple words (separated by space) + call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo')) + call assert_equal([], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('one two')) + call assert_equal([], ['foo bar']->matchfuzzy(" \t ")) + + " test for matching a sequence of words + call assert_equal(['bar foo'], ['foo bar', 'bar foo', 'foobar', 'barfoo']->matchfuzzy('bar foo', {'matchseq' : 1})) + call assert_equal([#{text: 'two one'}], [#{text: 'one two'}, #{text: 'two one'}]->matchfuzzy('two one', #{key: 'text', matchseq: v:true})) + + %bw! + eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) + let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl') + call assert_equal(1, len(l)) + call assert_match('needle', l[0]) + + " Test for fuzzy matching dicts + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] + call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'text_cb' : {v -> v.val}})) + call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'key' : 'val'})) + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> v.val}})) + call assert_equal([], matchfuzzy(l, 'day', {'key' : 'val'})) + call assert_fails("let x = matchfuzzy(l, 'cam', 'random')", 'E715:') + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> []}})) + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> 1}})) + call assert_fails("let x = matchfuzzy(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') + call assert_equal([], matchfuzzy(l, 'cam')) + " Nvim's callback implementation is different, so E6000 is expected instead, + " but we need v8.2.1183 to assert it + " call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E921:') + " call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E6000:') + call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E475:') + " call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : []})", 'E730:') + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : []})", 'E475:') + call assert_fails("let x = matchfuzzy(l, 'cam', v:_null_dict)", 'E715:') + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : v:_null_string})", 'E475:') + " Nvim doesn't have null functions + " call assert_fails("let x = matchfuzzy(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') + " matches with same score should not be reordered + let l = [#{text: 'abc', id: 1}, #{text: 'abc', id: 2}, #{text: 'abc', id: 3}] + call assert_equal(l, l->matchfuzzy('abc', #{key: 'text'})) + + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:') + + " Test in latin1 encoding + let save_enc = &encoding + " Nvim supports utf-8 encoding only + " set encoding=latin1 + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) + let &encoding = save_enc +endfunc + +" Test for the matchfuzzypos() function +func Test_matchfuzzypos() + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [128, 127]], matchfuzzypos(['world', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [128, 127]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) + call assert_equal([['hello', 'hello world hello world'], + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [275, 257]], + \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) + call assert_equal([['aaaaaaa'], [[0, 1, 2]], [191]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([['a b'], [[0, 3]], [219]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0, 3]], [219]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0]], [112]], matchfuzzypos(['a b'], ' a ')) + call assert_equal([[], [], []], matchfuzzypos(['a b'], ' ')) + call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab')) + let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) + call assert_equal(range(256), x[1][0]) + call assert_equal([[], [], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) + call assert_equal([[], [], []], matchfuzzypos([], 'abc')) + + " match in a long string + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [-135]], + \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) + + " preference for camel case match + call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [189]], matchfuzzypos(['xabcxxaBc'], 'abc')) + " preference for match after a separator (_ or space) + call assert_equal([['xabx_ab'], [[5, 6]], [145]], matchfuzzypos(['xabx_ab'], 'ab')) + " preference for leading letter match + call assert_equal([['abcxabc'], [[0, 1]], [150]], matchfuzzypos(['abcxabc'], 'ab')) + " preference for sequential match + call assert_equal([['aobncedone'], [[7, 8, 9]], [158]], matchfuzzypos(['aobncedone'], 'one')) + " best recursive match + call assert_equal([['xoone'], [[2, 3, 4]], [168]], matchfuzzypos(['xoone'], 'one')) + + " match multiple words (separated by space) + call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [369]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) + call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two')) + call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" \t ")) + call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [657]], ['grace']->matchfuzzypos('race ace grace')) + + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [192]], + \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [192]], + \ matchfuzzypos(l, 'cam', {'key' : 'val'})) + call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) + call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'})) + call assert_fails("let x = matchfuzzypos(l, 'cam', 'random')", 'E715:') + call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> []}})) + call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> 1}})) + call assert_fails("let x = matchfuzzypos(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') + call assert_equal([[], [], []], matchfuzzypos(l, 'cam')) + " Nvim's callback implementation is different, so E6000 is expected instead, + " but we need v8.2.1183 to assert it + " call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E921:') + " call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E6000:') + call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E475:') + " call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : []})", 'E730:') + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : []})", 'E475:') + call assert_fails("let x = matchfuzzypos(l, 'cam', v:_null_dict)", 'E715:') + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : v:_null_string})", 'E475:') + " Nvim doesn't have null functions + " call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') + + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') +endfunc + +" Test for matchfuzzy() with multibyte characters +func Test_matchfuzzy_mbyte() + CheckFeature multi_lang + call assert_equal(['ンヹㄇヺヴ'], matchfuzzy(['ンヹㄇヺヴ'], 'ヹヺ')) + " reverse the order of characters + call assert_equal([], matchfuzzy(['ンヹㄇヺヴ'], 'ヺヹ')) + call assert_equal(['αβΩxxx', 'xαxβxΩx'], + \ matchfuzzy(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) + call assert_equal(['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], + \ matchfuzzy(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) + + " match multiple words (separated by space) + call assert_equal(['세 마리의 작은 돼지'], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('돼지 마리의')) + call assert_equal([], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('파란 하늘')) + + " preference for camel case match + call assert_equal(['oneĄwo', 'oneąwo'], + \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) + " preference for complete match then match after separator (_ or space) + call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']), + \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) + " preference for match after a separator (_ or space) + call assert_equal(['ㄓㄔabㄟㄠ', 'ㄓㄔa_bㄟㄠ', 'ㄓㄔa bㄟㄠ'], + \ ['ㄓㄔa_bㄟㄠ', 'ㄓㄔa bㄟㄠ', 'ㄓㄔabㄟㄠ']->matchfuzzy('ㄓㄔabㄟㄠ')) + " preference for leading letter match + call assert_equal(['ŗŝţũŵż', 'xŗŝţũŵż'], + \ ['xŗŝţũŵż', 'ŗŝţũŵż']->matchfuzzy('ŗŝţũŵż')) + " preference for sequential match + call assert_equal(['ㄞㄡㄤfffifl', 'ㄞaㄡbㄤcffdfiefl'], + \ ['ㄞaㄡbㄤcffdfiefl', 'ㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl')) + " non-matching leading letter(s) penalty + call assert_equal(['xㄞㄡㄤfffifl', 'xxㄞㄡㄤfffifl'], + \ ['xxㄞㄡㄤfffifl', 'xㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl')) + " total non-matching letter(s) penalty + call assert_equal(['ŗŝţ', 'ŗŝţx', 'ŗŝţxx'], + \ ['ŗŝţxx', 'ŗŝţ', 'ŗŝţx']->matchfuzzy('ŗŝţ')) +endfunc + +" Test for matchfuzzypos() with multibyte characters +func Test_matchfuzzypos_mbyte() + CheckFeature multi_lang + call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [273]], + \ matchfuzzypos(['こんにちは世界'], 'こんにちは')) + call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) + " reverse the order of characters + call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [222, 113]], + \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) + call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], + \ [[0, 1], [0, 1], [0, 1], [0, 2]], [151, 148, 145, 110]], + \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) + call assert_equal([['ααααααα'], [[0, 1, 2]], [191]], + \ matchfuzzypos(['ααααααα'], 'ααα')) + + call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) + let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256)) + call assert_equal(range(256), x[1][0]) + call assert_equal([[], [], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) + + " match multiple words (separated by space) + call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [328]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) + call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘')) + + " match in a long string + call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [-135]], + \ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ')) + " preference for camel case match + call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [189]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) + " preference for match after a separator (_ or space) + call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) + " preference for leading letter match + call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [150]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) + " preference for sequential match + call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [158]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) + " best recursive match + call assert_equal([['xффйд'], [[2, 3, 4]], [168]], matchfuzzypos(['xффйд'], 'фйд')) +endfunc + +" vim: shiftwidth=2 sts=2 expandtab diff --git a/src/nvim/testdir/test_quickfix.vim b/src/nvim/testdir/test_quickfix.vim index f137ed5346..00679e1958 100644 --- a/src/nvim/testdir/test_quickfix.vim +++ b/src/nvim/testdir/test_quickfix.vim @@ -32,7 +32,7 @@ func s:setup_commands(cchar) command! -count -nargs=* -bang Xnfile <mods><count>cnfile<bang> <args> command! -nargs=* -bang Xpfile <mods>cpfile<bang> <args> command! -nargs=* Xexpr <mods>cexpr <args> - command! -count -nargs=* Xvimgrep <mods> <count>vimgrep <args> + command! -count=999 -nargs=* Xvimgrep <mods> <count>vimgrep <args> command! -nargs=* Xvimgrepadd <mods> vimgrepadd <args> command! -nargs=* Xgrep <mods> grep <args> command! -nargs=* Xgrepadd <mods> grepadd <args> @@ -69,7 +69,7 @@ func s:setup_commands(cchar) command! -count -nargs=* -bang Xnfile <mods><count>lnfile<bang> <args> command! -nargs=* -bang Xpfile <mods>lpfile<bang> <args> command! -nargs=* Xexpr <mods>lexpr <args> - command! -count -nargs=* Xvimgrep <mods> <count>lvimgrep <args> + command! -count=999 -nargs=* Xvimgrep <mods> <count>lvimgrep <args> command! -nargs=* Xvimgrepadd <mods> lvimgrepadd <args> command! -nargs=* Xgrep <mods> lgrep <args> command! -nargs=* Xgrepadd <mods> lgrepadd <args> @@ -5028,6 +5028,52 @@ func Test_qfbuf_update() call Xqfbuf_update('l') endfunc +" Test for the :vimgrep 'f' flag (fuzzy match) +func Xvimgrep_fuzzy_match(cchar) + call s:setup_commands(a:cchar) + + Xvimgrep /three one/f Xfile* + let l = g:Xgetlist() + call assert_equal(2, len(l)) + call assert_equal(['Xfile1', 1, 9, 'one two three'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile2', 2, 1, 'three one two'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + + Xvimgrep /the/f Xfile* + let l = g:Xgetlist() + call assert_equal(3, len(l)) + call assert_equal(['Xfile1', 1, 9, 'one two three'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile2', 2, 1, 'three one two'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + call assert_equal(['Xfile2', 4, 4, 'aaathreeaaa'], + \ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text]) + + Xvimgrep /aaa/fg Xfile* + let l = g:Xgetlist() + call assert_equal(4, len(l)) + call assert_equal(['Xfile1', 2, 1, 'aaaaaa'], + \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text]) + call assert_equal(['Xfile1', 2, 4, 'aaaaaa'], + \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text]) + call assert_equal(['Xfile2', 4, 1, 'aaathreeaaa'], + \ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text]) + call assert_equal(['Xfile2', 4, 9, 'aaathreeaaa'], + \ [bufname(l[3].bufnr), l[3].lnum, l[3].col, l[3].text]) + + call assert_fails('Xvimgrep /xyz/fg Xfile*', 'E480:') +endfunc + +func Test_vimgrep_fuzzy_match() + call writefile(['one two three', 'aaaaaa'], 'Xfile1') + call writefile(['one', 'three one two', 'two', 'aaathreeaaa'], 'Xfile2') + call Xvimgrep_fuzzy_match('c') + call Xvimgrep_fuzzy_match('l') + call delete('Xfile1') + call delete('Xfile2') +endfunc + " Test for getting a specific item from a quickfix list func Xtest_getqflist_by_idx(cchar) call s:setup_commands(a:cchar) |