diff options
author | Jan Edmund Lazo <jan.lazo@mail.utoronto.ca> | 2021-05-26 09:13:30 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-26 09:13:30 -0400 |
commit | fd91e73c57b2dd95e9395fef04c82b80b9c779bc (patch) | |
tree | 1d8663a2bdae23fc5a86c6a104a15743456ea44d /src | |
parent | a282a177d3320db25fa8f854cbcdbe0bc6abde7f (diff) | |
parent | a7061359b741b5716bddbefdc024f38405f758b0 (diff) | |
download | rneovim-fd91e73c57b2dd95e9395fef04c82b80b9c779bc.tar.gz rneovim-fd91e73c57b2dd95e9395fef04c82b80b9c779bc.tar.bz2 rneovim-fd91e73c57b2dd95e9395fef04c82b80b9c779bc.zip |
Merge pull request #14640 from janlazo/vim-3ec3217f0491
vim-patch:3ec3217f0491,8.2.{2278,2885}
Diffstat (limited to 'src')
-rw-r--r-- | src/nvim/regexp.c | 32 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 59 | ||||
-rw-r--r-- | src/nvim/testdir/test_regexp_utf8.vim | 46 | ||||
-rw-r--r-- | src/nvim/testdir/test_search.vim | 19 |
4 files changed, 124 insertions, 32 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index e0cc25421a..accf9b0bb5 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -692,6 +692,7 @@ static char_u *regparse; ///< Input-scan pointer. static int prevchr_len; ///< byte length of previous char static int num_complex_braces; ///< Complex \{...} count static int regnpar; ///< () count. +static bool wants_nfa; ///< regex should use NFA engine static int regnzpar; ///< \z() count. static int re_has_z; ///< \z item detected static char_u *regcode; ///< Code-emit pointer, or JUST_CALC_SIZE @@ -3974,17 +3975,25 @@ static bool regmatch( pos = getmark_buf(rex.reg_buf, mark, false); if (pos == NULL // mark doesn't exist - || pos->lnum <= 0 // mark isn't set in reg_buf - || (pos->lnum == rex.lnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(rex.input - rex.line) - ? (cmp == '<' || cmp == '>') - : (pos->col < (colnr_T)(rex.input - rex.line) - ? cmp != '>' - : cmp != '<')) - : (pos->lnum < rex.lnum + rex.reg_firstlnum - ? cmp != '>' - : cmp != '<'))) { + || pos->lnum <= 0) { // mark isn't set in reg_buf status = RA_NOMATCH; + } else { + const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum + && pos->col == MAXCOL + ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) + : pos->col; + + if (pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos_col == (colnr_T)(rex.input - rex.line) + ? (cmp == '<' || cmp == '>') + : (pos_col < (colnr_T)(rex.input - rex.line) + ? cmp != '>' + : cmp != '<')) + : (pos->lnum < rex.lnum + rex.reg_firstlnum + ? cmp != '>' + : cmp != '<')) { + status = RA_NOMATCH; + } } } break; @@ -7240,7 +7249,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) // Check for error compiling regexp with initial engine. if (prog == NULL) { #ifdef BT_REGEXP_DEBUG_LOG - // Debugging log for NFA. + // Debugging log for BT engine. if (regexp_engine != BACKTRACKING_ENGINE) { FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); if (f) { @@ -7257,6 +7266,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) // But don't try if an error message was given. if (regexp_engine == AUTOMATIC_ENGINE && !called_emsg) { regexp_engine = BACKTRACKING_ENGINE; + report_re_switch(expr); prog = bt_regengine.regcomp(expr, re_flags); } } diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 923db6422e..5047e0db03 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -328,6 +328,11 @@ static int *post_start; ///< holds the postfix form of r.e. static int *post_end; static int *post_ptr; +// Set when the pattern should use the NFA engine. +// E.g. [[:upper:]] only allows 8bit characters for BT engine, +// while NFA engine handles multibyte characters correctly. +static bool wants_nfa; + static int nstate; ///< Number of states in the NFA. Also used when executing. static int istate; ///< Index in the state vector, used in alloc_state() @@ -377,6 +382,7 @@ nfa_regcomp_start ( post_start = (int *)xmalloc(postfix_size); post_ptr = post_start; post_end = post_start + nstate_max; + wants_nfa = false; rex.nfa_has_zend = false; rex.nfa_has_backref = false; @@ -1618,6 +1624,7 @@ collection: EMIT(NFA_CLASS_GRAPH); break; case CLASS_LOWER: + wants_nfa = true; EMIT(NFA_CLASS_LOWER); break; case CLASS_PRINT: @@ -1630,6 +1637,7 @@ collection: EMIT(NFA_CLASS_SPACE); break; case CLASS_UPPER: + wants_nfa = true; EMIT(NFA_CLASS_UPPER); break; case CLASS_XDIGIT: @@ -1998,10 +2006,17 @@ static int nfa_regpiece(void) return OK; } - // The engine is very inefficient (uses too many states) when the maximum - // is much larger than the minimum and when the maximum is large. Bail out - // if we can use the other engine. - if ((nfa_re_flags & RE_AUTO) && (maxval > 500 || maxval > minval + 200)) { + // The engine is very inefficient (uses too many states) when the + // maximum is much larger than the minimum and when the maximum is + // large. However, when maxval is MAX_LIMIT, it is okay, as this + // will emit NFA_STAR. + // Bail out if we can use the other engine, but only, when the + // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\} + // does not work with with characters > 8 bit with the BT engine) + if ((nfa_re_flags & RE_AUTO) + && (maxval > 500 || maxval > minval + 200) + && (maxval != MAX_LIMIT && minval < 200) + && !wants_nfa) { return FAIL; } @@ -6055,21 +6070,27 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, { pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, false); - // Compare the mark position to the match position. - result = (pos != NULL // mark doesn't exist - && pos->lnum > 0 // mark isn't set in reg_buf - && (pos->lnum == rex.lnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(rex.input - rex.line) - ? t->state->c == NFA_MARK - : (pos->col < (colnr_T)(rex.input - rex.line) - ? t->state->c == NFA_MARK_GT - : t->state->c == NFA_MARK_LT)) - : (pos->lnum < rex.lnum + rex.reg_firstlnum - ? t->state->c == NFA_MARK_GT - : t->state->c == NFA_MARK_LT))); - if (result) { - add_here = true; - add_state = t->state->out; + // Compare the mark position to the match position, if the mark + // exists and mark is set in reg_buf. + if (pos != NULL && pos->lnum > 0) { + const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum + && pos->col == MAXCOL + ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) + : pos->col; + + result = pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos_col == (colnr_T)(rex.input - rex.line) + ? t->state->c == NFA_MARK + : (pos_col < (colnr_T)(rex.input - rex.line) + ? t->state->c == NFA_MARK_GT + : t->state->c == NFA_MARK_LT)) + : (pos->lnum < rex.lnum + rex.reg_firstlnum + ? t->state->c == NFA_MARK_GT + : t->state->c == NFA_MARK_LT); + if (result) { + add_here = true; + add_state = t->state->out; + } } break; } diff --git a/src/nvim/testdir/test_regexp_utf8.vim b/src/nvim/testdir/test_regexp_utf8.vim index 513780938e..d8d5797dcf 100644 --- a/src/nvim/testdir/test_regexp_utf8.vim +++ b/src/nvim/testdir/test_regexp_utf8.vim @@ -542,6 +542,52 @@ func Test_match_start_of_line_combining() bwipe! endfunc +" Check that [[:upper:]] matches for automatic engine +func Test_match_char_class_upper() + new + let _engine=®expengine + + " Test 1: [[:upper:]]\{2,\} + set regexpengine=0 + call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...']) + call cursor(1,1) + let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>" + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 1') + set regexpengine=1 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 1') + set regexpengine=2 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 1') + + " Test 2: [[:upper:]].\+ + let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>" + set regexpengine=0 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 2') + set regexpengine=1 + exe search_cmd + call assert_equal(1, searchcount().total, 'TEST 2') + set regexpengine=2 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 2') + + " Test 3: [[:lower:]]\+ + let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>" + set regexpengine=0 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 3 lower') + set regexpengine=1 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 3 lower') + set regexpengine=2 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 3 lower') + " clean up + let ®expengine=_engine + bwipe! +endfunc " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/nvim/testdir/test_search.vim b/src/nvim/testdir/test_search.vim index 75d42b986b..b391663e0f 100644 --- a/src/nvim/testdir/test_search.vim +++ b/src/nvim/testdir/test_search.vim @@ -1177,13 +1177,28 @@ func Test_look_behind() bwipe! endfunc +func Test_search_visual_area_linewise() + new + call setline(1, ['aa', 'bb', 'cc']) + exe "normal 2GV\<Esc>" + for engine in [1, 2] + exe 'set regexpengine=' .. engine + exe "normal gg/\\%'<\<CR>>" + call assert_equal([0, 2, 1, 0, 1], getcurpos(), 'engine ' .. engine) + exe "normal gg/\\%'>\<CR>" + call assert_equal([0, 2, 2, 0, 2], getcurpos(), 'engine ' .. engine) + endfor + + bwipe! + set regexpengine& +endfunc + func Test_search_sentence() new " this used to cause a crash - call assert_fails("/\\%')", 'E486') - call assert_fails("/", 'E486') /\%'( / + bwipe endfunc " Test that there is no crash when there is a last search pattern but no last |