diff options
-rw-r--r-- | src/nvim/regexp.c | 4 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 23 | ||||
-rw-r--r-- | src/nvim/testdir/test_regexp_utf8.vim | 46 |
3 files changed, 68 insertions, 5 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 1f025b24a4..accf9b0bb5 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -692,6 +692,7 @@ static char_u *regparse; ///< Input-scan pointer. static int prevchr_len; ///< byte length of previous char static int num_complex_braces; ///< Complex \{...} count static int regnpar; ///< () count. +static bool wants_nfa; ///< regex should use NFA engine static int regnzpar; ///< \z() count. static int re_has_z; ///< \z item detected static char_u *regcode; ///< Code-emit pointer, or JUST_CALC_SIZE @@ -7248,7 +7249,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) // Check for error compiling regexp with initial engine. if (prog == NULL) { #ifdef BT_REGEXP_DEBUG_LOG - // Debugging log for NFA. + // Debugging log for BT engine. if (regexp_engine != BACKTRACKING_ENGINE) { FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); if (f) { @@ -7265,6 +7266,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) // But don't try if an error message was given. if (regexp_engine == AUTOMATIC_ENGINE && !called_emsg) { regexp_engine = BACKTRACKING_ENGINE; + report_re_switch(expr); prog = bt_regengine.regcomp(expr, re_flags); } } diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 73308c6870..5047e0db03 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -328,6 +328,11 @@ static int *post_start; ///< holds the postfix form of r.e. static int *post_end; static int *post_ptr; +// Set when the pattern should use the NFA engine. +// E.g. [[:upper:]] only allows 8bit characters for BT engine, +// while NFA engine handles multibyte characters correctly. +static bool wants_nfa; + static int nstate; ///< Number of states in the NFA. Also used when executing. static int istate; ///< Index in the state vector, used in alloc_state() @@ -377,6 +382,7 @@ nfa_regcomp_start ( post_start = (int *)xmalloc(postfix_size); post_ptr = post_start; post_end = post_start + nstate_max; + wants_nfa = false; rex.nfa_has_zend = false; rex.nfa_has_backref = false; @@ -1618,6 +1624,7 @@ collection: EMIT(NFA_CLASS_GRAPH); break; case CLASS_LOWER: + wants_nfa = true; EMIT(NFA_CLASS_LOWER); break; case CLASS_PRINT: @@ -1630,6 +1637,7 @@ collection: EMIT(NFA_CLASS_SPACE); break; case CLASS_UPPER: + wants_nfa = true; EMIT(NFA_CLASS_UPPER); break; case CLASS_XDIGIT: @@ -1998,10 +2006,17 @@ static int nfa_regpiece(void) return OK; } - // The engine is very inefficient (uses too many states) when the maximum - // is much larger than the minimum and when the maximum is large. Bail out - // if we can use the other engine. - if ((nfa_re_flags & RE_AUTO) && (maxval > 500 || maxval > minval + 200)) { + // The engine is very inefficient (uses too many states) when the + // maximum is much larger than the minimum and when the maximum is + // large. However, when maxval is MAX_LIMIT, it is okay, as this + // will emit NFA_STAR. + // Bail out if we can use the other engine, but only, when the + // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\} + // does not work with with characters > 8 bit with the BT engine) + if ((nfa_re_flags & RE_AUTO) + && (maxval > 500 || maxval > minval + 200) + && (maxval != MAX_LIMIT && minval < 200) + && !wants_nfa) { return FAIL; } diff --git a/src/nvim/testdir/test_regexp_utf8.vim b/src/nvim/testdir/test_regexp_utf8.vim index 513780938e..d8d5797dcf 100644 --- a/src/nvim/testdir/test_regexp_utf8.vim +++ b/src/nvim/testdir/test_regexp_utf8.vim @@ -542,6 +542,52 @@ func Test_match_start_of_line_combining() bwipe! endfunc +" Check that [[:upper:]] matches for automatic engine +func Test_match_char_class_upper() + new + let _engine=®expengine + + " Test 1: [[:upper:]]\{2,\} + set regexpengine=0 + call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...']) + call cursor(1,1) + let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>" + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 1') + set regexpengine=1 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 1') + set regexpengine=2 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 1') + + " Test 2: [[:upper:]].\+ + let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>" + set regexpengine=0 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 2') + set regexpengine=1 + exe search_cmd + call assert_equal(1, searchcount().total, 'TEST 2') + set regexpengine=2 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 2') + + " Test 3: [[:lower:]]\+ + let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>" + set regexpengine=0 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 3 lower') + set regexpengine=1 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 3 lower') + set regexpengine=2 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 3 lower') + " clean up + let ®expengine=_engine + bwipe! +endfunc " vim: shiftwidth=2 sts=2 expandtab |