aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/nvim/regexp.c32
-rw-r--r--src/nvim/regexp_nfa.c59
-rw-r--r--src/nvim/testdir/test_regexp_utf8.vim46
-rw-r--r--src/nvim/testdir/test_search.vim19
4 files changed, 124 insertions, 32 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index e0cc25421a..accf9b0bb5 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -692,6 +692,7 @@ static char_u *regparse; ///< Input-scan pointer.
static int prevchr_len; ///< byte length of previous char
static int num_complex_braces; ///< Complex \{...} count
static int regnpar; ///< () count.
+static bool wants_nfa; ///< regex should use NFA engine
static int regnzpar; ///< \z() count.
static int re_has_z; ///< \z item detected
static char_u *regcode; ///< Code-emit pointer, or JUST_CALC_SIZE
@@ -3974,17 +3975,25 @@ static bool regmatch(
pos = getmark_buf(rex.reg_buf, mark, false);
if (pos == NULL // mark doesn't exist
- || pos->lnum <= 0 // mark isn't set in reg_buf
- || (pos->lnum == rex.lnum + rex.reg_firstlnum
- ? (pos->col == (colnr_T)(rex.input - rex.line)
- ? (cmp == '<' || cmp == '>')
- : (pos->col < (colnr_T)(rex.input - rex.line)
- ? cmp != '>'
- : cmp != '<'))
- : (pos->lnum < rex.lnum + rex.reg_firstlnum
- ? cmp != '>'
- : cmp != '<'))) {
+ || pos->lnum <= 0) { // mark isn't set in reg_buf
status = RA_NOMATCH;
+ } else {
+ const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum
+ && pos->col == MAXCOL
+ ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum))
+ : pos->col;
+
+ if (pos->lnum == rex.lnum + rex.reg_firstlnum
+ ? (pos_col == (colnr_T)(rex.input - rex.line)
+ ? (cmp == '<' || cmp == '>')
+ : (pos_col < (colnr_T)(rex.input - rex.line)
+ ? cmp != '>'
+ : cmp != '<'))
+ : (pos->lnum < rex.lnum + rex.reg_firstlnum
+ ? cmp != '>'
+ : cmp != '<')) {
+ status = RA_NOMATCH;
+ }
}
}
break;
@@ -7240,7 +7249,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
// Check for error compiling regexp with initial engine.
if (prog == NULL) {
#ifdef BT_REGEXP_DEBUG_LOG
- // Debugging log for NFA.
+ // Debugging log for BT engine.
if (regexp_engine != BACKTRACKING_ENGINE) {
FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
if (f) {
@@ -7257,6 +7266,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
// But don't try if an error message was given.
if (regexp_engine == AUTOMATIC_ENGINE && !called_emsg) {
regexp_engine = BACKTRACKING_ENGINE;
+ report_re_switch(expr);
prog = bt_regengine.regcomp(expr, re_flags);
}
}
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c
index 923db6422e..5047e0db03 100644
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@@ -328,6 +328,11 @@ static int *post_start; ///< holds the postfix form of r.e.
static int *post_end;
static int *post_ptr;
+// Set when the pattern should use the NFA engine.
+// E.g. [[:upper:]] only allows 8bit characters for BT engine,
+// while NFA engine handles multibyte characters correctly.
+static bool wants_nfa;
+
static int nstate; ///< Number of states in the NFA. Also used when executing.
static int istate; ///< Index in the state vector, used in alloc_state()
@@ -377,6 +382,7 @@ nfa_regcomp_start (
post_start = (int *)xmalloc(postfix_size);
post_ptr = post_start;
post_end = post_start + nstate_max;
+ wants_nfa = false;
rex.nfa_has_zend = false;
rex.nfa_has_backref = false;
@@ -1618,6 +1624,7 @@ collection:
EMIT(NFA_CLASS_GRAPH);
break;
case CLASS_LOWER:
+ wants_nfa = true;
EMIT(NFA_CLASS_LOWER);
break;
case CLASS_PRINT:
@@ -1630,6 +1637,7 @@ collection:
EMIT(NFA_CLASS_SPACE);
break;
case CLASS_UPPER:
+ wants_nfa = true;
EMIT(NFA_CLASS_UPPER);
break;
case CLASS_XDIGIT:
@@ -1998,10 +2006,17 @@ static int nfa_regpiece(void)
return OK;
}
- // The engine is very inefficient (uses too many states) when the maximum
- // is much larger than the minimum and when the maximum is large. Bail out
- // if we can use the other engine.
- if ((nfa_re_flags & RE_AUTO) && (maxval > 500 || maxval > minval + 200)) {
+ // The engine is very inefficient (uses too many states) when the
+ // maximum is much larger than the minimum and when the maximum is
+ // large. However, when maxval is MAX_LIMIT, it is okay, as this
+ // will emit NFA_STAR.
+ // Bail out if we can use the other engine, but only, when the
+ // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
+ // does not work with with characters > 8 bit with the BT engine)
+ if ((nfa_re_flags & RE_AUTO)
+ && (maxval > 500 || maxval > minval + 200)
+ && (maxval != MAX_LIMIT && minval < 200)
+ && !wants_nfa) {
return FAIL;
}
@@ -6055,21 +6070,27 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
{
pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, false);
- // Compare the mark position to the match position.
- result = (pos != NULL // mark doesn't exist
- && pos->lnum > 0 // mark isn't set in reg_buf
- && (pos->lnum == rex.lnum + rex.reg_firstlnum
- ? (pos->col == (colnr_T)(rex.input - rex.line)
- ? t->state->c == NFA_MARK
- : (pos->col < (colnr_T)(rex.input - rex.line)
- ? t->state->c == NFA_MARK_GT
- : t->state->c == NFA_MARK_LT))
- : (pos->lnum < rex.lnum + rex.reg_firstlnum
- ? t->state->c == NFA_MARK_GT
- : t->state->c == NFA_MARK_LT)));
- if (result) {
- add_here = true;
- add_state = t->state->out;
+ // Compare the mark position to the match position, if the mark
+ // exists and mark is set in reg_buf.
+ if (pos != NULL && pos->lnum > 0) {
+ const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum
+ && pos->col == MAXCOL
+ ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum))
+ : pos->col;
+
+ result = pos->lnum == rex.lnum + rex.reg_firstlnum
+ ? (pos_col == (colnr_T)(rex.input - rex.line)
+ ? t->state->c == NFA_MARK
+ : (pos_col < (colnr_T)(rex.input - rex.line)
+ ? t->state->c == NFA_MARK_GT
+ : t->state->c == NFA_MARK_LT))
+ : (pos->lnum < rex.lnum + rex.reg_firstlnum
+ ? t->state->c == NFA_MARK_GT
+ : t->state->c == NFA_MARK_LT);
+ if (result) {
+ add_here = true;
+ add_state = t->state->out;
+ }
}
break;
}
diff --git a/src/nvim/testdir/test_regexp_utf8.vim b/src/nvim/testdir/test_regexp_utf8.vim
index 513780938e..d8d5797dcf 100644
--- a/src/nvim/testdir/test_regexp_utf8.vim
+++ b/src/nvim/testdir/test_regexp_utf8.vim
@@ -542,6 +542,52 @@ func Test_match_start_of_line_combining()
bwipe!
endfunc
+" Check that [[:upper:]] matches for automatic engine
+func Test_match_char_class_upper()
+ new
+ let _engine=&regexpengine
+
+ " Test 1: [[:upper:]]\{2,\}
+ set regexpengine=0
+ call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
+ call cursor(1,1)
+ let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
+ exe search_cmd
+ call assert_equal(4, searchcount().total, 'TEST 1')
+ set regexpengine=1
+ exe search_cmd
+ call assert_equal(2, searchcount().total, 'TEST 1')
+ set regexpengine=2
+ exe search_cmd
+ call assert_equal(4, searchcount().total, 'TEST 1')
+
+ " Test 2: [[:upper:]].\+
+ let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
+ set regexpengine=0
+ exe search_cmd
+ call assert_equal(2, searchcount().total, 'TEST 2')
+ set regexpengine=1
+ exe search_cmd
+ call assert_equal(1, searchcount().total, 'TEST 2')
+ set regexpengine=2
+ exe search_cmd
+ call assert_equal(2, searchcount().total, 'TEST 2')
+
+ " Test 3: [[:lower:]]\+
+ let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
+ set regexpengine=0
+ exe search_cmd
+ call assert_equal(4, searchcount().total, 'TEST 3 lower')
+ set regexpengine=1
+ exe search_cmd
+ call assert_equal(2, searchcount().total, 'TEST 3 lower')
+ set regexpengine=2
+ exe search_cmd
+ call assert_equal(4, searchcount().total, 'TEST 3 lower')
+ " clean up
+ let &regexpengine=_engine
+ bwipe!
+endfunc
" vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/nvim/testdir/test_search.vim b/src/nvim/testdir/test_search.vim
index 75d42b986b..b391663e0f 100644
--- a/src/nvim/testdir/test_search.vim
+++ b/src/nvim/testdir/test_search.vim
@@ -1177,13 +1177,28 @@ func Test_look_behind()
bwipe!
endfunc
+func Test_search_visual_area_linewise()
+ new
+ call setline(1, ['aa', 'bb', 'cc'])
+ exe "normal 2GV\<Esc>"
+ for engine in [1, 2]
+ exe 'set regexpengine=' .. engine
+ exe "normal gg/\\%'<\<CR>>"
+ call assert_equal([0, 2, 1, 0, 1], getcurpos(), 'engine ' .. engine)
+ exe "normal gg/\\%'>\<CR>"
+ call assert_equal([0, 2, 2, 0, 2], getcurpos(), 'engine ' .. engine)
+ endfor
+
+ bwipe!
+ set regexpengine&
+endfunc
+
func Test_search_sentence()
new
" this used to cause a crash
- call assert_fails("/\\%')", 'E486')
- call assert_fails("/", 'E486')
/\%'(
/
+ bwipe
endfunc
" Test that there is no crash when there is a last search pattern but no last