From 7e0d50b16e5b8d5c8ac5048fa49486171aabd359 Mon Sep 17 00:00:00 2001 From: Jan Edmund Lazo Date: Tue, 25 May 2021 22:15:14 -0400 Subject: vim-patch:8.2.2885: searching for \%'> does not match linewise end of line Problem: searching for \%'> does not match linewise end of line. (Tim Chase) Solution: Match end of line if column is MAXCOL. (closes vim/vim#8238) https://github.com/vim/vim/commit/872bee557e5f8ab0e4a523a6a845868a2801b17e --- src/nvim/regexp_nfa.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'src/nvim/regexp_nfa.c') diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 923db6422e..73308c6870 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -6055,21 +6055,27 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, { pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, false); - // Compare the mark position to the match position. - result = (pos != NULL // mark doesn't exist - && pos->lnum > 0 // mark isn't set in reg_buf - && (pos->lnum == rex.lnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(rex.input - rex.line) - ? t->state->c == NFA_MARK - : (pos->col < (colnr_T)(rex.input - rex.line) - ? t->state->c == NFA_MARK_GT - : t->state->c == NFA_MARK_LT)) - : (pos->lnum < rex.lnum + rex.reg_firstlnum - ? t->state->c == NFA_MARK_GT - : t->state->c == NFA_MARK_LT))); - if (result) { - add_here = true; - add_state = t->state->out; + // Compare the mark position to the match position, if the mark + // exists and mark is set in reg_buf. + if (pos != NULL && pos->lnum > 0) { + const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum + && pos->col == MAXCOL + ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) + : pos->col; + + result = pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos_col == (colnr_T)(rex.input - rex.line) + ? t->state->c == NFA_MARK + : (pos_col < (colnr_T)(rex.input - rex.line) + ? t->state->c == NFA_MARK_GT + : t->state->c == NFA_MARK_LT)) + : (pos->lnum < rex.lnum + rex.reg_firstlnum + ? t->state->c == NFA_MARK_GT + : t->state->c == NFA_MARK_LT); + if (result) { + add_here = true; + add_state = t->state->out; + } } break; } -- cgit From a7061359b741b5716bddbefdc024f38405f758b0 Mon Sep 17 00:00:00 2001 From: Jan Edmund Lazo Date: Wed, 26 May 2021 00:21:09 -0400 Subject: vim-patch:8.2.2278: falling back to old regexp engine can some patterns Problem: Falling back to old regexp engine can some patterns. Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used. (Christian Brabandt, closes vim/vim#7572) https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644 --- src/nvim/regexp_nfa.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'src/nvim/regexp_nfa.c') diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 73308c6870..5047e0db03 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -328,6 +328,11 @@ static int *post_start; ///< holds the postfix form of r.e. static int *post_end; static int *post_ptr; +// Set when the pattern should use the NFA engine. +// E.g. [[:upper:]] only allows 8bit characters for BT engine, +// while NFA engine handles multibyte characters correctly. +static bool wants_nfa; + static int nstate; ///< Number of states in the NFA. Also used when executing. static int istate; ///< Index in the state vector, used in alloc_state() @@ -377,6 +382,7 @@ nfa_regcomp_start ( post_start = (int *)xmalloc(postfix_size); post_ptr = post_start; post_end = post_start + nstate_max; + wants_nfa = false; rex.nfa_has_zend = false; rex.nfa_has_backref = false; @@ -1618,6 +1624,7 @@ collection: EMIT(NFA_CLASS_GRAPH); break; case CLASS_LOWER: + wants_nfa = true; EMIT(NFA_CLASS_LOWER); break; case CLASS_PRINT: @@ -1630,6 +1637,7 @@ collection: EMIT(NFA_CLASS_SPACE); break; case CLASS_UPPER: + wants_nfa = true; EMIT(NFA_CLASS_UPPER); break; case CLASS_XDIGIT: @@ -1998,10 +2006,17 @@ static int nfa_regpiece(void) return OK; } - // The engine is very inefficient (uses too many states) when the maximum - // is much larger than the minimum and when the maximum is large. Bail out - // if we can use the other engine. - if ((nfa_re_flags & RE_AUTO) && (maxval > 500 || maxval > minval + 200)) { + // The engine is very inefficient (uses too many states) when the + // maximum is much larger than the minimum and when the maximum is + // large. However, when maxval is MAX_LIMIT, it is okay, as this + // will emit NFA_STAR. + // Bail out if we can use the other engine, but only, when the + // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\} + // does not work with with characters > 8 bit with the BT engine) + if ((nfa_re_flags & RE_AUTO) + && (maxval > 500 || maxval > minval + 200) + && (maxval != MAX_LIMIT && minval < 200) + && !wants_nfa) { return FAIL; } -- cgit