diff options
author | Justin M. Keyes <justinkz@gmail.com> | 2019-03-08 13:21:11 +0100 |
---|---|---|
committer | Justin M. Keyes <justinkz@gmail.com> | 2019-03-08 13:21:11 +0100 |
commit | 4352d41db0d93ab9266c64be48eda872cb5ea589 (patch) | |
tree | a10aa3db77c6db9c7c809200569a9bdd15119f92 /src/nvim/regexp.c | |
parent | 0355c1ed9c481d4ad3bf24887e0af869834e1b40 (diff) | |
parent | 96e2c3945f13453070894c70c74e4da29d421dab (diff) | |
download | rneovim-4352d41db0d93ab9266c64be48eda872cb5ea589.tar.gz rneovim-4352d41db0d93ab9266c64be48eda872cb5ea589.tar.bz2 rneovim-4352d41db0d93ab9266c64be48eda872cb5ea589.zip |
Merge #9662 'vim-patch:8.0.{0643-0646}'
Diffstat (limited to 'src/nvim/regexp.c')
-rw-r--r-- | src/nvim/regexp.c | 173 |
1 files changed, 101 insertions, 72 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index c043d4a173..ab1a7c7b3e 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -1210,6 +1210,31 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char_u **newp) return p; } +/// Return TRUE if the back reference is legal. We must have seen the close +/// brace. +/// TODO(vim): Should also check that we don't refer to something repeated +/// (+*=): what instance of the repetition should we match? +static int seen_endbrace(int refnum) +{ + if (!had_endbrace[refnum]) { + char_u *p; + + // Trick: check if "@<=" or "@<!" follows, in which case + // the \1 can appear before the referenced match. + for (p = regparse; *p != NUL; p++) { + if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '=')) { + break; + } + } + + if (*p == NUL) { + EMSG(_("E65: Illegal back reference")); + rc_did_emsg = true; + return false; + } + } + return TRUE; +} /* * bt_regcomp() - compile a regular expression into internal code for the @@ -1928,22 +1953,8 @@ static char_u *regatom(int *flagp) int refnum; refnum = c - Magic('0'); - /* - * Check if the back reference is legal. We must have seen the - * close brace. - * TODO: Should also check that we don't refer to something - * that is repeated (+*=): what instance of the repetition - * should we match? - */ - if (!had_endbrace[refnum]) { - /* Trick: check if "@<=" or "@<!" follows, in which case - * the \1 can appear before the referenced match. */ - for (p = regparse; *p != NUL; ++p) - if (p[0] == '@' && p[1] == '<' - && (p[2] == '!' || p[2] == '=')) - break; - if (*p == NUL) - EMSG_RET_NULL(_("E65: Illegal back reference")); + if (!seen_endbrace(refnum)) { + return NULL; } ret = regnode(BACKREF + refnum); } @@ -3297,7 +3308,7 @@ bt_regexec_nl ( rex.reg_icombine = false; rex.reg_maxcol = 0; - long r = bt_regexec_both(line, col, NULL); + long r = bt_regexec_both(line, col, NULL, NULL); assert(r <= INT_MAX); return (int)r; } @@ -3357,7 +3368,8 @@ static inline char_u *cstrchr(const char_u *const s, const int c) /// @return zero if there is no match and number of lines contained in the match /// otherwise. static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, - linenr_T lnum, colnr_T col, proftime_T *tm) + linenr_T lnum, colnr_T col, + proftime_T *tm, int *timed_out) { rex.reg_match = NULL; rex.reg_mmatch = rmp; @@ -3370,18 +3382,16 @@ static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, rex.reg_icombine = false; rex.reg_maxcol = rmp->rmm_maxcol; - return bt_regexec_both(NULL, col, tm); + return bt_regexec_both(NULL, col, tm, timed_out); } -/* - * Match a regexp against a string ("line" points to the string) or multiple - * lines ("line" is NULL, use reg_getline()). - * Returns 0 for failure, number of lines contained in the match otherwise. - */ +/// Match a regexp against a string ("line" points to the string) or multiple +/// lines ("line" is NULL, use reg_getline()). +/// @return 0 for failure, or number of lines contained in the match. static long bt_regexec_both(char_u *line, - colnr_T col, /* column to start looking for match */ - proftime_T *tm /* timeout limit or NULL */ - ) + colnr_T col, // column to start search + proftime_T *tm, // timeout limit or NULL + int *timed_out) // flag set on timeout or NULL { bt_regprog_T *prog; char_u *s; @@ -3483,7 +3493,7 @@ static long bt_regexec_both(char_u *line, && (utf_fold(prog->regstart) == utf_fold(c) || (c < 255 && prog->regstart < 255 && mb_tolower(prog->regstart) == mb_tolower(c))))) { - retval = regtry(prog, col); + retval = regtry(prog, col, tm, timed_out); } else { retval = 0; } @@ -3507,9 +3517,10 @@ static long bt_regexec_both(char_u *line, break; } - retval = regtry(prog, col); - if (retval > 0) + retval = regtry(prog, col, tm, timed_out); + if (retval > 0) { break; + } /* if not currently on the first line, get it again */ if (reglnum != 0) { @@ -3525,8 +3536,12 @@ static long bt_regexec_both(char_u *line, /* Check for timeout once in a twenty times to avoid overhead. */ if (tm != NULL && ++tm_count == 20) { tm_count = 0; - if (profile_passed_limit(*tm)) + if (profile_passed_limit(*tm)) { + if (timed_out != NULL) { + *timed_out = true; + } break; + } } } } @@ -3582,11 +3597,12 @@ void unref_extmatch(reg_extmatch_T *em) } } -/* - * regtry - try match of "prog" with at regline["col"]. - * Returns 0 for failure, number of lines contained in the match otherwise. - */ -static long regtry(bt_regprog_T *prog, colnr_T col) +/// Try match of "prog" with at regline["col"]. +/// @returns 0 for failure, or number of lines contained in the match. +static long regtry(bt_regprog_T *prog, + colnr_T col, + proftime_T *tm, // timeout limit or NULL + int *timed_out) // flag set on timeout or NULL { reginput = regline + col; need_clear_subexpr = TRUE; @@ -3594,8 +3610,9 @@ static long regtry(bt_regprog_T *prog, colnr_T col) if (prog->reghasz == REX_SET) need_clear_zsubexpr = TRUE; - if (regmatch(prog->program + 1) == 0) + if (regmatch(prog->program + 1, tm, timed_out) == 0) { return 0; + } cleanup_subexpr(); if (REG_MULTI) { @@ -3736,24 +3753,23 @@ static int reg_match_visual(void) static long bl_minval; static long bl_maxval; -/* - * regmatch - main matching routine - * - * Conceptually the strategy is simple: Check to see whether the current node - * matches, push an item onto the regstack and loop to see whether the rest - * matches, and then act accordingly. In practice we make some effort to - * avoid using the regstack, in particular by going through "ordinary" nodes - * (that don't need to know whether the rest of the match failed) by a nested - * loop. - * - * Returns TRUE when there is a match. Leaves reginput and reglnum just after - * the last matched character. - * Returns FALSE when there is no match. Leaves reginput and reglnum in an - * undefined state! - */ -static int -regmatch ( - char_u *scan /* Current node. */ +/// Main matching routine +/// +/// Conceptually the strategy is simple: Check to see whether the current node +/// matches, push an item onto the regstack and loop to see whether the rest +/// matches, and then act accordingly. In practice we make some effort to +/// avoid using the regstack, in particular by going through "ordinary" nodes +/// (that don't need to know whether the rest of the match failed) by a nested +/// loop. +/// +/// Returns TRUE when there is a match. Leaves reginput and reglnum just after +/// the last matched character. +/// Returns FALSE when there is no match. Leaves reginput and reglnum in an +/// undefined state! +static int regmatch( + char_u *scan, // Current node. + proftime_T *tm, // timeout limit or NULL + int *timed_out // flag set on timeout or NULL ) { char_u *next; /* Next node. */ @@ -3761,15 +3777,16 @@ regmatch ( int c; regitem_T *rp; int no; - int status; /* one of the RA_ values: */ -#define RA_FAIL 1 /* something failed, abort */ -#define RA_CONT 2 /* continue in inner loop */ -#define RA_BREAK 3 /* break inner loop */ -#define RA_MATCH 4 /* successful match */ -#define RA_NOMATCH 5 /* didn't match */ - - /* Make "regstack" and "backpos" empty. They are allocated and freed in - * bt_regexec_both() to reduce malloc()/free() calls. */ + int status; // one of the RA_ values: + int tm_count = 0; +#define RA_FAIL 1 // something failed, abort +#define RA_CONT 2 // continue in inner loop +#define RA_BREAK 3 // break inner loop +#define RA_MATCH 4 // successful match +#define RA_NOMATCH 5 // didn't match + + // Make "regstack" and "backpos" empty. They are allocated and freed in + // bt_regexec_both() to reduce malloc()/free() calls. regstack.ga_len = 0; backpos.ga_len = 0; @@ -3797,6 +3814,17 @@ regmatch ( status = RA_FAIL; break; } + // Check for timeout once in a 100 times to avoid overhead. + if (tm != NULL && ++tm_count == 100) { + tm_count = 0; + if (profile_passed_limit(*tm)) { + if (timed_out != NULL) { + *timed_out = true; + } + status = RA_FAIL; + break; + } + } status = RA_CONT; #ifdef REGEXP_DEBUG @@ -7275,12 +7303,13 @@ int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) /// Return zero if there is no match. Return number of lines contained in the /// match otherwise. long vim_regexec_multi( - regmmatch_T *rmp, - win_T *win, /* window in which to search or NULL */ - buf_T *buf, /* buffer in which to search */ - linenr_T lnum, /* nr of line to start looking for match */ - colnr_T col, /* column to start looking for match */ - proftime_T *tm /* timeout limit or NULL */ + regmmatch_T *rmp, + win_T *win, // window in which to search or NULL + buf_T *buf, // buffer in which to search + linenr_T lnum, // nr of line to start looking for match + colnr_T col, // column to start looking for match + proftime_T *tm, // timeout limit or NULL + int *timed_out // flag is set when timeout limit reached ) { regexec_T rex_save; @@ -7293,7 +7322,7 @@ long vim_regexec_multi( rex_in_use = true; int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, - tm); + tm, timed_out); // NFA engine aborted because it's very slow, use backtracking engine instead. if (rmp->regprog->re_engine == AUTOMATIC_ENGINE @@ -7313,7 +7342,7 @@ long vim_regexec_multi( if (rmp->regprog != NULL) { result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, - tm); + tm, timed_out); } xfree(pat); |