aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/nvim/regexp.c149
-rw-r--r--src/nvim/regexp_defs.h30
-rw-r--r--src/nvim/regexp_nfa.c35
-rw-r--r--src/nvim/version.c2
4 files changed, 143 insertions, 73 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index 8b7033b64b..d5e963c2f8 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -6914,13 +6914,8 @@ static regengine_T bt_regengine =
bt_regcomp,
bt_regfree,
bt_regexec_nl,
- bt_regexec_multi
-#ifdef REGEXP_DEBUG
- ,(char_u *)""
-#endif
-#ifdef DEBUG
- ,NULL
-#endif
+ bt_regexec_multi,
+ (char_u *)""
};
@@ -6934,21 +6929,14 @@ static regengine_T nfa_regengine =
nfa_regcomp,
nfa_regfree,
nfa_regexec_nl,
- nfa_regexec_multi
-#ifdef REGEXP_DEBUG
- ,(char_u *)""
-#endif
-#ifdef DEBUG
- , NULL
-#endif
+ nfa_regexec_multi,
+ (char_u *)""
};
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
static int regexp_engine = 0;
-#define AUTOMATIC_ENGINE 0
-#define BACKTRACKING_ENGINE 1
-#define NFA_ENGINE 2
+
#ifdef REGEXP_DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
@@ -6990,10 +6978,8 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
regexp_engine = AUTOMATIC_ENGINE;
}
}
-#ifdef REGEXP_DEBUG
bt_regengine.expr = expr;
nfa_regengine.expr = expr;
-#endif
/*
* First try the NFA engine, unless backtracking was requested.
@@ -7003,11 +6989,12 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
else
prog = bt_regengine.regcomp(expr, re_flags);
- if (prog == NULL) { /* error compiling regexp with initial engine */
+ // Check for error compiling regexp with initial engine.
+ if (prog == NULL) {
#ifdef BT_REGEXP_DEBUG_LOG
- if (regexp_engine != BACKTRACKING_ENGINE) { /* debugging log for NFA */
- FILE *f;
- f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
+ // Debugging log for NFA.
+ if (regexp_engine != BACKTRACKING_ENGINE) {
+ FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
if (f) {
fprintf(f, "Syntax error in \"%s\"\n", expr);
fclose(f);
@@ -7016,12 +7003,22 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
BT_REGEXP_DEBUG_LOG_NAME);
}
#endif
- /*
- * If the NFA engine failed, the backtracking engine won't work either.
- *
- if (regexp_engine == AUTOMATIC_ENGINE)
- prog = bt_regengine.regcomp(expr, re_flags);
- */
+ // If the NFA engine failed, try the backtracking engine.
+ // Disabled for now, both engines fail on the same patterns.
+ // Re-enable when regcomp() fails when the pattern would work better
+ // with the other engine.
+ //
+ // if (regexp_engine == AUTOMATIC_ENGINE) {
+ // prog = bt_regengine.regcomp(expr, re_flags);
+ // regexp_engine = BACKTRACKING_ENGINE;
+ // }
+ }
+
+ if (prog != NULL) {
+ // Store the info needed to call regcomp() again when the engine turns out
+ // to be very slow when executing it.
+ prog->re_engine = regexp_engine;
+ prog->re_flags = re_flags;
}
return prog;
@@ -7036,29 +7033,62 @@ void vim_regfree(regprog_T *prog)
prog->engine->regfree(prog);
}
-/*
- * Match a regexp against a string.
- * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
- * Uses curbuf for line count and 'iskeyword'.
- *
- * Return TRUE if there is a match, FALSE if not.
- */
-int
-vim_regexec (
- regmatch_T *rmp,
- char_u *line, /* string to match against */
- colnr_T col /* column to start looking for match */
-)
+static void report_re_switch(char_u *pat)
{
- return rmp->regprog->engine->regexec_nl(rmp, line, col, false);
+ if (p_verbose > 0) {
+ verbose_enter();
+ MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
+ MSG_PUTS(pat);
+ verbose_leave();
+ }
}
-/*
- * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
- */
+/// Matches a regexp against a string.
+/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
+/// Uses curbuf for line count and 'iskeyword'.
+/// When "nl" is true consider a "\n" in "line" to be a line break.
+///
+/// @param rmp
+/// @param line the string to match against
+/// @param col the column to start looking for match
+/// @param nl
+///
+/// @return TRUE if there is a match, FALSE if not.
+static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, bool nl)
+{
+ int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
+
+ // NFA engine aborted because it's very slow, use backtracking engine instead.
+ if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
+ && result == NFA_TOO_EXPENSIVE) {
+ int save_p_re = p_re;
+ int re_flags = rmp->regprog->re_flags;
+ char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
+
+ p_re = BACKTRACKING_ENGINE;
+ vim_regfree(rmp->regprog);
+ report_re_switch(pat);
+ rmp->regprog = vim_regcomp(pat, re_flags);
+ if (rmp->regprog != NULL) {
+ result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
+ }
+
+ free(pat);
+ p_re = save_p_re;
+ }
+
+ return result;
+}
+
+int vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
+{
+ return vim_regexec_both(rmp, line, col, false);
+}
+
+// Like vim_regexec(), but consider a "\n" in "line" to be a line break.
int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
{
- return rmp->regprog->engine->regexec_nl(rmp, line, col, true);
+ return vim_regexec_both(rmp, line, col, true);
}
/*
@@ -7078,5 +7108,28 @@ long vim_regexec_multi(
proftime_T *tm /* timeout limit or NULL */
)
{
- return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
+ int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
+ tm);
+
+ // NFA engine aborted because it's very slow, use backtracking engine instead.
+ if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
+ && result == NFA_TOO_EXPENSIVE) {
+ int save_p_re = p_re;
+ int re_flags = rmp->regprog->re_flags;
+ char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
+
+ p_re = BACKTRACKING_ENGINE;
+ vim_regfree(rmp->regprog);
+ report_re_switch(pat);
+ rmp->regprog = vim_regcomp(pat, re_flags);
+ if (rmp->regprog != NULL) {
+ result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
+ tm);
+ }
+
+ free(pat);
+ p_re = save_p_re;
+ }
+
+ return result;
}
diff --git a/src/nvim/regexp_defs.h b/src/nvim/regexp_defs.h
index 1e00f14ac6..6426ee441b 100644
--- a/src/nvim/regexp_defs.h
+++ b/src/nvim/regexp_defs.h
@@ -30,6 +30,16 @@
*/
#define NFA_MAX_BRACES 20
+// In the NFA engine: how many states are allowed.
+#define NFA_MAX_STATES 100000
+#define NFA_TOO_EXPENSIVE -1
+
+// Which regexp engine to use? Needed for vim_regcomp().
+// Must match with 'regexpengine'.
+#define AUTOMATIC_ENGINE 0
+#define BACKTRACKING_ENGINE 1
+#define NFA_ENGINE 2
+
typedef struct regengine regengine_T;
/*
@@ -38,8 +48,10 @@ typedef struct regengine regengine_T;
* structures are used. See code below.
*/
typedef struct regprog {
- regengine_T *engine;
+ regengine_T *engine;
unsigned regflags;
+ unsigned re_engine; ///< Automatic, backtracking or NFA engine.
+ unsigned re_flags; ///< Second argument for vim_regcomp().
} regprog_T;
/*
@@ -48,9 +60,11 @@ typedef struct regprog {
* See regexp.c for an explanation.
*/
typedef struct {
- /* These two members implement regprog_T */
- regengine_T *engine;
+ // These four members implement regprog_T.
+ regengine_T *engine;
unsigned regflags;
+ unsigned re_engine;
+ unsigned re_flags; ///< Second argument for vim_regcomp().
int regstart;
char_u reganch;
@@ -78,9 +92,11 @@ struct nfa_state {
* Structure used by the NFA matcher.
*/
typedef struct {
- /* These two members implement regprog_T */
- regengine_T *engine;
+ // These four members implement regprog_T.
+ regengine_T *engine;
unsigned regflags;
+ unsigned re_engine;
+ unsigned re_flags; ///< Second argument for vim_regcomp().
nfa_state_T *start; /* points into state[] */
@@ -91,9 +107,7 @@ typedef struct {
int has_zend; /* pattern contains \ze */
int has_backref; /* pattern contains \1 .. \9 */
int reghasz;
-#ifdef DEBUG
char_u *pattern;
-#endif
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[1]; /* actually longer.. */
@@ -143,9 +157,7 @@ struct regengine {
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, bool);
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T,
proftime_T*);
-#ifdef DEBUG
char_u *expr;
-#endif
};
#endif // NVIM_REGEXP_DEFS_H
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c
index 6ed42303ef..1de167c40f 100644
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@@ -2347,7 +2347,6 @@ static void nfa_set_code(int c)
}
-#ifdef REGEXP_DEBUG
static FILE *log_fd;
/*
@@ -2468,7 +2467,6 @@ static void nfa_dump(nfa_regprog_T *prog)
}
}
#endif /* REGEXP_DEBUG */
-#endif /* REGEXP_DEBUG */
/*
* Parse r.e. @expr and convert it into postfix form.
@@ -4908,6 +4906,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
nextlist->n = 0; /* clear nextlist */
nextlist->has_pim = FALSE;
++nfa_listid;
+ if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) {
+ // Too many states, retry with old engine.
+ nfa_match = NFA_TOO_EXPENSIVE;
+ goto theend;
+ }
+
thislist->id = nfa_listid;
nextlist->id = nfa_listid + 1;
@@ -5082,6 +5086,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
*/
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
+ if (result == NFA_TOO_EXPENSIVE) {
+ nfa_match = result;
+ goto theend;
+ }
/* for \@! and \@<! it is a match when the result is
* FALSE */
@@ -5180,6 +5188,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
/* First try matching the pattern. */
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
+ if (result == NFA_TOO_EXPENSIVE) {
+ nfa_match = result;
+ goto theend;
+ }
if (result) {
int bytelen;
@@ -6019,6 +6031,7 @@ nextchar:
log_fd = NULL;
#endif
+theend:
/* Free memory */
free(list[0].t);
free(list[1].t);
@@ -6068,8 +6081,12 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col)
clear_sub(&subs.synt);
clear_sub(&m.synt);
- if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
+ int result = nfa_regmatch(prog, start, &subs, &m);
+ if (result == FALSE) {
return 0;
+ } else if (result == NFA_TOO_EXPENSIVE) {
+ return result;
+ }
cleanup_subexpr();
if (REG_MULTI) {
@@ -6186,9 +6203,7 @@ nfa_regexec_both (
nfa_nsubexpr = prog->nsubexp;
nfa_listid = 1;
nfa_alt_listid = 2;
-#ifdef REGEXP_DEBUG
nfa_regengine.expr = prog->pattern;
-#endif
if (prog->reganch && col > 0)
return 0L;
@@ -6228,9 +6243,7 @@ nfa_regexec_both (
retval = nfa_regtry(prog, col);
-#ifdef REGEXP_DEBUG
nfa_regengine.expr = NULL;
-#endif
theend:
return retval;
@@ -6248,9 +6261,7 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
if (expr == NULL)
return NULL;
-#ifdef REGEXP_DEBUG
nfa_regengine.expr = expr;
-#endif
init_class_tab();
@@ -6325,10 +6336,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
#endif
/* Remember whether this pattern has any \z specials in it. */
prog->reghasz = re_has_z;
-#ifdef REGEXP_DEBUG
prog->pattern = vim_strsave(expr);
nfa_regengine.expr = NULL;
-#endif
out:
free(post_start);
@@ -6342,9 +6351,7 @@ fail:
#ifdef REGEXP_DEBUG
nfa_postfix_dump(expr, FAIL);
#endif
-#ifdef REGEXP_DEBUG
nfa_regengine.expr = NULL;
-#endif
goto out;
}
@@ -6355,9 +6362,7 @@ static void nfa_regfree(regprog_T *prog)
{
if (prog != NULL) {
free(((nfa_regprog_T *)prog)->match_text);
-#ifdef REGEXP_DEBUG
free(((nfa_regprog_T *)prog)->pattern);
-#endif
free(prog);
}
}
diff --git a/src/nvim/version.c b/src/nvim/version.c
index 8cdc06dba5..bbe5500d3d 100644
--- a/src/nvim/version.c
+++ b/src/nvim/version.c
@@ -243,7 +243,7 @@ static int included_patches[] = {
500,
499,
//498 NA
- //497,
+ 497,
//496 NA
//495 NA
494,