aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/regexp_nfa.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/nvim/regexp_nfa.c')
-rw-r--r--src/nvim/regexp_nfa.c383
1 files changed, 205 insertions, 178 deletions
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c
index 474f3df32a..93ba9ce097 100644
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@@ -1,3 +1,6 @@
+// This is an open source non-commercial project. Dear PVS-Studio, please check
+// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+
/*
* NFA regular expression implementation.
*
@@ -53,13 +56,13 @@ enum {
NFA_RANGE_MIN, /* low end of a range */
NFA_RANGE_MAX, /* high end of a range */
- NFA_CONCAT, /* concatenate two previous items (postfix
- * only) */
- NFA_OR, /* \| (postfix only) */
- NFA_STAR, /* greedy * (posfix only) */
- NFA_STAR_NONGREEDY, /* non-greedy * (postfix only) */
- NFA_QUEST, /* greedy \? (postfix only) */
- NFA_QUEST_NONGREEDY, /* non-greedy \? (postfix only) */
+ NFA_CONCAT, // concatenate two previous items (postfix
+ // only)
+ NFA_OR, // \| (postfix only)
+ NFA_STAR, // greedy * (postfix only)
+ NFA_STAR_NONGREEDY, // non-greedy * (postfix only)
+ NFA_QUEST, // greedy \? (postfix only)
+ NFA_QUEST_NONGREEDY, // non-greedy \? (postfix only)
NFA_BOL, /* ^ Begin line */
NFA_EOL, /* $ End line */
@@ -631,6 +634,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
config |= CLASS_o7;
break;
}
+ return FAIL;
case 'a':
if (*(p + 2) == 'z') {
config |= CLASS_az;
@@ -639,6 +643,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
config |= CLASS_af;
break;
}
+ return FAIL;
case 'A':
if (*(p + 2) == 'Z') {
config |= CLASS_AZ;
@@ -647,7 +652,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)
config |= CLASS_AF;
break;
}
- /* FALLTHROUGH */
+ return FAIL;
default:
return FAIL;
}
@@ -1983,7 +1988,7 @@ static int nfa_regpiece(void)
// The engine is very inefficient (uses too many states) when the maximum
// is much larger than the minimum and when the maximum is large. Bail out
// if we can use the other engine.
- if ((nfa_re_flags & RE_AUTO) && (maxval > minval + 200 || maxval > 500)) {
+ if ((nfa_re_flags & RE_AUTO) && (maxval > 500 || maxval > minval + 200)) {
return FAIL;
}
@@ -2772,15 +2777,10 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)
case NFA_ANY:
case NFA_START_COLL:
case NFA_START_NEG_COLL:
- /* matches some character, including composing chars */
- if (enc_utf8)
- len += MB_MAXBYTES;
- else if (has_mbyte)
- len += 2;
- else
- ++len;
+ // Matches some character, including composing chars.
+ len += MB_MAXBYTES;
if (state->c != NFA_ANY) {
- /* skip over the characters */
+ // Skip over the characters.
state = state->out1->out;
continue;
}
@@ -3893,23 +3893,27 @@ state_in_list (
return FALSE;
}
-/*
- * Add "state" and possibly what follows to state list ".".
- * Returns "subs_arg", possibly copied into temp_subs.
- */
+// Offset used for "off" by addstate_here().
+#define ADDSTATE_HERE_OFFSET 10
+// Add "state" and possibly what follows to state list ".".
+// Returns "subs_arg", possibly copied into temp_subs.
static regsubs_T *
addstate (
nfa_list_T *l, /* runtime state list */
nfa_state_T *state, /* state to update */
regsubs_T *subs_arg, /* pointers to subexpressions */
nfa_pim_T *pim, /* postponed look-behind match */
- int off /* byte offset, when -1 go to next line */
-)
+ int off_arg) /* byte offset, when -1 go to next line */
{
int subidx;
+ int off = off_arg;
+ int add_here = FALSE;
+ int listindex = 0;
+ int k;
+ int found = FALSE;
nfa_thread_T *thread;
- lpos_T save_lpos;
+ struct multipos save_multipos;
int save_in_use;
char_u *save_ptr;
int i;
@@ -3920,6 +3924,12 @@ addstate (
int did_print = FALSE;
#endif
+ if (off_arg <= -ADDSTATE_HERE_OFFSET) {
+ add_here = true;
+ off = 0;
+ listindex = -(off_arg + ADDSTATE_HERE_OFFSET);
+ }
+
switch (state->c) {
case NFA_NCLOSE:
case NFA_MCLOSE:
@@ -3996,13 +4006,28 @@ addstate (
* lower position is preferred. */
if (!nfa_has_backref && pim == NULL && !l->has_pim
&& state->c != NFA_MATCH) {
+
+ /* When called from addstate_here() do insert before
+ * existing states. */
+ if (add_here) {
+ for (k = 0; k < l->n && k < listindex; ++k) {
+ if (l->t[k].state->id == state->id) {
+ found = TRUE;
+ break;
+ }
+ }
+ }
+
+ if (!add_here || found) {
skip_add:
#ifdef REGEXP_DEBUG
- nfa_set_code(state->c);
- fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s\n",
- abs(state->id), l->id, state->c, code);
+ nfa_set_code(state->c);
+ fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s pim: %s has_pim: %d found: %d\n",
+ abs(state->id), l->id, state->c, code,
+ pim == NULL ? "NULL" : "yes", l->has_pim, found);
#endif
return subs;
+ }
}
/* Do not add the state again when it exists with the same
@@ -4058,14 +4083,14 @@ skip_add:
case NFA_SPLIT:
/* order matters here */
- subs = addstate(l, state->out, subs, pim, off);
- subs = addstate(l, state->out1, subs, pim, off);
+ subs = addstate(l, state->out, subs, pim, off_arg);
+ subs = addstate(l, state->out1, subs, pim, off_arg);
break;
case NFA_EMPTY:
case NFA_NOPEN:
case NFA_NCLOSE:
- subs = addstate(l, state->out, subs, pim, off);
+ subs = addstate(l, state->out, subs, pim, off_arg);
break;
case NFA_MOPEN:
@@ -4102,15 +4127,13 @@ skip_add:
/* avoid compiler warnings */
save_ptr = NULL;
- save_lpos.lnum = 0;
- save_lpos.col = 0;
+ memset(&save_multipos, 0, sizeof(save_multipos));
/* Set the position (with "off" added) in the subexpression. Save
* and restore it when it was in use. Otherwise fill any gap. */
if (REG_MULTI) {
if (subidx < sub->in_use) {
- save_lpos.lnum = sub->list.multi[subidx].start_lnum;
- save_lpos.col = sub->list.multi[subidx].start_col;
+ save_multipos = sub->list.multi[subidx];
save_in_use = -1;
} else {
save_in_use = sub->in_use;
@@ -4145,7 +4168,7 @@ skip_add:
sub->list.line[subidx].start = reginput + off;
}
- subs = addstate(l, state->out, subs, pim, off);
+ subs = addstate(l, state->out, subs, pim, off_arg);
/* "subs" may have changed, need to set "sub" again */
if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9)
sub = &subs->synt;
@@ -4153,9 +4176,8 @@ skip_add:
sub = &subs->norm;
if (save_in_use == -1) {
- if (REG_MULTI){
- sub->list.multi[subidx].start_lnum = save_lpos.lnum;
- sub->list.multi[subidx].start_col = save_lpos.col;
+ if (REG_MULTI) {
+ sub->list.multi[subidx] = save_multipos;
}
else
sub->list.line[subidx].start = save_ptr;
@@ -4168,9 +4190,10 @@ skip_add:
? subs->norm.list.multi[0].end_lnum >= 0
: subs->norm.list.line[0].end != NULL)) {
/* Do not overwrite the position set by \ze. */
- subs = addstate(l, state->out, subs, pim, off);
+ subs = addstate(l, state->out, subs, pim, off_arg);
break;
}
+ // fallthrough
case NFA_MCLOSE1:
case NFA_MCLOSE2:
case NFA_MCLOSE3:
@@ -4208,8 +4231,7 @@ skip_add:
if (sub->in_use <= subidx)
sub->in_use = subidx + 1;
if (REG_MULTI) {
- save_lpos.lnum = sub->list.multi[subidx].end_lnum;
- save_lpos.col = sub->list.multi[subidx].end_col;
+ save_multipos = sub->list.multi[subidx];
if (off == -1) {
sub->list.multi[subidx].end_lnum = reglnum + 1;
sub->list.multi[subidx].end_col = 0;
@@ -4223,21 +4245,19 @@ skip_add:
} else {
save_ptr = sub->list.line[subidx].end;
sub->list.line[subidx].end = reginput + off;
- /* avoid compiler warnings */
- save_lpos.lnum = 0;
- save_lpos.col = 0;
+ // avoid compiler warnings
+ memset(&save_multipos, 0, sizeof(save_multipos));
}
- subs = addstate(l, state->out, subs, pim, off);
+ subs = addstate(l, state->out, subs, pim, off_arg);
/* "subs" may have changed, need to set "sub" again */
if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9)
sub = &subs->synt;
else
sub = &subs->norm;
- if (REG_MULTI){
- sub->list.multi[subidx].end_lnum = save_lpos.lnum;
- sub->list.multi[subidx].end_col = save_lpos.col;
+ if (REG_MULTI) {
+ sub->list.multi[subidx] = save_multipos;
}
else
sub->list.line[subidx].end = save_ptr;
@@ -4266,8 +4286,10 @@ addstate_here (
int count;
int listidx = *ip;
- /* first add the state(s) at the end, so that we know how many there are */
- addstate(l, state, subs, pim, 0);
+ /* First add the state(s) at the end, so that we know how many there are.
+ * Pass the listidx as offset (avoids adding another argument to
+ * addstate(). */
+ addstate(l, state, subs, pim, -listidx - ADDSTATE_HERE_OFFSET);
/* when "*ip" was at the end of the list, nothing to do */
if (listidx + 1 == tlen)
@@ -4346,7 +4368,7 @@ static int check_char_class(int class, int c)
return OK;
break;
case NFA_CLASS_LOWER:
- if (vim_islower(c) && c != 170 && c != 186) {
+ if (mb_islower(c) && c != 170 && c != 186) {
return OK;
}
break;
@@ -4364,8 +4386,9 @@ static int check_char_class(int class, int c)
return OK;
break;
case NFA_CLASS_UPPER:
- if (vim_isupper(c))
+ if (mb_isupper(c)) {
return OK;
+ }
break;
case NFA_CLASS_XDIGIT:
if (ascii_isxdigit(c))
@@ -4384,8 +4407,9 @@ static int check_char_class(int class, int c)
return OK;
break;
case NFA_CLASS_ESCAPE:
- if (c == '\033')
+ if (c == ESC) {
return OK;
+ }
break;
default:
@@ -4828,17 +4852,10 @@ static int failure_chance(nfa_state_T *state, int depth)
*/
static int skip_to_start(int c, colnr_T *colp)
{
- char_u *s;
-
- /* Used often, do some work to avoid call overhead. */
- if (!ireg_ic
- && !has_mbyte
- )
- s = vim_strbyte(regline + *colp, c);
- else
- s = cstrchr(regline + *colp, c);
- if (s == NULL)
+ const char_u *const s = cstrchr(regline + *colp, c);
+ if (s == NULL) {
return FAIL;
+ }
*colp = (int)(s - regline);
return OK;
}
@@ -4865,7 +4882,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
int c2_len = PTR2LEN(s2);
int c2 = PTR2CHAR(s2);
- if ((c1 != c2 && (!ireg_ic || vim_tolower(c1) != vim_tolower(c2)))
+ if ((c1 != c2 && (!rex.reg_ic || mb_tolower(c1) != mb_tolower(c2)))
|| c1_len != c2_len) {
match = false;
break;
@@ -4878,13 +4895,13 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
&& !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) {
cleanup_subexpr();
if (REG_MULTI) {
- reg_startpos[0].lnum = reglnum;
- reg_startpos[0].col = col;
- reg_endpos[0].lnum = reglnum;
- reg_endpos[0].col = s2 - regline;
+ rex.reg_startpos[0].lnum = reglnum;
+ rex.reg_startpos[0].col = col;
+ rex.reg_endpos[0].lnum = reglnum;
+ rex.reg_endpos[0].col = s2 - regline;
} else {
- reg_startp[0] = regline + col;
- reg_endp[0] = s2;
+ rex.reg_startp[0] = regline + col;
+ rex.reg_endp[0] = s2;
}
return 1L;
}
@@ -5099,8 +5116,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_MATCH:
{
// If the match ends before a composing characters and
- // ireg_icombine is not set, that is not really a match.
- if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) {
+ // rex.reg_icombine is not set, that is not really a match.
+ if (enc_utf8 && !rex.reg_icombine && utf_iscomposing(curc)) {
break;
}
nfa_match = true;
@@ -5383,15 +5400,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
int this_class;
// Get class of current and previous char (if it exists).
- this_class = mb_get_class_buf(reginput, reg_buf);
+ this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab);
if (this_class <= 1) {
result = false;
} else if (reg_prev_class() == this_class) {
result = false;
}
- } else if (!vim_iswordc_buf(curc, reg_buf)
+ } else if (!vim_iswordc_buf(curc, rex.reg_buf)
|| (reginput > regline
- && vim_iswordc_buf(reginput[-1], reg_buf))) {
+ && vim_iswordc_buf(reginput[-1], rex.reg_buf))) {
result = false;
}
if (result) {
@@ -5408,15 +5425,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
int this_class, prev_class;
// Get class of current and previous char (if it exists).
- this_class = mb_get_class_buf(reginput, reg_buf);
+ this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab);
prev_class = reg_prev_class();
if (this_class == prev_class
|| prev_class == 0 || prev_class == 1) {
result = false;
}
- } else if (!vim_iswordc_buf(reginput[-1], reg_buf)
+ } else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf)
|| (reginput[0] != NUL
- && vim_iswordc_buf(curc, reg_buf))) {
+ && vim_iswordc_buf(curc, rex.reg_buf))) {
result = false;
}
if (result) {
@@ -5427,14 +5444,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_BOF:
if (reglnum == 0 && reginput == regline
- && (!REG_MULTI || reg_firstlnum == 1)) {
+ && (!REG_MULTI || rex.reg_firstlnum == 1)) {
add_here = true;
add_state = t->state->out;
}
break;
case NFA_EOF:
- if (reglnum == reg_maxline && curc == NUL) {
+ if (reglnum == rex.reg_maxline && curc == NUL) {
add_here = true;
add_state = t->state->out;
}
@@ -5458,7 +5475,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// (no preceding character).
len += mb_char2len(mc);
}
- if (ireg_icombine && len == 0) {
+ if (rex.reg_icombine && len == 0) {
// If \Z was present, then ignore composing characters.
// When ignoring the base character this always matches.
if (sta->c != curc) {
@@ -5509,14 +5526,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
case NFA_NEWL:
- if (curc == NUL && !reg_line_lbr && REG_MULTI
- && reglnum <= reg_maxline) {
+ if (curc == NUL && !rex.reg_line_lbr && REG_MULTI
+ && reglnum <= rex.reg_maxline) {
go_to_nextline = true;
// Pass -1 for the offset, which means taking the position
// at the start of the next line.
add_state = t->state->out;
add_off = -1;
- } else if (curc == '\n' && reg_line_lbr) {
+ } else if (curc == '\n' && rex.reg_line_lbr) {
// match \n as if it is an ordinary character
add_state = t->state->out;
add_off = 1;
@@ -5557,23 +5574,25 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
result = result_if_matched;
break;
}
- if (ireg_ic) {
- int curc_low = vim_tolower(curc);
- int done = FALSE;
+ if (rex.reg_ic) {
+ int curc_low = mb_tolower(curc);
+ int done = false;
- for (; c1 <= c2; ++c1)
- if (vim_tolower(c1) == curc_low) {
+ for (; c1 <= c2; c1++) {
+ if (mb_tolower(c1) == curc_low) {
result = result_if_matched;
done = TRUE;
break;
}
- if (done)
+ }
+ if (done) {
break;
+ }
}
} else if (state->c < 0 ? check_char_class(state->c, curc)
: (curc == state->c
- || (ireg_ic && vim_tolower(curc)
- == vim_tolower(state->c)))) {
+ || (rex.reg_ic && mb_tolower(curc)
+ == mb_tolower(state->c)))) {
result = result_if_matched;
break;
}
@@ -5620,13 +5639,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
break;
case NFA_KWORD: // \k
- result = vim_iswordp_buf(reginput, reg_buf);
+ result = vim_iswordp_buf(reginput, rex.reg_buf);
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_SKWORD: // \K
result = !ascii_isdigit(curc)
- && vim_iswordp_buf(reginput, reg_buf);
+ && vim_iswordp_buf(reginput, rex.reg_buf);
ADD_STATE_IF_MATCH(t->state);
break;
@@ -5741,24 +5760,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
break;
case NFA_LOWER_IC: // [a-z]
- result = ri_lower(curc) || (ireg_ic && ri_upper(curc));
+ result = ri_lower(curc) || (rex.reg_ic && ri_upper(curc));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_NLOWER_IC: // [^a-z]
result = curc != NUL
- && !(ri_lower(curc) || (ireg_ic && ri_upper(curc)));
+ && !(ri_lower(curc) || (rex.reg_ic && ri_upper(curc)));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_UPPER_IC: // [A-Z]
- result = ri_upper(curc) || (ireg_ic && ri_lower(curc));
+ result = ri_upper(curc) || (rex.reg_ic && ri_lower(curc));
ADD_STATE_IF_MATCH(t->state);
break;
case NFA_NUPPER_IC: // [^A-Z]
result = curc != NUL
- && !(ri_upper(curc) || (ireg_ic && ri_lower(curc)));
+ && !(ri_upper(curc) || (rex.reg_ic && ri_lower(curc)));
ADD_STATE_IF_MATCH(t->state);
break;
@@ -5832,13 +5851,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_LNUM_GT:
case NFA_LNUM_LT:
assert(t->state->val >= 0
- && !((reg_firstlnum > 0 && reglnum > LONG_MAX - reg_firstlnum)
- || (reg_firstlnum <0 && reglnum < LONG_MIN + reg_firstlnum))
- && reglnum + reg_firstlnum >= 0);
+ && !((rex.reg_firstlnum > 0
+ && reglnum > LONG_MAX - rex.reg_firstlnum)
+ || (rex.reg_firstlnum < 0
+ && reglnum < LONG_MIN + rex.reg_firstlnum))
+ && reglnum + rex.reg_firstlnum >= 0);
result = (REG_MULTI
&& nfa_re_num_cmp((uintmax_t)t->state->val,
t->state->c - NFA_LNUM,
- (uintmax_t)(reglnum + reg_firstlnum)));
+ (uintmax_t)(reglnum + rex.reg_firstlnum)));
if (result) {
add_here = true;
add_state = t->state->out;
@@ -5874,7 +5895,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
result = false;
- win_T *wp = reg_win == NULL ? curwin : reg_win;
+ win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;
if (op == 1 && col - 1 > t->state->val && col > 100) {
long ts = wp->w_buffer->b_p_ts;
@@ -5901,18 +5922,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_MARK_GT:
case NFA_MARK_LT:
{
- pos_T *pos = getmark_buf(reg_buf, t->state->val, FALSE);
+ pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, false);
// Compare the mark position to the match position.
result = (pos != NULL // mark doesn't exist
&& pos->lnum > 0 // mark isn't set in reg_buf
- && (pos->lnum == reglnum + reg_firstlnum
+ && (pos->lnum == reglnum + rex.reg_firstlnum
? (pos->col == (colnr_T)(reginput - regline)
? t->state->c == NFA_MARK
: (pos->col < (colnr_T)(reginput - regline)
? t->state->c == NFA_MARK_GT
: t->state->c == NFA_MARK_LT))
- : (pos->lnum < reglnum + reg_firstlnum
+ : (pos->lnum < reglnum + rex.reg_firstlnum
? t->state->c == NFA_MARK_GT
: t->state->c == NFA_MARK_LT)));
if (result) {
@@ -5923,10 +5944,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
}
case NFA_CURSOR:
- result = (reg_win != NULL
- && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum)
+ result = (rex.reg_win != NULL
+ && (reglnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum)
&& ((colnr_T)(reginput - regline)
- == reg_win->w_cursor.col));
+ == rex.reg_win->w_cursor.col));
if (result) {
add_here = true;
add_state = t->state->out;
@@ -5976,12 +5997,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
#endif
result = (c == curc);
- if (!result && ireg_ic)
- result = vim_tolower(c) == vim_tolower(curc);
+ if (!result && rex.reg_ic) {
+ result = mb_tolower(c) == mb_tolower(curc);
+ }
- // If ireg_icombine is not set only skip over the character
+ // If rex.reg_icombine is not set only skip over the character
// itself. When it is set skip over composing characters.
- if (result && enc_utf8 && !ireg_icombine) {
+ if (result && enc_utf8 && !rex.reg_icombine) {
clen = utf_ptr2len(reginput);
}
@@ -6089,8 +6111,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
&& ((toplevel
&& reglnum == 0
&& clen != 0
- && (ireg_maxcol == 0
- || (colnr_T)(reginput - regline) < ireg_maxcol))
+ && (rex.reg_maxcol == 0
+ || (colnr_T)(reginput - regline) < rex.reg_maxcol))
|| (nfa_endp != NULL
&& (REG_MULTI
? (reglnum < nfa_endp->se_u.pos.lnum
@@ -6125,8 +6147,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// Checking if the required start character matches is
// cheaper than adding a state that won't match.
c = PTR2CHAR(reginput + clen);
- if (c != prog->regstart && (!ireg_ic || vim_tolower(c)
- != vim_tolower(prog->regstart))) {
+ if (c != prog->regstart && (!rex.reg_ic || mb_tolower(c)
+ != mb_tolower(prog->regstart))) {
#ifdef REGEXP_DEBUG
fprintf(log_fd,
" Skipping start state, regstart does not match\n");
@@ -6251,34 +6273,37 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, proftime_T *tm)
cleanup_subexpr();
if (REG_MULTI) {
for (i = 0; i < subs.norm.in_use; i++) {
- reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum;
- reg_startpos[i].col = subs.norm.list.multi[i].start_col;
+ rex.reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum;
+ rex.reg_startpos[i].col = subs.norm.list.multi[i].start_col;
- reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum;
- reg_endpos[i].col = subs.norm.list.multi[i].end_col;
+ rex.reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum;
+ rex.reg_endpos[i].col = subs.norm.list.multi[i].end_col;
}
- if (reg_startpos[0].lnum < 0) {
- reg_startpos[0].lnum = 0;
- reg_startpos[0].col = col;
+ if (rex.reg_startpos[0].lnum < 0) {
+ rex.reg_startpos[0].lnum = 0;
+ rex.reg_startpos[0].col = col;
+ }
+ if (rex.reg_endpos[0].lnum < 0) {
+ // pattern has a \ze but it didn't match, use current end
+ rex.reg_endpos[0].lnum = reglnum;
+ rex.reg_endpos[0].col = (int)(reginput - regline);
+ } else {
+ // Use line number of "\ze".
+ reglnum = rex.reg_endpos[0].lnum;
}
- if (reg_endpos[0].lnum < 0) {
- /* pattern has a \ze but it didn't match, use current end */
- reg_endpos[0].lnum = reglnum;
- reg_endpos[0].col = (int)(reginput - regline);
- } else
- /* Use line number of "\ze". */
- reglnum = reg_endpos[0].lnum;
} else {
for (i = 0; i < subs.norm.in_use; i++) {
- reg_startp[i] = subs.norm.list.line[i].start;
- reg_endp[i] = subs.norm.list.line[i].end;
+ rex.reg_startp[i] = subs.norm.list.line[i].start;
+ rex.reg_endp[i] = subs.norm.list.line[i].end;
}
- if (reg_startp[0] == NULL)
- reg_startp[0] = regline + col;
- if (reg_endp[0] == NULL)
- reg_endp[0] = reginput;
+ if (rex.reg_startp[0] == NULL) {
+ rex.reg_startp[0] = regline + col;
+ }
+ if (rex.reg_endp[0] == NULL) {
+ rex.reg_endp[0] = reginput;
+ }
}
/* Package any found \z(...\) matches for export. Default is none. */
@@ -6332,14 +6357,14 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
colnr_T col = startcol;
if (REG_MULTI) {
- prog = (nfa_regprog_T *)reg_mmatch->regprog;
- line = reg_getline((linenr_T)0); /* relative to the cursor */
- reg_startpos = reg_mmatch->startpos;
- reg_endpos = reg_mmatch->endpos;
+ prog = (nfa_regprog_T *)rex.reg_mmatch->regprog;
+ line = reg_getline((linenr_T)0); // relative to the cursor
+ rex.reg_startpos = rex.reg_mmatch->startpos;
+ rex.reg_endpos = rex.reg_mmatch->endpos;
} else {
- prog = (nfa_regprog_T *)reg_match->regprog;
- reg_startp = reg_match->startp;
- reg_endp = reg_match->endp;
+ prog = (nfa_regprog_T *)rex.reg_match->regprog;
+ rex.reg_startp = rex.reg_match->startp;
+ rex.reg_endp = rex.reg_match->endp;
}
/* Be paranoid... */
@@ -6348,15 +6373,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
goto theend;
}
- /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
- if (prog->regflags & RF_ICASE)
- ireg_ic = TRUE;
- else if (prog->regflags & RF_NOICASE)
- ireg_ic = FALSE;
+ // If pattern contains "\c" or "\C": overrule value of rex.reg_ic
+ if (prog->regflags & RF_ICASE) {
+ rex.reg_ic = true;
+ } else if (prog->regflags & RF_NOICASE) {
+ rex.reg_ic = false;
+ }
- /* If pattern contains "\Z" overrule value of ireg_icombine */
- if (prog->regflags & RF_ICOMBINE)
- ireg_icombine = TRUE;
+ // If pattern contains "\Z" overrule value of rex.reg_icombine
+ if (prog->regflags & RF_ICOMBINE) {
+ rex.reg_icombine = true;
+ }
regline = line;
reglnum = 0; /* relative to line */
@@ -6385,17 +6412,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)
if (skip_to_start(prog->regstart, &col) == FAIL)
return 0L;
- /* If match_text is set it contains the full text that must match.
- * Nothing else to try. Doesn't handle combining chars well. */
- if (prog->match_text != NULL
- && !ireg_icombine
- )
+ // If match_text is set it contains the full text that must match.
+ // Nothing else to try. Doesn't handle combining chars well.
+ if (prog->match_text != NULL && !rex.reg_icombine) {
return find_match_text(col, prog->regstart, prog->match_text);
+ }
}
- /* If the start column is past the maximum column: no need to try. */
- if (ireg_maxcol > 0 && col >= ireg_maxcol)
+ // If the start column is past the maximum column: no need to try.
+ if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) {
goto theend;
+ }
nstate = prog->nstate;
for (i = 0; i < nstate; ++i) {
@@ -6547,15 +6574,15 @@ nfa_regexec_nl (
bool line_lbr
)
{
- reg_match = rmp;
- reg_mmatch = NULL;
- reg_maxline = 0;
- reg_line_lbr = line_lbr;
- reg_buf = curbuf;
- reg_win = NULL;
- ireg_ic = rmp->rm_ic;
- ireg_icombine = FALSE;
- ireg_maxcol = 0;
+ rex.reg_match = rmp;
+ rex.reg_mmatch = NULL;
+ rex.reg_maxline = 0;
+ rex.reg_line_lbr = line_lbr;
+ rex.reg_buf = curbuf;
+ rex.reg_win = NULL;
+ rex.reg_ic = rmp->rm_ic;
+ rex.reg_icombine = false;
+ rex.reg_maxcol = 0;
return nfa_regexec_both(line, col, NULL);
}
@@ -6596,16 +6623,16 @@ nfa_regexec_nl (
static long nfa_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf,
linenr_T lnum, colnr_T col, proftime_T *tm)
{
- reg_match = NULL;
- reg_mmatch = rmp;
- reg_buf = buf;
- reg_win = win;
- reg_firstlnum = lnum;
- reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
- reg_line_lbr = FALSE;
- ireg_ic = rmp->rmm_ic;
- ireg_icombine = FALSE;
- ireg_maxcol = rmp->rmm_maxcol;
+ rex.reg_match = NULL;
+ rex.reg_mmatch = rmp;
+ rex.reg_buf = buf;
+ rex.reg_win = win;
+ rex.reg_firstlnum = lnum;
+ rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
+ rex.reg_line_lbr = false;
+ rex.reg_ic = rmp->rmm_ic;
+ rex.reg_icombine = false;
+ rex.reg_maxcol = rmp->rmm_maxcol;
return nfa_regexec_both(NULL, col, tm);
}