diff options
-rw-r--r-- | src/nvim/regexp.c | 799 | ||||
-rw-r--r-- | src/nvim/regexp_defs.h | 7 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 1107 |
3 files changed, 973 insertions, 940 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 34553fcec4..5fdd18733e 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -301,8 +301,8 @@ typedef struct { */ typedef struct { union { - char_u *ptr; /* reginput pointer, for single-line regexp */ - lpos_T pos; /* reginput pos, for multi-line regexp */ + char_u *ptr; ///< rex.input pointer, for single-line regexp + lpos_T pos; ///< rex.input pos, for multi-line regexp } rs_u; int rs_len; } regsave_T; @@ -355,7 +355,7 @@ typedef struct regitem_S { union { save_se_T sesave; regsave_T regsave; - } rs_un; // room for saving reginput + } rs_un; ///< room for saving rex.input } regitem_T; @@ -490,6 +490,8 @@ static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here"); static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%["); static char_u e_empty_sb[] = N_("E70: Empty %s%%[]"); +static char_u e_recursive[] = N_("E956: Cannot use pattern recursively"); + #define NOT_MULTI 0 #define MULTI_ONE 1 #define MULTI_MULT 2 @@ -633,7 +635,7 @@ static short class_tab[256]; static void init_class_tab(void) { int i; - static int done = FALSE; + static int done = false; if (done) return; @@ -658,7 +660,7 @@ static void init_class_tab(void) } class_tab[' '] |= RI_WHITE; class_tab['\t'] |= RI_WHITE; - done = TRUE; + done = true; } # define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT)) @@ -678,26 +680,24 @@ static void init_class_tab(void) #define RF_ICOMBINE 8 /* ignore combining characters */ #define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */ -/* - * Global work variables for vim_regcomp(). - */ - -static char_u *regparse; /* Input-scan pointer. */ -static int prevchr_len; /* byte length of previous char */ -static int num_complex_braces; /* Complex \{...} count */ -static int regnpar; /* () count. */ -static int regnzpar; /* \z() count. */ -static int re_has_z; /* \z item detected */ -static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */ -static long regsize; /* Code size. */ -static int reg_toolong; /* TRUE when offset out of range */ -static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */ -static unsigned regflags; /* RF_ flags for prog */ -static long brace_min[10]; /* Minimums for complex brace repeats */ -static long brace_max[10]; /* Maximums for complex brace repeats */ -static int brace_count[10]; /* Current counts for complex brace repeats */ -static int had_eol; /* TRUE when EOL found by vim_regcomp() */ -static int one_exactly = FALSE; /* only do one char for EXACTLY */ +// Global work variables for vim_regcomp(). + +static char_u *regparse; ///< Input-scan pointer. +static int prevchr_len; ///< byte length of previous char +static int num_complex_braces; ///< Complex \{...} count +static int regnpar; ///< () count. +static int regnzpar; ///< \z() count. +static int re_has_z; ///< \z item detected +static char_u *regcode; ///< Code-emit pointer, or JUST_CALC_SIZE +static long regsize; ///< Code size. +static int reg_toolong; ///< true when offset out of range +static char_u had_endbrace[NSUBEXP]; ///< flags, true if end of () found +static unsigned regflags; ///< RF_ flags for prog +static long brace_min[10]; ///< Minimums for complex brace repeats +static long brace_max[10]; ///< Maximums for complex brace repeats +static int brace_count[10]; ///< Current counts for complex brace repeats +static int had_eol; ///< true when EOL found by vim_regcomp() +static int one_exactly = false; ///< only do one char for EXACTLY static int reg_magic; /* magicness of the pattern: */ #define MAGIC_NONE 1 /* "\V" very unmagic */ @@ -754,10 +754,9 @@ static int nextchr; /* used for ungetchr() */ static regengine_T bt_regengine; static regengine_T nfa_regengine; -/* - * Return TRUE if compiled regular expression "prog" can match a line break. - */ -int re_multiline(regprog_T *prog) +// Return true if compiled regular expression "prog" can match a line break. +int re_multiline(const regprog_T *prog) + FUNC_ATTR_NONNULL_ALL { return prog->regflags & RF_HASNL; } @@ -1211,7 +1210,7 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char_u **newp) return p; } -/// Return TRUE if the back reference is legal. We must have seen the close +/// Return true if the back reference is legal. We must have seen the close /// brace. /// TODO(vim): Should also check that we don't refer to something repeated /// (+*=): what instance of the repetition should we match? @@ -1234,7 +1233,7 @@ static int seen_endbrace(int refnum) return false; } } - return TRUE; + return true; } /* @@ -1281,6 +1280,7 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) /* Allocate space. */ bt_regprog_T *r = xmalloc(sizeof(bt_regprog_T) + regsize); + r->re_in_use = false; /* * Second pass: emit code. @@ -1394,9 +1394,9 @@ regcomp_start ( regnzpar = 1; re_has_z = 0; regsize = 0L; - reg_toolong = FALSE; + reg_toolong = false; regflags = 0; - had_eol = FALSE; + had_eol = false; } /* @@ -1408,7 +1408,7 @@ int vim_regcomp_had_eol(void) return had_eol; } -// variables for parsing reginput +// variables used for parsing static int at_start; // True when on the first character static int prev_at_start; // True when on the second character @@ -1506,12 +1506,11 @@ reg ( EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ /* NOTREACHED */ } - /* - * Here we set the flag allowing back references to this set of - * parentheses. - */ - if (paren == REG_PAREN) - had_endbrace[parno] = TRUE; /* have seen the close paren */ + // Here we set the flag allowing back references to this set of + // parentheses. + if (paren == REG_PAREN) { + had_endbrace[parno] = true; // have seen the close paren + } return ret; } @@ -1565,7 +1564,7 @@ static char_u *regconcat(int *flagp) char_u *chain = NULL; char_u *latest; int flags; - int cont = TRUE; + int cont = true; *flagp = WORST; /* Tentatively. */ @@ -1575,7 +1574,7 @@ static char_u *regconcat(int *flagp) case Magic('|'): case Magic('&'): case Magic(')'): - cont = FALSE; + cont = false; break; case Magic('Z'): regflags |= RF_ICOMBINE; @@ -1802,7 +1801,7 @@ static char_u *regatom(int *flagp) case Magic('$'): ret = regnode(EOL); - had_eol = TRUE; + had_eol = true; break; case Magic('<'): @@ -1821,7 +1820,7 @@ static char_u *regatom(int *flagp) } if (c == '$') { /* "\_$" is end-of-line */ ret = regnode(EOL); - had_eol = TRUE; + had_eol = true; break; } @@ -2069,11 +2068,12 @@ static char_u *regatom(int *flagp) } ungetchr(); - one_exactly = TRUE; + one_exactly = true; lastnode = regatom(flagp); - one_exactly = FALSE; - if (lastnode == NULL) + one_exactly = false; + if (lastnode == NULL) { return NULL; + } } if (ret == NULL) EMSG2_RET_NULL(_(e_empty_sb), @@ -2514,15 +2514,13 @@ static bool re_mult_next(char *what) return true; } -/* - * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for - * character "c". - */ -static int use_multibytecode(int c) +// Return true if MULTIBYTECODE should be used instead of EXACTLY for +// character "c". +static bool use_multibytecode(int c) { - return has_mbyte && (*mb_char2len)(c) > 1 + return utf_char2len(c) > 1 && (re_multi_type(peekchr()) != NOT_MULTI - || (enc_utf8 && utf_iscomposing(c))); + || utf_iscomposing(c)); } /* @@ -2667,39 +2665,38 @@ static char_u *re_put_uint32(char_u *p, uint32_t val) return p; } -/* - * Set the next-pointer at the end of a node chain. - */ +// Set the next-pointer at the end of a node chain. static void regtail(char_u *p, char_u *val) { - char_u *scan; - char_u *temp; int offset; - if (p == JUST_CALC_SIZE) + if (p == JUST_CALC_SIZE) { return; + } - /* Find last node. */ - scan = p; + // Find last node. + char_u *scan = p; for (;; ) { - temp = regnext(scan); - if (temp == NULL) + char_u *temp = regnext(scan); + if (temp == NULL) { break; + } scan = temp; } - if (OP(scan) == BACK) + if (OP(scan) == BACK) { offset = (int)(scan - val); - else + } else { offset = (int)(val - scan); - /* When the offset uses more than 16 bits it can no longer fit in the two - * bytes available. Use a global flag to avoid having to check return - * values in too many places. */ - if (offset > 0xffff) - reg_toolong = TRUE; - else { - *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377); - *(scan + 2) = (char_u) (offset & 0377); + } + // When the offset uses more than 16 bits it can no longer fit in the two + // bytes available. Use a global flag to avoid having to check return + // values in too many places. + if (offset > 0xffff) { + reg_toolong = true; + } else { + *(scan + 1) = (char_u)(((unsigned)offset >> 8) & 0377); + *(scan + 2) = (char_u)(offset & 0377); } } @@ -2728,8 +2725,8 @@ static void initchr(char_u *str) regparse = str; prevchr_len = 0; curchr = prevprevchr = prevchr = nextchr = -1; - at_start = TRUE; - prev_at_start = FALSE; + at_start = true; + prev_at_start = false; } /* @@ -2771,7 +2768,7 @@ static void restore_parse_state(parse_state_T *ps) */ static int peekchr(void) { - static int after_slash = FALSE; + static int after_slash = false; if (curchr != -1) { return curchr; @@ -2837,8 +2834,8 @@ static int peekchr(void) || (no_Magic(prevchr) == '(' && prevprevchr == Magic('%')))) { curchr = Magic('^'); - at_start = TRUE; - prev_at_start = FALSE; + at_start = true; + prev_at_start = false; } break; case '$': @@ -2889,12 +2886,12 @@ static int peekchr(void) */ curchr = -1; prev_at_start = at_start; - at_start = FALSE; /* be able to say "/\*ptr" */ - ++regparse; - ++after_slash; + at_start = false; // be able to say "/\*ptr" + regparse++; + after_slash++; peekchr(); - --regparse; - --after_slash; + regparse--; + after_slash--; curchr = toggle_Magic(curchr); } else if (vim_strchr(REGEXP_ABBR, c)) { /* @@ -2936,7 +2933,7 @@ static void skipchr(void) } regparse += prevchr_len; prev_at_start = at_start; - at_start = FALSE; + at_start = false; prevprevchr = prevchr; prevchr = curchr; curchr = nextchr; /* use previously unget char, or -1 */ @@ -2980,7 +2977,7 @@ static void ungetchr(void) curchr = prevchr; prevchr = prevprevchr; at_start = prev_at_start; - prev_at_start = FALSE; + prev_at_start = false; /* Backup regparse, so that it's at the same position as before the * getchr(). */ @@ -3101,14 +3098,14 @@ static int coll_get_char(void) */ static int read_limits(long *minval, long *maxval) { - int reverse = FALSE; + int reverse = false; char_u *first_char; long tmp; if (*regparse == '-') { // Starts with '-', so reverse the range later. regparse++; - reverse = TRUE; + reverse = true; } first_char = regparse; *minval = getdigits_long(®parse, false, 0); @@ -3153,17 +3150,6 @@ static int read_limits(long *minval, long *maxval) * Global work variables for vim_regexec(). */ -/* The current match-position is remembered with these variables: */ -static linenr_T reglnum; /* line number, relative to first line */ -static char_u *regline; /* start of current line */ -static char_u *reginput; /* current input, points into "regline" */ - -static int need_clear_subexpr; /* subexpressions still need to be - * cleared */ -static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions - * still need to be cleared */ - - /* Save the sub-expressions before attempting a match. */ #define save_se(savep, posp, pp) \ REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp)) @@ -3214,18 +3200,42 @@ typedef struct { linenr_T reg_maxline; bool reg_line_lbr; // "\n" in string is line break + // The current match-position is remembered with these variables: + linenr_T lnum; ///< line number, relative to first line + char_u *line; ///< start of current line + char_u *input; ///< current input, points into "regline" + + int need_clear_subexpr; ///< subexpressions still need to be cleared + int need_clear_zsubexpr; ///< extmatch subexpressions still need to be + ///< cleared + + // Internal copy of 'ignorecase'. It is set at each call to vim_regexec(). // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern // contains '\c' or '\C' the value is overruled. bool reg_ic; - // Similar to rex.reg_ic, but only for 'combining' characters. Set with \Z + // Similar to "reg_ic", but only for 'combining' characters. Set with \Z // flag in the regexp. Defaults to false, always. bool reg_icombine; // Copy of "rmm_maxcol": maximum column to search for a match. Zero when // there is no maximum. colnr_T reg_maxcol; + + // State for the NFA engine regexec. + int nfa_has_zend; ///< NFA regexp \ze operator encountered. + int nfa_has_backref; ///< NFA regexp \1 .. \9 encountered. + int nfa_nsubexpr; ///< Number of sub expressions actually being used + ///< during execution. 1 if only the whole match + ///< (subexpr 0) is used. + // listid is global, so that it increases on recursive calls to + // nfa_regmatch(), which means we don't have to clear the lastlist field of + // all the states. + int nfa_listid; + int nfa_alt_listid; + + int nfa_has_zsubexpr; ///< NFA regexp has \z( ), set zsubexpr. } regexec_T; static regexec_T rex; @@ -3290,7 +3300,7 @@ static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */ static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */ -// TRUE if using multi-line regexp. +// true if using multi-line regexp. #define REG_MULTI (rex.reg_match == NULL) /* @@ -3491,13 +3501,13 @@ static long bt_regexec_both(char_u *line, } } - regline = line; - reglnum = 0; - reg_toolong = FALSE; + rex.line = line; + rex.lnum = 0; + reg_toolong = false; /* Simplest case: Anchored match need be tried only once. */ if (prog->reganch) { - int c = utf_ptr2char(regline + col); + int c = utf_ptr2char(rex.line + col); if (prog->regstart == NUL || prog->regstart == c || (rex.reg_ic @@ -3514,12 +3524,12 @@ static long bt_regexec_both(char_u *line, while (!got_int) { if (prog->regstart != NUL) { // Skip until the char we know it must start with. - s = cstrchr(regline + col, prog->regstart); + s = cstrchr(rex.line + col, prog->regstart); if (s == NULL) { retval = 0; break; } - col = (int)(s - regline); + col = (int)(s - rex.line); } // Check for maximum column to try. @@ -3533,18 +3543,16 @@ static long bt_regexec_both(char_u *line, break; } - /* if not currently on the first line, get it again */ - if (reglnum != 0) { - reglnum = 0; - regline = reg_getline((linenr_T)0); + // if not currently on the first line, get it again + if (rex.lnum != 0) { + rex.lnum = 0; + rex.line = reg_getline((linenr_T)0); } - if (regline[col] == NUL) + if (rex.line[col] == NUL) { break; - if (has_mbyte) - col += (*mb_ptr2len)(regline + col); - else - ++col; - /* Check for timeout once in a twenty times to avoid overhead. */ + } + col += (*mb_ptr2len)(rex.line + col); + // Check for timeout once in a twenty times to avoid overhead. if (tm != NULL && ++tm_count == 20) { tm_count = 0; if (profile_passed_limit(*tm)) { @@ -3608,18 +3616,17 @@ void unref_extmatch(reg_extmatch_T *em) } } -/// Try match of "prog" with at regline["col"]. +/// Try match of "prog" with at rex.line["col"]. /// @returns 0 for failure, or number of lines contained in the match. static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, // timeout limit or NULL int *timed_out) // flag set on timeout or NULL { - reginput = regline + col; - need_clear_subexpr = TRUE; - /* Clear the external match subpointers if necessary. */ - if (prog->reghasz == REX_SET) - need_clear_zsubexpr = TRUE; + rex.input = rex.line + col; + rex.need_clear_subexpr = true; + // Clear the external match subpointers if necessaey. + rex.need_clear_zsubexpr = (prog->reghasz == REX_SET); if (regmatch(prog->program + 1, tm, timed_out) == 0) { return 0; @@ -3632,18 +3639,18 @@ static long regtry(bt_regprog_T *prog, rex.reg_startpos[0].col = col; } if (rex.reg_endpos[0].lnum < 0) { - rex.reg_endpos[0].lnum = reglnum; - rex.reg_endpos[0].col = (int)(reginput - regline); + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = (int)(rex.input - rex.line); } else { // Use line number of "\ze". - reglnum = rex.reg_endpos[0].lnum; + rex.lnum = rex.reg_endpos[0].lnum; } } else { if (rex.reg_startp[0] == NULL) { - rex.reg_startp[0] = regline + col; + rex.reg_startp[0] = rex.line + col; } if (rex.reg_endp[0] == NULL) { - rex.reg_endp[0] = reginput; + rex.reg_endp[0] = rex.input; } } /* Package any found \z(...\) matches for export. Default is none. */ @@ -3675,23 +3682,24 @@ static long regtry(bt_regprog_T *prog, } } } - return 1 + reglnum; + return 1 + rex.lnum; } // Get class of previous character. static int reg_prev_class(void) { - if (reginput > regline) { - return mb_get_class_tab(reginput - 1 - utf_head_off(regline, reginput - 1), - rex.reg_buf->b_chartab); + if (rex.input > rex.line) { + return mb_get_class_tab( + rex.input - 1 - utf_head_off(rex.line, rex.input - 1), + rex.reg_buf->b_chartab); } return -1; } -// Return TRUE if the current reginput position matches the Visual area. -static int reg_match_visual(void) +// Return true if the current rex.input position matches the Visual area. +static bool reg_match_visual(void) { pos_T top, bot; linenr_T lnum; @@ -3725,16 +3733,17 @@ static int reg_match_visual(void) } mode = curbuf->b_visual.vi_mode; } - lnum = reglnum + rex.reg_firstlnum; + lnum = rex.lnum + rex.reg_firstlnum; if (lnum < top.lnum || lnum > bot.lnum) { return false; } if (mode == 'v') { - col = (colnr_T)(reginput - regline); + col = (colnr_T)(rex.input - rex.line); if ((lnum == top.lnum && col < top.col) - || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e'))) - return FALSE; + || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e'))) { + return false; + } } else if (mode == Ctrl_V) { getvvcol(wp, &top, &start, NULL, &end); getvvcol(wp, &bot, &start2, NULL, &end2); @@ -3744,17 +3753,18 @@ static int reg_match_visual(void) end = end2; if (top.col == MAXCOL || bot.col == MAXCOL) end = MAXCOL; - unsigned int cols_u = win_linetabsize(wp, regline, - (colnr_T)(reginput - regline)); + unsigned int cols_u = win_linetabsize(wp, rex.line, + (colnr_T)(rex.input - rex.line)); assert(cols_u <= MAXCOL); colnr_T cols = (colnr_T)cols_u; - if (cols < start || cols > end - (*p_sel == 'e')) - return FALSE; + if (cols < start || cols > end - (*p_sel == 'e')) { + return false; + } } - return TRUE; + return true; } -#define ADVANCE_REGINPUT() MB_PTR_ADV(reginput) +#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) /* * The arguments from BRACE_LIMITS are stored here. They are actually local @@ -3773,11 +3783,11 @@ static long bl_maxval; /// (that don't need to know whether the rest of the match failed) by a nested /// loop. /// -/// Returns TRUE when there is a match. Leaves reginput and reglnum just after -/// the last matched character. -/// Returns FALSE when there is no match. Leaves reginput and reglnum in an +/// Returns true when there is a match. Leaves rex.input and rex.lnum +/// just after the last matched character. +/// Returns false when there is no match. Leaves rex.input and rex.lnum in an /// undefined state! -static int regmatch( +static bool regmatch( char_u *scan, // Current node. proftime_T *tm, // timeout limit or NULL int *timed_out // flag set on timeout or NULL @@ -3860,38 +3870,40 @@ static int regmatch( op = OP(scan); // Check for character class with NL added. if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI - && *reginput == NUL && reglnum <= rex.reg_maxline) { + && *rex.input == NUL && rex.lnum <= rex.reg_maxline) { reg_nextline(); - } else if (rex.reg_line_lbr && WITH_NL(op) && *reginput == '\n') { + } else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n') { ADVANCE_REGINPUT(); } else { if (WITH_NL(op)) { op -= ADD_NL; } - c = utf_ptr2char(reginput); + c = utf_ptr2char(rex.input); switch (op) { case BOL: - if (reginput != regline) + if (rex.input != rex.line) { status = RA_NOMATCH; + } break; case EOL: - if (c != NUL) + if (c != NUL) { status = RA_NOMATCH; + } break; case RE_BOF: // We're not at the beginning of the file when below the first // line where we started, not at the start of the line or we // didn't start at the first line of the buffer. - if (reglnum != 0 || reginput != regline + if (rex.lnum != 0 || rex.input != rex.line || (REG_MULTI && rex.reg_firstlnum > 1)) { status = RA_NOMATCH; } break; case RE_EOF: - if (reglnum != rex.reg_maxline || c != NUL) { + if (rex.lnum != rex.reg_maxline || c != NUL) { status = RA_NOMATCH; } break; @@ -3900,8 +3912,9 @@ static int regmatch( // Check if the buffer is in a window and compare the // rex.reg_win->w_cursor position to the match position. if (rex.reg_win == NULL - || (reglnum + rex.reg_firstlnum != rex.reg_win->w_cursor.lnum) - || ((colnr_T)(reginput - regline) != rex.reg_win->w_cursor.col)) { + || (rex.lnum + rex.reg_firstlnum != rex.reg_win->w_cursor.lnum) + || ((colnr_T)(rex.input - rex.line) != + rex.reg_win->w_cursor.col)) { status = RA_NOMATCH; } break; @@ -3916,13 +3929,13 @@ static int regmatch( pos = getmark_buf(rex.reg_buf, mark, false); if (pos == NULL // mark doesn't exist || pos->lnum <= 0 // mark isn't set in reg_buf - || (pos->lnum == reglnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(reginput - regline) + || (pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos->col == (colnr_T)(rex.input - rex.line) ? (cmp == '<' || cmp == '>') - : (pos->col < (colnr_T)(reginput - regline) + : (pos->col < (colnr_T)(rex.input - rex.line) ? cmp != '>' : cmp != '<')) - : (pos->lnum < reglnum + rex.reg_firstlnum + : (pos->lnum < rex.lnum + rex.reg_firstlnum ? cmp != '>' : cmp != '<'))) { status = RA_NOMATCH; @@ -3936,79 +3949,70 @@ static int regmatch( break; case RE_LNUM: - assert(reglnum + rex.reg_firstlnum >= 0 - && (uintmax_t)(reglnum + rex.reg_firstlnum) <= UINT32_MAX); + assert(rex.lnum + rex.reg_firstlnum >= 0 + && (uintmax_t)(rex.lnum + rex.reg_firstlnum) <= UINT32_MAX); if (!REG_MULTI - || !re_num_cmp((uint32_t)(reglnum + rex.reg_firstlnum), scan)) { + || !re_num_cmp((uint32_t)(rex.lnum + rex.reg_firstlnum), scan)) { status = RA_NOMATCH; } break; case RE_COL: - assert(reginput - regline + 1 >= 0 - && (uintmax_t)(reginput - regline + 1) <= UINT32_MAX); - if (!re_num_cmp((uint32_t)(reginput - regline + 1), scan)) + assert(rex.input - rex.line + 1 >= 0 + && (uintmax_t)(rex.input - rex.line + 1) <= UINT32_MAX); + if (!re_num_cmp((uint32_t)(rex.input - rex.line + 1), scan)) { status = RA_NOMATCH; + } break; case RE_VCOL: if (!re_num_cmp(win_linetabsize(rex.reg_win == NULL ? curwin : rex.reg_win, - regline, - (colnr_T)(reginput - regline)) + 1, + rex.line, + (colnr_T)(rex.input - rex.line)) + 1, scan)) { status = RA_NOMATCH; } break; - case BOW: /* \<word; reginput points to w */ - if (c == NUL) /* Can't match at end of line */ + case BOW: // \<word; rex.input points to w + if (c == NUL) { // Can't match at end of line status = RA_NOMATCH; - else if (has_mbyte) { - int this_class; - + } else { // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); + const int this_class = + mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); if (this_class <= 1) { status = RA_NOMATCH; // Not on a word at all. } else if (reg_prev_class() == this_class) { status = RA_NOMATCH; // Previous char is in same word. } - } else { - if (!vim_iswordc_buf(c, rex.reg_buf) - || (reginput > regline - && vim_iswordc_buf(reginput[-1], rex.reg_buf))) { - status = RA_NOMATCH; - } } break; - case EOW: /* word\>; reginput points after d */ - if (reginput == regline) /* Can't match at start of line */ + case EOW: // word\>; rex.input points after d + if (rex.input == rex.line) { // Can't match at start of line status = RA_NOMATCH; - else if (has_mbyte) { + } else { int this_class, prev_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); + this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); prev_class = reg_prev_class(); if (this_class == prev_class - || prev_class == 0 || prev_class == 1) - status = RA_NOMATCH; - } else { - if (!vim_iswordc_buf(reginput[-1], rex.reg_buf) - || (reginput[0] != NUL && vim_iswordc_buf(c, rex.reg_buf))) { + || prev_class == 0 || prev_class == 1) { status = RA_NOMATCH; } } - break; /* Matched with EOW */ + break; // Matched with EOW case ANY: - /* ANY does not match new lines. */ - if (c == NUL) + // ANY does not match new lines. + if (c == NUL) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case IDENT: @@ -4019,14 +4023,15 @@ static int regmatch( break; case SIDENT: - if (ascii_isdigit(*reginput) || !vim_isIDc(c)) + if (ascii_isdigit(*rex.input) || !vim_isIDc(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case KWORD: - if (!vim_iswordp_buf(reginput, rex.reg_buf)) { + if (!vim_iswordp_buf(rex.input, rex.reg_buf)) { status = RA_NOMATCH; } else { ADVANCE_REGINPUT(); @@ -4034,8 +4039,8 @@ static int regmatch( break; case SKWORD: - if (ascii_isdigit(*reginput) - || !vim_iswordp_buf(reginput, rex.reg_buf)) { + if (ascii_isdigit(*rex.input) + || !vim_iswordp_buf(rex.input, rex.reg_buf)) { status = RA_NOMATCH; } else { ADVANCE_REGINPUT(); @@ -4043,31 +4048,35 @@ static int regmatch( break; case FNAME: - if (!vim_isfilec(c)) + if (!vim_isfilec(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case SFNAME: - if (ascii_isdigit(*reginput) || !vim_isfilec(c)) + if (ascii_isdigit(*rex.input) || !vim_isfilec(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case PRINT: - if (!vim_isprintc(PTR2CHAR(reginput))) + if (!vim_isprintc(PTR2CHAR(rex.input))) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case SPRINT: - if (ascii_isdigit(*reginput) || !vim_isprintc(PTR2CHAR(reginput))) + if (ascii_isdigit(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input))) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case WHITE: @@ -4203,10 +4212,10 @@ static int regmatch( opnd = OPERAND(scan); // Inline the first byte, for speed. - if (*opnd != *reginput + if (*opnd != *rex.input && (!rex.reg_ic || (!enc_utf8 - && mb_tolower(*opnd) != mb_tolower(*reginput)))) { + && mb_tolower(*opnd) != mb_tolower(*rex.input)))) { status = RA_NOMATCH; } else if (*opnd == NUL) { // match empty string always works; happens when "~" is @@ -4217,14 +4226,14 @@ static int regmatch( } else { // Need to match first byte again for multi-byte. len = (int)STRLEN(opnd); - if (cstrncmp(opnd, reginput, &len) != 0) { + if (cstrncmp(opnd, rex.input, &len) != 0) { status = RA_NOMATCH; } } // Check for following composing character, unless %C // follows (skips over all composing chars). if (status != RA_NOMATCH && enc_utf8 - && UTF_COMPOSINGLIKE(reginput, reginput + len) + && UTF_COMPOSINGLIKE(rex.input, rex.input + len) && !rex.reg_icombine && OP(next) != RE_COMPOSING) { // raaron: This code makes a composing character get @@ -4233,7 +4242,7 @@ static int regmatch( status = RA_NOMATCH; } if (status != RA_NOMATCH) { - reginput += len; + rex.input += len; } } } @@ -4250,54 +4259,52 @@ static int regmatch( break; case MULTIBYTECODE: - if (has_mbyte) { + { int i, len; - char_u *opnd; - int opndc = 0, inpc; - opnd = OPERAND(scan); + const char_u *opnd = OPERAND(scan); // Safety check (just in case 'encoding' was changed since // compiling the program). if ((len = (*mb_ptr2len)(opnd)) < 2) { status = RA_NOMATCH; break; } - if (enc_utf8) { - opndc = utf_ptr2char(opnd); - } - if (enc_utf8 && utf_iscomposing(opndc)) { - /* When only a composing char is given match at any - * position where that composing char appears. */ + const int opndc = utf_ptr2char(opnd); + if (utf_iscomposing(opndc)) { + // When only a composing char is given match at any + // position where that composing char appears. status = RA_NOMATCH; - for (i = 0; reginput[i] != NUL; i += utf_ptr2len(reginput + i)) { - inpc = utf_ptr2char(reginput + i); + for (i = 0; rex.input[i] != NUL; + i += utf_ptr2len(rex.input + i)) { + const int inpc = utf_ptr2char(rex.input + i); if (!utf_iscomposing(inpc)) { if (i > 0) { break; } } else if (opndc == inpc) { // Include all following composing chars. - len = i + utfc_ptr2len(reginput + i); + len = i + utfc_ptr2len(rex.input + i); status = RA_MATCH; break; } } - } else - for (i = 0; i < len; ++i) - if (opnd[i] != reginput[i]) { + } else { + for (i = 0; i < len; i++) { + if (opnd[i] != rex.input[i]) { status = RA_NOMATCH; break; } - reginput += len; - } else - status = RA_NOMATCH; + } + } + rex.input += len; + } break; case RE_COMPOSING: if (enc_utf8) { // Skip composing characters. - while (utf_iscomposing(utf_ptr2char(reginput))) { - MB_CPTR_ADV(reginput); + while (utf_iscomposing(utf_ptr2char(rex.input))) { + MB_CPTR_ADV(rex.input); } } break; @@ -4460,7 +4467,7 @@ static int regmatch( } else { // Compare current input with back-ref in the same line. len = (int)(rex.reg_endp[no] - rex.reg_startp[no]); - if (cstrncmp(rex.reg_startp[no], reginput, &len) != 0) { + if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0) { status = RA_NOMATCH; } } @@ -4469,12 +4476,12 @@ static int regmatch( // Backref was not set: Match an empty string. len = 0; } else { - if (rex.reg_startpos[no].lnum == reglnum - && rex.reg_endpos[no].lnum == reglnum) { + if (rex.reg_startpos[no].lnum == rex.lnum + && rex.reg_endpos[no].lnum == rex.lnum) { // Compare back-ref within the current line. len = rex.reg_endpos[no].col - rex.reg_startpos[no].col; - if (cstrncmp(regline + rex.reg_startpos[no].col, - reginput, &len) != 0) { + if (cstrncmp(rex.line + rex.reg_startpos[no].col, + rex.input, &len) != 0) { status = RA_NOMATCH; } } else { @@ -4491,8 +4498,8 @@ static int regmatch( } } - /* Matched the backref, skip over it. */ - reginput += len; + // Matched the backref, skip over it. + rex.input += len; } break; @@ -4506,20 +4513,18 @@ static int regmatch( case ZREF + 8: case ZREF + 9: { - int len; - cleanup_zsubexpr(); no = op - ZREF; if (re_extmatch_in != NULL && re_extmatch_in->matches[no] != NULL) { - len = (int)STRLEN(re_extmatch_in->matches[no]); - if (cstrncmp(re_extmatch_in->matches[no], - reginput, &len) != 0) + int len = (int)STRLEN(re_extmatch_in->matches[no]); + if (cstrncmp(re_extmatch_in->matches[no], rex.input, &len) != 0) { status = RA_NOMATCH; - else - reginput += len; + } else { + rex.input += len; + } } else { - /* Backref was not set: Match an empty string. */ + // Backref was not set: Match an empty string. } } break; @@ -4725,15 +4730,17 @@ static int regmatch( case BHPOS: if (REG_MULTI) { - if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline) - || behind_pos.rs_u.pos.lnum != reglnum) + if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line) + || behind_pos.rs_u.pos.lnum != rex.lnum) { status = RA_NOMATCH; - } else if (behind_pos.rs_u.ptr != reginput) + } + } else if (behind_pos.rs_u.ptr != rex.input) { status = RA_NOMATCH; + } break; case NEWL: - if ((c != NUL || !REG_MULTI || reglnum > rex.reg_maxline + if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) && (c != '\n' || !rex.reg_line_lbr)) { status = RA_NOMATCH; } else if (rex.reg_line_lbr) { @@ -4946,7 +4953,7 @@ static int regmatch( if (limit > 0 && ((rp->rs_un.regsave.rs_u.pos.lnum < behind_pos.rs_u.pos.lnum - ? (colnr_T)STRLEN(regline) + ? (colnr_T)STRLEN(rex.line) : behind_pos.rs_u.pos.col) - rp->rs_un.regsave.rs_u.pos.col >= limit)) no = FAIL; @@ -4960,7 +4967,7 @@ static int regmatch( else { reg_restore(&rp->rs_un.regsave, &backpos); rp->rs_un.regsave.rs_u.pos.col = - (colnr_T)STRLEN(regline); + (colnr_T)STRLEN(rex.line); } } else { const char_u *const line = @@ -4972,10 +4979,10 @@ static int regmatch( + 1; } } else { - if (rp->rs_un.regsave.rs_u.ptr == regline) { + if (rp->rs_un.regsave.rs_u.ptr == rex.line) { no = FAIL; } else { - MB_PTR_BACK(regline, rp->rs_un.regsave.rs_u.ptr); + MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr); if (limit > 0 && (long)(behind_pos.rs_u.ptr - rp->rs_un.regsave.rs_u.ptr) > limit) { @@ -5039,18 +5046,18 @@ static int regmatch( * didn't match -- back up one char. */ if (--rst->count < rst->minval) break; - if (reginput == regline) { + if (rex.input == rex.line) { // backup to last char of previous line - reglnum--; - regline = reg_getline(reglnum); + rex.lnum--; + rex.line = reg_getline(rex.lnum); // Just in case regrepeat() didn't count right. - if (regline == NULL) { + if (rex.line == NULL) { break; } - reginput = regline + STRLEN(regline); + rex.input = rex.line + STRLEN(rex.line); fast_breakcheck(); } else { - MB_PTR_BACK(regline, reginput); + MB_PTR_BACK(rex.line, rex.input); } } else { /* Range is backwards, use shortest match first. @@ -5067,9 +5074,9 @@ static int regmatch( } else status = RA_NOMATCH; - /* If it could match, try it. */ - if (rst->nextb == NUL || *reginput == rst->nextb - || *reginput == rst->nextb_ic) { + // If it could match, try it. + if (rst->nextb == NUL || *rex.input == rst->nextb + || *rex.input == rst->nextb_ic) { reg_save(&rp->rs_un.regsave, &backpos); scan = regnext(rp->rs_scan); status = RA_CONT; @@ -5156,7 +5163,7 @@ static void regstack_pop(char_u **scan) /* * regrepeat - repeatedly match something simple, return how many. - * Advances reginput (and reglnum) to just after the matched chars. + * Advances rex.input (and rex.lnum) to just after the matched chars. */ static int regrepeat ( @@ -5165,12 +5172,11 @@ regrepeat ( ) { long count = 0; - char_u *scan; char_u *opnd; int mask; int testval = 0; - scan = reginput; /* Make local copy of reginput for speed. */ + char_u *scan = rex.input; // Make local copy of rex.input for speed. opnd = OPERAND(p); switch (OP(p)) { case ANY: @@ -5182,15 +5188,16 @@ regrepeat ( count++; MB_PTR_ADV(scan); } - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr || count == maxcount) { break; } count++; // count the line-break reg_nextline(); - scan = reginput; - if (got_int) + scan = rex.input; + if (got_int) { break; + } } break; @@ -5204,14 +5211,15 @@ regrepeat ( if (vim_isIDc(PTR2CHAR(scan)) && (testval || !ascii_isdigit(*scan))) { MB_PTR_ADV(scan); } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; - if (got_int) + scan = rex.input; + if (got_int) { break; + } } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { scan++; } else { @@ -5232,12 +5240,12 @@ regrepeat ( && (testval || !ascii_isdigit(*scan))) { MB_PTR_ADV(scan); } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) { break; } @@ -5260,12 +5268,12 @@ regrepeat ( if (vim_isfilec(PTR2CHAR(scan)) && (testval || !ascii_isdigit(*scan))) { MB_PTR_ADV(scan); } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) { break; } @@ -5286,12 +5294,12 @@ regrepeat ( case SPRINT + ADD_NL: while (count < maxcount) { if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) { break; } @@ -5314,14 +5322,15 @@ do_class: while (count < maxcount) { int l; if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; - if (got_int) + scan = rex.input; + if (got_int) { break; + } } else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1) { if (testval != 0) break; @@ -5467,12 +5476,12 @@ do_class: while (count < maxcount) { int len; if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) { break; } @@ -5494,7 +5503,7 @@ do_class: case NEWL: while (count < maxcount - && ((*scan == NUL && reglnum <= rex.reg_maxline && !rex.reg_line_lbr + && ((*scan == NUL && rex.lnum <= rex.reg_maxline && !rex.reg_line_lbr && REG_MULTI) || (*scan == '\n' && rex.reg_line_lbr))) { count++; if (rex.reg_line_lbr) { @@ -5502,9 +5511,10 @@ do_class: } else { reg_nextline(); } - scan = reginput; - if (got_int) + scan = rex.input; + if (got_int) { break; + } } break; @@ -5516,7 +5526,7 @@ do_class: break; } - reginput = scan; + rex.input = scan; return (int)count; } @@ -5546,7 +5556,7 @@ static char_u *regnext(char_u *p) /* * Check the regexp program for its magic number. - * Return TRUE if it's wrong. + * Return true if it's wrong. */ static int prog_magic_wrong(void) { @@ -5560,9 +5570,9 @@ static int prog_magic_wrong(void) if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC) { EMSG(_(e_re_corr)); - return TRUE; + return true; } - return FALSE; + return false; } /* @@ -5572,7 +5582,7 @@ static int prog_magic_wrong(void) */ static void cleanup_subexpr(void) { - if (need_clear_subexpr) { + if (rex.need_clear_subexpr) { if (REG_MULTI) { // Use 0xff to set lnum to -1 memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP); @@ -5581,13 +5591,13 @@ static void cleanup_subexpr(void) memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP); memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP); } - need_clear_subexpr = FALSE; + rex.need_clear_subexpr = false; } } static void cleanup_zsubexpr(void) { - if (need_clear_zsubexpr) { + if (rex.need_clear_zsubexpr) { if (REG_MULTI) { /* Use 0xff to set lnum to -1 */ memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP); @@ -5596,23 +5606,20 @@ static void cleanup_zsubexpr(void) memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP); memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP); } - need_clear_zsubexpr = FALSE; + rex.need_clear_zsubexpr = false; } } -/* - * Save the current subexpr to "bp", so that they can be restored - * later by restore_subexpr(). - */ +// Save the current subexpr to "bp", so that they can be restored +// later by restore_subexpr(). static void save_subexpr(regbehind_T *bp) + FUNC_ATTR_NONNULL_ALL { - int i; - - // When "need_clear_subexpr" is set we don't need to save the values, only + // When "rex.need_clear_subexpr" is set we don't need to save the values, only // remember that this flag needs to be set again when restoring. - bp->save_need_clear_subexpr = need_clear_subexpr; - if (!need_clear_subexpr) { - for (i = 0; i < NSUBEXP; ++i) { + bp->save_need_clear_subexpr = rex.need_clear_subexpr; + if (!rex.need_clear_subexpr) { + for (int i = 0; i < NSUBEXP; i++) { if (REG_MULTI) { bp->save_start[i].se_u.pos = rex.reg_startpos[i]; bp->save_end[i].se_u.pos = rex.reg_endpos[i]; @@ -5624,17 +5631,14 @@ static void save_subexpr(regbehind_T *bp) } } -/* - * Restore the subexpr from "bp". - */ +// Restore the subexpr from "bp". static void restore_subexpr(regbehind_T *bp) + FUNC_ATTR_NONNULL_ALL { - int i; - - /* Only need to restore saved values when they are not to be cleared. */ - need_clear_subexpr = bp->save_need_clear_subexpr; - if (!need_clear_subexpr) { - for (i = 0; i < NSUBEXP; ++i) { + // Only need to restore saved values when they are not to be cleared. + rex.need_clear_subexpr = bp->save_need_clear_subexpr; + if (!rex.need_clear_subexpr) { + for (int i = 0; i < NSUBEXP; i++) { if (REG_MULTI) { rex.reg_startpos[i] = bp->save_start[i].se_u.pos; rex.reg_endpos[i] = bp->save_end[i].se_u.pos; @@ -5646,56 +5650,54 @@ static void restore_subexpr(regbehind_T *bp) } } -/* - * Advance reglnum, regline and reginput to the next line. - */ +// Advance rex.lnum, rex.line and rex.input to the next line. static void reg_nextline(void) { - regline = reg_getline(++reglnum); - reginput = regline; + rex.line = reg_getline(++rex.lnum); + rex.input = rex.line; fast_breakcheck(); } -/* - * Save the input line and position in a regsave_T. - */ +// Save the input line and position in a regsave_T. static void reg_save(regsave_T *save, garray_T *gap) + FUNC_ATTR_NONNULL_ALL { if (REG_MULTI) { - save->rs_u.pos.col = (colnr_T)(reginput - regline); - save->rs_u.pos.lnum = reglnum; - } else - save->rs_u.ptr = reginput; + save->rs_u.pos.col = (colnr_T)(rex.input - rex.line); + save->rs_u.pos.lnum = rex.lnum; + } else { + save->rs_u.ptr = rex.input; + } save->rs_len = gap->ga_len; } -/* - * Restore the input line and position from a regsave_T. - */ +// Restore the input line and position from a regsave_T. static void reg_restore(regsave_T *save, garray_T *gap) + FUNC_ATTR_NONNULL_ALL { if (REG_MULTI) { - if (reglnum != save->rs_u.pos.lnum) { - /* only call reg_getline() when the line number changed to save - * a bit of time */ - reglnum = save->rs_u.pos.lnum; - regline = reg_getline(reglnum); + if (rex.lnum != save->rs_u.pos.lnum) { + // only call reg_getline() when the line number changed to save + // a bit of time + rex.lnum = save->rs_u.pos.lnum; + rex.line = reg_getline(rex.lnum); } - reginput = regline + save->rs_u.pos.col; - } else - reginput = save->rs_u.ptr; + rex.input = rex.line + save->rs_u.pos.col; + } else { + rex.input = save->rs_u.ptr; + } gap->ga_len = save->rs_len; } -/* - * Return TRUE if current position is equal to saved position. - */ -static int reg_save_equal(regsave_T *save) +// Return true if current position is equal to saved position. +static bool reg_save_equal(const regsave_T *save) + FUNC_ATTR_NONNULL_ALL { - if (REG_MULTI) - return reglnum == save->rs_u.pos.lnum - && reginput == regline + save->rs_u.pos.col; - return reginput == save->rs_u.ptr; + if (REG_MULTI) { + return rex.lnum == save->rs_u.pos.lnum + && rex.input == rex.line + save->rs_u.pos.col; + } + return rex.input == save->rs_u.ptr; } /* @@ -5708,14 +5710,14 @@ static int reg_save_equal(regsave_T *save) static void save_se_multi(save_se_T *savep, lpos_T *posp) { savep->se_u.pos = *posp; - posp->lnum = reglnum; - posp->col = (colnr_T)(reginput - regline); + posp->lnum = rex.lnum; + posp->col = (colnr_T)(rex.input - rex.line); } static void save_se_one(save_se_T *savep, char_u **pp) { savep->se_u.ptr = *pp; - *pp = reginput; + *pp = rex.input; } /* @@ -5750,17 +5752,17 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e for (;; ) { /* Since getting one line may invalidate the other, need to make copy. * Slow! */ - if (regline != reg_tofree) { - len = (int)STRLEN(regline); + if (rex.line != reg_tofree) { + len = (int)STRLEN(rex.line); if (reg_tofree == NULL || len >= (int)reg_tofreelen) { len += 50; /* get some extra */ xfree(reg_tofree); reg_tofree = xmalloc(len); reg_tofreelen = len; } - STRCPY(reg_tofree, regline); - reginput = reg_tofree + (reginput - regline); - regline = reg_tofree; + STRCPY(reg_tofree, rex.line); + rex.input = reg_tofree + (rex.input - rex.line); + rex.line = reg_tofree; } /* Get the line to compare with. */ @@ -5772,14 +5774,16 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e else len = (int)STRLEN(p + ccol); - if (cstrncmp(p + ccol, reginput, &len) != 0) - return RA_NOMATCH; /* doesn't match */ - if (bytelen != NULL) + if (cstrncmp(p + ccol, rex.input, &len) != 0) { + return RA_NOMATCH; // doesn't match + } + if (bytelen != NULL) { *bytelen += len; + } if (clnum == end_lnum) { break; // match and at end! } - if (reglnum >= rex.reg_maxline) { + if (rex.lnum >= rex.reg_maxline) { return RA_NOMATCH; // text too short } @@ -5793,8 +5797,8 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e return RA_FAIL; } - /* found a match! Note that regline may now point to a copy of the line, - * that should not matter. */ + // found a match! Note that rex.line may now point to a copy of the line, + // that should not matter. return RA_MATCH; } @@ -6477,7 +6481,7 @@ char_u *regtilde(char_u *source, int magic) return newsub; } -static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */ +static bool can_f_submatch = false; // true when submatch() can be used // These pointers are used for reg_submatch(). Needed for when the // substitution string is an expression that contains a call to substitute() @@ -6534,11 +6538,11 @@ static void clear_submatch_list(staticList10_T *sl) /// vim_regsub() - perform substitutions after a vim_regexec() or /// vim_regexec_multi() match. /// -/// If "copy" is TRUE really copy into "dest". -/// If "copy" is FALSE nothing is copied, this is just to find out the length +/// If "copy" is true really copy into "dest". +/// If "copy" is false nothing is copied, this is just to find out the length /// of the result. /// -/// If "backslash" is TRUE, a backslash will be removed later, need to double +/// If "backslash" is true, a backslash will be removed later, need to double /// them to keep them, and insert a backslash before a CR to avoid it being /// replaced with a line break later. /// @@ -6630,8 +6634,8 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, if (expr != NULL || (source[0] == '\\' && source[1] == '=')) { // To make sure that the length doesn't change between checking the // length and copying the string, and to speed up things, the - // resulting string is saved from the call with "copy" == FALSE to the - // call with "copy" == TRUE. + // resulting string is saved from the call with "copy" == false to the + // call with "copy" == true. if (copy) { if (eval_result != NULL) { STRCPY(dest, eval_result); @@ -6639,7 +6643,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, XFREE_CLEAR(eval_result); } } else { - int prev_can_f_submatch = can_f_submatch; + const bool prev_can_f_submatch = can_f_submatch; regsubmatch_T rsm_save; xfree(eval_result); @@ -6700,7 +6704,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } if (eval_result != NULL) { - int had_backslash = FALSE; + int had_backslash = false; for (s = eval_result; *s != NUL; MB_PTR_ADV(s)) { // Change NL to CR, so that it becomes a line break, @@ -6778,22 +6782,24 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } if (c == '\\' && *src != NUL) { - /* Check for abbreviations -- webb */ + // Check for abbreviations -- webb switch (*src) { case 'r': c = CAR; ++src; break; case 'n': c = NL; ++src; break; case 't': c = TAB; ++src; break; - /* Oh no! \e already has meaning in subst pat :-( */ - /* case 'e': c = ESC; ++src; break; */ + // Oh no! \e already has meaning in subst pat :-( + // case 'e': c = ESC; ++src; break; case 'b': c = Ctrl_H; ++src; break; - /* If "backslash" is TRUE the backslash will be removed - * later. Used to insert a literal CR. */ - default: if (backslash) { - if (copy) + // If "backslash" is true the backslash will be removed + // later. Used to insert a literal CR. + default: + if (backslash) { + if (copy) { *dst = '\\'; - ++dst; - } + } + dst++; + } c = *src++; } } else { @@ -7163,8 +7169,10 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) regexp_engine = AUTOMATIC_ENGINE; } } +#ifdef REGEXP_DEBUG bt_regengine.expr = expr; nfa_regengine.expr = expr; +#endif // reg_iswordc() uses rex.reg_buf rex.reg_buf = curbuf; @@ -7245,18 +7253,26 @@ static void report_re_switch(char_u *pat) /// @param col the column to start looking for match /// @param nl /// -/// @return TRUE if there is a match, FALSE if not. -static int vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, - bool nl) +/// @return true if there is a match, false if not. +static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, + bool nl) { regexec_T rex_save; bool rex_in_use_save = rex_in_use; + // Cannot use the same prog recursively, it contains state. + if (rmp->regprog->re_in_use) { + EMSG(_(e_recursive)); + return false; + } + rmp->regprog->re_in_use = true; + if (rex_in_use) { // Being called recursively, save the state. rex_save = rex; } rex_in_use = true; + rex.reg_startp = NULL; rex.reg_endp = NULL; rex.reg_startpos = NULL; @@ -7287,32 +7303,33 @@ static int vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, if (rex_in_use) { rex = rex_save; } + rmp->regprog->re_in_use = false; return result > 0; } // Note: "*prog" may be freed and changed. -// Return TRUE if there is a match, FALSE if not. -int vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, +// Return true if there is a match, false if not. +bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T col) { regmatch_T regmatch = { .regprog = *prog, .rm_ic = ignore_case }; - int r = vim_regexec_string(®match, line, col, false); + bool r = vim_regexec_string(®match, line, col, false); *prog = regmatch.regprog; return r; } // Note: "rmp->regprog" may be freed and changed. -// Return TRUE if there is a match, FALSE if not. -int vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col) +// Return true if there is a match, false if not. +bool vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col) { return vim_regexec_string(rmp, line, col, false); } // Like vim_regexec(), but consider a "\n" in "line" to be a line break. // Note: "rmp->regprog" may be freed and changed. -// Return TRUE if there is a match, FALSE if not. -int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) +// Return true if there is a match, false if not. +bool vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) { return vim_regexec_string(rmp, line, col, true); } @@ -7337,6 +7354,13 @@ long vim_regexec_multi( regexec_T rex_save; bool rex_in_use_save = rex_in_use; + // Cannot use the same prog recursively, it contains state. + if (rmp->regprog->re_in_use) { + EMSG(_(e_recursive)); + return false; + } + rmp->regprog->re_in_use = true; + if (rex_in_use) { // Being called recursively, save the state. rex_save = rex; @@ -7375,6 +7399,7 @@ long vim_regexec_multi( if (rex_in_use) { rex = rex_save; } + rmp->regprog->re_in_use = false; return result <= 0 ? 0 : result; } diff --git a/src/nvim/regexp_defs.h b/src/nvim/regexp_defs.h index 116bfee91e..a729a91555 100644 --- a/src/nvim/regexp_defs.h +++ b/src/nvim/regexp_defs.h @@ -72,6 +72,7 @@ struct regprog { unsigned regflags; unsigned re_engine; ///< Automatic, backtracking or NFA engine. unsigned re_flags; ///< Second argument for vim_regcomp(). + bool re_in_use; ///< prog is being executed }; /* @@ -84,7 +85,8 @@ typedef struct { regengine_T *engine; unsigned regflags; unsigned re_engine; - unsigned re_flags; ///< Second argument for vim_regcomp(). + unsigned re_flags; + bool re_in_use; int regstart; char_u reganch; @@ -114,7 +116,8 @@ typedef struct { regengine_T *engine; unsigned regflags; unsigned re_engine; - unsigned re_flags; ///< Second argument for vim_regcomp(). + unsigned re_flags; + bool re_in_use; nfa_state_T *start; // points into state[] diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 387732fdee..a744071a6a 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -267,9 +267,9 @@ struct Frag { typedef struct Frag Frag_T; typedef struct { - int in_use; /* number of subexpr with useful info */ + int in_use; ///< number of subexpr with useful info - /* When REG_MULTI is TRUE list.multi is used, otherwise list.line. */ + // When REG_MULTI is true list.multi is used, otherwise list.line. union { struct multipos { linenr_T start_lnum; @@ -310,48 +310,27 @@ typedef struct { regsubs_T subs; /* submatch info, only party used */ } nfa_thread_T; -/* nfa_list_T contains the alternative NFA execution states. */ +// nfa_list_T contains the alternative NFA execution states. typedef struct { - nfa_thread_T *t; /* allocated array of states */ - int n; /* nr of states currently in "t" */ - int len; /* max nr of states in "t" */ - int id; /* ID of the list */ - int has_pim; /* TRUE when any state has a PIM */ + nfa_thread_T *t; ///< allocated array of states + int n; ///< nr of states currently in "t" + int len; ///< max nr of states in "t" + int id; ///< ID of the list + int has_pim; ///< true when any state has a PIM } nfa_list_T; -/// re_flags passed to nfa_regcomp(). -static int nfa_re_flags; - -/* NFA regexp \ze operator encountered. */ -static int nfa_has_zend; - -/* NFA regexp \1 .. \9 encountered. */ -static int nfa_has_backref; - -/* NFA regexp has \z( ), set zsubexpr. */ -static int nfa_has_zsubexpr; - -/* Number of sub expressions actually being used during execution. 1 if only - * the whole match (subexpr 0) is used. */ -static int nfa_nsubexpr; - -static int *post_start; /* holds the postfix form of r.e. */ +// Variables only used in nfa_regcomp() and descendants. +static int nfa_re_flags; ///< re_flags passed to nfa_regcomp(). +static int *post_start; ///< holds the postfix form of r.e. static int *post_end; static int *post_ptr; -static int nstate; /* Number of states in the NFA. Also used when - * executing. */ -static int istate; /* Index in the state vector, used in alloc_state() */ +static int nstate; ///< Number of states in the NFA. Also used when executing. +static int istate; ///< Index in the state vector, used in alloc_state() /* If not NULL match must end at this position */ static save_se_T *nfa_endp = NULL; -/* listid is global, so that it increases on recursive calls to - * nfa_regmatch(), which means we don't have to clear the lastlist field of - * all the states. */ -static int nfa_listid; -static int nfa_alt_listid; - /* 0 for first call to nfa_regmatch(), 1 for recursive call. */ static int nfa_ll_index = 0; @@ -395,8 +374,8 @@ nfa_regcomp_start ( post_start = (int *)xmalloc(postfix_size); post_ptr = post_start; post_end = post_start + nstate_max; - nfa_has_zend = FALSE; - nfa_has_backref = FALSE; + rex.nfa_has_zend = false; + rex.nfa_has_backref = false; /* shared with BT engine */ regcomp_start(expr, re_flags); @@ -605,12 +584,10 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) # define CLASS_o9 0x02 # define CLASS_underscore 0x01 - int newl = FALSE; char_u *p; int config = 0; - if (extra_newl == TRUE) - newl = TRUE; + bool newl = extra_newl == true; if (*end != ']') return FAIL; @@ -655,13 +632,13 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) } p += 3; } else if (p + 1 < end && *p == '\\' && *(p + 1) == 'n') { - newl = TRUE; + newl = true; p += 2; } else if (*p == '_') { config |= CLASS_underscore; p++; } else if (*p == '\n') { - newl = TRUE; + newl = true; p++; } else return FAIL; @@ -670,8 +647,9 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) if (p != end) return FAIL; - if (newl == TRUE) + if (newl == true) { extra_newl = NFA_ADD_NL; + } switch (config) { case CLASS_o9: @@ -1188,7 +1166,7 @@ static int nfa_regatom(void) case Magic('$'): EMIT(NFA_EOL); - had_eol = TRUE; + had_eol = true; break; case Magic('<'): @@ -1210,7 +1188,7 @@ static int nfa_regatom(void) } if (c == '$') { /* "\_$" is end-of-line */ EMIT(NFA_EOL); - had_eol = TRUE; + had_eol = true; break; } @@ -1257,7 +1235,7 @@ static int nfa_regatom(void) if (p == NULL) { if (extra == NFA_ADD_NL) { EMSGN(_(e_ill_char_class), c); - rc_did_emsg = TRUE; + rc_did_emsg = true; return FAIL; } IEMSGN("INTERNAL: Unknown character class char: %" PRId64, c); @@ -1346,7 +1324,7 @@ static int nfa_regatom(void) return FAIL; } EMIT(NFA_BACKREF1 + refnum); - nfa_has_backref = true; + rex.nfa_has_backref = true; } break; @@ -1361,7 +1339,7 @@ static int nfa_regatom(void) break; case 'e': EMIT(NFA_ZEND); - nfa_has_zend = true; + rex.nfa_has_zend = true; if (!re_mult_next("\\zs")) { return false; } @@ -1380,8 +1358,8 @@ static int nfa_regatom(void) EMSG_RET_FAIL(_(e_z1_not_allowed)); } EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); - /* No need to set nfa_has_backref, the sub-matches don't - * change when \z1 .. \z9 matches or not. */ + // No need to set rex.nfa_has_backref, the sub-matches don't + // change when \z1 .. \z9 matches or not. re_has_z = REX_USE; break; case '(': @@ -1598,12 +1576,12 @@ collection: EMIT(NFA_CONCAT); MB_PTR_ADV(regparse); } - /* Emit the OR branches for each character in the [] */ - emit_range = FALSE; + // Emit the OR branches for each character in the [] + emit_range = false; while (regparse < endp) { oldstartc = startc; startc = -1; - got_coll_char = FALSE; + got_coll_char = false; if (*regparse == '[') { /* Check for [: :], [= =], [. .] */ equiclass = collclass = 0; @@ -1684,7 +1662,7 @@ collection: /* Try a range like 'a-x' or '\t-z'. Also allows '-' as a * start character. */ if (*regparse == '-' && oldstartc != -1) { - emit_range = TRUE; + emit_range = true; startc = oldstartc; MB_PTR_ADV(regparse); continue; // reading the end of the range @@ -1764,7 +1742,7 @@ collection: EMIT(NFA_CONCAT); } } - emit_range = FALSE; + emit_range = false; startc = -1; } else { /* This char (startc) is not part of a range. Just @@ -1781,10 +1759,11 @@ collection: if (!negated) extra = NFA_ADD_NL; } else { - if (got_coll_char == TRUE && startc == 0) + if (got_coll_char == true && startc == 0) { EMIT(0x0a); - else + } else { EMIT(startc); + } EMIT(NFA_CONCAT); } } @@ -1802,13 +1781,14 @@ collection: regparse = endp; MB_PTR_ADV(regparse); - /* Mark end of the collection. */ - if (negated == TRUE) + // Mark end of the collection. + if (negated == true) { EMIT(NFA_END_NEG_COLL); - else + } else { EMIT(NFA_END_COLL); + } - /* \_[] also matches \n but it's not negated */ + // \_[] also matches \n but it's not negated if (extra == NFA_ADD_NL) { EMIT(reg_string ? NL : NFA_NEWL); EMIT(NFA_OR); @@ -1877,7 +1857,7 @@ static int nfa_regpiece(void) int op; int ret; long minval, maxval; - int greedy = TRUE; /* Braces are prefixed with '-' ? */ + bool greedy = true; // Braces are prefixed with '-' ? parse_state_T old_state; parse_state_T new_state; int64_t c2; @@ -1977,11 +1957,11 @@ static int nfa_regpiece(void) * parenthesis have the same id */ - greedy = TRUE; + greedy = true; c2 = peekchr(); if (c2 == '-' || c2 == Magic('-')) { skipchr(); - greedy = FALSE; + greedy = false; } if (!read_limits(&minval, &maxval)) EMSG_RET_FAIL(_("E870: (NFA regexp) Error reading repetition limits")); @@ -2019,7 +1999,7 @@ static int nfa_regpiece(void) /* Save parse state after the repeated atom and the \{} */ save_parse_state(&new_state); - quest = (greedy == TRUE ? NFA_QUEST : NFA_QUEST_NONGREEDY); + quest = (greedy == true ? NFA_QUEST : NFA_QUEST_NONGREEDY); for (i = 0; i < maxval; i++) { /* Goto beginning of the repeated atom */ restore_parse_state(&old_state); @@ -2073,8 +2053,8 @@ static int nfa_regpiece(void) */ static int nfa_regconcat(void) { - int cont = TRUE; - int first = TRUE; + bool cont = true; + bool first = true; while (cont) { switch (peekchr()) { @@ -2082,7 +2062,7 @@ static int nfa_regconcat(void) case Magic('|'): case Magic('&'): case Magic(')'): - cont = FALSE; + cont = false; break; case Magic('Z'): @@ -2119,12 +2099,14 @@ static int nfa_regconcat(void) break; default: - if (nfa_regpiece() == FAIL) + if (nfa_regpiece() == FAIL) { return FAIL; - if (first == FALSE) + } + if (first == false) { EMIT(NFA_CONCAT); - else - first = FALSE; + } else { + first = false; + } break; } } @@ -2230,15 +2212,14 @@ nfa_reg ( else EMSG_RET_FAIL(_("E873: (NFA regexp) proper termination error")); } - /* - * Here we set the flag allowing back references to this set of - * parentheses. - */ + // Here we set the flag allowing back references to this set of + // parentheses. if (paren == REG_PAREN) { - had_endbrace[parno] = TRUE; /* have seen the close paren */ + had_endbrace[parno] = true; // have seen the close paren EMIT(NFA_MOPEN + parno); - } else if (paren == REG_ZPAREN) + } else if (paren == REG_ZPAREN) { EMIT(NFA_ZOPEN + parno); + } return OK; } @@ -2248,10 +2229,10 @@ static char_u code[50]; static void nfa_set_code(int c) { - int addnl = FALSE; + int addnl = false; if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL) { - addnl = TRUE; + addnl = true; c -= NFA_ADD_NL; } @@ -2464,9 +2445,9 @@ static void nfa_set_code(int c) code[5] = c; } - if (addnl == TRUE) + if (addnl == true) { STRCAT(code, " + NEWLINE "); - + } } static FILE *log_fd; @@ -2848,11 +2829,8 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) case NFA_UPPER_IC: case NFA_NUPPER_IC: case NFA_ANY_COMPOSING: - /* possibly non-ascii */ - if (has_mbyte) - len += 3; - else - ++len; + // possibly non-ascii + len += 3; break; case NFA_START_INVISIBLE: @@ -3019,12 +2997,12 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) for (p = postfix; p < end; ++p) { switch (*p) { case NFA_CONCAT: - /* Concatenation. - * Pay attention: this operator does not exist in the r.e. itself - * (it is implicit, really). It is added when r.e. is translated - * to postfix form in re2post(). */ - if (nfa_calc_size == TRUE) { - /* nstate += 0; */ + // Concatenation. + // Pay attention: this operator does not exist in the r.e. itself + // (it is implicit, really). It is added when r.e. is translated + // to postfix form in re2post(). + if (nfa_calc_size == true) { + // nstate += 0; break; } e2 = POP(); @@ -3034,8 +3012,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_OR: - /* Alternation */ - if (nfa_calc_size == TRUE) { + // Alternation + if (nfa_calc_size == true) { nstate++; break; } @@ -3048,8 +3026,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_STAR: - /* Zero or more, prefer more */ - if (nfa_calc_size == TRUE) { + // Zero or more, prefer more + if (nfa_calc_size == true) { nstate++; break; } @@ -3062,8 +3040,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_STAR_NONGREEDY: - /* Zero or more, prefer zero */ - if (nfa_calc_size == TRUE) { + // Zero or more, prefer zero + if (nfa_calc_size == true) { nstate++; break; } @@ -3076,8 +3054,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_QUEST: - /* one or zero atoms=> greedy match */ - if (nfa_calc_size == TRUE) { + // one or zero atoms=> greedy match + if (nfa_calc_size == true) { nstate++; break; } @@ -3089,8 +3067,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_QUEST_NONGREEDY: - /* zero or one atoms => non-greedy match */ - if (nfa_calc_size == TRUE) { + // zero or one atoms => non-greedy match + if (nfa_calc_size == true) { nstate++; break; } @@ -3106,7 +3084,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) /* On the stack is the sequence starting with NFA_START_COLL or * NFA_START_NEG_COLL and all possible characters. Patch it to * add the output to the start. */ - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate++; break; } @@ -3120,10 +3098,10 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_RANGE: - /* Before this are two characters, the low and high end of a - * range. Turn them into two states with MIN and MAX. */ - if (nfa_calc_size == TRUE) { - /* nstate += 0; */ + // Before this are two characters, the low and high end of a + // range. Turn them into two states with MIN and MAX. + if (nfa_calc_size == true) { + // nstate += 0; break; } e2 = POP(); @@ -3137,8 +3115,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_EMPTY: - /* 0-length, used in a repetition with max/min count of 0 */ - if (nfa_calc_size == TRUE) { + // 0-length, used in a repetition with max/min count of 0 + if (nfa_calc_size == true) { nstate++; break; } @@ -3152,20 +3130,19 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) { int n; - /* \%[abc] implemented as: - * NFA_SPLIT - * +-CHAR(a) - * | +-NFA_SPLIT - * | +-CHAR(b) - * | | +-NFA_SPLIT - * | | +-CHAR(c) - * | | | +-next - * | | +- next - * | +- next - * +- next - */ - n = *++p; /* get number of characters */ - if (nfa_calc_size == TRUE) { + // \%[abc] implemented as: + // NFA_SPLIT + // +-CHAR(a) + // | +-NFA_SPLIT + // | +-CHAR(b) + // | | +-NFA_SPLIT + // | | +-CHAR(c) + // | | | +-next + // | | +- next + // | +- next + // +- next + n = *++p; // get number of characters + if (nfa_calc_size == true) { nstate += n; break; } @@ -3235,7 +3212,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) * Surrounds the preceding atom with START_INVISIBLE and * END_INVISIBLE, similarly to MOPEN. */ - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate += pattern ? 4 : 2; break; } @@ -3297,8 +3274,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_ZOPEN7: case NFA_ZOPEN8: case NFA_ZOPEN9: - case NFA_NOPEN: /* \%( \) "Invisible Submatch" */ - if (nfa_calc_size == TRUE) { + case NFA_NOPEN: // \%( \) "Invisible Submatch" + if (nfa_calc_size == true) { nstate += 2; break; } @@ -3376,7 +3353,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_ZREF7: case NFA_ZREF8: case NFA_ZREF9: - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate += 2; break; } @@ -3405,7 +3382,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) { int n = *++p; /* lnum, col or mark name */ - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate += 1; break; } @@ -3420,8 +3397,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_ZSTART: case NFA_ZEND: default: - /* Operands */ - if (nfa_calc_size == TRUE) { + // Operands + if (nfa_calc_size == true) { nstate++; break; } @@ -3435,7 +3412,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) } /* for(p = postfix; *p; ++p) */ - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate++; goto theend; /* Return value when counting size is ignored anyway */ } @@ -3489,11 +3466,11 @@ static void nfa_postprocess(nfa_regprog_T *prog) || c == NFA_START_INVISIBLE_BEFORE_NEG) { int directly; - /* Do it directly when what follows is possibly the end of the - * match. */ - if (match_follows(prog->state[i].out1->out, 0)) - directly = TRUE; - else { + // Do it directly when what follows is possibly the end of the + // match. + if (match_follows(prog->state[i].out1->out, 0)) { + directly = true; + } else { int ch_invisible = failure_chance(prog->state[i].out, 0); int ch_follows = failure_chance(prog->state[i].out1->out, 0); @@ -3505,10 +3482,11 @@ static void nfa_postprocess(nfa_regprog_T *prog) * unbounded, always prefer what follows then, * unless what follows will always match. * Otherwise strongly prefer what follows. */ - if (prog->state[i].val <= 0 && ch_follows > 0) - directly = FALSE; - else + if (prog->state[i].val <= 0 && ch_follows > 0) { + directly = false; + } else { directly = ch_follows * 10 < ch_invisible; + } } else { /* normal invisible, first do the one with the * highest failure chance */ @@ -3537,8 +3515,9 @@ static void nfa_postprocess(nfa_regprog_T *prog) static void log_subsexpr(regsubs_T *subs) { log_subexpr(&subs->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { log_subexpr(&subs->synt); + } } static void log_subexpr(regsub_T *sub) @@ -3564,15 +3543,17 @@ static void log_subexpr(regsub_T *sub) } } -static char *pim_info(nfa_pim_T *pim) +static char *pim_info(const nfa_pim_T *pim) { static char buf[30]; - if (pim == NULL || pim->result == NFA_PIM_UNUSED) + if (pim == NULL || pim->result == NFA_PIM_UNUSED) { buf[0] = NUL; - else { - sprintf(buf, " PIM col %d", REG_MULTI ? (int)pim->end.pos.col - : (int)(pim->end.ptr - reginput)); + } else { + snprintf(buf, sizeof(buf), " PIM col %d", + REG_MULTI + ? (int)pim->end.pos.col + : (int)(pim->end.ptr - rex.input)); } return buf; } @@ -3591,19 +3572,21 @@ static void copy_pim(nfa_pim_T *to, nfa_pim_T *from) to->result = from->result; to->state = from->state; copy_sub(&to->subs.norm, &from->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&to->subs.synt, &from->subs.synt); + } to->end = from->end; } static void clear_sub(regsub_T *sub) { - if (REG_MULTI) - /* Use 0xff to set lnum to -1 */ + if (REG_MULTI) { + // Use 0xff to set lnum to -1 memset(sub->list.multi, 0xff, - sizeof(struct multipos) * nfa_nsubexpr); - else - memset(sub->list.line, 0, sizeof(struct linepos) * nfa_nsubexpr); + sizeof(struct multipos) * rex.nfa_nsubexpr); + } else { + memset(sub->list.line, 0, sizeof(struct linepos) * rex.nfa_nsubexpr); + } sub->in_use = 0; } @@ -3651,7 +3634,7 @@ static void copy_sub_off(regsub_T *to, regsub_T *from) */ static void copy_ze_off(regsub_T *to, regsub_T *from) { - if (nfa_has_zend) { + if (rex.nfa_has_zend) { if (REG_MULTI) { if (from->list.multi[0].end_lnum >= 0){ to->list.multi[0].end_lnum = from->list.multi[0].end_lnum; @@ -3664,9 +3647,9 @@ static void copy_ze_off(regsub_T *to, regsub_T *from) } } -// Return TRUE if "sub1" and "sub2" have the same start positions. +// Return true if "sub1" and "sub2" have the same start positions. // When using back-references also check the end position. -static int sub_equal(regsub_T *sub1, regsub_T *sub2) +static bool sub_equal(regsub_T *sub1, regsub_T *sub2) { int i; int todo; @@ -3677,22 +3660,25 @@ static int sub_equal(regsub_T *sub1, regsub_T *sub2) todo = sub1->in_use > sub2->in_use ? sub1->in_use : sub2->in_use; if (REG_MULTI) { - for (i = 0; i < todo; ++i) { - if (i < sub1->in_use) + for (i = 0; i < todo; i++) { + if (i < sub1->in_use) { s1 = sub1->list.multi[i].start_lnum; - else + } else { s1 = -1; - if (i < sub2->in_use) + } + if (i < sub2->in_use) { s2 = sub2->list.multi[i].start_lnum; - else + } else { s2 = -1; - if (s1 != s2) - return FALSE; + } + if (s1 != s2) { + return false; + } if (s1 != -1 && sub1->list.multi[i].start_col - != sub2->list.multi[i].start_col) - return FALSE; - - if (nfa_has_backref) { + != sub2->list.multi[i].start_col) { + return false; + } + if (rex.nfa_has_backref) { if (i < sub1->in_use) { s1 = sub1->list.multi[i].end_lnum; } else { @@ -3704,28 +3690,30 @@ static int sub_equal(regsub_T *sub1, regsub_T *sub2) s2 = -1; } if (s1 != s2) { - return FALSE; + return false; } if (s1 != -1 && sub1->list.multi[i].end_col != sub2->list.multi[i].end_col) { - return FALSE; + return false; } } } } else { - for (i = 0; i < todo; ++i) { - if (i < sub1->in_use) + for (i = 0; i < todo; i++) { + if (i < sub1->in_use) { sp1 = sub1->list.line[i].start; - else + } else { sp1 = NULL; - if (i < sub2->in_use) + } + if (i < sub2->in_use) { sp2 = sub2->list.line[i].start; - else + } else { sp2 = NULL; - if (sp1 != sp2) - return FALSE; - - if (nfa_has_backref) { + } + if (sp1 != sp2) { + return false; + } + if (rex.nfa_has_backref) { if (i < sub1->in_use) { sp1 = sub1->list.line[i].end; } else { @@ -3737,13 +3725,13 @@ static int sub_equal(regsub_T *sub1, regsub_T *sub2) sp2 = NULL; } if (sp1 != sp2) { - return FALSE; + return false; } } } } - return TRUE; + return true; } #ifdef REGEXP_DEBUG @@ -3754,83 +3742,81 @@ static void report_state(char *action, nfa_pim_T *pim) { int col; - if (sub->in_use <= 0) + if (sub->in_use <= 0) { col = -1; - else if (REG_MULTI) + } else if (REG_MULTI) { col = sub->list.multi[0].start_col; - else - col = (int)(sub->list.line[0].start - regline); + } else { + col = (int)(sub->list.line[0].start - rex.line); + } nfa_set_code(state->c); fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)%s\n", - action, abs(state->id), lid, state->c, code, col, - pim_info(pim)); + action, abs(state->id), lid, state->c, code, col, + pim_info(pim)); } #endif -/* - * Return TRUE if the same state is already in list "l" with the same - * positions as "subs". - */ -static int -has_state_with_pos ( - nfa_list_T *l, /* runtime state list */ - nfa_state_T *state, /* state to update */ - regsubs_T *subs, /* pointers to subexpressions */ - nfa_pim_T *pim /* postponed match or NULL */ +// Return true if the same state is already in list "l" with the same +// positions as "subs". +static bool has_state_with_pos( + nfa_list_T *l, // runtime state list + nfa_state_T *state, // state to update + regsubs_T *subs, // pointers to subexpressions + nfa_pim_T *pim // postponed match or NULL ) + FUNC_ATTR_NONNULL_ARG(1, 2, 3) { - nfa_thread_T *thread; - int i; - - for (i = 0; i < l->n; ++i) { - thread = &l->t[i]; + for (int i = 0; i < l->n; i++) { + nfa_thread_T *thread = &l->t[i]; if (thread->state->id == state->id && sub_equal(&thread->subs.norm, &subs->norm) - && (!nfa_has_zsubexpr + && (!rex.nfa_has_zsubexpr || sub_equal(&thread->subs.synt, &subs->synt)) - && pim_equal(&thread->pim, pim)) - return TRUE; + && pim_equal(&thread->pim, pim)) { + return true; + } } - return FALSE; + return false; } -/* - * Return TRUE if "one" and "two" are equal. That includes when both are not - * set. - */ -static int pim_equal(nfa_pim_T *one, nfa_pim_T *two) +// Return true if "one" and "two" are equal. That includes when both are not +// set. +static bool pim_equal(const nfa_pim_T *one, const nfa_pim_T *two) { - int one_unused = (one == NULL || one->result == NFA_PIM_UNUSED); - int two_unused = (two == NULL || two->result == NFA_PIM_UNUSED); + const bool one_unused = (one == NULL || one->result == NFA_PIM_UNUSED); + const bool two_unused = (two == NULL || two->result == NFA_PIM_UNUSED); - if (one_unused) - /* one is unused: equal when two is also unused */ + if (one_unused) { + // one is unused: equal when two is also unused return two_unused; - if (two_unused) - /* one is used and two is not: not equal */ - return FALSE; - /* compare the state id */ - if (one->state->id != two->state->id) - return FALSE; - /* compare the position */ - if (REG_MULTI) + } + if (two_unused) { + // one is used and two is not: not equal + return false; + } + // compare the state id + if (one->state->id != two->state->id) { + return false; + } + // compare the position + if (REG_MULTI) { return one->end.pos.lnum == two->end.pos.lnum && one->end.pos.col == two->end.pos.col; + } return one->end.ptr == two->end.ptr; } -/* - * Return TRUE if "state" leads to a NFA_MATCH without advancing the input. - */ -static int match_follows(nfa_state_T *startstate, int depth) +// Return true if "state" leads to a NFA_MATCH without advancing the input. +static bool match_follows(const nfa_state_T *startstate, int depth) + FUNC_ATTR_NONNULL_ALL { - nfa_state_T *state = startstate; - - /* avoid too much recursion */ - if (depth > 10) - return FALSE; + const nfa_state_T *state = startstate; + // avoid too much recursion + if (depth > 10) { + return false; + } while (state != NULL) { switch (state->c) { case NFA_MATCH: @@ -3838,7 +3824,7 @@ static int match_follows(nfa_state_T *startstate, int depth) case NFA_END_INVISIBLE: case NFA_END_INVISIBLE_NEG: case NFA_END_PATTERN: - return TRUE; + return true; case NFA_SPLIT: return match_follows(state->out, depth + 1) @@ -3892,39 +3878,38 @@ static int match_follows(nfa_state_T *startstate, int depth) case NFA_START_COLL: case NFA_START_NEG_COLL: case NFA_NEWL: - /* state will advance input */ - return FALSE; + // state will advance input + return false; default: - if (state->c > 0) - /* state will advance input */ - return FALSE; - - /* Others: zero-width or possibly zero-width, might still find - * a match at the same position, keep looking. */ + if (state->c > 0) { + // state will advance input + return false; + } + // Others: zero-width or possibly zero-width, might still find + // a match at the same position, keep looking. break; } state = state->out; } - return FALSE; + return false; } -/* - * Return TRUE if "state" is already in list "l". - */ -static int -state_in_list ( - nfa_list_T *l, /* runtime state list */ - nfa_state_T *state, /* state to update */ - regsubs_T *subs /* pointers to subexpressions */ +// Return true if "state" is already in list "l". +static bool state_in_list( + nfa_list_T *l, // runtime state list + nfa_state_T *state, // state to update + regsubs_T *subs // pointers to subexpressions ) + FUNC_ATTR_NONNULL_ALL { if (state->lastlist[nfa_ll_index] == l->id) { - if (!nfa_has_backref || has_state_with_pos(l, state, subs, NULL)) - return TRUE; + if (!rex.nfa_has_backref || has_state_with_pos(l, state, subs, NULL)) { + return true; + } } - return FALSE; + return false; } // Offset used for "off" by addstate_here(). @@ -3943,10 +3928,10 @@ static regsubs_T *addstate( { int subidx; int off = off_arg; - int add_here = FALSE; + int add_here = false; int listindex = 0; int k; - int found = FALSE; + int found = false; nfa_thread_T *thread; struct multipos save_multipos; int save_in_use; @@ -3956,7 +3941,7 @@ static regsubs_T *addstate( regsubs_T *subs = subs_arg; static regsubs_T temp_subs; #ifdef REGEXP_DEBUG - int did_print = FALSE; + int did_print = false; #endif static int depth = 0; @@ -4005,15 +3990,16 @@ static regsubs_T *addstate( case NFA_BOL: case NFA_BOF: - /* "^" won't match past end-of-line, don't bother trying. - * Except when at the end of the line, or when we are going to the - * next line for a look-behind match. */ - if (reginput > regline - && *reginput != NUL + // "^" won't match past end-of-line, don't bother trying. + // Except when at the end of the line, or when we are going to the + // next line for a look-behind match. + if (rex.input > rex.line + && *rex.input != NUL && (nfa_endp == NULL || !REG_MULTI - || reglnum == nfa_endp->se_u.pos.lnum)) + || rex.lnum == nfa_endp->se_u.pos.lnum)) { goto skip_add; + } FALLTHROUGH; case NFA_MOPEN1: @@ -4047,7 +4033,7 @@ static regsubs_T *addstate( * unless it is an MOPEN that is used for a backreference or * when there is a PIM. For NFA_MATCH check the position, * lower position is preferred. */ - if (!nfa_has_backref && pim == NULL && !l->has_pim + if (!rex.nfa_has_backref && pim == NULL && !l->has_pim && state->c != NFA_MATCH) { /* When called from addstate_here() do insert before @@ -4055,7 +4041,7 @@ static regsubs_T *addstate( if (add_here) { for (k = 0; k < l->n && k < listindex; ++k) { if (l->t[k].state->id == state->id) { - found = TRUE; + found = true; break; } } @@ -4092,11 +4078,12 @@ skip_add: return NULL; } if (subs != &temp_subs) { - /* "subs" may point into the current array, need to make a - * copy before it becomes invalid. */ + // "subs" may point into the current array, need to make a + // copy before it becomes invalid. copy_sub(&temp_subs.norm, &subs->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&temp_subs.synt, &subs->synt); + } subs = &temp_subs; } @@ -4113,14 +4100,15 @@ skip_add: thread->pim.result = NFA_PIM_UNUSED; else { copy_pim(&thread->pim, pim); - l->has_pim = TRUE; + l->has_pim = true; } copy_sub(&thread->subs.norm, &subs->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&thread->subs.synt, &subs->synt); + } #ifdef REGEXP_DEBUG report_state("Adding", &thread->subs.norm, state, l->id, pim); - did_print = TRUE; + did_print = true; #endif } @@ -4195,13 +4183,12 @@ skip_add: sub->in_use = subidx + 1; } if (off == -1) { - sub->list.multi[subidx].start_lnum = reglnum + 1; + sub->list.multi[subidx].start_lnum = rex.lnum + 1; sub->list.multi[subidx].start_col = 0; } else { - - sub->list.multi[subidx].start_lnum = reglnum; + sub->list.multi[subidx].start_lnum = rex.lnum; sub->list.multi[subidx].start_col = - (colnr_T)(reginput - regline + off); + (colnr_T)(rex.input - rex.line + off); } sub->list.multi[subidx].end_lnum = -1; } else { @@ -4216,7 +4203,7 @@ skip_add: } sub->in_use = subidx + 1; } - sub->list.line[subidx].start = reginput + off; + sub->list.line[subidx].start = rex.input + off; } subs = addstate(l, state->out, subs, pim, off_arg); @@ -4241,9 +4228,10 @@ skip_add: break; case NFA_MCLOSE: - if (nfa_has_zend && (REG_MULTI - ? subs->norm.list.multi[0].end_lnum >= 0 - : subs->norm.list.line[0].end != NULL)) { + if (rex.nfa_has_zend + && (REG_MULTI + ? subs->norm.list.multi[0].end_lnum >= 0 + : subs->norm.list.line[0].end != NULL)) { // Do not overwrite the position set by \ze. subs = addstate(l, state->out, subs, pim, off_arg); break; @@ -4288,18 +4276,18 @@ skip_add: if (REG_MULTI) { save_multipos = sub->list.multi[subidx]; if (off == -1) { - sub->list.multi[subidx].end_lnum = reglnum + 1; + sub->list.multi[subidx].end_lnum = rex.lnum + 1; sub->list.multi[subidx].end_col = 0; } else { - sub->list.multi[subidx].end_lnum = reglnum; + sub->list.multi[subidx].end_lnum = rex.lnum; sub->list.multi[subidx].end_col = - (colnr_T)(reginput - regline + off); + (colnr_T)(rex.input - rex.line + off); } /* avoid compiler warnings */ save_ptr = NULL; } else { save_ptr = sub->list.line[subidx].end; - sub->list.line[subidx].end = reginput + off; + sub->list.line[subidx].end = rex.input + off; // avoid compiler warnings memset(&save_multipos, 0, sizeof(save_multipos)); } @@ -4497,7 +4485,7 @@ static int check_char_class(int class, int c) /* * Check for a match with subexpression "subidx". - * Return TRUE if it matches. + * Return true if it matches. */ static int match_backref ( @@ -4512,49 +4500,49 @@ match_backref ( retempty: /* backref was not set, match an empty string */ *bytelen = 0; - return TRUE; + return true; } if (REG_MULTI) { if (sub->list.multi[subidx].start_lnum < 0 || sub->list.multi[subidx].end_lnum < 0) goto retempty; - if (sub->list.multi[subidx].start_lnum == reglnum - && sub->list.multi[subidx].end_lnum == reglnum) { + if (sub->list.multi[subidx].start_lnum == rex.lnum + && sub->list.multi[subidx].end_lnum == rex.lnum) { len = sub->list.multi[subidx].end_col - sub->list.multi[subidx].start_col; - if (cstrncmp(regline + sub->list.multi[subidx].start_col, - reginput, &len) == 0) { + if (cstrncmp(rex.line + sub->list.multi[subidx].start_col, + rex.input, &len) == 0) { *bytelen = len; - return TRUE; + return true; } } else { - if (match_with_backref( - sub->list.multi[subidx].start_lnum, - sub->list.multi[subidx].start_col, - sub->list.multi[subidx].end_lnum, - sub->list.multi[subidx].end_col, - bytelen) == RA_MATCH) - return TRUE; + if (match_with_backref(sub->list.multi[subidx].start_lnum, + sub->list.multi[subidx].start_col, + sub->list.multi[subidx].end_lnum, + sub->list.multi[subidx].end_col, + bytelen) == RA_MATCH) { + return true; + } } } else { if (sub->list.line[subidx].start == NULL || sub->list.line[subidx].end == NULL) goto retempty; len = (int)(sub->list.line[subidx].end - sub->list.line[subidx].start); - if (cstrncmp(sub->list.line[subidx].start, reginput, &len) == 0) { + if (cstrncmp(sub->list.line[subidx].start, rex.input, &len) == 0) { *bytelen = len; - return TRUE; + return true; } } - return FALSE; + return false; } /* * Check for a match with \z subexpression "subidx". - * Return TRUE if it matches. + * Return true if it matches. */ static int match_zref ( @@ -4568,15 +4556,15 @@ match_zref ( if (re_extmatch_in == NULL || re_extmatch_in->matches[subidx] == NULL) { /* backref was not set, match an empty string */ *bytelen = 0; - return TRUE; + return true; } len = (int)STRLEN(re_extmatch_in->matches[subidx]); - if (cstrncmp(re_extmatch_in->matches[subidx], reginput, &len) == 0) { + if (cstrncmp(re_extmatch_in->matches[subidx], rex.input, &len) == 0) { *bytelen = len; - return TRUE; + return true; } - return FALSE; + return false; } /* @@ -4629,74 +4617,79 @@ static bool nfa_re_num_cmp(uintmax_t val, int op, uintmax_t pos) static int recursive_regmatch( nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog, regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len) + FUNC_ATTR_NONNULL_ARG(1, 3, 5, 6, 7) { - int save_reginput_col = (int)(reginput - regline); - int save_reglnum = reglnum; - int save_nfa_match = nfa_match; - int save_nfa_listid = nfa_listid; - save_se_T *save_nfa_endp = nfa_endp; + const int save_reginput_col = (int)(rex.input - rex.line); + const int save_reglnum = rex.lnum; + const int save_nfa_match = nfa_match; + const int save_nfa_listid = rex.nfa_listid; + save_se_T *const save_nfa_endp = nfa_endp; save_se_T endpos; save_se_T *endposp = NULL; - int result; - int need_restore = FALSE; + int need_restore = false; if (pim != NULL) { - /* start at the position where the postponed match was */ - if (REG_MULTI) - reginput = regline + pim->end.pos.col; - else - reginput = pim->end.ptr; + // start at the position where the postponed match was + if (REG_MULTI) { + rex.input = rex.line + pim->end.pos.col; + } else { + rex.input = pim->end.ptr; + } } if (state->c == NFA_START_INVISIBLE_BEFORE || state->c == NFA_START_INVISIBLE_BEFORE_FIRST || state->c == NFA_START_INVISIBLE_BEFORE_NEG || state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST) { - /* The recursive match must end at the current position. When "pim" is - * not NULL it specifies the current position. */ + // The recursive match must end at the current position. When "pim" is + // not NULL it specifies the current position. endposp = &endpos; if (REG_MULTI) { if (pim == NULL) { - endpos.se_u.pos.col = (int)(reginput - regline); - endpos.se_u.pos.lnum = reglnum; - } else + endpos.se_u.pos.col = (int)(rex.input - rex.line); + endpos.se_u.pos.lnum = rex.lnum; + } else { endpos.se_u.pos = pim->end.pos; + } } else { - if (pim == NULL) - endpos.se_u.ptr = reginput; - else + if (pim == NULL) { + endpos.se_u.ptr = rex.input; + } else { endpos.se_u.ptr = pim->end.ptr; + } } - /* Go back the specified number of bytes, or as far as the - * start of the previous line, to try matching "\@<=" or - * not matching "\@<!". This is very inefficient, limit the number of - * bytes if possible. */ + // Go back the specified number of bytes, or as far as the + // start of the previous line, to try matching "\@<=" or + // not matching "\@<!". This is very inefficient, limit the number of + // bytes if possible. if (state->val <= 0) { if (REG_MULTI) { - regline = reg_getline(--reglnum); - if (regline == NULL) - /* can't go before the first line */ - regline = reg_getline(++reglnum); + rex.line = reg_getline(--rex.lnum); + if (rex.line == NULL) { + // can't go before the first line + rex.line = reg_getline(++rex.lnum); + } } - reginput = regline; + rex.input = rex.line; } else { - if (REG_MULTI && (int)(reginput - regline) < state->val) { - /* Not enough bytes in this line, go to end of - * previous line. */ - regline = reg_getline(--reglnum); - if (regline == NULL) { - /* can't go before the first line */ - regline = reg_getline(++reglnum); - reginput = regline; - } else - reginput = regline + STRLEN(regline); + if (REG_MULTI && (int)(rex.input - rex.line) < state->val) { + // Not enough bytes in this line, go to end of + // previous line. + rex.line = reg_getline(--rex.lnum); + if (rex.line == NULL) { + // can't go before the first line + rex.line = reg_getline(++rex.lnum); + rex.input = rex.line; + } else { + rex.input = rex.line + STRLEN(rex.line); + } } - if ((int)(reginput - regline) >= state->val) { - reginput -= state->val; - reginput -= utf_head_off(regline, reginput); + if ((int)(rex.input - rex.line) >= state->val) { + rex.input -= state->val; + rex.input -= utf_head_off(rex.line, rex.input); } else { - reginput = regline; + rex.input = rex.line; } } } @@ -4706,48 +4699,50 @@ static int recursive_regmatch( fclose(log_fd); log_fd = NULL; #endif - /* Have to clear the lastlist field of the NFA nodes, so that - * nfa_regmatch() and addstate() can run properly after recursion. */ + // Have to clear the lastlist field of the NFA nodes, so that + // nfa_regmatch() and addstate() can run properly after recursion. if (nfa_ll_index == 1) { - /* Already calling nfa_regmatch() recursively. Save the lastlist[1] - * values and clear them. */ - if (*listids == NULL || *listids_len < nstate) { + // Already calling nfa_regmatch() recursively. Save the lastlist[1] + // values and clear them. + if (*listids == NULL || *listids_len < prog->nstate) { xfree(*listids); - *listids = xmalloc(sizeof(**listids) * nstate); - *listids_len = nstate; + *listids = xmalloc(sizeof(**listids) * prog->nstate); + *listids_len = prog->nstate; } nfa_save_listids(prog, *listids); - need_restore = TRUE; - /* any value of nfa_listid will do */ + need_restore = true; + // any value of rex.nfa_listid will do } else { - /* First recursive nfa_regmatch() call, switch to the second lastlist - * entry. Make sure nfa_listid is different from a previous recursive - * call, because some states may still have this ID. */ - ++nfa_ll_index; - if (nfa_listid <= nfa_alt_listid) - nfa_listid = nfa_alt_listid; + // First recursive nfa_regmatch() call, switch to the second lastlist + // entry. Make sure rex.nfa_listid is different from a previous + // recursive call, because some states may still have this ID. + nfa_ll_index++; + if (rex.nfa_listid <= rex.nfa_alt_listid) { + rex.nfa_listid = rex.nfa_alt_listid; + } } - /* Call nfa_regmatch() to check if the current concat matches at this - * position. The concat ends with the node NFA_END_INVISIBLE */ + // Call nfa_regmatch() to check if the current concat matches at this + // position. The concat ends with the node NFA_END_INVISIBLE nfa_endp = endposp; - result = nfa_regmatch(prog, state->out, submatch, m); + const int result = nfa_regmatch(prog, state->out, submatch, m); - if (need_restore) + if (need_restore) { nfa_restore_listids(prog, *listids); - else { - --nfa_ll_index; - nfa_alt_listid = nfa_listid; + } else { + nfa_ll_index--; + rex.nfa_alt_listid = rex.nfa_listid; } - /* restore position in input text */ - reglnum = save_reglnum; - if (REG_MULTI) - regline = reg_getline(reglnum); - reginput = regline + save_reginput_col; + // restore position in input text + rex.lnum = save_reglnum; + if (REG_MULTI) { + rex.line = reg_getline(rex.lnum); + } + rex.input = rex.line + save_reginput_col; if (result != NFA_TOO_EXPENSIVE) { nfa_match = save_nfa_match; - nfa_listid = save_nfa_listid; + rex.nfa_listid = save_nfa_listid; } nfa_endp = save_nfa_endp; @@ -4756,7 +4751,7 @@ static int recursive_regmatch( if (log_fd != NULL) { fprintf(log_fd, "****************************\n"); fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n"); - fprintf(log_fd, "MATCH = %s\n", !result ? "FALSE" : "OK"); + fprintf(log_fd, "MATCH = %s\n", !result ? "false" : "OK"); fprintf(log_fd, "****************************\n"); } else { EMSG(_(e_log_open_failed)); @@ -4930,11 +4925,11 @@ static int failure_chance(nfa_state_T *state, int depth) */ static int skip_to_start(int c, colnr_T *colp) { - const char_u *const s = cstrchr(regline + *colp, c); + const char_u *const s = cstrchr(rex.line + *colp, c); if (s == NULL) { return FAIL; } - *colp = (int)(s - regline); + *colp = (int)(s - rex.line); return OK; } @@ -4948,12 +4943,12 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) #define PTR2LEN(x) utf_ptr2len(x) colnr_T col = startcol; - int regstart_len = PTR2LEN(regline + startcol); + int regstart_len = PTR2LEN(rex.line + startcol); for (;;) { bool match = true; char_u *s1 = match_text; - char_u *s2 = regline + col + regstart_len; // skip regstart + char_u *s2 = rex.line + col + regstart_len; // skip regstart while (*s1) { int c1_len = PTR2LEN(s1); int c1 = PTR2CHAR(s1); @@ -4973,12 +4968,12 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) && !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) { cleanup_subexpr(); if (REG_MULTI) { - rex.reg_startpos[0].lnum = reglnum; + rex.reg_startpos[0].lnum = rex.lnum; rex.reg_startpos[0].col = col; - rex.reg_endpos[0].lnum = reglnum; - rex.reg_endpos[0].col = s2 - regline; + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = s2 - rex.line; } else { - rex.reg_startp[0] = regline + col; + rex.reg_startp[0] = rex.line + col; rex.reg_endp[0] = s2; } return 1L; @@ -5008,17 +5003,18 @@ static int nfa_did_time_out(void) /// Main matching routine. /// -/// Run NFA to determine whether it matches reginput. +/// Run NFA to determine whether it matches rex.input. /// /// When "nfa_endp" is not NULL it is a required end-of-match position. /// -/// Return TRUE if there is a match, FALSE if there is no match, +/// Return true if there is a match, false if there is no match, /// NFA_TOO_EXPENSIVE if we end up with too many states. /// When there is a match "submatch" contains the positions. /// /// Note: Caller must ensure that: start != NULL. static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m) + FUNC_ATTR_NONNULL_ARG(1, 2, 4) { int result = false; int flag = 0; @@ -5063,11 +5059,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, nfa_match = false; // Allocate memory for the lists of nodes. - size_t size = (nstate + 1) * sizeof(nfa_thread_T); + size_t size = (prog->nstate + 1) * sizeof(nfa_thread_T); list[0].t = xmalloc(size); - list[0].len = nstate + 1; + list[0].len = prog->nstate + 1; list[1].t = xmalloc(size); - list[1].len = nstate + 1; + list[1].len = prog->nstate + 1; #ifdef REGEXP_DEBUG log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); @@ -5085,23 +5081,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, thislist = &list[0]; thislist->n = 0; - thislist->has_pim = FALSE; + thislist->has_pim = false; nextlist = &list[1]; nextlist->n = 0; - nextlist->has_pim = FALSE; + nextlist->has_pim = false; #ifdef REGEXP_DEBUG fprintf(log_fd, "(---) STARTSTATE first\n"); #endif - thislist->id = nfa_listid + 1; + thislist->id = rex.nfa_listid + 1; - /* Inline optimized code for addstate(thislist, start, m, 0) if we know - * it's the first MOPEN. */ + // Inline optimized code for addstate(thislist, start, m, 0) if we know + // it's the first MOPEN. if (toplevel) { if (REG_MULTI) { - m->norm.list.multi[0].start_lnum = reglnum; - m->norm.list.multi[0].start_col = (colnr_T)(reginput - regline); - } else - m->norm.list.line[0].start = reginput; + m->norm.list.multi[0].start_lnum = rex.lnum; + m->norm.list.multi[0].start_col = (colnr_T)(rex.input - rex.line); + } else { + m->norm.list.line[0].start = rex.input; + } m->norm.in_use = 1; r = addstate(thislist, start->out, m, NULL, 0); } else { @@ -5122,8 +5119,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, * Run for each character. */ for (;; ) { - int curc = utf_ptr2char(reginput); - int clen = utfc_ptr2len(reginput); + int curc = utf_ptr2char(rex.input); + int clen = utfc_ptr2len(rex.input); if (curc == NUL) { clen = 0; go_to_nextline = false; @@ -5134,20 +5131,20 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, nextlist = &list[flag ^= 1]; nextlist->n = 0; // clear nextlist nextlist->has_pim = false; - nfa_listid++; + rex.nfa_listid++; if (prog->re_engine == AUTOMATIC_ENGINE - && (nfa_listid >= NFA_MAX_STATES)) { + && (rex.nfa_listid >= NFA_MAX_STATES)) { // Too many states, retry with old engine. nfa_match = NFA_TOO_EXPENSIVE; goto theend; } - thislist->id = nfa_listid; - nextlist->id = nfa_listid + 1; + thislist->id = rex.nfa_listid; + nextlist->id = rex.nfa_listid + 1; #ifdef REGEXP_DEBUG fprintf(log_fd, "------------------------------------------\n"); - fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); + fprintf(log_fd, ">>> Reginput is \"%s\"\n", rex.input); fprintf(log_fd, ">>> Advanced one character... Current char is %c (code %d) \n", curc, @@ -5200,7 +5197,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } else if (REG_MULTI) { col = t->subs.norm.list.multi[0].start_col; } else { - col = (int)(t->subs.norm.list.line[0].start - regline); + col = (int)(t->subs.norm.list.line[0].start - rex.line); } nfa_set_code(t->state->c); fprintf(log_fd, "(%d) char %d %s (start col %d)%s... \n", @@ -5226,64 +5223,66 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } nfa_match = true; copy_sub(&submatch->norm, &t->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&submatch->synt, &t->subs.synt); + } #ifdef REGEXP_DEBUG log_subsexpr(&t->subs); #endif - /* Found the left-most longest match, do not look at any other - * states at this position. When the list of states is going - * to be empty quit without advancing, so that "reginput" is - * correct. */ - if (nextlist->n == 0) + // Found the left-most longest match, do not look at any other + // states at this position. When the list of states is going + // to be empty quit without advancing, so that "rex.input" is + // correct. + if (nextlist->n == 0) { clen = 0; + } goto nextchar; } case NFA_END_INVISIBLE: case NFA_END_INVISIBLE_NEG: case NFA_END_PATTERN: - /* - * This is only encountered after a NFA_START_INVISIBLE or - * NFA_START_INVISIBLE_BEFORE node. - * They surround a zero-width group, used with "\@=", "\&", - * "\@!", "\@<=" and "\@<!". - * If we got here, it means that the current "invisible" group - * finished successfully, so return control to the parent - * nfa_regmatch(). For a look-behind match only when it ends - * in the position in "nfa_endp". - * Submatches are stored in *m, and used in the parent call. - */ + // This is only encountered after a NFA_START_INVISIBLE or + // NFA_START_INVISIBLE_BEFORE node. + // They surround a zero-width group, used with "\@=", "\&", + // "\@!", "\@<=" and "\@<!". + // If we got here, it means that the current "invisible" group + // finished successfully, so return control to the parent + // nfa_regmatch(). For a look-behind match only when it ends + // in the position in "nfa_endp". + // Submatches are stored in *m, and used in the parent call. #ifdef REGEXP_DEBUG if (nfa_endp != NULL) { - if (REG_MULTI) - fprintf( - log_fd, - "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n", - (int)reglnum, - (int)nfa_endp->se_u.pos.lnum, - (int)(reginput - regline), - nfa_endp->se_u.pos.col); - else + if (REG_MULTI) { + fprintf(log_fd, + "Current lnum: %d, endp lnum: %d;" + " current col: %d, endp col: %d\n", + (int)rex.lnum, + (int)nfa_endp->se_u.pos.lnum, + (int)(rex.input - rex.line), + nfa_endp->se_u.pos.col); + } else { fprintf(log_fd, "Current col: %d, endp col: %d\n", - (int)(reginput - regline), - (int)(nfa_endp->se_u.ptr - reginput)); + (int)(rex.input - rex.line), + (int)(nfa_endp->se_u.ptr - rex.input)); + } } #endif - /* If "nfa_endp" is set it's only a match if it ends at - * "nfa_endp" */ - if (nfa_endp != NULL && (REG_MULTI - ? (reglnum != nfa_endp->se_u.pos.lnum - || (int)(reginput - regline) - != nfa_endp->se_u.pos.col) - : reginput != nfa_endp->se_u.ptr)) + // If "nfa_endp" is set it's only a match if it ends at + // "nfa_endp" + if (nfa_endp != NULL + && (REG_MULTI + ? (rex.lnum != nfa_endp->se_u.pos.lnum + || (int)(rex.input - rex.line) != nfa_endp->se_u.pos.col) + : rex.input != nfa_endp->se_u.ptr)) { break; - - /* do not set submatches for \@! */ + } + // do not set submatches for \@! if (t->state->c != NFA_END_INVISIBLE_NEG) { copy_sub(&m->norm, &t->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&m->synt, &t->subs.synt); + } } #ifdef REGEXP_DEBUG fprintf(log_fd, "Match found:\n"); @@ -5322,9 +5321,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Copy submatch info for the recursive call, opposite // of what happens on success below. copy_sub_off(&m->norm, &t->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub_off(&m->synt, &t->subs.synt); - + } // First try matching the invisible match, then what // follows. result = recursive_regmatch(t->state, NULL, prog, submatch, m, @@ -5335,7 +5334,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } // for \@! and \@<! it is a match when the result is - // FALSE + // false if (result != (t->state->c == NFA_START_INVISIBLE_NEG || t->state->c == NFA_START_INVISIBLE_NEG_FIRST || t->state->c @@ -5344,8 +5343,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { // Copy submatch info from the recursive call copy_sub_off(&t->subs.norm, &m->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub_off(&t->subs.synt, &m->synt); + } // If the pattern has \ze and it matched in the // sub pattern, use it. copy_ze_off(&t->subs.norm, &m->norm); @@ -5369,11 +5369,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, pim.subs.norm.in_use = 0; pim.subs.synt.in_use = 0; if (REG_MULTI) { - pim.end.pos.col = (int)(reginput - regline); - pim.end.pos.lnum = reglnum; - } else - pim.end.ptr = reginput; - + pim.end.pos.col = (int)(rex.input - rex.line); + pim.end.pos.lnum = rex.lnum; + } else { + pim.end.ptr = rex.input; + } // t->state->out1 is the corresponding END_INVISIBLE // node; Add its out to the current list (zero-width // match). @@ -5426,7 +5426,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Copy submatch info to the recursive call, opposite of what // happens afterwards. copy_sub_off(&m->norm, &t->subs.norm); - if (nfa_has_zsubexpr) { + if (rex.nfa_has_zsubexpr) { copy_sub_off(&m->synt, &t->subs.synt); } @@ -5446,7 +5446,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, #endif // Copy submatch info from the recursive call copy_sub_off(&t->subs.norm, &m->norm); - if (nfa_has_zsubexpr) { + if (rex.nfa_has_zsubexpr) { copy_sub_off(&t->subs.synt, &m->synt); } // Now we need to skip over the matched text and then @@ -5454,9 +5454,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, if (REG_MULTI) { // TODO(RE): multi-line match bytelen = m->norm.list.multi[0].end_col - - (int)(reginput - regline); + - (int)(rex.input - rex.line); } else { - bytelen = (int)(m->norm.list.line[0].end - reginput); + bytelen = (int)(m->norm.list.line[0].end - rex.input); } #ifdef REGEXP_DEBUG @@ -5485,7 +5485,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } case NFA_BOL: - if (reginput == regline) { + if (rex.input == rex.line) { add_here = true; add_state = t->state->out; } @@ -5503,20 +5503,16 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, if (curc == NUL) { result = false; - } else if (has_mbyte) { + } else { int this_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); + this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); if (this_class <= 1) { result = false; } else if (reg_prev_class() == this_class) { result = false; } - } else if (!vim_iswordc_buf(curc, rex.reg_buf) - || (reginput > regline - && vim_iswordc_buf(reginput[-1], rex.reg_buf))) { - result = false; } if (result) { add_here = true; @@ -5526,22 +5522,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_EOW: result = true; - if (reginput == regline) { + if (rex.input == rex.line) { result = false; - } else if (has_mbyte) { + } else { int this_class, prev_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); + this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) { result = false; } - } else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf) - || (reginput[0] != NUL - && vim_iswordc_buf(curc, rex.reg_buf))) { - result = false; } if (result) { add_here = true; @@ -5550,7 +5542,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_BOF: - if (reglnum == 0 && reginput == regline + if (rex.lnum == 0 && rex.input == rex.line && (!REG_MULTI || rex.reg_firstlnum == 1)) { add_here = true; add_state = t->state->out; @@ -5558,7 +5550,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_EOF: - if (reglnum == rex.reg_maxline && curc == NUL) { + if (rex.lnum == rex.reg_maxline && curc == NUL) { add_here = true; add_state = t->state->out; } @@ -5603,7 +5595,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // We don't care about the order of composing characters. // Get them into cchars[] first. while (len < clen) { - mc = utf_ptr2char(reginput + len); + mc = utf_ptr2char(rex.input + len); cchars[ccount++] = mc; len += mb_char2len(mc); if (ccount == MAX_MCO) @@ -5634,7 +5626,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_NEWL: if (curc == NUL && !rex.reg_line_lbr && REG_MULTI - && reglnum <= rex.reg_maxline) { + && rex.lnum <= rex.reg_maxline) { go_to_nextline = true; // Pass -1 for the offset, which means taking the position // at the start of the next line. @@ -5688,7 +5680,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, for (; c1 <= c2; c1++) { if (utf_fold(c1) == curc_low) { result = result_if_matched; - done = TRUE; + done = true; break; } } @@ -5746,13 +5738,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_KWORD: // \k - result = vim_iswordp_buf(reginput, rex.reg_buf); + result = vim_iswordp_buf(rex.input, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; case NFA_SKWORD: // \K result = !ascii_isdigit(curc) - && vim_iswordp_buf(reginput, rex.reg_buf); + && vim_iswordp_buf(rex.input, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; @@ -5767,12 +5759,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_PRINT: // \p - result = vim_isprintc(PTR2CHAR(reginput)); + result = vim_isprintc(PTR2CHAR(rex.input)); ADD_STATE_IF_MATCH(t->state); break; case NFA_SPRINT: // \P - result = !ascii_isdigit(curc) && vim_isprintc(PTR2CHAR(reginput)); + result = !ascii_isdigit(curc) && vim_isprintc(PTR2CHAR(rex.input)); ADD_STATE_IF_MATCH(t->state); break; @@ -5959,14 +5951,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_LNUM_LT: assert(t->state->val >= 0 && !((rex.reg_firstlnum > 0 - && reglnum > LONG_MAX - rex.reg_firstlnum) + && rex.lnum > LONG_MAX - rex.reg_firstlnum) || (rex.reg_firstlnum < 0 - && reglnum < LONG_MIN + rex.reg_firstlnum)) - && reglnum + rex.reg_firstlnum >= 0); + && rex.lnum < LONG_MIN + rex.reg_firstlnum)) + && rex.lnum + rex.reg_firstlnum >= 0); result = (REG_MULTI && nfa_re_num_cmp((uintmax_t)t->state->val, t->state->c - NFA_LNUM, - (uintmax_t)(reglnum + rex.reg_firstlnum))); + (uintmax_t)(rex.lnum + rex.reg_firstlnum))); if (result) { add_here = true; add_state = t->state->out; @@ -5977,11 +5969,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_COL_GT: case NFA_COL_LT: assert(t->state->val >= 0 - && reginput >= regline - && (uintmax_t)(reginput - regline) <= UINTMAX_MAX - 1); + && rex.input >= rex.line + && (uintmax_t)(rex.input - rex.line) <= UINTMAX_MAX - 1); result = nfa_re_num_cmp((uintmax_t)t->state->val, t->state->c - NFA_COL, - (uintmax_t)(reginput - regline + 1)); + (uintmax_t)(rex.input - rex.line + 1)); if (result) { add_here = true; add_state = t->state->out; @@ -5993,7 +5985,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_VCOL_LT: { int op = t->state->c - NFA_VCOL; - colnr_T col = (colnr_T)(reginput - regline); + colnr_T col = (colnr_T)(rex.input - rex.line); // Bail out quickly when there can't be a match, avoid the overhead of // win_linetabsize() on long lines. @@ -6014,7 +6006,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, result = col > t->state->val * ts; } if (!result) { - uintmax_t lts = win_linetabsize(wp, regline, col); + uintmax_t lts = win_linetabsize(wp, rex.line, col); assert(t->state->val >= 0); result = nfa_re_num_cmp((uintmax_t)t->state->val, op, lts + 1); } @@ -6034,13 +6026,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Compare the mark position to the match position. result = (pos != NULL // mark doesn't exist && pos->lnum > 0 // mark isn't set in reg_buf - && (pos->lnum == reglnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(reginput - regline) + && (pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos->col == (colnr_T)(rex.input - rex.line) ? t->state->c == NFA_MARK - : (pos->col < (colnr_T)(reginput - regline) + : (pos->col < (colnr_T)(rex.input - rex.line) ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT)) - : (pos->lnum < reglnum + rex.reg_firstlnum + : (pos->lnum < rex.lnum + rex.reg_firstlnum ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT))); if (result) { @@ -6051,10 +6043,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } case NFA_CURSOR: - result = (rex.reg_win != NULL - && (reglnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum) - && ((colnr_T)(reginput - regline) - == rex.reg_win->w_cursor.col)); + result = rex.reg_win != NULL + && (rex.lnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum) + && ((colnr_T)(rex.input - rex.line) == rex.reg_win->w_cursor.col); if (result) { add_here = true; add_state = t->state->out; @@ -6112,7 +6103,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // If rex.reg_icombine is not set only skip over the character // itself. When it is set skip over composing characters. if (result && enc_utf8 && !rex.reg_icombine) { - clen = utf_ptr2len(reginput); + clen = utf_ptr2len(rex.input); } ADD_STATE_IF_MATCH(t->state); @@ -6143,7 +6134,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, &listids, &listids_len); pim->result = result ? NFA_PIM_MATCH : NFA_PIM_NOMATCH; // for \@! and \@<! it is a match when the result is - // FALSE + // false if (result != (pim->state->c == NFA_START_INVISIBLE_NEG || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST || pim->state->c @@ -6152,8 +6143,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { // Copy submatch info from the recursive call copy_sub_off(&pim->subs.norm, &m->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub_off(&pim->subs.synt, &m->synt); + } } } else { result = (pim->result == NFA_PIM_MATCH); @@ -6163,12 +6155,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, log_fd, "Using previous recursive nfa_regmatch() result, result == %d\n", pim->result); - fprintf(log_fd, "MATCH = %s\n", result ? "OK" : "FALSE"); + fprintf(log_fd, "MATCH = %s\n", result ? "OK" : "false"); fprintf(log_fd, "\n"); #endif } - // for \@! and \@<! it is a match when result is FALSE + // for \@! and \@<! it is a match when result is false if (result != (pim->state->c == NFA_START_INVISIBLE_NEG || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST || pim->state->c @@ -6177,8 +6169,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { // Copy submatch info from the recursive call copy_sub_off(&t->subs.norm, &pim->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub_off(&t->subs.synt, &pim->subs.synt); + } } else { // look-behind match failed, don't add the state continue; @@ -6222,29 +6215,28 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Also don't start a match past the first line. if (!nfa_match && ((toplevel - && reglnum == 0 + && rex.lnum == 0 && clen != 0 && (rex.reg_maxcol == 0 - || (colnr_T)(reginput - regline) < rex.reg_maxcol)) + || (colnr_T)(rex.input - rex.line) < rex.reg_maxcol)) || (nfa_endp != NULL && (REG_MULTI - ? (reglnum < nfa_endp->se_u.pos.lnum - || (reglnum == nfa_endp->se_u.pos.lnum - && (int)(reginput - regline) + ? (rex.lnum < nfa_endp->se_u.pos.lnum + || (rex.lnum == nfa_endp->se_u.pos.lnum + && (int)(rex.input - rex.line) < nfa_endp->se_u.pos.col)) - : reginput < nfa_endp->se_u.ptr)))) { + : rex.input < nfa_endp->se_u.ptr)))) { #ifdef REGEXP_DEBUG fprintf(log_fd, "(---) STARTSTATE\n"); #endif // Inline optimized code for addstate() if we know the state is // the first MOPEN. if (toplevel) { - int add = TRUE; - int c; + int add = true; if (prog->regstart != NUL && clen != 0) { if (nextlist->n == 0) { - colnr_T col = (colnr_T)(reginput - regline) + clen; + colnr_T col = (colnr_T)(rex.input - rex.line) + clen; // Nextlist is empty, we can skip ahead to the // character that must appear at the start. @@ -6253,13 +6245,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } #ifdef REGEXP_DEBUG fprintf(log_fd, " Skipping ahead %d bytes to regstart\n", - col - ((colnr_T)(reginput - regline) + clen)); + col - ((colnr_T)(rex.input - rex.line) + clen)); #endif - reginput = regline + col - clen; + rex.input = rex.line + col - clen; } else { // Checking if the required start character matches is // cheaper than adding a state that won't match. - c = PTR2CHAR(reginput + clen); + const int c = PTR2CHAR(rex.input + clen); if (c != prog->regstart && (!rex.reg_ic || utf_fold(c) != utf_fold(prog->regstart))) { @@ -6267,17 +6259,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, fprintf(log_fd, " Skipping start state, regstart does not match\n"); #endif - add = FALSE; + add = false; } } } if (add) { - if (REG_MULTI) + if (REG_MULTI) { m->norm.list.multi[0].start_col = - (colnr_T)(reginput - regline) + clen; - else - m->norm.list.line[0].start = reginput + clen; + (colnr_T)(rex.input - rex.line) + clen; + } else { + m->norm.list.line[0].start = rex.input + clen; + } if (addstate(nextlist, start->out, m, NULL, clen) == NULL) { nfa_match = NFA_TOO_EXPENSIVE; goto theend; @@ -6306,9 +6299,9 @@ nextchar: // Advance to the next character, or advance to the next line, or // finish. if (clen != 0) { - reginput += clen; + rex.input += clen; } else if (go_to_nextline || (nfa_endp != NULL && REG_MULTI - && reglnum < nfa_endp->se_u.pos.lnum)) { + && rex.lnum < nfa_endp->se_u.pos.lnum)) { reg_nextline(); } else { break; @@ -6347,7 +6340,7 @@ theend: return nfa_match; } -// Try match of "prog" with at regline["col"]. +// Try match of "prog" with at rex.line["col"]. // Returns <= 0 for failure, number of lines contained in the match otherwise. static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, @@ -6361,7 +6354,7 @@ static long nfa_regtry(nfa_regprog_T *prog, FILE *f; #endif - reginput = regline + col; + rex.input = rex.line + col; nfa_time_limit = tm; nfa_timed_out = timed_out; nfa_time_count = 0; @@ -6374,7 +6367,7 @@ static long nfa_regtry(nfa_regprog_T *prog, #ifdef REGEXP_DEBUG fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); #endif - fprintf(f, "\tInput text is \"%s\" \n", reginput); + fprintf(f, "\tInput text is \"%s\" \n", rex.input); fprintf(f, "\t=======================================================\n\n"); nfa_print_state(f, start); fprintf(f, "\n\n"); @@ -6412,11 +6405,11 @@ static long nfa_regtry(nfa_regprog_T *prog, } if (rex.reg_endpos[0].lnum < 0) { // pattern has a \ze but it didn't match, use current end - rex.reg_endpos[0].lnum = reglnum; - rex.reg_endpos[0].col = (int)(reginput - regline); + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = (int)(rex.input - rex.line); } else { // Use line number of "\ze". - reglnum = rex.reg_endpos[0].lnum; + rex.lnum = rex.reg_endpos[0].lnum; } } else { for (i = 0; i < subs.norm.in_use; i++) { @@ -6425,10 +6418,10 @@ static long nfa_regtry(nfa_regprog_T *prog, } if (rex.reg_startp[0] == NULL) { - rex.reg_startp[0] = regline + col; + rex.reg_startp[0] = rex.line + col; } if (rex.reg_endp[0] == NULL) { - rex.reg_endp[0] = reginput; + rex.reg_endp[0] = rex.input; } } @@ -6463,7 +6456,7 @@ static long nfa_regtry(nfa_regprog_T *prog, } } - return 1 + reglnum; + return 1 + rex.lnum; } /// Match a regexp against a string ("line" points to the string) or multiple @@ -6481,7 +6474,6 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, { nfa_regprog_T *prog; long retval = 0L; - int i; colnr_T col = startcol; if (REG_MULTI) { @@ -6513,26 +6505,30 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, rex.reg_icombine = true; } - regline = line; - reglnum = 0; /* relative to line */ + rex.line = line; + rex.lnum = 0; // relative to line - nfa_has_zend = prog->has_zend; - nfa_has_backref = prog->has_backref; - nfa_nsubexpr = prog->nsubexp; - nfa_listid = 1; - nfa_alt_listid = 2; + rex.nfa_has_zend = prog->has_zend; + rex.nfa_has_backref = prog->has_backref; + rex.nfa_nsubexpr = prog->nsubexp; + rex.nfa_listid = 1; + rex.nfa_alt_listid = 2; +#ifdef REGEXP_DEBUG nfa_regengine.expr = prog->pattern; +#endif if (prog->reganch && col > 0) return 0L; - need_clear_subexpr = TRUE; - /* Clear the external match subpointers if necessary. */ + rex.need_clear_subexpr = true; + // Clear the external match subpointers if necessary. if (prog->reghasz == REX_SET) { - nfa_has_zsubexpr = TRUE; - need_clear_zsubexpr = TRUE; - } else - nfa_has_zsubexpr = FALSE; + rex.nfa_has_zsubexpr = true; + rex.need_clear_zsubexpr = true; + } else { + rex.nfa_has_zsubexpr = false; + rex.need_clear_zsubexpr = false; + } if (prog->regstart != NUL) { /* Skip ahead until a character we know the match must start with. @@ -6552,8 +6548,10 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, goto theend; } - nstate = prog->nstate; - for (i = 0; i < nstate; ++i) { + // Set the "nstate" used by nfa_regcomp() to zero to trigger an error when + // it's accidentally used during execution. + nstate = 0; + for (int i = 0; i < prog->nstate; i++) { prog->state[i].id = i; prog->state[i].lastlist[0] = 0; prog->state[i].lastlist[1] = 0; @@ -6561,7 +6559,9 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, retval = nfa_regtry(prog, col, tm, timed_out); +#ifdef REGEXP_DEBUG nfa_regengine.expr = NULL; +#endif theend: return retval; @@ -6579,7 +6579,9 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) if (expr == NULL) return NULL; +#ifdef REGEXP_DEBUG nfa_regengine.expr = expr; +#endif nfa_re_flags = re_flags; init_class_tab(); @@ -6616,26 +6618,27 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) * PASS 1 * Count number of NFA states in "nstate". Do not build the NFA. */ - post2nfa(postfix, post_ptr, TRUE); + post2nfa(postfix, post_ptr, true); /* allocate the regprog with space for the compiled regexp */ size_t prog_size = sizeof(nfa_regprog_T) + sizeof(nfa_state_T) * (nstate - 1); prog = xmalloc(prog_size); state_ptr = prog->state; + prog->re_in_use = false; /* * PASS 2 * Build the NFA */ - prog->start = post2nfa(postfix, post_ptr, FALSE); - if (prog->start == NULL) + prog->start = post2nfa(postfix, post_ptr, false); + if (prog->start == NULL) { goto fail; - + } prog->regflags = regflags; prog->engine = &nfa_regengine; prog->nstate = nstate; - prog->has_zend = nfa_has_zend; - prog->has_backref = nfa_has_backref; + prog->has_zend = rex.nfa_has_zend; + prog->has_backref = rex.nfa_has_backref; prog->nsubexp = regnpar; nfa_postprocess(prog); @@ -6651,7 +6654,9 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) /* Remember whether this pattern has any \z specials in it. */ prog->reghasz = re_has_z; prog->pattern = vim_strsave(expr); +#ifdef REGEXP_DEBUG nfa_regengine.expr = NULL; +#endif out: xfree(post_start); @@ -6663,8 +6668,8 @@ fail: XFREE_CLEAR(prog); #ifdef REGEXP_DEBUG nfa_postfix_dump(expr, FAIL); -#endif nfa_regengine.expr = NULL; +#endif goto out; } |