diff options
Diffstat (limited to 'src/nvim/regexp_nfa.c')
| -rw-r--r-- | src/nvim/regexp_nfa.c | 383 | 
1 files changed, 205 insertions, 178 deletions
| diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 474f3df32a..93ba9ce097 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -1,3 +1,6 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com +  /*   * NFA regular expression implementation.   * @@ -53,13 +56,13 @@ enum {    NFA_RANGE_MIN,                    /* low end of a range  */    NFA_RANGE_MAX,                    /* high end of a range  */ -  NFA_CONCAT,                       /* concatenate two previous items (postfix -                                     * only) */ -  NFA_OR,                           /* \| (postfix only) */ -  NFA_STAR,                         /* greedy * (posfix only) */ -  NFA_STAR_NONGREEDY,               /* non-greedy * (postfix only) */ -  NFA_QUEST,                        /* greedy \? (postfix only) */ -  NFA_QUEST_NONGREEDY,              /* non-greedy \? (postfix only) */ +  NFA_CONCAT,                       // concatenate two previous items (postfix +                                    // only) +  NFA_OR,                           // \| (postfix only) +  NFA_STAR,                         // greedy * (postfix only) +  NFA_STAR_NONGREEDY,               // non-greedy * (postfix only) +  NFA_QUEST,                        // greedy \? (postfix only) +  NFA_QUEST_NONGREEDY,              // non-greedy \? (postfix only)    NFA_BOL,                          /* ^    Begin line */    NFA_EOL,                          /* $    End line */ @@ -631,6 +634,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)            config |= CLASS_o7;            break;          } +        return FAIL;        case 'a':          if (*(p + 2) == 'z') {            config |= CLASS_az; @@ -639,6 +643,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)            config |= CLASS_af;            break;          } +        return FAIL;        case 'A':          if (*(p + 2) == 'Z') {            config |= CLASS_AZ; @@ -647,7 +652,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl)            config |= CLASS_AF;            break;          } -      /* FALLTHROUGH */ +        return FAIL;        default:          return FAIL;        } @@ -1983,7 +1988,7 @@ static int nfa_regpiece(void)      // The engine is very inefficient (uses too many states) when the maximum      // is much larger than the minimum and when the maximum is large. Bail out      // if we can use the other engine. -    if ((nfa_re_flags & RE_AUTO) && (maxval > minval + 200 || maxval > 500)) { +    if ((nfa_re_flags & RE_AUTO) && (maxval > 500 || maxval > minval + 200)) {        return FAIL;      } @@ -2772,15 +2777,10 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)      case NFA_ANY:      case NFA_START_COLL:      case NFA_START_NEG_COLL: -      /* matches some character, including composing chars */ -      if (enc_utf8) -        len += MB_MAXBYTES; -      else if (has_mbyte) -        len += 2; -      else -        ++len; +      // Matches some character, including composing chars. +      len += MB_MAXBYTES;        if (state->c != NFA_ANY) { -        /* skip over the characters */ +        // Skip over the characters.          state = state->out1->out;          continue;        } @@ -3893,23 +3893,27 @@ state_in_list (    return FALSE;  } -/* - * Add "state" and possibly what follows to state list ".". - * Returns "subs_arg", possibly copied into temp_subs. - */ +// Offset used for "off" by addstate_here(). +#define ADDSTATE_HERE_OFFSET 10 +// Add "state" and possibly what follows to state list ".". +// Returns "subs_arg", possibly copied into temp_subs.  static regsubs_T *  addstate (      nfa_list_T *l,             /* runtime state list */      nfa_state_T *state,         /* state to update */      regsubs_T *subs_arg,      /* pointers to subexpressions */      nfa_pim_T *pim,           /* postponed look-behind match */ -    int off                            /* byte offset, when -1 go to next line */ -) +    int off_arg)    /* byte offset, when -1 go to next line */  {    int subidx; +  int off = off_arg; +  int add_here = FALSE; +  int listindex = 0; +  int k; +  int found = FALSE;    nfa_thread_T        *thread; -  lpos_T save_lpos; +  struct multipos     save_multipos;    int save_in_use;    char_u              *save_ptr;    int i; @@ -3920,6 +3924,12 @@ addstate (    int did_print = FALSE;  #endif +  if (off_arg <= -ADDSTATE_HERE_OFFSET) { +    add_here = true; +    off = 0; +    listindex = -(off_arg + ADDSTATE_HERE_OFFSET); +  } +    switch (state->c) {    case NFA_NCLOSE:    case NFA_MCLOSE: @@ -3996,13 +4006,28 @@ addstate (         * lower position is preferred. */        if (!nfa_has_backref && pim == NULL && !l->has_pim            && state->c != NFA_MATCH) { + +        /* When called from addstate_here() do insert before +         * existing states. */ +        if (add_here) { +          for (k = 0; k < l->n && k < listindex; ++k) { +            if (l->t[k].state->id == state->id) { +              found = TRUE; +              break; +            } +          } +        } + +        if (!add_here || found) {  skip_add:  #ifdef REGEXP_DEBUG -        nfa_set_code(state->c); -        fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s\n", -            abs(state->id), l->id, state->c, code); +          nfa_set_code(state->c); +          fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s pim: %s has_pim: %d found: %d\n", +                  abs(state->id), l->id, state->c, code, +                  pim == NULL ? "NULL" : "yes", l->has_pim, found);  #endif          return subs; +        }        }        /* Do not add the state again when it exists with the same @@ -4058,14 +4083,14 @@ skip_add:    case NFA_SPLIT:      /* order matters here */ -    subs = addstate(l, state->out, subs, pim, off); -    subs = addstate(l, state->out1, subs, pim, off); +    subs = addstate(l, state->out, subs, pim, off_arg); +    subs = addstate(l, state->out1, subs, pim, off_arg);      break;    case NFA_EMPTY:    case NFA_NOPEN:    case NFA_NCLOSE: -    subs = addstate(l, state->out, subs, pim, off); +    subs = addstate(l, state->out, subs, pim, off_arg);      break;    case NFA_MOPEN: @@ -4102,15 +4127,13 @@ skip_add:      /* avoid compiler warnings */      save_ptr = NULL; -    save_lpos.lnum = 0; -    save_lpos.col = 0; +    memset(&save_multipos, 0, sizeof(save_multipos));      /* Set the position (with "off" added) in the subexpression.  Save       * and restore it when it was in use.  Otherwise fill any gap. */      if (REG_MULTI) {        if (subidx < sub->in_use) { -        save_lpos.lnum = sub->list.multi[subidx].start_lnum; -        save_lpos.col = sub->list.multi[subidx].start_col; +        save_multipos = sub->list.multi[subidx];          save_in_use = -1;        } else {          save_in_use = sub->in_use; @@ -4145,7 +4168,7 @@ skip_add:        sub->list.line[subidx].start = reginput + off;      } -    subs = addstate(l, state->out, subs, pim, off); +    subs = addstate(l, state->out, subs, pim, off_arg);      /* "subs" may have changed, need to set "sub" again */      if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9)        sub = &subs->synt; @@ -4153,9 +4176,8 @@ skip_add:        sub = &subs->norm;      if (save_in_use == -1) { -      if (REG_MULTI){ -        sub->list.multi[subidx].start_lnum = save_lpos.lnum; -        sub->list.multi[subidx].start_col = save_lpos.col; +      if (REG_MULTI) { +        sub->list.multi[subidx] = save_multipos;        }        else          sub->list.line[subidx].start = save_ptr; @@ -4168,9 +4190,10 @@ skip_add:                           ? subs->norm.list.multi[0].end_lnum >= 0                           : subs->norm.list.line[0].end != NULL)) {        /* Do not overwrite the position set by \ze. */ -      subs = addstate(l, state->out, subs, pim, off); +      subs = addstate(l, state->out, subs, pim, off_arg);        break;      } +    // fallthrough    case NFA_MCLOSE1:    case NFA_MCLOSE2:    case NFA_MCLOSE3: @@ -4208,8 +4231,7 @@ skip_add:      if (sub->in_use <= subidx)        sub->in_use = subidx + 1;      if (REG_MULTI) { -      save_lpos.lnum = sub->list.multi[subidx].end_lnum; -      save_lpos.col = sub->list.multi[subidx].end_col; +      save_multipos = sub->list.multi[subidx];        if (off == -1) {          sub->list.multi[subidx].end_lnum = reglnum + 1;          sub->list.multi[subidx].end_col = 0; @@ -4223,21 +4245,19 @@ skip_add:      } else {        save_ptr = sub->list.line[subidx].end;        sub->list.line[subidx].end = reginput + off; -      /* avoid compiler warnings */ -      save_lpos.lnum = 0; -      save_lpos.col = 0; +      // avoid compiler warnings +      memset(&save_multipos, 0, sizeof(save_multipos));      } -    subs = addstate(l, state->out, subs, pim, off); +    subs = addstate(l, state->out, subs, pim, off_arg);      /* "subs" may have changed, need to set "sub" again */      if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9)        sub = &subs->synt;      else        sub = &subs->norm; -    if (REG_MULTI){ -      sub->list.multi[subidx].end_lnum = save_lpos.lnum; -      sub->list.multi[subidx].end_col = save_lpos.col; +    if (REG_MULTI) { +      sub->list.multi[subidx] = save_multipos;      }      else        sub->list.line[subidx].end = save_ptr; @@ -4266,8 +4286,10 @@ addstate_here (    int count;    int listidx = *ip; -  /* first add the state(s) at the end, so that we know how many there are */ -  addstate(l, state, subs, pim, 0); +  /* First add the state(s) at the end, so that we know how many there are. +   * Pass the listidx as offset (avoids adding another argument to +   * addstate(). */ +  addstate(l, state, subs, pim, -listidx - ADDSTATE_HERE_OFFSET);    /* when "*ip" was at the end of the list, nothing to do */    if (listidx + 1 == tlen) @@ -4346,7 +4368,7 @@ static int check_char_class(int class, int c)        return OK;      break;    case NFA_CLASS_LOWER: -    if (vim_islower(c) && c != 170 && c != 186) { +    if (mb_islower(c) && c != 170 && c != 186) {        return OK;      }      break; @@ -4364,8 +4386,9 @@ static int check_char_class(int class, int c)        return OK;      break;    case NFA_CLASS_UPPER: -    if (vim_isupper(c)) +    if (mb_isupper(c)) {        return OK; +    }      break;    case NFA_CLASS_XDIGIT:      if (ascii_isxdigit(c)) @@ -4384,8 +4407,9 @@ static int check_char_class(int class, int c)        return OK;      break;    case NFA_CLASS_ESCAPE: -    if (c == '\033') +    if (c == ESC) {        return OK; +    }      break;    default: @@ -4828,17 +4852,10 @@ static int failure_chance(nfa_state_T *state, int depth)   */  static int skip_to_start(int c, colnr_T *colp)  { -  char_u *s; - -  /* Used often, do some work to avoid call overhead. */ -  if (!ireg_ic -      && !has_mbyte -      ) -    s = vim_strbyte(regline + *colp, c); -  else -    s = cstrchr(regline + *colp, c); -  if (s == NULL) +  const char_u *const s = cstrchr(regline + *colp, c); +  if (s == NULL) {      return FAIL; +  }    *colp = (int)(s - regline);    return OK;  } @@ -4865,7 +4882,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)        int c2_len = PTR2LEN(s2);        int c2 = PTR2CHAR(s2); -      if ((c1 != c2 && (!ireg_ic || vim_tolower(c1) != vim_tolower(c2))) +      if ((c1 != c2 && (!rex.reg_ic || mb_tolower(c1) != mb_tolower(c2)))            || c1_len != c2_len) {          match = false;          break; @@ -4878,13 +4895,13 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)          && !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) {        cleanup_subexpr();        if (REG_MULTI) { -        reg_startpos[0].lnum = reglnum; -        reg_startpos[0].col = col; -        reg_endpos[0].lnum = reglnum; -        reg_endpos[0].col = s2 - regline; +        rex.reg_startpos[0].lnum = reglnum; +        rex.reg_startpos[0].col = col; +        rex.reg_endpos[0].lnum = reglnum; +        rex.reg_endpos[0].col = s2 - regline;        } else { -        reg_startp[0] = regline + col; -        reg_endp[0] = s2; +        rex.reg_startp[0] = regline + col; +        rex.reg_endp[0] = s2;        }        return 1L;      } @@ -5099,8 +5116,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,        case NFA_MATCH:        {          // If the match ends before a composing characters and -        // ireg_icombine is not set, that is not really a match. -        if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) { +        // rex.reg_icombine is not set, that is not really a match. +        if (enc_utf8 && !rex.reg_icombine && utf_iscomposing(curc)) {            break;          }          nfa_match = true; @@ -5383,15 +5400,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,            int this_class;            // Get class of current and previous char (if it exists). -          this_class = mb_get_class_buf(reginput, reg_buf); +          this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab);            if (this_class <= 1) {              result = false;            } else if (reg_prev_class() == this_class) {              result = false;            } -        } else if (!vim_iswordc_buf(curc, reg_buf) +        } else if (!vim_iswordc_buf(curc, rex.reg_buf)                     || (reginput > regline -                       && vim_iswordc_buf(reginput[-1], reg_buf))) { +                       && vim_iswordc_buf(reginput[-1], rex.reg_buf))) {            result = false;          }          if (result) { @@ -5408,15 +5425,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,            int this_class, prev_class;            // Get class of current and previous char (if it exists). -          this_class = mb_get_class_buf(reginput, reg_buf); +          this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab);            prev_class = reg_prev_class();            if (this_class == prev_class                || prev_class == 0 || prev_class == 1) {              result = false;            } -        } else if (!vim_iswordc_buf(reginput[-1], reg_buf) +        } else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf)                     || (reginput[0] != NUL -                       && vim_iswordc_buf(curc, reg_buf))) { +                       && vim_iswordc_buf(curc, rex.reg_buf))) {            result = false;          }          if (result) { @@ -5427,14 +5444,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,        case NFA_BOF:          if (reglnum == 0 && reginput == regline -            && (!REG_MULTI || reg_firstlnum == 1)) { +            && (!REG_MULTI || rex.reg_firstlnum == 1)) {            add_here = true;            add_state = t->state->out;          }          break;        case NFA_EOF: -        if (reglnum == reg_maxline && curc == NUL) { +        if (reglnum == rex.reg_maxline && curc == NUL) {            add_here = true;            add_state = t->state->out;          } @@ -5458,7 +5475,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,            // (no preceding character).            len += mb_char2len(mc);          } -        if (ireg_icombine && len == 0) { +        if (rex.reg_icombine && len == 0) {            // If \Z was present, then ignore composing characters.            // When ignoring the base character this always matches.            if (sta->c != curc) { @@ -5509,14 +5526,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,        }        case NFA_NEWL: -        if (curc == NUL && !reg_line_lbr && REG_MULTI -            && reglnum <= reg_maxline) { +        if (curc == NUL && !rex.reg_line_lbr && REG_MULTI +            && reglnum <= rex.reg_maxline) {            go_to_nextline = true;            // Pass -1 for the offset, which means taking the position            // at the start of the next line.            add_state = t->state->out;            add_off = -1; -        } else if (curc == '\n' && reg_line_lbr) { +        } else if (curc == '\n' && rex.reg_line_lbr) {            // match \n as if it is an ordinary character            add_state = t->state->out;            add_off = 1; @@ -5557,23 +5574,25 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,                result = result_if_matched;                break;              } -            if (ireg_ic) { -              int curc_low = vim_tolower(curc); -              int done = FALSE; +            if (rex.reg_ic) { +              int curc_low = mb_tolower(curc); +              int done = false; -              for (; c1 <= c2; ++c1) -                if (vim_tolower(c1) == curc_low) { +              for (; c1 <= c2; c1++) { +                if (mb_tolower(c1) == curc_low) {                    result = result_if_matched;                    done = TRUE;                    break;                  } -              if (done) +              } +              if (done) {                  break; +              }              }            } else if (state->c < 0 ? check_char_class(state->c, curc)                       : (curc == state->c -                        || (ireg_ic && vim_tolower(curc) -                            == vim_tolower(state->c)))) { +                        || (rex.reg_ic && mb_tolower(curc) +                            == mb_tolower(state->c)))) {              result = result_if_matched;              break;            } @@ -5620,13 +5639,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,          break;        case NFA_KWORD:           //  \k -        result = vim_iswordp_buf(reginput, reg_buf); +        result = vim_iswordp_buf(reginput, rex.reg_buf);          ADD_STATE_IF_MATCH(t->state);          break;        case NFA_SKWORD:          //  \K          result = !ascii_isdigit(curc) -                 && vim_iswordp_buf(reginput, reg_buf); +                 && vim_iswordp_buf(reginput, rex.reg_buf);          ADD_STATE_IF_MATCH(t->state);          break; @@ -5741,24 +5760,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,          break;        case NFA_LOWER_IC:        // [a-z] -        result = ri_lower(curc) || (ireg_ic && ri_upper(curc)); +        result = ri_lower(curc) || (rex.reg_ic && ri_upper(curc));          ADD_STATE_IF_MATCH(t->state);          break;        case NFA_NLOWER_IC:       // [^a-z]          result = curc != NUL -                 && !(ri_lower(curc) || (ireg_ic && ri_upper(curc))); +                 && !(ri_lower(curc) || (rex.reg_ic && ri_upper(curc)));          ADD_STATE_IF_MATCH(t->state);          break;        case NFA_UPPER_IC:        // [A-Z] -        result = ri_upper(curc) || (ireg_ic && ri_lower(curc)); +        result = ri_upper(curc) || (rex.reg_ic && ri_lower(curc));          ADD_STATE_IF_MATCH(t->state);          break;        case NFA_NUPPER_IC:       // [^A-Z]          result = curc != NUL -                 && !(ri_upper(curc) || (ireg_ic && ri_lower(curc))); +                 && !(ri_upper(curc) || (rex.reg_ic && ri_lower(curc)));          ADD_STATE_IF_MATCH(t->state);          break; @@ -5832,13 +5851,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,        case NFA_LNUM_GT:        case NFA_LNUM_LT:          assert(t->state->val >= 0 -               && !((reg_firstlnum > 0 && reglnum > LONG_MAX - reg_firstlnum) -                    || (reg_firstlnum <0 && reglnum < LONG_MIN + reg_firstlnum)) -               && reglnum + reg_firstlnum >= 0); +               && !((rex.reg_firstlnum > 0 +                     && reglnum > LONG_MAX - rex.reg_firstlnum) +                    || (rex.reg_firstlnum < 0 +                        && reglnum < LONG_MIN + rex.reg_firstlnum)) +               && reglnum + rex.reg_firstlnum >= 0);          result = (REG_MULTI                    && nfa_re_num_cmp((uintmax_t)t->state->val,                                      t->state->c - NFA_LNUM, -                                    (uintmax_t)(reglnum + reg_firstlnum))); +                                    (uintmax_t)(reglnum + rex.reg_firstlnum)));          if (result) {            add_here = true;            add_state = t->state->out; @@ -5874,7 +5895,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,            }            result = false; -          win_T *wp = reg_win == NULL ? curwin : reg_win; +          win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win;            if (op == 1 && col - 1 > t->state->val && col > 100) {              long ts = wp->w_buffer->b_p_ts; @@ -5901,18 +5922,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,        case NFA_MARK_GT:        case NFA_MARK_LT:        { -        pos_T   *pos = getmark_buf(reg_buf, t->state->val, FALSE); +        pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, false);          // Compare the mark position to the match position.          result = (pos != NULL                        // mark doesn't exist                    && pos->lnum > 0          // mark isn't set in reg_buf -                  && (pos->lnum == reglnum + reg_firstlnum +                  && (pos->lnum == reglnum + rex.reg_firstlnum                        ? (pos->col == (colnr_T)(reginput - regline)                           ? t->state->c == NFA_MARK                           : (pos->col < (colnr_T)(reginput - regline)                              ? t->state->c == NFA_MARK_GT                              : t->state->c == NFA_MARK_LT)) -                      : (pos->lnum < reglnum + reg_firstlnum +                      : (pos->lnum < reglnum + rex.reg_firstlnum                           ? t->state->c == NFA_MARK_GT                           : t->state->c == NFA_MARK_LT)));          if (result) { @@ -5923,10 +5944,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,        }        case NFA_CURSOR: -        result = (reg_win != NULL -                  && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) +        result = (rex.reg_win != NULL +                  && (reglnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum)                    && ((colnr_T)(reginput - regline) -                      == reg_win->w_cursor.col)); +                      == rex.reg_win->w_cursor.col));          if (result) {            add_here = true;            add_state = t->state->out; @@ -5976,12 +5997,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,  #endif          result = (c == curc); -        if (!result && ireg_ic) -          result = vim_tolower(c) == vim_tolower(curc); +        if (!result && rex.reg_ic) { +          result = mb_tolower(c) == mb_tolower(curc); +        } -        // If ireg_icombine is not set only skip over the character +        // If rex.reg_icombine is not set only skip over the character          // itself.  When it is set skip over composing characters. -        if (result && enc_utf8 && !ireg_icombine) { +        if (result && enc_utf8 && !rex.reg_icombine) {            clen = utf_ptr2len(reginput);          } @@ -6089,8 +6111,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,          && ((toplevel               && reglnum == 0               && clen != 0 -             && (ireg_maxcol == 0 -                 || (colnr_T)(reginput - regline) < ireg_maxcol)) +             && (rex.reg_maxcol == 0 +                 || (colnr_T)(reginput - regline) < rex.reg_maxcol))              || (nfa_endp != NULL                  && (REG_MULTI                      ? (reglnum < nfa_endp->se_u.pos.lnum @@ -6125,8 +6147,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,              // Checking if the required start character matches is              // cheaper than adding a state that won't match.              c = PTR2CHAR(reginput + clen); -            if (c != prog->regstart && (!ireg_ic || vim_tolower(c) -                                        != vim_tolower(prog->regstart))) { +            if (c != prog->regstart && (!rex.reg_ic || mb_tolower(c) +                                        != mb_tolower(prog->regstart))) {  #ifdef REGEXP_DEBUG                fprintf(log_fd,                    "  Skipping start state, regstart does not match\n"); @@ -6251,34 +6273,37 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, proftime_T *tm)    cleanup_subexpr();    if (REG_MULTI) {      for (i = 0; i < subs.norm.in_use; i++) { -      reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum; -      reg_startpos[i].col = subs.norm.list.multi[i].start_col; +      rex.reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum; +      rex.reg_startpos[i].col = subs.norm.list.multi[i].start_col; -      reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum; -      reg_endpos[i].col = subs.norm.list.multi[i].end_col; +      rex.reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum; +      rex.reg_endpos[i].col = subs.norm.list.multi[i].end_col;      } -    if (reg_startpos[0].lnum < 0) { -      reg_startpos[0].lnum = 0; -      reg_startpos[0].col = col; +    if (rex.reg_startpos[0].lnum < 0) { +      rex.reg_startpos[0].lnum = 0; +      rex.reg_startpos[0].col = col; +    } +    if (rex.reg_endpos[0].lnum < 0) { +      // pattern has a \ze but it didn't match, use current end +      rex.reg_endpos[0].lnum = reglnum; +      rex.reg_endpos[0].col = (int)(reginput - regline); +    } else { +      // Use line number of "\ze". +      reglnum = rex.reg_endpos[0].lnum;      } -    if (reg_endpos[0].lnum < 0) { -      /* pattern has a \ze but it didn't match, use current end */ -      reg_endpos[0].lnum = reglnum; -      reg_endpos[0].col = (int)(reginput - regline); -    } else -      /* Use line number of "\ze". */ -      reglnum = reg_endpos[0].lnum;    } else {      for (i = 0; i < subs.norm.in_use; i++) { -      reg_startp[i] = subs.norm.list.line[i].start; -      reg_endp[i] = subs.norm.list.line[i].end; +      rex.reg_startp[i] = subs.norm.list.line[i].start; +      rex.reg_endp[i] = subs.norm.list.line[i].end;      } -    if (reg_startp[0] == NULL) -      reg_startp[0] = regline + col; -    if (reg_endp[0] == NULL) -      reg_endp[0] = reginput; +    if (rex.reg_startp[0] == NULL) { +      rex.reg_startp[0] = regline + col; +    } +    if (rex.reg_endp[0] == NULL) { +      rex.reg_endp[0] = reginput; +    }    }    /* Package any found \z(...\) matches for export. Default is none. */ @@ -6332,14 +6357,14 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)    colnr_T col = startcol;    if (REG_MULTI) { -    prog = (nfa_regprog_T *)reg_mmatch->regprog; -    line = reg_getline((linenr_T)0);        /* relative to the cursor */ -    reg_startpos = reg_mmatch->startpos; -    reg_endpos = reg_mmatch->endpos; +    prog = (nfa_regprog_T *)rex.reg_mmatch->regprog; +    line = reg_getline((linenr_T)0);  // relative to the cursor +    rex.reg_startpos = rex.reg_mmatch->startpos; +    rex.reg_endpos = rex.reg_mmatch->endpos;    } else { -    prog = (nfa_regprog_T *)reg_match->regprog; -    reg_startp = reg_match->startp; -    reg_endp = reg_match->endp; +    prog = (nfa_regprog_T *)rex.reg_match->regprog; +    rex.reg_startp = rex.reg_match->startp; +    rex.reg_endp = rex.reg_match->endp;    }    /* Be paranoid... */ @@ -6348,15 +6373,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)      goto theend;    } -  /* If pattern contains "\c" or "\C": overrule value of ireg_ic */ -  if (prog->regflags & RF_ICASE) -    ireg_ic = TRUE; -  else if (prog->regflags & RF_NOICASE) -    ireg_ic = FALSE; +  // If pattern contains "\c" or "\C": overrule value of rex.reg_ic +  if (prog->regflags & RF_ICASE) { +    rex.reg_ic = true; +  } else if (prog->regflags & RF_NOICASE) { +    rex.reg_ic = false; +  } -  /* If pattern contains "\Z" overrule value of ireg_icombine */ -  if (prog->regflags & RF_ICOMBINE) -    ireg_icombine = TRUE; +  // If pattern contains "\Z" overrule value of rex.reg_icombine +  if (prog->regflags & RF_ICOMBINE) { +    rex.reg_icombine = true; +  }    regline = line;    reglnum = 0;      /* relative to line */ @@ -6385,17 +6412,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm)      if (skip_to_start(prog->regstart, &col) == FAIL)        return 0L; -    /* If match_text is set it contains the full text that must match. -     * Nothing else to try. Doesn't handle combining chars well. */ -    if (prog->match_text != NULL -        && !ireg_icombine -        ) +    // If match_text is set it contains the full text that must match. +    // Nothing else to try. Doesn't handle combining chars well. +    if (prog->match_text != NULL && !rex.reg_icombine) {        return find_match_text(col, prog->regstart, prog->match_text); +    }    } -  /* If the start column is past the maximum column: no need to try. */ -  if (ireg_maxcol > 0 && col >= ireg_maxcol) +  // If the start column is past the maximum column: no need to try. +  if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) {      goto theend; +  }    nstate = prog->nstate;    for (i = 0; i < nstate; ++i) { @@ -6547,15 +6574,15 @@ nfa_regexec_nl (      bool line_lbr  )  { -  reg_match = rmp; -  reg_mmatch = NULL; -  reg_maxline = 0; -  reg_line_lbr = line_lbr; -  reg_buf = curbuf; -  reg_win = NULL; -  ireg_ic = rmp->rm_ic; -  ireg_icombine = FALSE; -  ireg_maxcol = 0; +  rex.reg_match = rmp; +  rex.reg_mmatch = NULL; +  rex.reg_maxline = 0; +  rex.reg_line_lbr = line_lbr; +  rex.reg_buf = curbuf; +  rex.reg_win = NULL; +  rex.reg_ic = rmp->rm_ic; +  rex.reg_icombine = false; +  rex.reg_maxcol = 0;    return nfa_regexec_both(line, col, NULL);  } @@ -6596,16 +6623,16 @@ nfa_regexec_nl (  static long nfa_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf,                                linenr_T lnum, colnr_T col, proftime_T *tm)  { -  reg_match = NULL; -  reg_mmatch = rmp; -  reg_buf = buf; -  reg_win = win; -  reg_firstlnum = lnum; -  reg_maxline = reg_buf->b_ml.ml_line_count - lnum; -  reg_line_lbr = FALSE; -  ireg_ic = rmp->rmm_ic; -  ireg_icombine = FALSE; -  ireg_maxcol = rmp->rmm_maxcol; +  rex.reg_match = NULL; +  rex.reg_mmatch = rmp; +  rex.reg_buf = buf; +  rex.reg_win = win; +  rex.reg_firstlnum = lnum; +  rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; +  rex.reg_line_lbr = false; +  rex.reg_ic = rmp->rmm_ic; +  rex.reg_icombine = false; +  rex.reg_maxcol = rmp->rmm_maxcol;    return nfa_regexec_both(NULL, col, tm);  } | 
