diff options
Diffstat (limited to 'src/nvim/regexp_nfa.c')
-rw-r--r-- | src/nvim/regexp_nfa.c | 137 |
1 files changed, 72 insertions, 65 deletions
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 2eb0ca9313..fe18cb4389 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -563,10 +563,7 @@ static char_u *nfa_get_match_text(nfa_state_T *start) p = start->out->out; /* skip first char, it goes into regstart */ s = ret; while (p->c > 0) { - if (has_mbyte) - s += (*mb_char2bytes)(p->c, s); - else - *s++ = p->c; + s += utf_char2bytes(p->c, s); p = p->out; } *s = NUL; @@ -1223,8 +1220,8 @@ static int nfa_regatom(void) if (c == '[') goto collection; - /* "\_x" is character class plus newline */ - /*FALLTHROUGH*/ + // "\_x" is character class plus newline + FALLTHROUGH; /* * Character classes. @@ -1387,8 +1384,8 @@ static int nfa_regatom(void) re_has_z = REX_SET; break; default: - EMSGN(_("E867: (NFA) Unknown operator '\\z%c'"), - no_Magic(c)); + emsgf(_("E867: (NFA) Unknown operator '\\z%c'"), + no_Magic(c)); return FAIL; } break; @@ -1409,7 +1406,7 @@ static int nfa_regatom(void) case 'u': /* %uabcd hex 4 */ case 'U': /* %U1234abcd hex 8 */ { - int nr; + int64_t nr; switch (c) { case 'd': nr = getdecchrs(); break; @@ -1485,7 +1482,7 @@ static int nfa_regatom(void) default: { - int n = 0; + long n = 0; int cmp = c; if (c == '<' || c == '>') @@ -1511,7 +1508,13 @@ static int nfa_regatom(void) EMIT(cmp == '<' ? NFA_VCOL_LT : cmp == '>' ? NFA_VCOL_GT : NFA_VCOL); } - EMIT(n); +#if SIZEOF_INT < SIZEOF_LONG + if (n > INT_MAX) { + EMSG(_("E951: \\% value too large")); + return FAIL; + } +#endif + EMIT((int)n); break; } else if (c == '\'' && n == 0) { /* \%'m \%<'m \%>'m */ @@ -1521,8 +1524,8 @@ static int nfa_regatom(void) break; } } - EMSGN(_("E867: (NFA) Unknown operator '\\%%%c'"), - no_Magic(c)); + emsgf(_("E867: (NFA) Unknown operator '\\%%%c'"), + no_Magic(c)); return FAIL; } break; @@ -1798,7 +1801,7 @@ collection: if (reg_strict) EMSG_RET_FAIL(_(e_missingbracket)); - /* FALLTHROUGH */ + FALLTHROUGH; default: { @@ -1859,7 +1862,7 @@ static int nfa_regpiece(void) int greedy = TRUE; /* Braces are prefixed with '-' ? */ parse_state_T old_state; parse_state_T new_state; - int c2; + int64_t c2; int old_post_pos; int my_post_start; int quest; @@ -1934,7 +1937,7 @@ static int nfa_regpiece(void) break; } if (i == 0) { - EMSGN(_("E869: (NFA) Unknown operator '\\@%c'"), op); + emsgf(_("E869: (NFA) Unknown operator '\\@%c'"), op); return FAIL; } EMIT(i); @@ -2032,9 +2035,10 @@ static int nfa_regpiece(void) break; } /* end switch */ - if (re_multi_type(peekchr()) != NOT_MULTI) - /* Can't have a multi follow a multi. */ - EMSG_RET_FAIL(_("E871: (NFA regexp) Can't have a multi follow a multi !")); + if (re_multi_type(peekchr()) != NOT_MULTI) { + // Can't have a multi follow a multi. + EMSG_RET_FAIL(_("E871: (NFA regexp) Can't have a multi follow a multi")); + } return OK; } @@ -2124,7 +2128,6 @@ static int nfa_regconcat(void) */ static int nfa_regbranch(void) { - int ch; int old_post_pos; old_post_pos = (int)(post_ptr - post_start); @@ -2133,10 +2136,13 @@ static int nfa_regbranch(void) if (nfa_regconcat() == FAIL) return FAIL; - ch = peekchr(); - /* Try next concats */ - while (ch == Magic('&')) { + // Try next concats + while (peekchr() == Magic('&')) { skipchr(); + // if concat is empty do emit a node + if (old_post_pos == (int)(post_ptr - post_start)) { + EMIT(NFA_EMPTY); + } EMIT(NFA_NOPEN); EMIT(NFA_PREV_ATOM_NO_WIDTH); old_post_pos = (int)(post_ptr - post_start); @@ -2146,7 +2152,6 @@ static int nfa_regbranch(void) if (old_post_pos == (int)(post_ptr - post_start)) EMIT(NFA_EMPTY); EMIT(NFA_CONCAT); - ch = peekchr(); } /* if a branch is empty, emit one node for it */ @@ -2447,6 +2452,8 @@ static void nfa_set_code(int c) } static FILE *log_fd; +static char_u e_log_open_failed[] = N_( + "Could not open temporary log file for writing, displaying on stderr... "); /* * Print the postfix notation of the current regexp. @@ -2459,10 +2466,11 @@ static void nfa_postfix_dump(char_u *expr, int retval) f = fopen(NFA_REGEXP_DUMP_LOG, "a"); if (f != NULL) { fprintf(f, "\n-------------------------\n"); - if (retval == FAIL) - fprintf(f, ">>> NFA engine failed ... \n"); - else if (retval == OK) + if (retval == FAIL) { + fprintf(f, ">>> NFA engine failed... \n"); + } else if (retval == OK) { fprintf(f, ">>> NFA engine succeeded !\n"); + } fprintf(f, "Regexp: \"%s\"\nPostfix notation (char): \"", expr); for (p = post_start; *p && p < post_ptr; p++) { nfa_set_code(*p); @@ -2716,7 +2724,7 @@ static void st_error(int *postfix, int *end, int *p) fclose(df); } #endif - EMSG(_("E874: (NFA) Could not pop the stack !")); + EMSG(_("E874: (NFA) Could not pop the stack!")); } /* @@ -3224,7 +3232,13 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) if (pattern) { /* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */ skip = alloc_state(NFA_SKIP, NULL, NULL); + if (skip == NULL) { + goto theend; + } zend = alloc_state(NFA_ZEND, s1, NULL); + if (zend == NULL) { + goto theend; + } s1->out= skip; patch(e.out, zend); PUSH(frag(s, list1(&skip->out))); @@ -3242,8 +3256,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; } - case NFA_COMPOSING: /* char with composing char */ - /* FALLTHROUGH */ + case NFA_COMPOSING: // char with composing char + FALLTHROUGH; case NFA_MOPEN: /* \( \) Submatch */ case NFA_MOPEN1: @@ -3972,7 +3986,7 @@ addstate ( || !REG_MULTI || reglnum == nfa_endp->se_u.pos.lnum)) goto skip_add; - /* FALLTHROUGH */ + FALLTHROUGH; case NFA_MOPEN1: case NFA_MOPEN2: @@ -4195,7 +4209,7 @@ skip_add: subs = addstate(l, state->out, subs, pim, off_arg); break; } - // fallthrough + FALLTHROUGH; case NFA_MCLOSE1: case NFA_MCLOSE2: case NFA_MCLOSE3: @@ -4622,10 +4636,10 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T } if ((int)(reginput - regline) >= state->val) { reginput -= state->val; - if (has_mbyte) - reginput -= mb_head_off(regline, reginput); - } else + reginput -= utf_head_off(regline, reginput); + } else { reginput = regline; + } } } @@ -4685,8 +4699,7 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T fprintf(log_fd, "MATCH = %s\n", !result ? "FALSE" : "OK"); fprintf(log_fd, "****************************\n"); } else { - EMSG(_( - "Could not open temporary log file for writing, displaying on stderr ... ")); + EMSG(_(e_log_open_failed)); log_fd = stderr; } #endif @@ -4952,7 +4965,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a"); if (debug == NULL) { - EMSG2(_("(NFA) COULD NOT OPEN %s !"), NFA_REGEXP_DEBUG_LOG); + EMSG2("(NFA) COULD NOT OPEN %s!", NFA_REGEXP_DEBUG_LOG); return false; } #endif @@ -4990,8 +5003,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, abs(start->id), code); fprintf(log_fd, "**********************************\n"); } else { - EMSG(_( - "Could not open temporary log file for writing, displaying on stderr ... ")); + EMSG(_(e_log_open_failed)); log_fd = stderr; } #endif @@ -5030,16 +5042,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, * Run for each character. */ for (;; ) { - int curc; - int clen; - - if (has_mbyte) { - curc = (*mb_ptr2char)(reginput); - clen = (*mb_ptr2len)(reginput); - } else { - curc = *reginput; - clen = 1; - } + int curc = utf_ptr2char(reginput); + int clen = utfc_ptr2len(reginput); if (curc == NUL) { clen = 0; go_to_nextline = false; @@ -5064,8 +5068,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, fprintf(log_fd, "------------------------------------------\n"); fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); fprintf(log_fd, - ">>> Advanced one character ... Current char is %c (code %d) \n", curc, - (int)curc); + ">>> Advanced one character... Current char is %c (code %d) \n", + curc, + (int)curc); fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n); { int i; @@ -5097,16 +5102,17 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, { int col; - if (t->subs.norm.in_use <= 0) + if (t->subs.norm.in_use <= 0) { col = -1; - else if (REG_MULTI) + } else if (REG_MULTI) { col = t->subs.norm.list.multi[0].start_col; - else + } else { col = (int)(t->subs.norm.list.line[0].start - regline); + } nfa_set_code(t->state->c); - fprintf(log_fd, "(%d) char %d %s (start col %d)%s ... \n", - abs(t->state->id), (int)t->state->c, code, col, - pim_info(&t->pim)); + fprintf(log_fd, "(%d) char %d %s (start col %d)%s... \n", + abs(t->state->id), (int)t->state->c, code, col, + pim_info(&t->pim)); } #endif @@ -5501,7 +5507,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // We don't care about the order of composing characters. // Get them into cchars[] first. while (len < clen) { - mc = mb_ptr2char(reginput + len); + mc = utf_ptr2char(reginput + len); cchars[ccount++] = mc; len += mb_char2len(mc); if (ccount == MAX_MCO) @@ -6260,8 +6266,9 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, proftime_T *tm) nfa_print_state(f, start); fprintf(f, "\n\n"); fclose(f); - } else - EMSG(_("Could not open temporary log file for writing ")); + } else { + EMSG("Could not open temporary log file for writing"); + } #endif clear_sub(&subs.norm); @@ -6487,10 +6494,10 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) FILE *f = fopen(NFA_REGEXP_RUN_LOG, "a"); if (f != NULL) { - fprintf( - f, - "\n*****************************\n\n\n\n\tCompiling regexp \"%s\" ... hold on !\n", - expr); + fprintf(f, + "\n*****************************\n\n\n\n\t" + "Compiling regexp \"%s\"... hold on !\n", + expr); fclose(f); } } |