diff options
Diffstat (limited to 'src/nvim/regexp_nfa.c')
-rw-r--r-- | src/nvim/regexp_nfa.c | 957 |
1 files changed, 548 insertions, 409 deletions
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index f97dce9e0d..08ef7da9c1 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -1,3 +1,6 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + /* * NFA regular expression implementation. * @@ -11,7 +14,6 @@ #include <limits.h> #include "nvim/ascii.h" -#include "nvim/misc2.h" #include "nvim/garray.h" /* @@ -54,13 +56,13 @@ enum { NFA_RANGE_MIN, /* low end of a range */ NFA_RANGE_MAX, /* high end of a range */ - NFA_CONCAT, /* concatenate two previous items (postfix - * only) */ - NFA_OR, /* \| (postfix only) */ - NFA_STAR, /* greedy * (posfix only) */ - NFA_STAR_NONGREEDY, /* non-greedy * (postfix only) */ - NFA_QUEST, /* greedy \? (postfix only) */ - NFA_QUEST_NONGREEDY, /* non-greedy \? (postfix only) */ + NFA_CONCAT, // concatenate two previous items (postfix + // only) + NFA_OR, // \| (postfix only) + NFA_STAR, // greedy * (postfix only) + NFA_STAR_NONGREEDY, // non-greedy * (postfix only) + NFA_QUEST, // greedy \? (postfix only) + NFA_QUEST_NONGREEDY, // non-greedy \? (postfix only) NFA_BOL, /* ^ Begin line */ NFA_EOL, /* $ End line */ @@ -561,10 +563,7 @@ static char_u *nfa_get_match_text(nfa_state_T *start) p = start->out->out; /* skip first char, it goes into regstart */ s = ret; while (p->c > 0) { - if (has_mbyte) - s += (*mb_char2bytes)(p->c, s); - else - *s++ = p->c; + s += utf_char2bytes(p->c, s); p = p->out; } *s = NUL; @@ -632,6 +631,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) config |= CLASS_o7; break; } + return FAIL; case 'a': if (*(p + 2) == 'z') { config |= CLASS_az; @@ -640,6 +640,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) config |= CLASS_af; break; } + return FAIL; case 'A': if (*(p + 2) == 'Z') { config |= CLASS_AZ; @@ -648,7 +649,7 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) config |= CLASS_AF; break; } - /* FALLTHROUGH */ + return FAIL; default: return FAIL; } @@ -724,13 +725,70 @@ static void nfa_emit_equi_class(int c) if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 || STRCMP(p_enc, "iso-8859-15") == 0) { +#define A_grave 0xc0 +#define A_acute 0xc1 +#define A_circumflex 0xc2 +#define A_virguilla 0xc3 +#define A_diaeresis 0xc4 +#define A_ring 0xc5 +#define C_cedilla 0xc7 +#define E_grave 0xc8 +#define E_acute 0xc9 +#define E_circumflex 0xca +#define E_diaeresis 0xcb +#define I_grave 0xcc +#define I_acute 0xcd +#define I_circumflex 0xce +#define I_diaeresis 0xcf +#define N_virguilla 0xd1 +#define O_grave 0xd2 +#define O_acute 0xd3 +#define O_circumflex 0xd4 +#define O_virguilla 0xd5 +#define O_diaeresis 0xd6 +#define O_slash 0xd8 +#define U_grave 0xd9 +#define U_acute 0xda +#define U_circumflex 0xdb +#define U_diaeresis 0xdc +#define Y_acute 0xdd +#define a_grave 0xe0 +#define a_acute 0xe1 +#define a_circumflex 0xe2 +#define a_virguilla 0xe3 +#define a_diaeresis 0xe4 +#define a_ring 0xe5 +#define c_cedilla 0xe7 +#define e_grave 0xe8 +#define e_acute 0xe9 +#define e_circumflex 0xea +#define e_diaeresis 0xeb +#define i_grave 0xec +#define i_acute 0xed +#define i_circumflex 0xee +#define i_diaeresis 0xef +#define n_virguilla 0xf1 +#define o_grave 0xf2 +#define o_acute 0xf3 +#define o_circumflex 0xf4 +#define o_virguilla 0xf5 +#define o_diaeresis 0xf6 +#define o_slash 0xf8 +#define u_grave 0xf9 +#define u_acute 0xfa +#define u_circumflex 0xfb +#define u_diaeresis 0xfc +#define y_acute 0xfd +#define y_diaeresis 0xff switch (c) { - case 'A': case 0300: case 0301: case 0302: - case 0303: case 0304: case 0305: - CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd) - CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2) - EMIT2('A'); EMIT2(0300); EMIT2(0301); EMIT2(0302); - EMIT2(0303); EMIT2(0304); EMIT2(0305); + case 'A': case A_grave: case A_acute: case A_circumflex: + case A_virguilla: case A_diaeresis: case A_ring: + CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) + CASEMBC(0x1cd) CASEMBC(0x1de) CASEMBC(0x1e0) + CASEMBC(0x1ea2) + EMIT2('A'); EMIT2(A_grave); EMIT2(A_acute); + EMIT2(A_circumflex); EMIT2(A_virguilla); + EMIT2(A_diaeresis); EMIT2(A_ring); EMITMBC(0x100) EMITMBC(0x102) EMITMBC(0x104) EMITMBC(0x1cd) EMITMBC(0x1de) EMITMBC(0x1e0) EMITMBC(0x1ea2) @@ -740,23 +798,24 @@ static void nfa_emit_equi_class(int c) EMIT2('B'); EMITMBC(0x1e02) EMITMBC(0x1e06) return; - case 'C': case 0307: - CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c) - EMIT2('C'); EMIT2(0307); EMITMBC(0x106) EMITMBC(0x108) + case 'C': case C_cedilla: CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) + CASEMBC(0x10c) + EMIT2('C'); EMIT2(C_cedilla); EMITMBC(0x106) EMITMBC(0x108) EMITMBC(0x10a) EMITMBC(0x10c) return; case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a) - CASEMBC(0x1e0e) CASEMBC(0x1e10) + CASEMBC(0x1e0e) CASEMBC(0x1e10) EMIT2('D'); EMITMBC(0x10e) EMITMBC(0x110) EMITMBC(0x1e0a) EMITMBC(0x1e0e) EMITMBC(0x1e10) return; - case 'E': case 0310: case 0311: case 0312: case 0313: - CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118) - CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc) - EMIT2('E'); EMIT2(0310); EMIT2(0311); EMIT2(0312); - EMIT2(0313); + case 'E': case E_grave: case E_acute: case E_circumflex: + case E_diaeresis: CASEMBC(0x112) CASEMBC(0x114) + CASEMBC(0x116) CASEMBC(0x118) CASEMBC(0x11a) + CASEMBC(0x1eba) CASEMBC(0x1ebc) + EMIT2('E'); EMIT2(E_grave); EMIT2(E_acute); + EMIT2(E_circumflex); EMIT2(E_diaeresis); EMITMBC(0x112) EMITMBC(0x114) EMITMBC(0x116) EMITMBC(0x118) EMITMBC(0x11a) EMITMBC(0x1eba) EMITMBC(0x1ebc) @@ -767,24 +826,26 @@ static void nfa_emit_equi_class(int c) return; case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120) - CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4) - CASEMBC(0x1e20) + CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) + CASEMBC(0x1f4) CASEMBC(0x1e20) EMIT2('G'); EMITMBC(0x11c) EMITMBC(0x11e) EMITMBC(0x120) EMITMBC(0x122) EMITMBC(0x1e4) EMITMBC(0x1e6) EMITMBC(0x1f4) EMITMBC(0x1e20) return; case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22) - CASEMBC(0x1e26) CASEMBC(0x1e28) + CASEMBC(0x1e26) CASEMBC(0x1e28) EMIT2('H'); EMITMBC(0x124) EMITMBC(0x126) EMITMBC(0x1e22) EMITMBC(0x1e26) EMITMBC(0x1e28) return; - case 'I': case 0314: case 0315: case 0316: case 0317: - CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e) - CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8) - EMIT2('I'); EMIT2(0314); EMIT2(0315); EMIT2(0316); - EMIT2(0317); EMITMBC(0x128) EMITMBC(0x12a) + case 'I': case I_grave: case I_acute: case I_circumflex: + case I_diaeresis: CASEMBC(0x128) CASEMBC(0x12a) + CASEMBC(0x12c) CASEMBC(0x12e) CASEMBC(0x130) + CASEMBC(0x1cf) CASEMBC(0x1ec8) + EMIT2('I'); EMIT2(I_grave); EMIT2(I_acute); + EMIT2(I_circumflex); EMIT2(I_diaeresis); + EMITMBC(0x128) EMITMBC(0x12a) EMITMBC(0x12c) EMITMBC(0x12e) EMITMBC(0x130) EMITMBC(0x1cf) EMITMBC(0x1ec8) return; @@ -794,13 +855,13 @@ static void nfa_emit_equi_class(int c) return; case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30) - CASEMBC(0x1e34) + CASEMBC(0x1e34) EMIT2('K'); EMITMBC(0x136) EMITMBC(0x1e8) EMITMBC(0x1e30) EMITMBC(0x1e34) return; case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d) - CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a) + CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a) EMIT2('L'); EMITMBC(0x139) EMITMBC(0x13b) EMITMBC(0x13d) EMITMBC(0x13f) EMITMBC(0x141) EMITMBC(0x1e3a) return; @@ -809,19 +870,21 @@ static void nfa_emit_equi_class(int c) EMIT2('M'); EMITMBC(0x1e3e) EMITMBC(0x1e40) return; - case 'N': case 0321: - CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44) - CASEMBC(0x1e48) - EMIT2('N'); EMIT2(0321); EMITMBC(0x143) EMITMBC(0x145) + case 'N': case N_virguilla: CASEMBC(0x143) CASEMBC(0x145) + CASEMBC(0x147) CASEMBC(0x1e44) CASEMBC(0x1e48) + EMIT2('N'); EMIT2(N_virguilla); + EMITMBC(0x143) EMITMBC(0x145) EMITMBC(0x147) EMITMBC(0x1e44) EMITMBC(0x1e48) return; - case 'O': case 0322: case 0323: case 0324: case 0325: - case 0326: case 0330: - CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0) - CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece) - EMIT2('O'); EMIT2(0322); EMIT2(0323); EMIT2(0324); - EMIT2(0325); EMIT2(0326); EMIT2(0330); + case 'O': case O_grave: case O_acute: case O_circumflex: + case O_virguilla: case O_diaeresis: case O_slash: + CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) + CASEMBC(0x1a0) CASEMBC(0x1d1) CASEMBC(0x1ea) + CASEMBC(0x1ec) CASEMBC(0x1ece) + EMIT2('O'); EMIT2(O_grave); EMIT2(O_acute); + EMIT2(O_circumflex); EMIT2(O_virguilla); + EMIT2(O_diaeresis); EMIT2(O_slash); EMITMBC(0x14c) EMITMBC(0x14e) EMITMBC(0x150) EMITMBC(0x1a0) EMITMBC(0x1d1) EMITMBC(0x1ea) EMITMBC(0x1ec) EMITMBC(0x1ece) @@ -832,29 +895,31 @@ static void nfa_emit_equi_class(int c) return; case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158) - CASEMBC(0x1e58) CASEMBC(0x1e5e) + CASEMBC(0x1e58) CASEMBC(0x1e5e) EMIT2('R'); EMITMBC(0x154) EMITMBC(0x156) EMITMBC(0x158) EMITMBC(0x1e58) EMITMBC(0x1e5e) return; case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e) - CASEMBC(0x160) CASEMBC(0x1e60) + CASEMBC(0x160) CASEMBC(0x1e60) EMIT2('S'); EMITMBC(0x15a) EMITMBC(0x15c) EMITMBC(0x15e) EMITMBC(0x160) EMITMBC(0x1e60) return; case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166) - CASEMBC(0x1e6a) CASEMBC(0x1e6e) + CASEMBC(0x1e6a) CASEMBC(0x1e6e) EMIT2('T'); EMITMBC(0x162) EMITMBC(0x164) EMITMBC(0x166) EMITMBC(0x1e6a) EMITMBC(0x1e6e) return; - case 'U': case 0331: case 0332: case 0333: case 0334: - CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e) - CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3) - CASEMBC(0x1ee6) - EMIT2('U'); EMIT2(0331); EMIT2(0332); EMIT2(0333); - EMIT2(0334); EMITMBC(0x168) EMITMBC(0x16a) + case 'U': case U_grave: case U_acute: case U_diaeresis: + case U_circumflex: CASEMBC(0x168) CASEMBC(0x16a) + CASEMBC(0x16c) CASEMBC(0x16e) CASEMBC(0x170) + CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3) + CASEMBC(0x1ee6) + EMIT2('U'); EMIT2(U_grave); EMIT2(U_acute); + EMIT2(U_diaeresis); EMIT2(U_circumflex); + EMITMBC(0x168) EMITMBC(0x16a) EMITMBC(0x16c) EMITMBC(0x16e) EMITMBC(0x170) EMITMBC(0x172) EMITMBC(0x1af) EMITMBC(0x1d3) EMITMBC(0x1ee6) @@ -865,7 +930,7 @@ static void nfa_emit_equi_class(int c) return; case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82) - CASEMBC(0x1e84) CASEMBC(0x1e86) + CASEMBC(0x1e84) CASEMBC(0x1e86) EMIT2('W'); EMITMBC(0x174) EMITMBC(0x1e80) EMITMBC(0x1e82) EMITMBC(0x1e84) EMITMBC(0x1e86) return; @@ -874,26 +939,29 @@ static void nfa_emit_equi_class(int c) EMIT2('X'); EMITMBC(0x1e8a) EMITMBC(0x1e8c) return; - case 'Y': case 0335: - CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2) - CASEMBC(0x1ef6) CASEMBC(0x1ef8) - EMIT2('Y'); EMIT2(0335); EMITMBC(0x176) EMITMBC(0x178) + case 'Y': case Y_acute: CASEMBC(0x176) CASEMBC(0x178) + CASEMBC(0x1e8e) CASEMBC(0x1ef2) CASEMBC(0x1ef6) + CASEMBC(0x1ef8) + EMIT2('Y'); EMIT2(Y_acute); + EMITMBC(0x176) EMITMBC(0x178) EMITMBC(0x1e8e) EMITMBC(0x1ef2) EMITMBC(0x1ef6) EMITMBC(0x1ef8) return; case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d) - CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94) + CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94) EMIT2('Z'); EMITMBC(0x179) EMITMBC(0x17b) EMITMBC(0x17d) EMITMBC(0x1b5) EMITMBC(0x1e90) EMITMBC(0x1e94) return; - case 'a': case 0340: case 0341: case 0342: - case 0343: case 0344: case 0345: - CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce) - CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3) - EMIT2('a'); EMIT2(0340); EMIT2(0341); EMIT2(0342); - EMIT2(0343); EMIT2(0344); EMIT2(0345); + case 'a': case a_grave: case a_acute: case a_circumflex: + case a_virguilla: case a_diaeresis: case a_ring: + CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) + CASEMBC(0x1ce) CASEMBC(0x1df) CASEMBC(0x1e1) + CASEMBC(0x1ea3) + EMIT2('a'); EMIT2(a_grave); EMIT2(a_acute); + EMIT2(a_circumflex); EMIT2(a_virguilla); + EMIT2(a_diaeresis); EMIT2(a_ring); EMITMBC(0x101) EMITMBC(0x103) EMITMBC(0x105) EMITMBC(0x1ce) EMITMBC(0x1df) EMITMBC(0x1e1) EMITMBC(0x1ea3) @@ -903,23 +971,26 @@ static void nfa_emit_equi_class(int c) EMIT2('b'); EMITMBC(0x1e03) EMITMBC(0x1e07) return; - case 'c': case 0347: - CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d) - EMIT2('c'); EMIT2(0347); EMITMBC(0x107) EMITMBC(0x109) + case 'c': case c_cedilla: CASEMBC(0x107) CASEMBC(0x109) + CASEMBC(0x10b) CASEMBC(0x10d) + EMIT2('c'); EMIT2(c_cedilla); + EMITMBC(0x107) EMITMBC(0x109) EMITMBC(0x10b) EMITMBC(0x10d) return; case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b) - CASEMBC(0x1e0f) CASEMBC(0x1e11) + CASEMBC(0x1e0f) CASEMBC(0x1e11) EMIT2('d'); EMITMBC(0x10f) EMITMBC(0x111) EMITMBC(0x1e0b) EMITMBC(0x1e0f) EMITMBC(0x1e11) return; - case 'e': case 0350: case 0351: case 0352: case 0353: - CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119) - CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd) - EMIT2('e'); EMIT2(0350); EMIT2(0351); EMIT2(0352); - EMIT2(0353); EMITMBC(0x113) EMITMBC(0x115) + case 'e': case e_grave: case e_acute: case e_circumflex: + case e_diaeresis: CASEMBC(0x113) CASEMBC(0x115) + CASEMBC(0x117) CASEMBC(0x119) CASEMBC(0x11b) + CASEMBC(0x1ebb) CASEMBC(0x1ebd) + EMIT2('e'); EMIT2(e_grave); EMIT2(e_acute); + EMIT2(e_circumflex); EMIT2(e_diaeresis); + EMITMBC(0x113) EMITMBC(0x115) EMITMBC(0x117) EMITMBC(0x119) EMITMBC(0x11b) EMITMBC(0x1ebb) EMITMBC(0x1ebd) return; @@ -929,24 +1000,26 @@ static void nfa_emit_equi_class(int c) return; case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121) - CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5) - CASEMBC(0x1e21) + CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) + CASEMBC(0x1f5) CASEMBC(0x1e21) EMIT2('g'); EMITMBC(0x11d) EMITMBC(0x11f) EMITMBC(0x121) EMITMBC(0x123) EMITMBC(0x1e5) EMITMBC(0x1e7) EMITMBC(0x1f5) EMITMBC(0x1e21) return; case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23) - CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96) + CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96) EMIT2('h'); EMITMBC(0x125) EMITMBC(0x127) EMITMBC(0x1e23) EMITMBC(0x1e27) EMITMBC(0x1e29) EMITMBC(0x1e96) return; - case 'i': case 0354: case 0355: case 0356: case 0357: - CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f) - CASEMBC(0x1d0) CASEMBC(0x1ec9) - EMIT2('i'); EMIT2(0354); EMIT2(0355); EMIT2(0356); - EMIT2(0357); EMITMBC(0x129) EMITMBC(0x12b) + case 'i': case i_grave: case i_acute: case i_circumflex: + case i_diaeresis: CASEMBC(0x129) CASEMBC(0x12b) + CASEMBC(0x12d) CASEMBC(0x12f) CASEMBC(0x1d0) + CASEMBC(0x1ec9) + EMIT2('i'); EMIT2(i_grave); EMIT2(i_acute); + EMIT2(i_circumflex); EMIT2(i_diaeresis); + EMITMBC(0x129) EMITMBC(0x12b) EMITMBC(0x12d) EMITMBC(0x12f) EMITMBC(0x1d0) EMITMBC(0x1ec9) return; @@ -956,13 +1029,13 @@ static void nfa_emit_equi_class(int c) return; case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31) - CASEMBC(0x1e35) + CASEMBC(0x1e35) EMIT2('k'); EMITMBC(0x137) EMITMBC(0x1e9) EMITMBC(0x1e31) EMITMBC(0x1e35) return; case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e) - CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b) + CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b) EMIT2('l'); EMITMBC(0x13a) EMITMBC(0x13c) EMITMBC(0x13e) EMITMBC(0x140) EMITMBC(0x142) EMITMBC(0x1e3b) return; @@ -971,20 +1044,23 @@ static void nfa_emit_equi_class(int c) EMIT2('m'); EMITMBC(0x1e3f) EMITMBC(0x1e41) return; - case 'n': case 0361: - CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149) - CASEMBC(0x1e45) CASEMBC(0x1e49) - EMIT2('n'); EMIT2(0361); EMITMBC(0x144) EMITMBC(0x146) + case 'n': case n_virguilla: CASEMBC(0x144) CASEMBC(0x146) + CASEMBC(0x148) CASEMBC(0x149) CASEMBC(0x1e45) + CASEMBC(0x1e49) + EMIT2('n'); EMIT2(n_virguilla); + EMITMBC(0x144) EMITMBC(0x146) EMITMBC(0x148) EMITMBC(0x149) EMITMBC(0x1e45) EMITMBC(0x1e49) return; - case 'o': case 0362: case 0363: case 0364: case 0365: - case 0366: case 0370: - CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1) - CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf) - EMIT2('o'); EMIT2(0362); EMIT2(0363); EMIT2(0364); - EMIT2(0365); EMIT2(0366); EMIT2(0370); + case 'o': case o_grave: case o_acute: case o_circumflex: + case o_virguilla: case o_diaeresis: case o_slash: + CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) + CASEMBC(0x1a1) CASEMBC(0x1d2) CASEMBC(0x1eb) + CASEMBC(0x1ed) CASEMBC(0x1ecf) + EMIT2('o'); EMIT2(o_grave); EMIT2(o_acute); + EMIT2(o_circumflex); EMIT2(o_virguilla); + EMIT2(o_diaeresis); EMIT2(o_slash); EMITMBC(0x14d) EMITMBC(0x14f) EMITMBC(0x151) EMITMBC(0x1a1) EMITMBC(0x1d2) EMITMBC(0x1eb) EMITMBC(0x1ed) EMITMBC(0x1ecf) @@ -995,29 +1071,31 @@ static void nfa_emit_equi_class(int c) return; case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159) - CASEMBC(0x1e59) CASEMBC(0x1e5f) + CASEMBC(0x1e59) CASEMBC(0x1e5f) EMIT2('r'); EMITMBC(0x155) EMITMBC(0x157) EMITMBC(0x159) EMITMBC(0x1e59) EMITMBC(0x1e5f) return; case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f) - CASEMBC(0x161) CASEMBC(0x1e61) + CASEMBC(0x161) CASEMBC(0x1e61) EMIT2('s'); EMITMBC(0x15b) EMITMBC(0x15d) EMITMBC(0x15f) EMITMBC(0x161) EMITMBC(0x1e61) return; case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167) - CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97) + CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97) EMIT2('t'); EMITMBC(0x163) EMITMBC(0x165) EMITMBC(0x167) EMITMBC(0x1e6b) EMITMBC(0x1e6f) EMITMBC(0x1e97) return; - case 'u': case 0371: case 0372: case 0373: case 0374: - CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f) - CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4) - CASEMBC(0x1ee7) - EMIT2('u'); EMIT2(0371); EMIT2(0372); EMIT2(0373); - EMIT2(0374); EMITMBC(0x169) EMITMBC(0x16b) + case 'u': case u_grave: case u_acute: case u_circumflex: + case u_diaeresis: CASEMBC(0x169) CASEMBC(0x16b) + CASEMBC(0x16d) CASEMBC(0x16f) CASEMBC(0x171) + CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4) + CASEMBC(0x1ee7) + EMIT2('u'); EMIT2(u_grave); EMIT2(u_acute); + EMIT2(u_circumflex); EMIT2(u_diaeresis); + EMITMBC(0x169) EMITMBC(0x16b) EMITMBC(0x16d) EMITMBC(0x16f) EMITMBC(0x171) EMITMBC(0x173) EMITMBC(0x1b0) EMITMBC(0x1d4) EMITMBC(0x1ee7) @@ -1028,7 +1106,7 @@ static void nfa_emit_equi_class(int c) return; case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83) - CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98) + CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98) EMIT2('w'); EMITMBC(0x175) EMITMBC(0x1e81) EMITMBC(0x1e83) EMITMBC(0x1e85) EMITMBC(0x1e87) EMITMBC(0x1e98) return; @@ -1037,16 +1115,17 @@ static void nfa_emit_equi_class(int c) EMIT2('x'); EMITMBC(0x1e8b) EMITMBC(0x1e8d) return; - case 'y': case 0375: case 0377: - CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99) - CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9) - EMIT2('y'); EMIT2(0375); EMIT2(0377); EMITMBC(0x177) + case 'y': case y_acute: case y_diaeresis: CASEMBC(0x177) + CASEMBC(0x1e8f) CASEMBC(0x1e99) CASEMBC(0x1ef3) + CASEMBC(0x1ef7) CASEMBC(0x1ef9) + EMIT2('y'); EMIT2(y_acute); EMIT2(y_diaeresis); + EMITMBC(0x177) EMITMBC(0x1e8f) EMITMBC(0x1e99) EMITMBC(0x1ef3) EMITMBC(0x1ef7) EMITMBC(0x1ef9) return; case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e) - CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95) + CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95) EMIT2('z'); EMITMBC(0x17a) EMITMBC(0x17c) EMITMBC(0x17e) EMITMBC(0x1b6) EMITMBC(0x1e91) EMITMBC(0x1e95) return; @@ -1096,6 +1175,7 @@ static int nfa_regatom(void) int startc = -1; int endc = -1; int oldstartc = -1; + int save_prev_at_start = prev_at_start; c = getchr(); switch (c) { @@ -1140,8 +1220,8 @@ static int nfa_regatom(void) if (c == '[') goto collection; - /* "\_x" is character class plus newline */ - /*FALLTHROUGH*/ + // "\_x" is character class plus newline + FALLTHROUGH; /* * Character classes. @@ -1180,7 +1260,7 @@ static int nfa_regatom(void) rc_did_emsg = TRUE; return FAIL; } - EMSGN("INTERNAL: Unknown character class char: %" PRId64, c); + IEMSGN("INTERNAL: Unknown character class char: %" PRId64, c); return FAIL; } /* When '.' is followed by a composing char ignore the dot, so that @@ -1240,7 +1320,7 @@ static int nfa_regatom(void) EMSG(_(e_nopresub)); return FAIL; } - for (lp = reg_prev_sub; *lp != NUL; mb_cptr_adv(lp)) { + for (lp = reg_prev_sub; *lp != NUL; MB_CPTR_ADV(lp)) { EMIT(PTR2CHAR(lp)); if (lp != reg_prev_sub) EMIT(NFA_CONCAT); @@ -1287,25 +1367,28 @@ static int nfa_regatom(void) case '7': case '8': case '9': - /* \z1...\z9 */ - if (reg_do_extmatch != REX_USE) + // \z1...\z9 + if ((reg_do_extmatch & REX_USE) == 0) { EMSG_RET_FAIL(_(e_z1_not_allowed)); + } EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); /* No need to set nfa_has_backref, the sub-matches don't * change when \z1 .. \z9 matches or not. */ re_has_z = REX_USE; break; case '(': - /* \z( */ - if (reg_do_extmatch != REX_SET) + // \z( + if (reg_do_extmatch != REX_SET) { EMSG_RET_FAIL(_(e_z_not_allowed)); - if (nfa_reg(REG_ZPAREN) == FAIL) - return FAIL; /* cascaded error */ + } + if (nfa_reg(REG_ZPAREN) == FAIL) { + return FAIL; // cascaded error + } re_has_z = REX_SET; break; default: - EMSGN(_("E867: (NFA) Unknown operator '\\z%c'"), - no_Magic(c)); + emsgf(_("E867: (NFA) Unknown operator '\\z%c'"), + no_Magic(c)); return FAIL; } break; @@ -1326,7 +1409,7 @@ static int nfa_regatom(void) case 'u': /* %uabcd hex 4 */ case 'U': /* %U1234abcd hex 8 */ { - int nr; + int64_t nr; switch (c) { case 'd': nr = getdecchrs(); break; @@ -1402,7 +1485,7 @@ static int nfa_regatom(void) default: { - int n = 0; + long n = 0; int cmp = c; if (c == '<' || c == '>') @@ -1412,19 +1495,29 @@ static int nfa_regatom(void) c = getchr(); } if (c == 'l' || c == 'c' || c == 'v') { - if (c == 'l') - /* \%{n}l \%{n}<l \%{n}>l */ + if (c == 'l') { + // \%{n}l \%{n}<l \%{n}>l EMIT(cmp == '<' ? NFA_LNUM_LT : - cmp == '>' ? NFA_LNUM_GT : NFA_LNUM); - else if (c == 'c') - /* \%{n}c \%{n}<c \%{n}>c */ + cmp == '>' ? NFA_LNUM_GT : NFA_LNUM); + if (save_prev_at_start) { + at_start = true; + } + } else if (c == 'c') { + // \%{n}c \%{n}<c \%{n}>c EMIT(cmp == '<' ? NFA_COL_LT : - cmp == '>' ? NFA_COL_GT : NFA_COL); - else - /* \%{n}v \%{n}<v \%{n}>v */ + cmp == '>' ? NFA_COL_GT : NFA_COL); + } else { + // \%{n}v \%{n}<v \%{n}>v EMIT(cmp == '<' ? NFA_VCOL_LT : - cmp == '>' ? NFA_VCOL_GT : NFA_VCOL); - EMIT(n); + cmp == '>' ? NFA_VCOL_GT : NFA_VCOL); + } +#if SIZEOF_INT < SIZEOF_LONG + if (n > INT_MAX) { + EMSG(_("E951: \\% value too large")); + return FAIL; + } +#endif + EMIT((int)n); break; } else if (c == '\'' && n == 0) { /* \%'m \%<'m \%>'m */ @@ -1434,8 +1527,8 @@ static int nfa_regatom(void) break; } } - EMSGN(_("E867: (NFA) Unknown operator '\\%%%c'"), - no_Magic(c)); + emsgf(_("E867: (NFA) Unknown operator '\\%%%c'"), + no_Magic(c)); return FAIL; } break; @@ -1470,7 +1563,7 @@ collection: } else EMIT(result); regparse = endp; - mb_ptr_adv(regparse); + MB_PTR_ADV(regparse); return OK; } /* @@ -1478,10 +1571,10 @@ collection: * version that turns [abc] into 'a' OR 'b' OR 'c' */ startc = endc = oldstartc = -1; - negated = FALSE; - if (*regparse == '^') { /* negated range */ - negated = TRUE; - mb_ptr_adv(regparse); + negated = false; + if (*regparse == '^') { // negated range + negated = true; + MB_PTR_ADV(regparse); EMIT(NFA_START_NEG_COLL); } else EMIT(NFA_START_COLL); @@ -1489,7 +1582,7 @@ collection: startc = '-'; EMIT(startc); EMIT(NFA_CONCAT); - mb_ptr_adv(regparse); + MB_PTR_ADV(regparse); } /* Emit the OR branches for each character in the [] */ emit_range = FALSE; @@ -1579,8 +1672,8 @@ collection: if (*regparse == '-' && oldstartc != -1) { emit_range = TRUE; startc = oldstartc; - mb_ptr_adv(regparse); - continue; /* reading the end of the range */ + MB_PTR_ADV(regparse); + continue; // reading the end of the range } /* Now handle simple and escaped characters. @@ -1596,7 +1689,7 @@ collection: != NULL) ) ) { - mb_ptr_adv(regparse); + MB_PTR_ADV(regparse); if (*regparse == 'n') startc = reg_string ? NL : NFA_NEWL; @@ -1608,8 +1701,8 @@ collection: ) { /* TODO(RE) This needs more testing */ startc = coll_get_char(); - got_coll_char = TRUE; - mb_ptr_back(old_regparse, regparse); + got_coll_char = true; + MB_PTR_BACK(old_regparse, regparse); } else { /* \r,\t,\e,\b */ startc = backslash_trans(*regparse); @@ -1624,8 +1717,9 @@ collection: if (emit_range) { endc = startc; startc = oldstartc; - if (startc > endc) - EMSG_RET_FAIL(_(e_invrange)); + if (startc > endc) { + EMSG_RET_FAIL(_(e_reverse_range)); + } if (endc > startc + 2) { /* Emit a range instead of the sequence of @@ -1680,18 +1774,18 @@ collection: } } - mb_ptr_adv(regparse); - } /* while (p < endp) */ + MB_PTR_ADV(regparse); + } // while (p < endp) - mb_ptr_back(old_regparse, regparse); - if (*regparse == '-') { /* if last, '-' is just a char */ + MB_PTR_BACK(old_regparse, regparse); + if (*regparse == '-') { // if last, '-' is just a char EMIT('-'); EMIT(NFA_CONCAT); } /* skip the trailing ] */ regparse = endp; - mb_ptr_adv(regparse); + MB_PTR_ADV(regparse); /* Mark end of the collection. */ if (negated == TRUE) @@ -1710,16 +1804,16 @@ collection: if (reg_strict) EMSG_RET_FAIL(_(e_missingbracket)); - /* FALLTHROUGH */ + FALLTHROUGH; default: { int plen; nfa_do_multibyte: - /* plen is length of current char with composing chars */ + // plen is length of current char with composing chars if (enc_utf8 && ((*mb_char2len)(c) - != (plen = (*mb_ptr2len)(old_regparse)) + != (plen = utfc_ptr2len(old_regparse)) || utf_iscomposing(c))) { int i = 0; @@ -1771,7 +1865,7 @@ static int nfa_regpiece(void) int greedy = TRUE; /* Braces are prefixed with '-' ? */ parse_state_T old_state; parse_state_T new_state; - int c2; + int64_t c2; int old_post_pos; int my_post_start; int quest; @@ -1846,7 +1940,7 @@ static int nfa_regpiece(void) break; } if (i == 0) { - EMSGN(_("E869: (NFA) Unknown operator '\\@%c'"), op); + emsgf(_("E869: (NFA) Unknown operator '\\@%c'"), op); return FAIL; } EMIT(i); @@ -1901,7 +1995,7 @@ static int nfa_regpiece(void) // The engine is very inefficient (uses too many states) when the maximum // is much larger than the minimum and when the maximum is large. Bail out // if we can use the other engine. - if ((nfa_re_flags & RE_AUTO) && (maxval > minval + 200 || maxval > 500)) { + if ((nfa_re_flags & RE_AUTO) && (maxval > 500 || maxval > minval + 200)) { return FAIL; } @@ -1944,9 +2038,10 @@ static int nfa_regpiece(void) break; } /* end switch */ - if (re_multi_type(peekchr()) != NOT_MULTI) - /* Can't have a multi follow a multi. */ - EMSG_RET_FAIL(_("E871: (NFA regexp) Can't have a multi follow a multi !")); + if (re_multi_type(peekchr()) != NOT_MULTI) { + // Can't have a multi follow a multi. + EMSG_RET_FAIL(_("E871: (NFA regexp) Can't have a multi follow a multi")); + } return OK; } @@ -2036,7 +2131,6 @@ static int nfa_regconcat(void) */ static int nfa_regbranch(void) { - int ch; int old_post_pos; old_post_pos = (int)(post_ptr - post_start); @@ -2045,10 +2139,13 @@ static int nfa_regbranch(void) if (nfa_regconcat() == FAIL) return FAIL; - ch = peekchr(); - /* Try next concats */ - while (ch == Magic('&')) { + // Try next concats + while (peekchr() == Magic('&')) { skipchr(); + // if concat is empty do emit a node + if (old_post_pos == (int)(post_ptr - post_start)) { + EMIT(NFA_EMPTY); + } EMIT(NFA_NOPEN); EMIT(NFA_PREV_ATOM_NO_WIDTH); old_post_pos = (int)(post_ptr - post_start); @@ -2058,7 +2155,6 @@ static int nfa_regbranch(void) if (old_post_pos == (int)(post_ptr - post_start)) EMIT(NFA_EMPTY); EMIT(NFA_CONCAT); - ch = peekchr(); } /* if a branch is empty, emit one node for it */ @@ -2359,6 +2455,8 @@ static void nfa_set_code(int c) } static FILE *log_fd; +static char_u e_log_open_failed[] = N_( + "Could not open temporary log file for writing, displaying on stderr... "); /* * Print the postfix notation of the current regexp. @@ -2371,10 +2469,11 @@ static void nfa_postfix_dump(char_u *expr, int retval) f = fopen(NFA_REGEXP_DUMP_LOG, "a"); if (f != NULL) { fprintf(f, "\n-------------------------\n"); - if (retval == FAIL) - fprintf(f, ">>> NFA engine failed ... \n"); - else if (retval == OK) + if (retval == FAIL) { + fprintf(f, ">>> NFA engine failed... \n"); + } else if (retval == OK) { fprintf(f, ">>> NFA engine succeeded !\n"); + } fprintf(f, "Regexp: \"%s\"\nPostfix notation (char): \"", expr); for (p = post_start; *p && p < post_ptr; p++) { nfa_set_code(*p); @@ -2628,7 +2727,7 @@ static void st_error(int *postfix, int *end, int *p) fclose(df); } #endif - EMSG(_("E874: (NFA) Could not pop the stack !")); + EMSG(_("E874: (NFA) Could not pop the stack!")); } /* @@ -2690,15 +2789,10 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) case NFA_ANY: case NFA_START_COLL: case NFA_START_NEG_COLL: - /* matches some character, including composing chars */ - if (enc_utf8) - len += MB_MAXBYTES; - else if (has_mbyte) - len += 2; - else - ++len; + // Matches some character, including composing chars. + len += MB_MAXBYTES; if (state->c != NFA_ANY) { - /* skip over the characters */ + // Skip over the characters. state = state->out1->out; continue; } @@ -3141,7 +3235,13 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) if (pattern) { /* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */ skip = alloc_state(NFA_SKIP, NULL, NULL); + if (skip == NULL) { + goto theend; + } zend = alloc_state(NFA_ZEND, s1, NULL); + if (zend == NULL) { + goto theend; + } s1->out= skip; patch(e.out, zend); PUSH(frag(s, list1(&skip->out))); @@ -3159,8 +3259,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; } - case NFA_COMPOSING: /* char with composing char */ - /* FALLTHROUGH */ + case NFA_COMPOSING: // char with composing char + FALLTHROUGH; case NFA_MOPEN: /* \( \) Submatch */ case NFA_MOPEN1: @@ -3811,23 +3911,27 @@ state_in_list ( return FALSE; } -/* - * Add "state" and possibly what follows to state list ".". - * Returns "subs_arg", possibly copied into temp_subs. - */ +// Offset used for "off" by addstate_here(). +#define ADDSTATE_HERE_OFFSET 10 +// Add "state" and possibly what follows to state list ".". +// Returns "subs_arg", possibly copied into temp_subs. static regsubs_T * addstate ( nfa_list_T *l, /* runtime state list */ nfa_state_T *state, /* state to update */ regsubs_T *subs_arg, /* pointers to subexpressions */ nfa_pim_T *pim, /* postponed look-behind match */ - int off /* byte offset, when -1 go to next line */ -) + int off_arg) /* byte offset, when -1 go to next line */ { int subidx; + int off = off_arg; + int add_here = FALSE; + int listindex = 0; + int k; + int found = FALSE; nfa_thread_T *thread; - lpos_T save_lpos; + struct multipos save_multipos; int save_in_use; char_u *save_ptr; int i; @@ -3838,6 +3942,12 @@ addstate ( int did_print = FALSE; #endif + if (off_arg <= -ADDSTATE_HERE_OFFSET) { + add_here = true; + off = 0; + listindex = -(off_arg + ADDSTATE_HERE_OFFSET); + } + switch (state->c) { case NFA_NCLOSE: case NFA_MCLOSE: @@ -3879,7 +3989,7 @@ addstate ( || !REG_MULTI || reglnum == nfa_endp->se_u.pos.lnum)) goto skip_add; - /* FALLTHROUGH */ + FALLTHROUGH; case NFA_MOPEN1: case NFA_MOPEN2: @@ -3914,13 +4024,28 @@ addstate ( * lower position is preferred. */ if (!nfa_has_backref && pim == NULL && !l->has_pim && state->c != NFA_MATCH) { + + /* When called from addstate_here() do insert before + * existing states. */ + if (add_here) { + for (k = 0; k < l->n && k < listindex; ++k) { + if (l->t[k].state->id == state->id) { + found = TRUE; + break; + } + } + } + + if (!add_here || found) { skip_add: #ifdef REGEXP_DEBUG - nfa_set_code(state->c); - fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s\n", - abs(state->id), l->id, state->c, code); + nfa_set_code(state->c); + fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s pim: %s has_pim: %d found: %d\n", + abs(state->id), l->id, state->c, code, + pim == NULL ? "NULL" : "yes", l->has_pim, found); #endif return subs; + } } /* Do not add the state again when it exists with the same @@ -3976,14 +4101,14 @@ skip_add: case NFA_SPLIT: /* order matters here */ - subs = addstate(l, state->out, subs, pim, off); - subs = addstate(l, state->out1, subs, pim, off); + subs = addstate(l, state->out, subs, pim, off_arg); + subs = addstate(l, state->out1, subs, pim, off_arg); break; case NFA_EMPTY: case NFA_NOPEN: case NFA_NCLOSE: - subs = addstate(l, state->out, subs, pim, off); + subs = addstate(l, state->out, subs, pim, off_arg); break; case NFA_MOPEN: @@ -4010,7 +4135,7 @@ skip_add: if (state->c == NFA_ZSTART) { subidx = 0; sub = &subs->norm; - } else if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9) { + } else if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9) { // -V560 subidx = state->c - NFA_ZOPEN; sub = &subs->synt; } else { @@ -4020,15 +4145,13 @@ skip_add: /* avoid compiler warnings */ save_ptr = NULL; - save_lpos.lnum = 0; - save_lpos.col = 0; + memset(&save_multipos, 0, sizeof(save_multipos)); /* Set the position (with "off" added) in the subexpression. Save * and restore it when it was in use. Otherwise fill any gap. */ if (REG_MULTI) { if (subidx < sub->in_use) { - save_lpos.lnum = sub->list.multi[subidx].start_lnum; - save_lpos.col = sub->list.multi[subidx].start_col; + save_multipos = sub->list.multi[subidx]; save_in_use = -1; } else { save_in_use = sub->in_use; @@ -4063,17 +4186,17 @@ skip_add: sub->list.line[subidx].start = reginput + off; } - subs = addstate(l, state->out, subs, pim, off); - /* "subs" may have changed, need to set "sub" again */ - if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9) + subs = addstate(l, state->out, subs, pim, off_arg); + // "subs" may have changed, need to set "sub" again. + if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9) { // -V560 sub = &subs->synt; - else + } else { sub = &subs->norm; + } if (save_in_use == -1) { - if (REG_MULTI){ - sub->list.multi[subidx].start_lnum = save_lpos.lnum; - sub->list.multi[subidx].start_col = save_lpos.col; + if (REG_MULTI) { + sub->list.multi[subidx] = save_multipos; } else sub->list.line[subidx].start = save_ptr; @@ -4086,9 +4209,10 @@ skip_add: ? subs->norm.list.multi[0].end_lnum >= 0 : subs->norm.list.line[0].end != NULL)) { /* Do not overwrite the position set by \ze. */ - subs = addstate(l, state->out, subs, pim, off); + subs = addstate(l, state->out, subs, pim, off_arg); break; } + FALLTHROUGH; case NFA_MCLOSE1: case NFA_MCLOSE2: case NFA_MCLOSE3: @@ -4112,7 +4236,7 @@ skip_add: if (state->c == NFA_ZEND) { subidx = 0; sub = &subs->norm; - } else if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9) { + } else if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9) { // -V560 subidx = state->c - NFA_ZCLOSE; sub = &subs->synt; } else { @@ -4126,8 +4250,7 @@ skip_add: if (sub->in_use <= subidx) sub->in_use = subidx + 1; if (REG_MULTI) { - save_lpos.lnum = sub->list.multi[subidx].end_lnum; - save_lpos.col = sub->list.multi[subidx].end_col; + save_multipos = sub->list.multi[subidx]; if (off == -1) { sub->list.multi[subidx].end_lnum = reglnum + 1; sub->list.multi[subidx].end_col = 0; @@ -4141,21 +4264,20 @@ skip_add: } else { save_ptr = sub->list.line[subidx].end; sub->list.line[subidx].end = reginput + off; - /* avoid compiler warnings */ - save_lpos.lnum = 0; - save_lpos.col = 0; + // avoid compiler warnings + memset(&save_multipos, 0, sizeof(save_multipos)); } - subs = addstate(l, state->out, subs, pim, off); - /* "subs" may have changed, need to set "sub" again */ - if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9) + subs = addstate(l, state->out, subs, pim, off_arg); + // "subs" may have changed, need to set "sub" again. + if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9) { // -V560 sub = &subs->synt; - else + } else { sub = &subs->norm; + } - if (REG_MULTI){ - sub->list.multi[subidx].end_lnum = save_lpos.lnum; - sub->list.multi[subidx].end_col = save_lpos.col; + if (REG_MULTI) { + sub->list.multi[subidx] = save_multipos; } else sub->list.line[subidx].end = save_ptr; @@ -4184,8 +4306,10 @@ addstate_here ( int count; int listidx = *ip; - /* first add the state(s) at the end, so that we know how many there are */ - addstate(l, state, subs, pim, 0); + /* First add the state(s) at the end, so that we know how many there are. + * Pass the listidx as offset (avoids adding another argument to + * addstate(). */ + addstate(l, state, subs, pim, -listidx - ADDSTATE_HERE_OFFSET); /* when "*ip" was at the end of the list, nothing to do */ if (listidx + 1 == tlen) @@ -4238,48 +4362,55 @@ static int check_char_class(int class, int c) { switch (class) { case NFA_CLASS_ALNUM: - if (c >= 1 && c <= 255 && isalnum(c)) + if (c >= 1 && c < 128 && isalnum(c)) { return OK; + } break; case NFA_CLASS_ALPHA: - if (c >= 1 && c <= 255 && isalpha(c)) + if (c >= 1 && c < 128 && isalpha(c)) { return OK; + } break; case NFA_CLASS_BLANK: if (c == ' ' || c == '\t') return OK; break; case NFA_CLASS_CNTRL: - if (c >= 1 && c <= 255 && iscntrl(c)) + if (c >= 1 && c <= 127 && iscntrl(c)) { return OK; + } break; case NFA_CLASS_DIGIT: if (ascii_isdigit(c)) return OK; break; case NFA_CLASS_GRAPH: - if (c >= 1 && c <= 255 && isgraph(c)) + if (c >= 1 && c <= 127 && isgraph(c)) { return OK; + } break; case NFA_CLASS_LOWER: - if (vim_islower(c)) + if (mb_islower(c) && c != 170 && c != 186) { return OK; + } break; case NFA_CLASS_PRINT: if (vim_isprintc(c)) return OK; break; case NFA_CLASS_PUNCT: - if (c >= 1 && c <= 255 && ispunct(c)) + if (c >= 1 && c < 128 && ispunct(c)) { return OK; + } break; case NFA_CLASS_SPACE: if ((c >= 9 && c <= 13) || (c == ' ')) return OK; break; case NFA_CLASS_UPPER: - if (vim_isupper(c)) + if (mb_isupper(c)) { return OK; + } break; case NFA_CLASS_XDIGIT: if (ascii_isxdigit(c)) @@ -4298,13 +4429,14 @@ static int check_char_class(int class, int c) return OK; break; case NFA_CLASS_ESCAPE: - if (c == '\033') + if (c == ESC) { return OK; + } break; default: - /* should not be here :P */ - EMSGN(_(e_ill_char_class), class); + // should not be here :P + IEMSGN(_(e_ill_char_class), class); return FAIL; } return FAIL; @@ -4507,10 +4639,10 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T } if ((int)(reginput - regline) >= state->val) { reginput -= state->val; - if (has_mbyte) - reginput -= mb_head_off(regline, reginput); - } else + reginput -= utf_head_off(regline, reginput); + } else { reginput = regline; + } } } @@ -4556,9 +4688,11 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T if (REG_MULTI) regline = reg_getline(reglnum); reginput = regline + save_reginput_col; - nfa_match = save_nfa_match; + if (result != NFA_TOO_EXPENSIVE) { + nfa_match = save_nfa_match; + nfa_listid = save_nfa_listid; + } nfa_endp = save_nfa_endp; - nfa_listid = save_nfa_listid; #ifdef REGEXP_DEBUG log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); @@ -4568,8 +4702,7 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T fprintf(log_fd, "MATCH = %s\n", !result ? "FALSE" : "OK"); fprintf(log_fd, "****************************\n"); } else { - EMSG(_( - "Could not open temporary log file for writing, displaying on stderr ... ")); + EMSG(_(e_log_open_failed)); log_fd = stderr; } #endif @@ -4740,17 +4873,10 @@ static int failure_chance(nfa_state_T *state, int depth) */ static int skip_to_start(int c, colnr_T *colp) { - char_u *s; - - /* Used often, do some work to avoid call overhead. */ - if (!ireg_ic - && !has_mbyte - ) - s = vim_strbyte(regline + *colp, c); - else - s = cstrchr(regline + *colp, c); - if (s == NULL) + const char_u *const s = cstrchr(regline + *colp, c); + if (s == NULL) { return FAIL; + } *colp = (int)(s - regline); return OK; } @@ -4777,7 +4903,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) int c2_len = PTR2LEN(s2); int c2 = PTR2CHAR(s2); - if ((c1 != c2 && (!ireg_ic || vim_tolower(c1) != vim_tolower(c2))) + if ((c1 != c2 && (!rex.reg_ic || mb_tolower(c1) != mb_tolower(c2))) || c1_len != c2_len) { match = false; break; @@ -4790,13 +4916,13 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) && !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) { cleanup_subexpr(); if (REG_MULTI) { - reg_startpos[0].lnum = reglnum; - reg_startpos[0].col = col; - reg_endpos[0].lnum = reglnum; - reg_endpos[0].col = s2 - regline; + rex.reg_startpos[0].lnum = reglnum; + rex.reg_startpos[0].col = col; + rex.reg_endpos[0].lnum = reglnum; + rex.reg_endpos[0].col = s2 - regline; } else { - reg_startp[0] = regline + col; - reg_endp[0] = s2; + rex.reg_startp[0] = regline + col; + rex.reg_endp[0] = s2; } return 1L; } @@ -4842,7 +4968,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a"); if (debug == NULL) { - EMSG2(_("(NFA) COULD NOT OPEN %s !"), NFA_REGEXP_DEBUG_LOG); + EMSG2("(NFA) COULD NOT OPEN %s!", NFA_REGEXP_DEBUG_LOG); return false; } #endif @@ -4850,9 +4976,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // recursive_regmatch(). Allow interrupting them with CTRL-C. fast_breakcheck(); if (got_int) { +#ifdef NFA_REGEXP_DEBUG_LOG + fclose(debug); +#endif return false; } if (nfa_time_limit != NULL && profile_passed_limit(*nfa_time_limit)) { +#ifdef NFA_REGEXP_DEBUG_LOG + fclose(debug); +#endif return false; } @@ -4874,8 +5006,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, abs(start->id), code); fprintf(log_fd, "**********************************\n"); } else { - EMSG(_( - "Could not open temporary log file for writing, displaying on stderr ... ")); + EMSG(_(e_log_open_failed)); log_fd = stderr; } #endif @@ -4914,16 +5045,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, * Run for each character. */ for (;; ) { - int curc; - int clen; - - if (has_mbyte) { - curc = (*mb_ptr2char)(reginput); - clen = (*mb_ptr2len)(reginput); - } else { - curc = *reginput; - clen = 1; - } + int curc = utf_ptr2char(reginput); + int clen = utfc_ptr2len(reginput); if (curc == NUL) { clen = 0; go_to_nextline = false; @@ -4932,10 +5055,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, /* swap lists */ thislist = &list[flag]; nextlist = &list[flag ^= 1]; - nextlist->n = 0; /* clear nextlist */ - nextlist->has_pim = FALSE; - ++nfa_listid; - if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) { + nextlist->n = 0; // clear nextlist + nextlist->has_pim = false; + nfa_listid++; + if (prog->re_engine == AUTOMATIC_ENGINE + && (nfa_listid >= NFA_MAX_STATES)) { // Too many states, retry with old engine. nfa_match = NFA_TOO_EXPENSIVE; goto theend; @@ -4948,8 +5072,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, fprintf(log_fd, "------------------------------------------\n"); fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); fprintf(log_fd, - ">>> Advanced one character ... Current char is %c (code %d) \n", curc, - (int)curc); + ">>> Advanced one character... Current char is %c (code %d) \n", + curc, + (int)curc); fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n); { int i; @@ -4981,16 +5106,17 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, { int col; - if (t->subs.norm.in_use <= 0) + if (t->subs.norm.in_use <= 0) { col = -1; - else if (REG_MULTI) + } else if (REG_MULTI) { col = t->subs.norm.list.multi[0].start_col; - else + } else { col = (int)(t->subs.norm.list.line[0].start - regline); + } nfa_set_code(t->state->c); - fprintf(log_fd, "(%d) char %d %s (start col %d)%s ... \n", - abs(t->state->id), (int)t->state->c, code, col, - pim_info(&t->pim)); + fprintf(log_fd, "(%d) char %d %s (start col %d)%s... \n", + abs(t->state->id), (int)t->state->c, code, col, + pim_info(&t->pim)); } #endif @@ -5005,8 +5131,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_MATCH: { // If the match ends before a composing characters and - // ireg_icombine is not set, that is not really a match. - if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) { + // rex.reg_icombine is not set, that is not really a match. + if (enc_utf8 && !rex.reg_icombine && utf_iscomposing(curc)) { break; } nfa_match = true; @@ -5289,15 +5415,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, int this_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_buf(reginput, reg_buf); + this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); if (this_class <= 1) { result = false; } else if (reg_prev_class() == this_class) { result = false; } - } else if (!vim_iswordc_buf(curc, reg_buf) + } else if (!vim_iswordc_buf(curc, rex.reg_buf) || (reginput > regline - && vim_iswordc_buf(reginput[-1], reg_buf))) { + && vim_iswordc_buf(reginput[-1], rex.reg_buf))) { result = false; } if (result) { @@ -5314,15 +5440,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, int this_class, prev_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_buf(reginput, reg_buf); + this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) { result = false; } - } else if (!vim_iswordc_buf(reginput[-1], reg_buf) + } else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf) || (reginput[0] != NUL - && vim_iswordc_buf(curc, reg_buf))) { + && vim_iswordc_buf(curc, rex.reg_buf))) { result = false; } if (result) { @@ -5333,14 +5459,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_BOF: if (reglnum == 0 && reginput == regline - && (!REG_MULTI || reg_firstlnum == 1)) { + && (!REG_MULTI || rex.reg_firstlnum == 1)) { add_here = true; add_state = t->state->out; } break; case NFA_EOF: - if (reglnum == reg_maxline && curc == NUL) { + if (reglnum == rex.reg_maxline && curc == NUL) { add_here = true; add_state = t->state->out; } @@ -5364,7 +5490,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // (no preceding character). len += mb_char2len(mc); } - if (ireg_icombine && len == 0) { + if (rex.reg_icombine && len == 0) { // If \Z was present, then ignore composing characters. // When ignoring the base character this always matches. if (sta->c != curc) { @@ -5385,7 +5511,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // We don't care about the order of composing characters. // Get them into cchars[] first. while (len < clen) { - mc = mb_ptr2char(reginput + len); + mc = utf_ptr2char(reginput + len); cchars[ccount++] = mc; len += mb_char2len(mc); if (ccount == MAX_MCO) @@ -5415,14 +5541,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } case NFA_NEWL: - if (curc == NUL && !reg_line_lbr && REG_MULTI - && reglnum <= reg_maxline) { + if (curc == NUL && !rex.reg_line_lbr && REG_MULTI + && reglnum <= rex.reg_maxline) { go_to_nextline = true; // Pass -1 for the offset, which means taking the position // at the start of the next line. add_state = t->state->out; add_off = -1; - } else if (curc == '\n' && reg_line_lbr) { + } else if (curc == '\n' && rex.reg_line_lbr) { // match \n as if it is an ordinary character add_state = t->state->out; add_off = 1; @@ -5463,23 +5589,25 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, result = result_if_matched; break; } - if (ireg_ic) { - int curc_low = vim_tolower(curc); - int done = FALSE; + if (rex.reg_ic) { + int curc_low = mb_tolower(curc); + int done = false; - for (; c1 <= c2; ++c1) - if (vim_tolower(c1) == curc_low) { + for (; c1 <= c2; c1++) { + if (mb_tolower(c1) == curc_low) { result = result_if_matched; done = TRUE; break; } - if (done) + } + if (done) { break; + } } } else if (state->c < 0 ? check_char_class(state->c, curc) : (curc == state->c - || (ireg_ic && vim_tolower(curc) - == vim_tolower(state->c)))) { + || (rex.reg_ic && mb_tolower(curc) + == mb_tolower(state->c)))) { result = result_if_matched; break; } @@ -5526,13 +5654,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_KWORD: // \k - result = vim_iswordp_buf(reginput, reg_buf); + result = vim_iswordp_buf(reginput, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; case NFA_SKWORD: // \K result = !ascii_isdigit(curc) - && vim_iswordp_buf(reginput, reg_buf); + && vim_iswordp_buf(reginput, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; @@ -5647,24 +5775,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_LOWER_IC: // [a-z] - result = ri_lower(curc) || (ireg_ic && ri_upper(curc)); + result = ri_lower(curc) || (rex.reg_ic && ri_upper(curc)); ADD_STATE_IF_MATCH(t->state); break; case NFA_NLOWER_IC: // [^a-z] result = curc != NUL - && !(ri_lower(curc) || (ireg_ic && ri_upper(curc))); + && !(ri_lower(curc) || (rex.reg_ic && ri_upper(curc))); ADD_STATE_IF_MATCH(t->state); break; case NFA_UPPER_IC: // [A-Z] - result = ri_upper(curc) || (ireg_ic && ri_lower(curc)); + result = ri_upper(curc) || (rex.reg_ic && ri_lower(curc)); ADD_STATE_IF_MATCH(t->state); break; case NFA_NUPPER_IC: // [^A-Z] result = curc != NUL - && !(ri_upper(curc) || (ireg_ic && ri_lower(curc))); + && !(ri_upper(curc) || (rex.reg_ic && ri_lower(curc))); ADD_STATE_IF_MATCH(t->state); break; @@ -5738,13 +5866,15 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_LNUM_GT: case NFA_LNUM_LT: assert(t->state->val >= 0 - && !((reg_firstlnum > 0 && reglnum > LONG_MAX - reg_firstlnum) - || (reg_firstlnum <0 && reglnum < LONG_MIN + reg_firstlnum)) - && reglnum + reg_firstlnum >= 0); + && !((rex.reg_firstlnum > 0 + && reglnum > LONG_MAX - rex.reg_firstlnum) + || (rex.reg_firstlnum < 0 + && reglnum < LONG_MIN + rex.reg_firstlnum)) + && reglnum + rex.reg_firstlnum >= 0); result = (REG_MULTI && nfa_re_num_cmp((uintmax_t)t->state->val, t->state->c - NFA_LNUM, - (uintmax_t)(reglnum + reg_firstlnum))); + (uintmax_t)(reglnum + rex.reg_firstlnum))); if (result) { add_here = true; add_state = t->state->out; @@ -5780,7 +5910,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } result = false; - win_T *wp = reg_win == NULL ? curwin : reg_win; + win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win; if (op == 1 && col - 1 > t->state->val && col > 100) { long ts = wp->w_buffer->b_p_ts; @@ -5807,18 +5937,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_MARK_GT: case NFA_MARK_LT: { - pos_T *pos = getmark_buf(reg_buf, t->state->val, FALSE); + pos_T *pos = getmark_buf(rex.reg_buf, t->state->val, false); // Compare the mark position to the match position. result = (pos != NULL // mark doesn't exist && pos->lnum > 0 // mark isn't set in reg_buf - && (pos->lnum == reglnum + reg_firstlnum + && (pos->lnum == reglnum + rex.reg_firstlnum ? (pos->col == (colnr_T)(reginput - regline) ? t->state->c == NFA_MARK : (pos->col < (colnr_T)(reginput - regline) ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT)) - : (pos->lnum < reglnum + reg_firstlnum + : (pos->lnum < reglnum + rex.reg_firstlnum ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT))); if (result) { @@ -5829,10 +5959,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } case NFA_CURSOR: - result = (reg_win != NULL - && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) + result = (rex.reg_win != NULL + && (reglnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum) && ((colnr_T)(reginput - regline) - == reg_win->w_cursor.col)); + == rex.reg_win->w_cursor.col)); if (result) { add_here = true; add_state = t->state->out; @@ -5877,17 +6007,19 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, int c = t->state->c; #ifdef REGEXP_DEBUG - if (c < 0) - EMSGN("INTERNAL: Negative state char: %" PRId64, c); + if (c < 0) { + IEMSGN("INTERNAL: Negative state char: %" PRId64, c); + } #endif result = (c == curc); - if (!result && ireg_ic) - result = vim_tolower(c) == vim_tolower(curc); + if (!result && rex.reg_ic) { + result = mb_tolower(c) == mb_tolower(curc); + } - // If ireg_icombine is not set only skip over the character + // If rex.reg_icombine is not set only skip over the character // itself. When it is set skip over composing characters. - if (result && enc_utf8 && !ireg_icombine) { + if (result && enc_utf8 && !rex.reg_icombine) { clen = utf_ptr2len(reginput); } @@ -5995,8 +6127,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, && ((toplevel && reglnum == 0 && clen != 0 - && (ireg_maxcol == 0 - || (colnr_T)(reginput - regline) < ireg_maxcol)) + && (rex.reg_maxcol == 0 + || (colnr_T)(reginput - regline) < rex.reg_maxcol)) || (nfa_endp != NULL && (REG_MULTI ? (reglnum < nfa_endp->se_u.pos.lnum @@ -6031,8 +6163,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Checking if the required start character matches is // cheaper than adding a state that won't match. c = PTR2CHAR(reginput + clen); - if (c != prog->regstart && (!ireg_ic || vim_tolower(c) - != vim_tolower(prog->regstart))) { + if (c != prog->regstart && (!rex.reg_ic || mb_tolower(c) + != mb_tolower(prog->regstart))) { #ifdef REGEXP_DEBUG fprintf(log_fd, " Skipping start state, regstart does not match\n"); @@ -6138,8 +6270,9 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, proftime_T *tm) nfa_print_state(f, start); fprintf(f, "\n\n"); fclose(f); - } else - EMSG(_("Could not open temporary log file for writing ")); + } else { + EMSG("Could not open temporary log file for writing"); + } #endif clear_sub(&subs.norm); @@ -6157,34 +6290,37 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, proftime_T *tm) cleanup_subexpr(); if (REG_MULTI) { for (i = 0; i < subs.norm.in_use; i++) { - reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum; - reg_startpos[i].col = subs.norm.list.multi[i].start_col; + rex.reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum; + rex.reg_startpos[i].col = subs.norm.list.multi[i].start_col; - reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum; - reg_endpos[i].col = subs.norm.list.multi[i].end_col; + rex.reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum; + rex.reg_endpos[i].col = subs.norm.list.multi[i].end_col; } - if (reg_startpos[0].lnum < 0) { - reg_startpos[0].lnum = 0; - reg_startpos[0].col = col; + if (rex.reg_startpos[0].lnum < 0) { + rex.reg_startpos[0].lnum = 0; + rex.reg_startpos[0].col = col; + } + if (rex.reg_endpos[0].lnum < 0) { + // pattern has a \ze but it didn't match, use current end + rex.reg_endpos[0].lnum = reglnum; + rex.reg_endpos[0].col = (int)(reginput - regline); + } else { + // Use line number of "\ze". + reglnum = rex.reg_endpos[0].lnum; } - if (reg_endpos[0].lnum < 0) { - /* pattern has a \ze but it didn't match, use current end */ - reg_endpos[0].lnum = reglnum; - reg_endpos[0].col = (int)(reginput - regline); - } else - /* Use line number of "\ze". */ - reglnum = reg_endpos[0].lnum; } else { for (i = 0; i < subs.norm.in_use; i++) { - reg_startp[i] = subs.norm.list.line[i].start; - reg_endp[i] = subs.norm.list.line[i].end; + rex.reg_startp[i] = subs.norm.list.line[i].start; + rex.reg_endp[i] = subs.norm.list.line[i].end; } - if (reg_startp[0] == NULL) - reg_startp[0] = regline + col; - if (reg_endp[0] == NULL) - reg_endp[0] = reginput; + if (rex.reg_startp[0] == NULL) { + rex.reg_startp[0] = regline + col; + } + if (rex.reg_endp[0] == NULL) { + rex.reg_endp[0] = reginput; + } } /* Package any found \z(...\) matches for export. Default is none. */ @@ -6238,14 +6374,14 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm) colnr_T col = startcol; if (REG_MULTI) { - prog = (nfa_regprog_T *)reg_mmatch->regprog; - line = reg_getline((linenr_T)0); /* relative to the cursor */ - reg_startpos = reg_mmatch->startpos; - reg_endpos = reg_mmatch->endpos; + prog = (nfa_regprog_T *)rex.reg_mmatch->regprog; + line = reg_getline((linenr_T)0); // relative to the cursor + rex.reg_startpos = rex.reg_mmatch->startpos; + rex.reg_endpos = rex.reg_mmatch->endpos; } else { - prog = (nfa_regprog_T *)reg_match->regprog; - reg_startp = reg_match->startp; - reg_endp = reg_match->endp; + prog = (nfa_regprog_T *)rex.reg_match->regprog; + rex.reg_startp = rex.reg_match->startp; + rex.reg_endp = rex.reg_match->endp; } /* Be paranoid... */ @@ -6254,15 +6390,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm) goto theend; } - /* If pattern contains "\c" or "\C": overrule value of ireg_ic */ - if (prog->regflags & RF_ICASE) - ireg_ic = TRUE; - else if (prog->regflags & RF_NOICASE) - ireg_ic = FALSE; + // If pattern contains "\c" or "\C": overrule value of rex.reg_ic + if (prog->regflags & RF_ICASE) { + rex.reg_ic = true; + } else if (prog->regflags & RF_NOICASE) { + rex.reg_ic = false; + } - /* If pattern contains "\Z" overrule value of ireg_icombine */ - if (prog->regflags & RF_ICOMBINE) - ireg_icombine = TRUE; + // If pattern contains "\Z" overrule value of rex.reg_icombine + if (prog->regflags & RF_ICOMBINE) { + rex.reg_icombine = true; + } regline = line; reglnum = 0; /* relative to line */ @@ -6291,17 +6429,17 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm) if (skip_to_start(prog->regstart, &col) == FAIL) return 0L; - /* If match_text is set it contains the full text that must match. - * Nothing else to try. Doesn't handle combining chars well. */ - if (prog->match_text != NULL - && !ireg_icombine - ) + // If match_text is set it contains the full text that must match. + // Nothing else to try. Doesn't handle combining chars well. + if (prog->match_text != NULL && !rex.reg_icombine) { return find_match_text(col, prog->regstart, prog->match_text); + } } - /* If the start column is past the maximum column: no need to try. */ - if (ireg_maxcol > 0 && col >= ireg_maxcol) + // If the start column is past the maximum column: no need to try. + if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) { goto theend; + } nstate = prog->nstate; for (i = 0; i < nstate; ++i) { @@ -6341,12 +6479,13 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) * (and count its size). */ postfix = re2post(); if (postfix == NULL) { - /* TODO: only give this error for debugging? */ - if (post_ptr >= post_end) - EMSGN("Internal error: estimated max number " - "of states insufficient: %" PRId64, - post_end - post_start); - goto fail; /* Cascaded (syntax?) error */ + // TODO(vim): only give this error for debugging? + if (post_ptr >= post_end) { + IEMSGN("Internal error: estimated max number " + "of states insufficient: %" PRId64, + post_end - post_start); + } + goto fail; // Cascaded (syntax?) error } /* @@ -6359,10 +6498,10 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) FILE *f = fopen(NFA_REGEXP_RUN_LOG, "a"); if (f != NULL) { - fprintf( - f, - "\n*****************************\n\n\n\n\tCompiling regexp \"%s\" ... hold on !\n", - expr); + fprintf(f, + "\n*****************************\n\n\n\n\t" + "Compiling regexp \"%s\"... hold on !\n", + expr); fclose(f); } } @@ -6453,15 +6592,15 @@ nfa_regexec_nl ( bool line_lbr ) { - reg_match = rmp; - reg_mmatch = NULL; - reg_maxline = 0; - reg_line_lbr = line_lbr; - reg_buf = curbuf; - reg_win = NULL; - ireg_ic = rmp->rm_ic; - ireg_icombine = FALSE; - ireg_maxcol = 0; + rex.reg_match = rmp; + rex.reg_mmatch = NULL; + rex.reg_maxline = 0; + rex.reg_line_lbr = line_lbr; + rex.reg_buf = curbuf; + rex.reg_win = NULL; + rex.reg_ic = rmp->rm_ic; + rex.reg_icombine = false; + rex.reg_maxcol = 0; return nfa_regexec_both(line, col, NULL); } @@ -6502,16 +6641,16 @@ nfa_regexec_nl ( static long nfa_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm) { - reg_match = NULL; - reg_mmatch = rmp; - reg_buf = buf; - reg_win = win; - reg_firstlnum = lnum; - reg_maxline = reg_buf->b_ml.ml_line_count - lnum; - reg_line_lbr = FALSE; - ireg_ic = rmp->rmm_ic; - ireg_icombine = FALSE; - ireg_maxcol = rmp->rmm_maxcol; + rex.reg_match = NULL; + rex.reg_mmatch = rmp; + rex.reg_buf = buf; + rex.reg_win = win; + rex.reg_firstlnum = lnum; + rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; + rex.reg_line_lbr = false; + rex.reg_ic = rmp->rmm_ic; + rex.reg_icombine = false; + rex.reg_maxcol = rmp->rmm_maxcol; return nfa_regexec_both(NULL, col, tm); } |