diff options
author | dundargoc <33953936+dundargoc@users.noreply.github.com> | 2022-05-14 20:35:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-14 20:35:39 +0200 |
commit | 0adc66171a355a12494d87ebb767d509540c7ef9 (patch) | |
tree | d53d4ff9e04d4a570b38ea3f7a2133d0896acebf /src | |
parent | 83da441d165e6ed28c8515934000e3b57e1a7598 (diff) | |
download | rneovim-0adc66171a355a12494d87ebb767d509540c7ef9.tar.gz rneovim-0adc66171a355a12494d87ebb767d509540c7ef9.tar.bz2 rneovim-0adc66171a355a12494d87ebb767d509540c7ef9.zip |
refactor(uncrustify): enable formatting for regexp and indent files (#18549)
The formatting for these files were originally disabled as to signal
that "we don't own these files", meaning we intentionally want to
minimize the amount of work put in these files as the return will be
very little. This unfortunately conflicts with other refactoring efforts
that happen to touch these files, and it's easier to simply enable
formatting.
Diffstat (limited to 'src')
-rw-r--r-- | src/nvim/indent_c.c | 1046 | ||||
-rw-r--r-- | src/nvim/regexp.c | 342 | ||||
-rw-r--r-- | src/nvim/regexp_bt.c | 2043 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 2768 |
4 files changed, 3835 insertions, 2364 deletions
diff --git a/src/nvim/indent_c.c b/src/nvim/indent_c.c index 06153267bc..854487ccdb 100644 --- a/src/nvim/indent_c.c +++ b/src/nvim/indent_c.c @@ -1,12 +1,9 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -// uncrustify:off - #include <assert.h> #include <inttypes.h> -#include "nvim/vim.h" #include "nvim/ascii.h" #include "nvim/charset.h" #include "nvim/cursor.h" @@ -19,11 +16,12 @@ #include "nvim/option.h" #include "nvim/search.h" #include "nvim/strings.h" +#include "nvim/vim.h" // Find result cache for cpp_baseclass typedef struct { - int found; - lpos_T lpos; + int found; + lpos_T lpos; } cpp_baseclass_cache_T; #ifdef INCLUDE_GENERATED_DECLARATIONS @@ -44,10 +42,11 @@ pos_T *find_start_comment(int ind_maxcomment) // XXX pos_T *pos; int64_t cur_maxcomment = ind_maxcomment; - for (;; ) { + for (;;) { pos = findmatchlimit(NULL, '*', FM_BACKWARD, cur_maxcomment); - if (pos == NULL) + if (pos == NULL) { break; + } /* * Check if the comment start we found is inside a string. @@ -105,30 +104,27 @@ static pos_T *ind_find_start_CORS(linenr_T *is_raw) */ static pos_T *find_start_rawstring(int ind_maxcomment) // XXX { - pos_T *pos; - long cur_maxcomment = ind_maxcomment; + pos_T *pos; + long cur_maxcomment = ind_maxcomment; - for (;;) - { - pos = findmatchlimit(NULL, 'R', FM_BACKWARD, cur_maxcomment); - if (pos == NULL) - break; + for (;;) { + pos = findmatchlimit(NULL, 'R', FM_BACKWARD, cur_maxcomment); + if (pos == NULL) { + break; + } - /* - * Check if the raw string start we found is inside a string. - * If it is then restrict the search to below this line and try again. - */ - if (!is_pos_in_string(ml_get(pos->lnum), pos->col)) { - break; - } - cur_maxcomment = curwin->w_cursor.lnum - pos->lnum - 1; - if (cur_maxcomment <= 0) - { - pos = NULL; - break; - } + // Check if the raw string start we found is inside a string. + // If it is then restrict the search to below this line and try again. + if (!is_pos_in_string(ml_get(pos->lnum), pos->col)) { + break; } - return pos; + cur_maxcomment = curwin->w_cursor.lnum - pos->lnum - 1; + if (cur_maxcomment <= 0) { + pos = NULL; + break; + } + } + return pos; } /* @@ -228,7 +224,7 @@ bool cin_is_cinword(const char_u *line) char_u *cinw_buf = xmalloc(cinw_len); line = (char_u *)skipwhite((char *)line); - for (char_u *cinw = curbuf->b_p_cinw; *cinw; ) { + for (char_u *cinw = curbuf->b_p_cinw; *cinw;) { size_t len = copy_option_part(&cinw, cinw_buf, cinw_len, ","); if (STRNCMP(line, cinw_buf, len) == 0 && (!vim_iswordc(line[len]) || !vim_iswordc(line[len - 1]))) { @@ -243,7 +239,6 @@ bool cin_is_cinword(const char_u *line) } - /* * Skip over white space and C comments within the line. * Also skip over Perl/shell comments if desired. @@ -269,8 +264,9 @@ static const char_u *cin_skipcomment(const char_u *s) s += STRLEN(s); break; } - if (*s != '*') + if (*s != '*') { break; + } for (++s; *s; s++) { // skip slash-star comment if (s[0] == '*' && s[1] == '/') { s += 2; @@ -296,8 +292,8 @@ static int cin_nocode(const char_u *s) static pos_T *find_line_comment(void) // XXX { static pos_T pos; - char_u *line; - char_u *p; + char_u *line; + char_u *p; pos = curwin->w_cursor; while (--pos.lnum > 0) { @@ -307,8 +303,9 @@ static pos_T *find_line_comment(void) // XXX pos.col = (int)(p - line); return &pos; } - if (*p != NUL) + if (*p != NUL) { break; + } } return NULL; } @@ -324,8 +321,8 @@ static bool cin_has_js_key(const char_u *text) quote = *s; ++s; } - if (!vim_isIDc(*s)) { // need at least one ID character - return FALSE; + if (!vim_isIDc(*s)) { // need at least one ID character + return false; } while (vim_isIDc(*s)) { @@ -350,8 +347,9 @@ static bool cin_islabel_skip(const char_u **s) return false; } - while (vim_isIDc(**s)) + while (vim_isIDc(**s)) { (*s)++; + } *s = cin_skipcomment(*s); @@ -431,10 +429,11 @@ static int cin_isinit(void) s = cin_skipcomment(get_cursor_line_ptr()); - if (cin_starts_with(s, "typedef")) + if (cin_starts_with(s, "typedef")) { s = cin_skipcomment(s + 7); + } - for (;; ) { + for (;;) { int i, l; for (i = 0; i < (int)ARRAY_SIZE(skip); ++i) { @@ -445,26 +444,26 @@ static int cin_isinit(void) break; } } - if (l != 0) + if (l != 0) { break; + } } - if (cin_starts_with(s, "enum")) - return TRUE; + if (cin_starts_with(s, "enum")) { + return true; + } - if (cin_ends_in(s, (char_u *)"=", (char_u *)"{")) - return TRUE; + if (cin_ends_in(s, (char_u *)"=", (char_u *)"{")) { + return true; + } return FALSE; } -/* - * Recognize a switch label: "case .*:" or "default:". - */ -bool cin_iscase( - const char_u *s, - bool strict // Allow relaxed check of case statement for JS -) +/// Recognize a switch label: "case .*:" or "default:". +/// +/// @param strict Allow relaxed check of case statement for JS +bool cin_iscase(const char_u *s, bool strict) { s = cin_skipcomment(s); if (cin_starts_with(s, "case")) { @@ -522,7 +521,7 @@ bool cin_isscopedecl(const char_u *p) bool found = false; - for (char_u *cinsd = curbuf->b_p_cinsd; *cinsd; ) { + for (char_u *cinsd = curbuf->b_p_cinsd; *cinsd;) { const size_t len = copy_option_part(&cinsd, cinsd_buf, cinsd_len, ","); if (STRNCMP(s, cinsd_buf, len) == 0) { const char_u *skip = cin_skipcomment(s + len); @@ -586,8 +585,8 @@ static bool cin_is_cpp_namespace(const char_u *s) /* * Return a pointer to the first non-empty non-comment character after a ':'. * Return NULL if not found. - * case 234: a = b; - * ^ + * case 234: a = b; + * ^ */ static const char_u *after_label(const char_u *l) { @@ -602,11 +601,13 @@ static const char_u *after_label(const char_u *l) l += 2; // skip over 'x' } } - if (*l == NUL) + if (*l == NUL) { return NULL; + } l = cin_skipcomment(l + 1); - if (*l == NUL) + if (*l == NUL) { return NULL; + } return l; } @@ -623,8 +624,9 @@ static int get_indent_nolabel(linenr_T lnum) // XXX l = ml_get(lnum); p = after_label(l); - if (p == NULL) + if (p == NULL) { return 0; + } fp.col = (colnr_T)(p - l); fp.lnum = lnum; @@ -635,8 +637,8 @@ static int get_indent_nolabel(linenr_T lnum) // XXX /* * Find indent for line "lnum", ignoring any case or jump label. * Also return a pointer to the text (after the label) in "pp". - * label: if (asdf && asdfasdf) - * ^ + * label: if (asdf && asdfasdf) + * ^ */ static int skip_label(linenr_T lnum, const char_u **pp) { @@ -666,14 +668,14 @@ static int skip_label(linenr_T lnum, const char_u **pp) /* * Return the indent of the first variable name after a type in a declaration. - * int a, indent of "a" - * static struct foo b, indent of "b" - * enum bla c, indent of "c" + * int a, indent of "a" + * static struct foo b, indent of "b" + * enum bla c, indent of "c" * Returns zero when it doesn't look like a declaration. */ static int cin_first_id_amount(void) { - char_u *line, *p, *s; + char_u *line, *p, *s; int len; pos_T fp; colnr_T col; @@ -699,10 +701,10 @@ static int cin_first_id_amount(void) p = s; } } - for (len = 0; vim_isIDc(p[len]); ++len) - ; - if (len == 0 || !ascii_iswhite(p[len]) || cin_nocode(p)) + for (len = 0; vim_isIDc(p[len]); len++) {} + if (len == 0 || !ascii_iswhite(p[len]) || cin_nocode(p)) { return 0; + } p = (char_u *)skipwhite((char *)p + len); fp.lnum = curwin->w_cursor.lnum; @@ -717,8 +719,8 @@ static int cin_first_id_amount(void) * Return zero if no (useful) equal sign found. * Return -1 if the line above "lnum" ends in a backslash. * foo = "asdf\ - * asdf\ - * here"; + * asdf\ + * here"; */ static int cin_get_equal_amount(linenr_T lnum) { @@ -729,8 +731,9 @@ static int cin_get_equal_amount(linenr_T lnum) if (lnum > 1) { line = ml_get(lnum - 1); - if (*line != NUL && line[STRLEN(line) - 1] == '\\') + if (*line != NUL && line[STRLEN(line) - 1] == '\\') { return -1; + } } line = s = ml_get(lnum); @@ -741,8 +744,9 @@ static int cin_get_equal_amount(linenr_T lnum) s++; } } - if (*s != '=') + if (*s != '=') { return 0; + } s = (char_u *)skipwhite((char *)s + 1); if (cin_nocode(s)) { @@ -785,17 +789,19 @@ static int cin_ispreproc_cont(const char_u **pp, linenr_T *lnump, int *amount) candidate_amount = get_indent_lnum(lnum); } - for (;; ) { + for (;;) { if (cin_ispreproc(line)) { retval = TRUE; *lnump = lnum; break; } - if (lnum == 1) + if (lnum == 1) { break; + } line = ml_get(--lnum); - if (*line == NUL || line[STRLEN(line) - 1] != '\\') + if (*line == NUL || line[STRLEN(line) - 1] != '\\') { break; + } } if (lnum != *lnump) { @@ -823,21 +829,18 @@ static int cin_islinecomment(const char_u *p) return p[0] == '/' && p[1] == '/'; } -/* - * Recognize a line that starts with '{' or '}', or ends with ';', ',', '{' or - * '}'. - * Don't consider "} else" a terminated line. - * If a line begins with an "else", only consider it terminated if no unmatched - * opening braces follow (handle "else { foo();" correctly). - * Return the character terminating the line (ending char's have precedence if - * both apply in order to determine initializations). - */ -static char_u -cin_isterminated( - const char_u *s, - int incl_open, // include '{' at the end as terminator - int incl_comma // recognize a trailing comma -) +/// Recognize a line that starts with '{' or '}', or ends with ';', ',', '{' or +/// '}'. +/// Don't consider "} else" a terminated line. +/// If a line begins with an "else", only consider it terminated if no unmatched +/// opening braces follow (handle "else { foo();" correctly). +/// +/// @param incl_open include '{' at the end as terminator +/// @param incl_comma recognize a trailing comma +/// +/// @return the character terminating the line (ending char's have precedence if +/// both apply in order to determine initializations). +static char_u cin_isterminated(const char_u *s, int incl_open, int incl_comma) { char_u found_start = 0; unsigned n_open = 0; @@ -845,30 +848,35 @@ cin_isterminated( s = cin_skipcomment(s); - if (*s == '{' || (*s == '}' && !cin_iselse(s))) + if (*s == '{' || (*s == '}' && !cin_iselse(s))) { found_start = *s; + } - if (!found_start) + if (!found_start) { is_else = cin_iselse(s); + } while (*s) { // skip over comments, "" strings and 'c'haracters s = skip_string(cin_skipcomment(s)); - if (*s == '}' && n_open > 0) - --n_open; + if (*s == '}' && n_open > 0) { + n_open--; + } if ((!is_else || n_open == 0) && (*s == ';' || *s == '}' || (incl_comma && *s == ',')) - && cin_nocode(s + 1)) + && cin_nocode(s + 1)) { return *s; - else if (*s == '{') { - if (incl_open && cin_nocode(s + 1)) + } else if (*s == '{') { + if (incl_open && cin_nocode(s + 1)) { return *s; - else - ++n_open; + } else { + n_open++; + } } - if (*s) + if (*s) { s++; + } } return found_start; } @@ -889,13 +897,14 @@ static int cin_isfuncdecl(const char_u **sp, linenr_T first_lnum, linenr_T min_l linenr_T lnum = first_lnum; linenr_T save_lnum = curwin->w_cursor.lnum; int retval = false; - pos_T *trypos; - int just_started = TRUE; + pos_T *trypos; + int just_started = true; - if (sp == NULL) + if (sp == NULL) { s = ml_get(lnum); - else + } else { s = *sp; + } curwin->w_cursor.lnum = lnum; if (find_last_paren(s, '(', ')') @@ -945,8 +954,9 @@ static int cin_isfuncdecl(const char_u **sp, linenr_T first_lnum, linenr_T min_l // defined(y) lnum = first_lnum - 1; s = ml_get(lnum); - if (*s == NUL || s[STRLEN(s) - 1] != '\\') - retval = TRUE; + if (*s == NUL || s[STRLEN(s) - 1] != '\\') { + retval = true; + } goto done; } if ((*s == ',' && cin_nocode(s + 1)) || s[1] == NUL || cin_nocode(s)) { @@ -956,17 +966,20 @@ static int cin_isfuncdecl(const char_u **sp, linenr_T first_lnum, linenr_T min_l * At the end: check for ',' in the next line, for this style: * func(arg1 * , arg2) */ - for (;; ) { - if (lnum >= curbuf->b_ml.ml_line_count) + for (;;) { + if (lnum >= curbuf->b_ml.ml_line_count) { break; + } s = ml_get(++lnum); - if (!cin_ispreproc(s)) + if (!cin_ispreproc(s)) { break; + } } - if (lnum >= curbuf->b_ml.ml_line_count) + if (lnum >= curbuf->b_ml.ml_line_count) { break; - /* Require a comma at end of the line or a comma or ')' at the - * start of next line. */ + } + // Require a comma at end of the line or a comma or ')' at the + // start of next line. s = (char_u *)skipwhite((char *)s); if (!just_started && (!comma && *s != ',' && *s != ')')) { break; @@ -981,8 +994,9 @@ static int cin_isfuncdecl(const char_u **sp, linenr_T first_lnum, linenr_T min_l } done: - if (lnum != first_lnum && sp != NULL) + if (lnum != first_lnum && sp != NULL) { *sp = ml_get(first_lnum); + } return retval; } @@ -1013,8 +1027,8 @@ static int cin_isdo(const char_u *p) static int cin_iswhileofdo(const char_u *p, linenr_T lnum) // XXX { pos_T cursor_save; - pos_T *trypos; - int retval = FALSE; + pos_T *trypos; + int retval = false; p = cin_skipcomment(p); if (*p == '}') { // accept "} while (cond);" @@ -1029,10 +1043,10 @@ static int cin_iswhileofdo(const char_u *p, linenr_T lnum) // XXX p++; curwin->w_cursor.col++; } - if ((trypos = findmatchlimit(NULL, 0, 0, - curbuf->b_ind_maxparen)) != NULL - && *cin_skipcomment(ml_get_pos(trypos) + 1) == ';') - retval = TRUE; + if ((trypos = findmatchlimit(NULL, 0, 0, curbuf->b_ind_maxparen)) != NULL + && *cin_skipcomment(ml_get_pos(trypos) + 1) == ';') { + retval = true; + } curwin->w_cursor = cursor_save; } return retval; @@ -1048,24 +1062,29 @@ static int cin_is_if_for_while_before_offset(const char_u *line, int *poffset) { int offset = *poffset; - if (offset-- < 2) + if (offset-- < 2) { return 0; - while (offset > 2 && ascii_iswhite(line[offset])) - --offset; + } + while (offset > 2 && ascii_iswhite(line[offset])) { + offset--; + } offset -= 1; - if (!STRNCMP(line + offset, "if", 2)) + if (!STRNCMP(line + offset, "if", 2)) { goto probablyFound; + } if (offset >= 1) { offset -= 1; - if (!STRNCMP(line + offset, "for", 3)) + if (!STRNCMP(line + offset, "for", 3)) { goto probablyFound; + } if (offset >= 2) { offset -= 2; - if (!STRNCMP(line + offset, "while", 5)) + if (!STRNCMP(line + offset, "while", 5)) { goto probablyFound; + } } } return 0; @@ -1083,7 +1102,7 @@ probablyFound: * do * nothing; * while (foo - * && bar); <-- here + * && bar); <-- here * Adjust the cursor to the line with "while". */ static int cin_iswhileofdo_end(int terminated) @@ -1125,8 +1144,9 @@ static int cin_iswhileofdo_end(int terminated) p = line + i; } } - if (*p != NUL) - ++p; + if (*p != NUL) { + p++; + } } return FALSE; } @@ -1141,15 +1161,16 @@ static int cin_isbreak(const char_u *p) * constructor-initialization. eg: * * class MyClass : - * baseClass <-- here + * baseClass <-- here * class MyClass : public baseClass, - * anotherBaseClass <-- here (should probably lineup ??) + * anotherBaseClass <-- here (should probably lineup ??) * MyClass::MyClass(...) : - * baseClass(...) <-- here (constructor-initialization) + * baseClass(...) <-- here (constructor-initialization) * * This is a lot of guessing. Watch out for "cond ? func() : foo". */ -static int cin_is_cpp_baseclass(cpp_baseclass_cache_T *cached) { +static int cin_is_cpp_baseclass(cpp_baseclass_cache_T *cached) +{ lpos_T *pos = &cached->lpos; // find position const char_u *s; int class_or_struct, lookfor_ctor_init, cpp_base_class; @@ -1167,23 +1188,24 @@ static int cin_is_cpp_baseclass(cpp_baseclass_cache_T *cached) { return false; } s = cin_skipcomment(s); - if (*s == NUL) - return FALSE; + if (*s == NUL) { + return false; + } cpp_base_class = lookfor_ctor_init = class_or_struct = FALSE; /* Search for a line starting with '#', empty, ending in ';' or containing * '{' or '}' and start below it. This handles the following situations: - * a = cond ? - * func() : - * asdf; - * func::foo() - * : something - * {} - * Foo::Foo (int one, int two) - * : something(4), - * somethingelse(3) - * {} + * a = cond ? + * func() : + * asdf; + * func::foo() + * : something + * {} + * Foo::Foo (int one, int two) + * : something(4), + * somethingelse(3) + * {} */ while (lnum > 1) { line = ml_get(lnum - 1); @@ -1194,20 +1216,23 @@ static int cin_is_cpp_baseclass(cpp_baseclass_cache_T *cached) { while (*s != NUL) { s = cin_skipcomment(s); if (*s == '{' || *s == '}' - || (*s == ';' && cin_nocode(s + 1))) + || (*s == ';' && cin_nocode(s + 1))) { break; - if (*s != NUL) - ++s; + } + if (*s != NUL) { + s++; + } } - if (*s != NUL) + if (*s != NUL) { break; - --lnum; + } + lnum--; } pos->lnum = lnum; line = ml_get(lnum); s = line; - for (;; ) { + for (;;) { if (*s == NUL) { if (lnum == curwin->w_cursor.lnum) { break; @@ -1222,13 +1247,14 @@ static int cin_is_cpp_baseclass(cpp_baseclass_cache_T *cached) { break; } s = cin_skipcomment(line); - if (*s == NUL) + if (*s == NUL) { continue; + } } - if (s[0] == '"' || (s[0] == 'R' && s[1] == '"')) + if (s[0] == '"' || (s[0] == 'R' && s[1] == '"')) { s = skip_string(s) + 1; - else if (s[0] == ':') { + } else if (s[0] == ':') { if (s[1] == ':') { /* skip double colon. It can't be a constructor * initialization any more */ @@ -1241,17 +1267,19 @@ static int cin_is_cpp_baseclass(cpp_baseclass_cache_T *cached) { lookfor_ctor_init = class_or_struct = false; pos->col = 0; s = cin_skipcomment(s + 1); - } else + } else { s = cin_skipcomment(s + 1); + } } else if ((STRNCMP(s, "class", 5) == 0 && !vim_isIDc(s[5])) || (STRNCMP(s, "struct", 6) == 0 && !vim_isIDc(s[6]))) { class_or_struct = TRUE; lookfor_ctor_init = FALSE; - if (*s == 'c') + if (*s == 'c') { s = cin_skipcomment(s + 5); - else + } else { s = cin_skipcomment(s + 6); + } } else { if (s[0] == '{' || s[0] == '}' || s[0] == ';') { cpp_base_class = lookfor_ctor_init = class_or_struct = FALSE; @@ -1297,7 +1325,7 @@ static int get_baseclass_amount(int col) { int amount; colnr_T vcol; - pos_T *trypos; + pos_T *trypos; if (col == 0) { amount = get_indent(); @@ -1313,8 +1341,9 @@ static int get_baseclass_amount(int col) getvcol(curwin, &curwin->w_cursor, &vcol, NULL, NULL); amount = (int)vcol; } - if (amount < curbuf->b_ind_cpp_baseclass) + if (amount < curbuf->b_ind_cpp_baseclass) { amount = curbuf->b_ind_cpp_baseclass; + } return amount; } @@ -1340,8 +1369,9 @@ static int cin_ends_in(const char_u *s, const char_u *find, const char_u *ignore return true; } } - if (*p != NUL) - ++p; + if (*p != NUL) { + p++; + } } return FALSE; } @@ -1461,10 +1491,10 @@ static pos_T *find_match_paren(int ind_maxparen) return find_match_char('(', ind_maxparen); } -static pos_T * find_match_char(char_u c, int ind_maxparen) +static pos_T *find_match_char(char_u c, int ind_maxparen) { pos_T cursor_save; - pos_T *trypos; + pos_T *trypos; static pos_T pos_copy; int ind_maxp_wk; @@ -1488,8 +1518,7 @@ retry: trypos = &pos_copy; curwin->w_cursor = *trypos; if ((trypos_wk = ind_find_start_CORS(NULL)) != NULL) { // XXX - ind_maxp_wk = ind_maxparen - (int)(cursor_save.lnum - - trypos_wk->lnum); + ind_maxp_wk = ind_maxparen - (int)(cursor_save.lnum - trypos_wk->lnum); if (ind_maxp_wk > 0) { curwin->w_cursor = *trypos_wk; goto retry; @@ -1535,8 +1564,9 @@ static int corr_ind_maxparen(pos_T *startpos) { long n = (long)startpos->lnum - (long)curwin->w_cursor.lnum; - if (n > 0 && n < curbuf->b_ind_maxparen / 2) + if (n > 0 && n < curbuf->b_ind_maxparen / 2) { return curbuf->b_ind_maxparen - (int)n; + } return curbuf->b_ind_maxparen; } @@ -1575,8 +1605,8 @@ static int find_last_paren(const char_u *l, int start, int end) */ void parse_cino(buf_T *buf) { - char_u *p; - char_u *l; + char_u *p; + char_u *l; int divider; int fraction = 0; int sw = get_sw_value(buf); @@ -1714,7 +1744,7 @@ void parse_cino(buf_T *buf) // Handle C #pragma directives buf->b_ind_pragma = 0; - for (p = buf->b_p_cino; *p; ) { + for (p = buf->b_p_cino; *p;) { l = p++; if (*p == '-') { p++; @@ -1738,57 +1768,134 @@ void parse_cino(buf_T *buf) n = sw; // just "s" is one 'shiftwidth'. } else { n *= sw; - if (divider) + if (divider) { n += (sw * fraction + divider / 2) / divider; + } } ++p; } - if (l[1] == '-') + if (l[1] == '-') { n = -n; + } /* When adding an entry here, also update the default 'cinoptions' in * doc/indent.txt, and add explanation for it! */ switch (*l) { - case '>': buf->b_ind_level = n; break; - case 'e': buf->b_ind_open_imag = n; break; - case 'n': buf->b_ind_no_brace = n; break; - case 'f': buf->b_ind_first_open = n; break; - case '{': buf->b_ind_open_extra = n; break; - case '}': buf->b_ind_close_extra = n; break; - case '^': buf->b_ind_open_left_imag = n; break; - case 'L': buf->b_ind_jump_label = n; break; - case ':': buf->b_ind_case = n; break; - case '=': buf->b_ind_case_code = n; break; - case 'b': buf->b_ind_case_break = n; break; - case 'p': buf->b_ind_param = n; break; - case 't': buf->b_ind_func_type = n; break; - case '/': buf->b_ind_comment = n; break; - case 'c': buf->b_ind_in_comment = n; break; - case 'C': buf->b_ind_in_comment2 = n; break; - case 'i': buf->b_ind_cpp_baseclass = n; break; - case '+': buf->b_ind_continuation = n; break; - case '(': buf->b_ind_unclosed = n; break; - case 'u': buf->b_ind_unclosed2 = n; break; - case 'U': buf->b_ind_unclosed_noignore = n; break; - case 'W': buf->b_ind_unclosed_wrapped = n; break; - case 'w': buf->b_ind_unclosed_whiteok = n; break; - case 'm': buf->b_ind_matching_paren = n; break; - case 'M': buf->b_ind_paren_prev = n; break; - case ')': buf->b_ind_maxparen = n; break; - case '*': buf->b_ind_maxcomment = n; break; - case 'g': buf->b_ind_scopedecl = n; break; - case 'h': buf->b_ind_scopedecl_code = n; break; - case 'j': buf->b_ind_java = n; break; - case 'J': buf->b_ind_js = n; break; - case 'l': buf->b_ind_keep_case_label = n; break; - case '#': buf->b_ind_hash_comment = n; break; - case 'N': buf->b_ind_cpp_namespace = n; break; - case 'k': buf->b_ind_if_for_while = n; break; - case 'E': buf->b_ind_cpp_extern_c = n; break; - case 'P': buf->b_ind_pragma = n; break; - } - if (*p == ',') - ++p; + case '>': + buf->b_ind_level = n; + break; + case 'e': + buf->b_ind_open_imag = n; + break; + case 'n': + buf->b_ind_no_brace = n; + break; + case 'f': + buf->b_ind_first_open = n; + break; + case '{': + buf->b_ind_open_extra = n; + break; + case '}': + buf->b_ind_close_extra = n; + break; + case '^': + buf->b_ind_open_left_imag = n; + break; + case 'L': + buf->b_ind_jump_label = n; + break; + case ':': + buf->b_ind_case = n; + break; + case '=': + buf->b_ind_case_code = n; + break; + case 'b': + buf->b_ind_case_break = n; + break; + case 'p': + buf->b_ind_param = n; + break; + case 't': + buf->b_ind_func_type = n; + break; + case '/': + buf->b_ind_comment = n; + break; + case 'c': + buf->b_ind_in_comment = n; + break; + case 'C': + buf->b_ind_in_comment2 = n; + break; + case 'i': + buf->b_ind_cpp_baseclass = n; + break; + case '+': + buf->b_ind_continuation = n; + break; + case '(': + buf->b_ind_unclosed = n; + break; + case 'u': + buf->b_ind_unclosed2 = n; + break; + case 'U': + buf->b_ind_unclosed_noignore = n; + break; + case 'W': + buf->b_ind_unclosed_wrapped = n; + break; + case 'w': + buf->b_ind_unclosed_whiteok = n; + break; + case 'm': + buf->b_ind_matching_paren = n; + break; + case 'M': + buf->b_ind_paren_prev = n; + break; + case ')': + buf->b_ind_maxparen = n; + break; + case '*': + buf->b_ind_maxcomment = n; + break; + case 'g': + buf->b_ind_scopedecl = n; + break; + case 'h': + buf->b_ind_scopedecl_code = n; + break; + case 'j': + buf->b_ind_java = n; + break; + case 'J': + buf->b_ind_js = n; + break; + case 'l': + buf->b_ind_keep_case_label = n; + break; + case '#': + buf->b_ind_hash_comment = n; + break; + case 'N': + buf->b_ind_cpp_namespace = n; + break; + case 'k': + buf->b_ind_if_for_while = n; + break; + case 'E': + buf->b_ind_cpp_extern_c = n; + break; + case 'P': + buf->b_ind_pragma = n; + break; + } + if (*p == ',') { + p++; + } } } @@ -1803,14 +1910,14 @@ int get_c_indent(void) int scope_amount; int cur_amount = MAXCOL; colnr_T col; - char_u *theline; - char_u *linecopy; - pos_T *trypos; - pos_T *comment_pos; - pos_T *tryposBrace = NULL; - pos_T tryposCopy; + char_u *theline; + char_u *linecopy; + pos_T *trypos; + pos_T *comment_pos; + pos_T *tryposBrace = NULL; + pos_T tryposCopy; pos_T our_paren_pos; - char_u *start; + char_u *start; int start_brace; #define BRACE_IN_COL0 1 // '{' is in column 0 #define BRACE_AT_START 2 // '{' is at start of line @@ -1908,13 +2015,10 @@ int get_c_indent(void) } } - /* - * Is it a non-case label? Then that goes at the left margin too unless: - * - JS flag is set. - * - 'L' item has a positive value. - */ - if (original_line_islabel && !curbuf->b_ind_js - && curbuf->b_ind_jump_label < 0) { + // Is it a non-case label? Then that goes at the left margin too unless: + // - JS flag is set. + // - 'L' item has a positive value. + if (original_line_islabel && !curbuf->b_ind_js && curbuf->b_ind_jump_label < 0) { amount = 0; goto theend; } @@ -1952,7 +2056,7 @@ int get_c_indent(void) char_u lead_start[COM_MAX_LEN]; // start-comment string char_u lead_middle[COM_MAX_LEN]; // middle-comment string char_u lead_end[COM_MAX_LEN]; // end-comment string - char_u *p; + char_u *p; int start_align = 0; int start_off = 0; int done = FALSE; @@ -1981,8 +2085,9 @@ int get_c_indent(void) } } - if (*p == ':') - ++p; + if (*p == ':') { + p++; + } (void)copy_option_part(&p, lead_end, COM_MAX_LEN, ","); if (what == COM_START) { STRCPY(lead_start, lead_end); @@ -2010,17 +2115,18 @@ int get_c_indent(void) amount = get_indent_lnum(curwin->w_cursor.lnum - 1); break; } else if (STRNCMP(ml_get(comment_pos->lnum) + comment_pos->col, - lead_start, lead_start_len) != 0) { - /* If the start comment string doesn't match with the - * start of the comment, skip this entry. XXX */ + lead_start, lead_start_len) != 0) { + // If the start comment string doesn't match with the + // start of the comment, skip this entry. XXX continue; } } - if (start_off != 0) + if (start_off != 0) { amount += start_off; - else if (start_align == COM_RIGHT) + } else if (start_align == COM_RIGHT) { amount += vim_strsize(lead_start) - vim_strsize(lead_middle); + } break; } @@ -2045,18 +2151,16 @@ int get_c_indent(void) * asterisk in the comment opener; otherwise, line up * with the first character of the comment text. */ - if (done) - ; - else if (theline[0] == '*') + if (done) { + // skip + } else if (theline[0] == '*') { amount += 1; - else { - /* - * If we are more than one line away from the comment opener, take - * the indent of the previous non-empty line. If 'cino' has "CO" - * and we are just below the comment opener and there are any - * white characters after it line up with the text after it; - * otherwise, add the amount specified by "c" in 'cino' - */ + } else { + // If we are more than one line away from the comment opener, take + // the indent of the previous non-empty line. If 'cino' has "CO" + // and we are just below the comment opener and there are any + // white characters after it line up with the text after it; + // otherwise, add the amount specified by "c" in 'cino' amount = -1; for (lnum = cur_curpos.lnum - 1; lnum > comment_pos->lnum; lnum--) { if (linewhite(lnum)) { // skip blank lines @@ -2075,11 +2179,12 @@ int get_c_indent(void) } getvcol(curwin, comment_pos, &col, NULL, NULL); amount = col; - if (curbuf->b_ind_in_comment2 || *look == NUL) + if (curbuf->b_ind_in_comment2 || *look == NUL) { amount += curbuf->b_ind_in_comment; + } } } - goto theend; + goto theend; } // Are we looking at a ']' that has a match? if (*skipwhite((char *)theline) == ']' @@ -2091,18 +2196,19 @@ int get_c_indent(void) // Are we inside parentheses or braces? // XXX if (((trypos = find_match_paren(curbuf->b_ind_maxparen)) != NULL - && curbuf->b_ind_java == 0) - || (tryposBrace = find_start_brace()) != NULL - || trypos != NULL) { + && curbuf->b_ind_java == 0) + || (tryposBrace = find_start_brace()) != NULL + || trypos != NULL) { if (trypos != NULL && tryposBrace != NULL) { /* Both an unmatched '(' and '{' is found. Use the one which is * closer to the current cursor position, set the other to NULL. */ if (trypos->lnum != tryposBrace->lnum ? trypos->lnum < tryposBrace->lnum - : trypos->col < tryposBrace->col) + : trypos->col < tryposBrace->col) { trypos = NULL; - else + } else { tryposBrace = NULL; + } } if (trypos != NULL) { @@ -2133,16 +2239,16 @@ int get_c_indent(void) } // XXX - if ((trypos = find_match_paren( - corr_ind_maxparen(&cur_curpos))) != NULL + if ((trypos = find_match_paren(corr_ind_maxparen(&cur_curpos))) != NULL && trypos->lnum == our_paren_pos.lnum && trypos->col == our_paren_pos.col) { amount = get_indent_lnum(lnum); // XXX if (theline[0] == ')') { if (our_paren_pos.lnum != lnum - && cur_amount > amount) + && cur_amount > amount) { cur_amount = amount; + } amount = -1; } break; @@ -2165,7 +2271,7 @@ int get_c_indent(void) pos_T cursor_save = curwin->w_cursor; pos_T outermost; - char_u *line; + char_u *line; trypos = &our_paren_pos; do { @@ -2188,7 +2294,7 @@ int get_c_indent(void) look = (char_u *)skipwhite((char *)look); if (*look == '(') { linenr_T save_lnum = curwin->w_cursor.lnum; - char_u *line; + char_u *line; int look_col; /* Ignore a '(' in front of the line that has a match before @@ -2198,11 +2304,12 @@ int get_c_indent(void) look_col = (int)(look - line); curwin->w_cursor.col = look_col + 1; if ((trypos = findmatchlimit(NULL, ')', 0, - curbuf->b_ind_maxparen)) + curbuf->b_ind_maxparen)) != NULL && trypos->lnum == our_paren_pos.lnum - && trypos->col < our_paren_pos.col) + && trypos->col < our_paren_pos.col) { ignore_paren_col = trypos->col + 1; + } curwin->w_cursor.lnum = save_lnum; look = ml_get(our_paren_pos.lnum) + look_col; @@ -2231,24 +2338,28 @@ int get_c_indent(void) for (col = 0; col < our_paren_pos.col; ++col) { switch (l[col]) { case '(': - case '{': ++n; + case '{': + n++; break; case ')': - case '}': if (n > 1) - --n; + case '}': + if (n > 1) { + n--; + } break; } } our_paren_pos.col = 0; amount += n * curbuf->b_ind_unclosed_wrapped; - } else if (curbuf->b_ind_unclosed_whiteok) + } else if (curbuf->b_ind_unclosed_whiteok) { our_paren_pos.col++; - else { + } else { col = our_paren_pos.col + 1; - while (ascii_iswhite(l[col])) + while (ascii_iswhite(l[col])) { col++; + } if (l[col] != NUL) { // In case of trailing space our_paren_pos.col = col; } else { @@ -2263,8 +2374,9 @@ int get_c_indent(void) */ if (our_paren_pos.col > 0) { getvcol(curwin, &our_paren_pos, &col, NULL, NULL); - if (cur_amount > (int)col) + if (cur_amount > (int)col) { cur_amount = col; + } } } @@ -2273,8 +2385,9 @@ int get_c_indent(void) } else if ((curbuf->b_ind_unclosed == 0 && is_if_for_while == 0) || (!curbuf->b_ind_unclosed_noignore && *look == '(' && ignore_paren_col == 0)) { - if (cur_amount != MAXCOL) + if (cur_amount != MAXCOL) { amount = cur_amount; + } } else { /* Add b_ind_unclosed2 for each '(' before our matching one, * but ignore (void) before the line (ignore_paren_col). */ @@ -2282,10 +2395,12 @@ int get_c_indent(void) while ((int)our_paren_pos.col > ignore_paren_col) { --our_paren_pos.col; switch (*ml_get_pos(&our_paren_pos)) { - case '(': amount += curbuf->b_ind_unclosed2; + case '(': + amount += curbuf->b_ind_unclosed2; col = our_paren_pos.col; break; - case ')': amount -= curbuf->b_ind_unclosed2; + case ')': + amount -= curbuf->b_ind_unclosed2; col = MAXCOL; break; } @@ -2293,9 +2408,9 @@ int get_c_indent(void) /* Use b_ind_unclosed once, when the first '(' is not inside * braces */ - if (col == MAXCOL) + if (col == MAXCOL) { amount += curbuf->b_ind_unclosed; - else { + } else { curwin->w_cursor.lnum = our_paren_pos.lnum; curwin->w_cursor.col = col; if (find_match_paren_after_brace(curbuf->b_ind_maxparen)) { @@ -2312,12 +2427,13 @@ int get_c_indent(void) * For a line starting with ')' use the minimum of the two * positions, to avoid giving it more indent than the previous * lines: - * func_long_name( if (x - * arg && yy - * ) ^ not here ) ^ not here + * func_long_name( if (x + * arg && yy + * ) ^ not here ) ^ not here */ - if (cur_amount < amount) + if (cur_amount < amount) { amount = cur_amount; + } } } @@ -2346,10 +2462,11 @@ int get_c_indent(void) if (*look == '{') { getvcol(curwin, trypos, &col, NULL, NULL); amount = col; - if (*start == '{') + if (*start == '{') { start_brace = BRACE_IN_COL0; - else + } else { start_brace = BRACE_AT_START; + } } else { // That opening brace might have been on a continuation // line. If so, find the start of the line. @@ -2364,9 +2481,9 @@ int get_c_indent(void) } // It could have been something like - // case 1: if (asdf && - // ldfd) { - // } + // case 1: if (asdf && + // ldfd) { + // } if ((curbuf->b_ind_js || curbuf->b_ind_keep_case_label) && cin_iscase((char_u *)skipwhite((char *)get_cursor_line_ptr()), false)) { amount = get_indent(); @@ -2444,8 +2561,9 @@ int get_c_indent(void) } else { // Compensate for adding b_ind_open_extra later. amount -= curbuf->b_ind_open_extra; - if (amount < 0) + if (amount < 0) { amount = 0; + } } } @@ -2477,7 +2595,7 @@ int get_c_indent(void) // the usual amount relative to the conditional // that opens the block. curwin->w_cursor = cur_curpos; - for (;; ) { + for (;;) { curwin->w_cursor.lnum--; curwin->w_cursor.col = 0; @@ -2501,10 +2619,11 @@ int get_c_indent(void) /* nothing found (abuse curbuf->b_ind_maxparen as * limit) assume terminated line (i.e. a variable * initialization) */ - if (cont_amount > 0) + if (cont_amount > 0) { amount = cont_amount; - else if (!curbuf->b_ind_js) + } else if (!curbuf->b_ind_js) { amount += ind_continuation; + } break; } @@ -2528,8 +2647,9 @@ int get_c_indent(void) continue; } - if (cin_nocode(l)) + if (cin_nocode(l)) { continue; + } terminated = cin_isterminated(l, FALSE, TRUE); @@ -2547,14 +2667,16 @@ int get_c_indent(void) * declaration is split over multiple lines: * cin_isfuncdecl returns FALSE then. */ - if (terminated == ',') + if (terminated == ',') { break; + } /* if it is an enum declaration or an assignment, * we are done. */ - if (terminated != ';' && cin_isinit()) + if (terminated != ';' && cin_isinit()) { break; + } // nothing useful found if (terminated == 0 || terminated == '{') { @@ -2568,12 +2690,13 @@ int get_c_indent(void) // will take us back to the start of the line. // XXX trypos = NULL; - if (find_last_paren(l, '(', ')')) - trypos = find_match_paren( - curbuf->b_ind_maxparen); + if (find_last_paren(l, '(', ')')) { + trypos = find_match_paren(curbuf->b_ind_maxparen); + } - if (trypos == NULL && find_last_paren(l, '{', '}')) + if (trypos == NULL && find_last_paren(l, '{', '}')) { trypos = find_start_brace(); + } if (trypos != NULL) { curwin->w_cursor.lnum = trypos->lnum + 1; @@ -2587,15 +2710,17 @@ int get_c_indent(void) * int a, * b; */ - if (cont_amount > 0) + if (cont_amount > 0) { amount = cont_amount; - else + } else { amount += ind_continuation; + } } else if (lookfor == LOOKFOR_UNTERM) { - if (cont_amount > 0) + if (cont_amount > 0) { amount = cont_amount; - else + } else { amount += ind_continuation; + } } else { if (lookfor != LOOKFOR_TERM && lookfor != LOOKFOR_CPP_BASECLASS @@ -2612,13 +2737,15 @@ int get_c_indent(void) * Looking for C++ namespace, need to look further * back. */ - if (curwin->w_cursor.lnum == ourscope) + if (curwin->w_cursor.lnum == ourscope) { continue; + } if (curwin->w_cursor.lnum == 0 || curwin->w_cursor.lnum - < ourscope - FIND_NAMESPACE_LIM) + < ourscope - FIND_NAMESPACE_LIM) { break; + } l = get_cursor_line_ptr(); @@ -2646,8 +2773,9 @@ int get_c_indent(void) break; } - if (cin_nocode(l)) + if (cin_nocode(l)) { continue; + } } } break; @@ -2672,25 +2800,25 @@ int get_c_indent(void) if (iscase || cin_isscopedecl(l)) { /* we are only looking for cpp base class * declaration/initialization any longer */ - if (lookfor == LOOKFOR_CPP_BASECLASS) + if (lookfor == LOOKFOR_CPP_BASECLASS) { break; + } /* When looking for a "do" we are not interested in * labels. */ - if (whilelevel > 0) + if (whilelevel > 0) { continue; + } - /* - * case xx: - * c = 99 + <- this indent plus continuation - **-> here; - */ - if (lookfor == LOOKFOR_UNTERM - || lookfor == LOOKFOR_ENUM_OR_INIT) { - if (cont_amount > 0) + // case xx: + // c = 99 + <- this indent plus continuation + // -> here; + if (lookfor == LOOKFOR_UNTERM || lookfor == LOOKFOR_ENUM_OR_INIT) { + if (cont_amount > 0) { amount = cont_amount; - else + } else { amount += ind_continuation; + } break; } @@ -2713,40 +2841,39 @@ int get_c_indent(void) n = get_indent_nolabel(curwin->w_cursor.lnum); // XXX - /* - * case xx: if (cond) <- line up with this if - * y = y + 1; - * -> s = 99; - * - * case xx: - * if (cond) <- line up with this line - * y = y + 1; - * -> s = 99; - */ + // case xx: if (cond) <- line up with this if + // y = y + 1; + // -> s = 99; + // + // case xx: + // if (cond) <- line up with this line + // y = y + 1; + // -> s = 99; if (lookfor == LOOKFOR_TERM) { - if (n) + if (n) { amount = n; + } - if (!lookfor_break) + if (!lookfor_break) { break; + } } - /* - * case xx: x = x + 1; <- line up with this x - * -> y = y + 1; - * - * case xx: if (cond) <- line up with this if - * -> y = y + 1; - */ + // case xx: x = x + 1; <- line up with this x + // -> y = y + 1; + // + // case xx: if (cond) <- line up with this if + // -> y = y + 1; if (n) { amount = n; l = after_label(get_cursor_line_ptr()); if (l != NULL && cin_is_cinword(l)) { - if (theline[0] == '{') + if (theline[0] == '{') { amount += curbuf->b_ind_open_extra; - else + } else { amount += curbuf->b_ind_level + curbuf->b_ind_no_brace; + } } break; } @@ -2755,8 +2882,8 @@ int get_c_indent(void) * Try to get the indent of a statement before the switch * label. If nothing is found, line up relative to the * switch label. - * break; <- may line up with this line - * case xx: + * break; <- may line up with this line + * case xx: * -> y = 1; */ scope_amount = get_indent() + (iscase // XXX @@ -2785,8 +2912,9 @@ int get_c_indent(void) */ if (!curbuf->b_ind_js && cin_islabel()) { l = after_label(get_cursor_line_ptr()); - if (l == NULL || cin_nocode(l)) + if (l == NULL || cin_nocode(l)) { continue; + } } /* @@ -2811,10 +2939,11 @@ int get_c_indent(void) } if (n) { if (lookfor == LOOKFOR_UNTERM) { - if (cont_amount > 0) + if (cont_amount > 0) { amount = cont_amount; - else + } else { amount += ind_continuation; + } } else if (theline[0] == '{') { // Need to find start of the declaration. lookfor = LOOKFOR_UNTERM; @@ -2829,10 +2958,11 @@ int get_c_indent(void) /* only look, whether there is a cpp base class * declaration or initialization before the opening brace. */ - if (cin_isterminated(l, TRUE, FALSE)) + if (cin_isterminated(l, true, false)) { break; - else + } else { continue; + } } /* @@ -2841,7 +2971,7 @@ int get_c_indent(void) * there is another unterminated statement behind, eg: * 123, * sizeof - * here + * here * Otherwise check whether it is an enumeration or structure * initialisation (not indented) or a variable declaration * (indented). @@ -2849,7 +2979,7 @@ int get_c_indent(void) terminated = cin_isterminated(l, FALSE, TRUE); if (js_cur_has_key) { - js_cur_has_key = false; // only check the first line + js_cur_has_key = false; // only check the first line if (curbuf->b_ind_js && terminated == ',') { // For Javascript we might be inside an object: // key: something, <- align with this @@ -2893,8 +3023,8 @@ int get_c_indent(void) } // If we're in the middle of a paren thing, Go back to the line // that starts it so we can get the right prevailing indent - // if ( foo && - // bar ) + // if ( foo && + // bar ) // Position the cursor over the rightmost paren, so that // matching it will take us back to the start of the line. @@ -2910,15 +3040,16 @@ int get_c_indent(void) // If we are looking for ',', we also look for matching // braces. if (trypos == NULL && terminated == ',' - && find_last_paren(l, '{', '}')) + && find_last_paren(l, '{', '}')) { trypos = find_start_brace(); + } if (trypos != NULL) { /* * Check if we are on a case label now. This is * handled above. * case xx: if ( asdf && - * asdf) + * asdf) */ curwin->w_cursor = *trypos; l = get_cursor_line_ptr(); @@ -2933,22 +3064,23 @@ int get_c_indent(void) * Skip over continuation lines to find the one to get the * indent from * char *usethis = "bla\ - * bla", + * bla", * here; */ if (terminated == ',') { while (curwin->w_cursor.lnum > 1) { l = ml_get(curwin->w_cursor.lnum - 1); - if (*l == NUL || l[STRLEN(l) - 1] != '\\') + if (*l == NUL || l[STRLEN(l) - 1] != '\\') { break; - --curwin->w_cursor.lnum; + } + curwin->w_cursor.lnum--; curwin->w_cursor.col = 0; } } /* * Get indent and pointer to text for current line, - * ignoring any jump label. XXX + * ignoring any jump label. XXX */ if (curbuf->b_ind_js) { cur_amount = get_indent(); @@ -2958,9 +3090,9 @@ int get_c_indent(void) /* * If this is just above the line we are indenting, and it * starts with a '{', line it up with this line. - * while (not) - * -> { - * } + * while (not) + * -> { + * } */ if (terminated != ',' && lookfor != LOOKFOR_TERM && theline[0] == '{') { @@ -2969,8 +3101,8 @@ int get_c_indent(void) * Only add b_ind_open_extra when the current line * doesn't start with a '{', which must have a match * in the same line (scope is the same). Probably: - * { 1, 2 }, - * -> { 3, 4 } + * { 1, 2 }, + * -> { 3, 4 } */ if (*skipwhite((char *)l) != '{') { amount += curbuf->b_ind_open_extra; @@ -2997,29 +3129,31 @@ int get_c_indent(void) // -> here; if (lookfor == LOOKFOR_UNTERM || lookfor == LOOKFOR_ENUM_OR_INIT) { - if (cont_amount > 0) + if (cont_amount > 0) { amount = cont_amount; - else + } else { amount += ind_continuation; + } break; } /* * If this is just above the line we are indenting, we * are finished. - * while (not) - * -> here; + * while (not) + * -> here; * Otherwise this indent can be used when the line * before this is terminated. - * yyy; - * if (stat) - * while (not) - * xxx; - * -> here; + * yyy; + * if (stat) + * while (not) + * xxx; + * -> here; */ amount = cur_amount; - if (theline[0] == '{') + if (theline[0] == '{') { amount += curbuf->b_ind_open_extra; + } if (lookfor != LOOKFOR_TERM) { amount += curbuf->b_ind_level + curbuf->b_ind_no_brace; @@ -3030,14 +3164,15 @@ int get_c_indent(void) * Special trick: when expecting the while () after a * do, line up with the while() * do - * x = 1; + * x = 1; * -> here */ l = (char_u *)skipwhite((char *)get_cursor_line_ptr()); if (cin_isdo(l)) { - if (whilelevel == 0) + if (whilelevel == 0) { break; - --whilelevel; + } + whilelevel--; } /* @@ -3050,14 +3185,16 @@ int get_c_indent(void) /* If we're looking at "} else", let's make sure we * find the opening brace of the enclosing scope, * not the one from "if () {". */ - if (*l == '}') + if (*l == '}') { curwin->w_cursor.col = (colnr_T)(l - get_cursor_line_ptr()) + 1; + } if ((trypos = find_start_brace()) == NULL || find_match(LOOKFOR_IF, trypos->lnum) - == FAIL) + == FAIL) { break; + } } } /* @@ -3071,8 +3208,8 @@ int get_c_indent(void) * Found two unterminated lines on a row, line up with * the last one. * c = 99 + - * 100 + - * -> here; + * 100 + + * -> here; */ if (lookfor == LOOKFOR_UNTERM) { // When line ends in a comma add extra indent @@ -3089,8 +3226,9 @@ int get_c_indent(void) * opening brace or we are looking just for * enumerations/initializations. */ if (terminated == ',') { - if (curbuf->b_ind_cpp_baseclass == 0) + if (curbuf->b_ind_cpp_baseclass == 0) { break; + } lookfor = LOOKFOR_CPP_BASECLASS; continue; @@ -3104,15 +3242,15 @@ int get_c_indent(void) } else { // Found first unterminated line on a row, may // line up with this line, remember its indent - // 100 + // NOLINT(whitespace/tab) - // -> here; // NOLINT(whitespace/tab) + // 100 + // NOLINT(whitespace/tab) + // -> here; // NOLINT(whitespace/tab) l = get_cursor_line_ptr(); amount = cur_amount; n = (int)STRLEN(l); if (terminated == ',' && (*skipwhite((char *)l) == ']' - || (n >=2 && l[n - 2] == ']'))) { + || (n >= 2 && l[n - 2] == ']'))) { break; } @@ -3160,7 +3298,7 @@ int get_c_indent(void) && *l != NUL && l[STRLEN(l) - 1] == '\\') { // XXX - cont_amount = cin_get_equal_amount( curwin->w_cursor.lnum); + cont_amount = cin_get_equal_amount(curwin->w_cursor.lnum); } if (lookfor != LOOKFOR_TERM && lookfor != LOOKFOR_JS_KEY @@ -3180,16 +3318,17 @@ int get_c_indent(void) /* * Found an unterminated line after a while ();, line up * with the last one. - * while (cond); - * 100 + <- line up with this one - * -> here; + * while (cond); + * 100 + <- line up with this one + * -> here; */ if (lookfor == LOOKFOR_UNTERM || lookfor == LOOKFOR_ENUM_OR_INIT) { - if (cont_amount > 0) + if (cont_amount > 0) { amount = cont_amount; - else + } else { amount += ind_continuation; + } break; } @@ -3235,35 +3374,37 @@ int get_c_indent(void) /* * Found a terminated line above an unterminated line. Add * the amount for a continuation line. - * x = 1; - * y = foo + - * -> here; + * x = 1; + * y = foo + + * -> here; * or - * int x = 1; - * int foo, - * -> here; + * int x = 1; + * int foo, + * -> here; */ if (lookfor == LOOKFOR_UNTERM || lookfor == LOOKFOR_ENUM_OR_INIT) { - if (cont_amount > 0) + if (cont_amount > 0) { amount = cont_amount; - else + } else { amount += ind_continuation; + } break; } /* * Found a terminated line above a terminated line or "if" * etc. line. Use the amount of the line below us. - * x = 1; x = 1; - * if (asdf) y = 2; - * while (asdf) ->here; - * here; + * x = 1; x = 1; + * if (asdf) y = 2; + * while (asdf) ->here; + * here; * ->foo; */ if (lookfor == LOOKFOR_TERM) { - if (!lookfor_break && whilelevel == 0) + if (!lookfor_break && whilelevel == 0) { break; + } } /* * First line above the one we're indenting is terminated. @@ -3276,20 +3417,17 @@ int get_c_indent(void) * that matching it will take us back to the start of * the line. Helps for: * func(asdr, - * asdfasdf); + * asdfasdf); * here; */ term_again: l = get_cursor_line_ptr(); if (find_last_paren(l, '(', ')') - && (trypos = find_match_paren( - curbuf->b_ind_maxparen)) != NULL) { - /* - * Check if we are on a case label now. This is - * handled above. - * case xx: if ( asdf && - * asdf) - */ + && (trypos = find_match_paren(curbuf->b_ind_maxparen)) != NULL) { + // Check if we are on a case label now. This is + // handled above. + // case xx: if ( asdf && + // asdf) curwin->w_cursor = *trypos; l = get_cursor_line_ptr(); if (cin_iscase(l, false) || cin_isscopedecl(l)) { @@ -3302,10 +3440,10 @@ term_again: /* When aligning with the case statement, don't align * with a statement after it. * case 1: { <-- don't use this { position - * stat; + * stat; * } * case 2: - * stat; + * stat; * } */ iscase = curbuf->b_ind_keep_case_label && cin_iscase(l, false); @@ -3316,8 +3454,9 @@ term_again: */ amount = skip_label(curwin->w_cursor.lnum, &l); - if (theline[0] == '{') + if (theline[0] == '{') { amount += curbuf->b_ind_open_extra; + } // See remark above: "Only add b_ind_open_extra.." l = (char_u *)skipwhite((char *)l); if (*l == '{') { @@ -3329,7 +3468,7 @@ term_again: * When a terminated line starts with "else" skip to * the matching "if": * else 3; - * indent this; + * indent this; * Need to use the scope of this "else". XXX * If whilelevel != 0 continue looking for a "do {". */ @@ -3339,8 +3478,9 @@ term_again: && whilelevel == 0) { if ((trypos = find_start_brace()) == NULL || find_match(LOOKFOR_IF, trypos->lnum) - == FAIL) + == FAIL) { break; + } continue; } @@ -3355,9 +3495,10 @@ term_again: // if not "else {" check for terminated again // but skip block for "} else {" l = cin_skipcomment(get_cursor_line_ptr()); - if (*l == '}' || !cin_iselse(l)) + if (*l == '}' || !cin_iselse(l)) { goto term_again; - ++curwin->w_cursor.lnum; + } + curwin->w_cursor.lnum++; curwin->w_cursor.col = 0; } } @@ -3390,8 +3531,8 @@ term_again: // of a function if (theline[0] == '{') { - amount = curbuf->b_ind_first_open; - goto theend; + amount = curbuf->b_ind_first_open; + goto theend; } /* * If the NEXT line is a function declaration, the current @@ -3401,14 +3542,13 @@ term_again: * contains { or }: "void f() {\n if (1)" */ if (cur_curpos.lnum < curbuf->b_ml.ml_line_count - && !cin_nocode(theline) - && vim_strchr(theline, '{') == NULL - && vim_strchr(theline, '}') == NULL - && !cin_ends_in(theline, (char_u *)":", NULL) - && !cin_ends_in(theline, (char_u *)",", NULL) - && cin_isfuncdecl(NULL, cur_curpos.lnum + 1, - cur_curpos.lnum + 1) - && !cin_isterminated(theline, false, true)) { + && !cin_nocode(theline) + && vim_strchr(theline, '{') == NULL + && vim_strchr(theline, '}') == NULL + && !cin_ends_in(theline, (char_u *)":", NULL) + && !cin_ends_in(theline, (char_u *)",", NULL) + && cin_isfuncdecl(NULL, cur_curpos.lnum + 1, cur_curpos.lnum + 1) + && !cin_isterminated(theline, false, true)) { amount = curbuf->b_ind_func_type; goto theend; } @@ -3451,8 +3591,9 @@ term_again: continue; } - if (cin_nocode(l)) + if (cin_nocode(l)) { continue; + } /* * If the previous line ends in ',', use one level of @@ -3477,23 +3618,26 @@ term_again: /* For a line ending in ',' that is a continuation line go * back to the first line with a backslash: * char *foo = "bla\ - * bla", + * bla", * here; */ while (n == 0 && curwin->w_cursor.lnum > 1) { l = ml_get(curwin->w_cursor.lnum - 1); - if (*l == NUL || l[STRLEN(l) - 1] != '\\') + if (*l == NUL || l[STRLEN(l) - 1] != '\\') { break; - --curwin->w_cursor.lnum; + } + curwin->w_cursor.lnum--; curwin->w_cursor.col = 0; } amount = get_indent(); // XXX - if (amount == 0) + if (amount == 0) { amount = cin_first_id_amount(); - if (amount == 0) + } + if (amount == 0) { amount = ind_continuation; + } break; } @@ -3514,14 +3658,14 @@ term_again: break; } - /* (matching {) - * If the previous line ends on '};' (maybe followed by - * comments) align at column 0. For example: - * char *string_array[] = { "foo", - * / * x * / "b};ar" }; / * foobar * / - */ - if (cin_ends_in(l, (char_u *)"};", NULL)) + // (matching {) + // If the previous line ends on '};' (maybe followed by + // comments) align at column 0. For example: + // char *string_array[] = { "foo", + // / * x * / "b};ar" }; / * foobar * / + if (cin_ends_in(l, (char_u *)"};", NULL)) { break; + } // If the previous line ends on '[' we are probably in an // array constant: @@ -3548,8 +3692,9 @@ term_again: } } if (curwin->w_cursor.lnum > 0 - && cin_ends_in(look, (char_u *)"}", NULL)) + && cin_ends_in(look, (char_u *)"}", NULL)) { break; + } curwin->w_cursor = curpos_save; } @@ -3574,8 +3719,9 @@ term_again: if (cin_ends_in(l, (char_u *)";", NULL)) { l = ml_get(curwin->w_cursor.lnum - 1); if (cin_ends_in(l, (char_u *)",", NULL) - || (*l != NUL && l[STRLEN(l) - 1] == '\\')) + || (*l != NUL && l[STRLEN(l) - 1] == '\\')) { break; + } l = get_cursor_line_ptr(); } @@ -3588,8 +3734,9 @@ term_again: */ (void)find_last_paren(l, '(', ')'); - if ((trypos = find_match_paren(curbuf->b_ind_maxparen)) != NULL) + if ((trypos = find_match_paren(curbuf->b_ind_maxparen)) != NULL) { curwin->w_cursor = *trypos; + } amount = get_indent(); // XXX break; } @@ -3599,26 +3746,28 @@ term_again: amount += curbuf->b_ind_comment; } - /* add extra indent if the previous line ended in a backslash: - * "asdfasdf\ - * here"; - * char *foo = "asdf\ - * here"; - */ + + // add extra indent if the previous line ended in a backslash: + // "asdfasdf{backslash} + // here"; + // char *foo = "asdf{backslash} + // here"; if (cur_curpos.lnum > 1) { l = ml_get(cur_curpos.lnum - 1); if (*l != NUL && l[STRLEN(l) - 1] == '\\') { cur_amount = cin_get_equal_amount(cur_curpos.lnum - 1); - if (cur_amount > 0) + if (cur_amount > 0) { amount = cur_amount; - else if (cur_amount == 0) + } else if (cur_amount == 0) { amount += ind_continuation; + } } } theend: - if (amount < 0) + if (amount < 0) { amount = 0; + } laterend: // put the cursor back where it belongs @@ -3673,16 +3822,18 @@ static int find_match(int lookfor, linenr_T ourscope) * back than the one enclosing the else, we're * out of luck too. */ - if (theirscope->lnum < ourscope) + if (theirscope->lnum < ourscope) { break; + } /* * and if they're enclosed in a *deeper* brace, * then we can ignore it because it's in a * different scope... */ - if (theirscope->lnum > ourscope) + if (theirscope->lnum > ourscope) { continue; + } /* * if it was an "else" (that's not an "else if") @@ -3692,8 +3843,9 @@ static int find_match(int lookfor, linenr_T ourscope) look = cin_skipcomment(get_cursor_line_ptr()); if (cin_iselse(look)) { mightbeif = cin_skipcomment(look + 4); - if (!cin_isif(mightbeif)) - ++elselevel; + if (!cin_isif(mightbeif)) { + elselevel++; // NOLINT(readability/braces) + } continue; } @@ -3714,8 +3866,9 @@ static int find_match(int lookfor, linenr_T ourscope) * When looking for an "if" ignore "while"s that * get in the way. */ - if (elselevel == 0 && lookfor == LOOKFOR_IF) + if (elselevel == 0 && lookfor == LOOKFOR_IF) { whilelevel = 0; + } } // If it's a "do" decrement whilelevel @@ -3740,8 +3893,9 @@ static int find_match(int lookfor, linenr_T ourscope) */ void do_c_expr_indent(void) { - if (*curbuf->b_p_inde != NUL) + if (*curbuf->b_p_inde != NUL) { fixthisline(get_expr_indent); - else + } else { fixthisline(get_c_indent); + } } diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 2052493e3f..118e344641 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -1,8 +1,6 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -// uncrustify:off - /* * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() */ @@ -18,21 +16,21 @@ #include <stdbool.h> #include <string.h> -#include "nvim/vim.h" #include "nvim/ascii.h" -#include "nvim/regexp.h" #include "nvim/charset.h" #include "nvim/eval.h" #include "nvim/eval/userfunc.h" #include "nvim/ex_cmds2.h" +#include "nvim/garray.h" #include "nvim/mark.h" #include "nvim/memline.h" #include "nvim/memory.h" #include "nvim/message.h" #include "nvim/os/input.h" #include "nvim/plines.h" -#include "nvim/garray.h" +#include "nvim/regexp.h" #include "nvim/strings.h" +#include "nvim/vim.h" #ifdef REGEXP_DEBUG // show/save debugging data when BT engine is used @@ -62,15 +60,17 @@ typedef void (*(*fptr_T)(int *, int))(void); static int no_Magic(int x) { - if (is_Magic(x)) + if (is_Magic(x)) { return un_Magic(x); + } return x; } static int toggle_Magic(int x) { - if (is_Magic(x)) + if (is_Magic(x)) { return un_Magic(x); + } return Magic(x); } @@ -88,13 +88,12 @@ static int toggle_Magic(int x) #define IEMSG_RET_NULL(m) return (iemsg(m), rc_did_emsg = true, (void *)NULL) #define EMSG_RET_FAIL(m) return (emsg(m), rc_did_emsg = true, FAIL) #define EMSG2_RET_NULL(m, c) \ - return (semsg((m), (c) ? "" : "\\"), rc_did_emsg = true, (void *)NULL) + return (semsg((m), (c) ? "" : "\\"), rc_did_emsg = true, (void *)NULL) #define EMSG3_RET_NULL(m, c, a) \ - return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = true, (void *)NULL) + return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = true, (void *)NULL) #define EMSG2_RET_FAIL(m, c) \ - return (semsg((m), (c) ? "" : "\\"), rc_did_emsg = true, FAIL) -#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_( \ - "E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) + return (semsg((m), (c) ? "" : "\\"), rc_did_emsg = true, FAIL) +#define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) #define MAX_LIMIT (32767L << 16L) @@ -128,30 +127,32 @@ static char_u e_regexp_number_after_dot_pos_search[] /// Return MULTI_MULT if c is a multi "multi" operator. static int re_multi_type(int c) { - if (c == Magic('@') || c == Magic('=') || c == Magic('?')) + if (c == Magic('@') || c == Magic('=') || c == Magic('?')) { return MULTI_ONE; - if (c == Magic('*') || c == Magic('+') || c == Magic('{')) + } + if (c == Magic('*') || c == Magic('+') || c == Magic('{')) { return MULTI_MULT; + } return NOT_MULTI; } -static char_u *reg_prev_sub = NULL; +static char_u *reg_prev_sub = NULL; /* * REGEXP_INRANGE contains all characters which are always special in a [] * range after '\'. * REGEXP_ABBR contains all characters which act as abbreviations after '\'. * These are: - * \n - New line (NL). - * \r - Carriage Return (CR). - * \t - Tab (TAB). - * \e - Escape (ESC). - * \b - Backspace (Ctrl_H). + * \n - New line (NL). + * \r - Carriage Return (CR). + * \t - Tab (TAB). + * \e - Escape (ESC). + * \b - Backspace (Ctrl_H). * \d - Character code in decimal, eg \d123 - * \o - Character code in octal, eg \o80 - * \x - Character code in hex, eg \x4a - * \u - Multibyte character code, eg \u20ac - * \U - Long multibyte character code, eg \U12345678 + * \o - Character code in octal, eg \o80 + * \x - Character code in hex, eg \x4a + * \u - Multibyte character code, eg \u20ac + * \U - Long multibyte character code, eg \U12345678 */ static char_u REGEXP_INRANGE[] = "]^-n\\"; static char_u REGEXP_ABBR[] = "nrtebdoxuU"; @@ -163,10 +164,14 @@ static char_u REGEXP_ABBR[] = "nrtebdoxuU"; static int backslash_trans(int c) { switch (c) { - case 'r': return CAR; - case 't': return TAB; - case 'e': return ESC; - case 'b': return BS; + case 'r': + return CAR; + case 't': + return TAB; + case 'e': + return ESC; + case 'b': + return BS; } return c; } @@ -223,11 +228,12 @@ static int get_char_class(char_u **pp) int i; if ((*pp)[1] == ':') { - for (i = 0; i < (int)ARRAY_SIZE(class_names); ++i) + for (i = 0; i < (int)ARRAY_SIZE(class_names); i++) { if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0) { *pp += STRLEN(class_names[i]) + 2; return i; } + } } return CLASS_NONE; } @@ -253,41 +259,43 @@ static void init_class_tab(void) int i; static int done = false; - if (done) + if (done) { return; + } - for (i = 0; i < 256; ++i) { - if (i >= '0' && i <= '7') + for (i = 0; i < 256; i++) { + if (i >= '0' && i <= '7') { class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD; - else if (i >= '8' && i <= '9') + } else if (i >= '8' && i <= '9') { class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD; - else if (i >= 'a' && i <= 'f') + } else if (i >= 'a' && i <= 'f') { class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER; - else if (i >= 'g' && i <= 'z') + } else if (i >= 'g' && i <= 'z') { class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER; - else if (i >= 'A' && i <= 'F') + } else if (i >= 'A' && i <= 'F') { class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER; - else if (i >= 'G' && i <= 'Z') + } else if (i >= 'G' && i <= 'Z') { class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER; - else if (i == '_') + } else if (i == '_') { class_tab[i] = RI_WORD + RI_HEAD; - else + } else { class_tab[i] = 0; + } } class_tab[' '] |= RI_WHITE; class_tab['\t'] |= RI_WHITE; done = true; } -# define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT)) -# define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX)) -# define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL)) -# define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD)) -# define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD)) -# define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA)) -# define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER)) -# define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER)) -# define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE)) +#define ri_digit(c) ((c) < 0x100 && (class_tab[c] & RI_DIGIT)) +#define ri_hex(c) ((c) < 0x100 && (class_tab[c] & RI_HEX)) +#define ri_octal(c) ((c) < 0x100 && (class_tab[c] & RI_OCTAL)) +#define ri_word(c) ((c) < 0x100 && (class_tab[c] & RI_WORD)) +#define ri_head(c) ((c) < 0x100 && (class_tab[c] & RI_HEAD)) +#define ri_alpha(c) ((c) < 0x100 && (class_tab[c] & RI_ALPHA)) +#define ri_lower(c) ((c) < 0x100 && (class_tab[c] & RI_LOWER)) +#define ri_upper(c) ((c) < 0x100 && (class_tab[c] & RI_UPPER)) +#define ri_white(c) ((c) < 0x100 && (class_tab[c] & RI_WHITE)) // flags for regflags #define RF_ICASE 1 // ignore case @@ -320,6 +328,8 @@ static int reg_strict; // "[abc" is illegal * META contains all characters that may be magic, except '^' and '$'. */ +// uncrustify:off + // META[] is used often enough to justify turning it into a table. static char_u META_flags[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -338,6 +348,8 @@ static char_u META_flags[] = { 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1 }; +// uncrustify:on + static int curchr; // currently parsed character // Previous character. Note: prevchr is sometimes -1 when we are not at the // start, eg in /[ ^I]^ the pattern was never found even if it existed, @@ -389,7 +401,7 @@ static int get_equi_class(char_u **pp) { int c; int l = 1; - char_u *p = *pp; + char_u *p = *pp; if (p[1] == '=' && p[2] != NUL) { l = utfc_ptr2len((char *)p + 2); @@ -413,7 +425,7 @@ static int get_coll_element(char_u **pp) { int c; int l = 1; - char_u *p = *pp; + char_u *p = *pp; if (p[0] != NUL && p[1] == '.' && p[2] != NUL) { l = utfc_ptr2len((char *)p + 2); @@ -451,7 +463,7 @@ static char_u *skip_anyof(char_u *p) while (*p != NUL && *p != ']') { if ((l = utfc_ptr2len((char *)p)) > 1) { p += l; - } else if (*p == '-') { + } else if (*p == '-') { p++; if (*p != ']' && *p != NUL) { MB_PTR_ADV(p); @@ -488,12 +500,13 @@ static char_u *skip_anyof(char_u *p) char_u *skip_regexp(char_u *startp, int dirc, int magic, char_u **newp) { int mymagic; - char_u *p = startp; + char_u *p = startp; - if (magic) + if (magic) { mymagic = MAGIC_ON; - else + } else { mymagic = MAGIC_OFF; + } get_cpo_flags(); for (; p[0] != NUL; MB_PTR_ADV(p)) { @@ -503,9 +516,10 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char_u **newp) if ((p[0] == '[' && mymagic >= MAGIC_ON) || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF)) { p = skip_anyof(p + 1); - if (p[0] == NUL) + if (p[0] == NUL) { break; - } else if (p[0] == '\\' && p[1] != NUL) { + } + } else if (p[0] == '\\' && p[1] != NUL) { if (dirc == '?' && newp != NULL && p[1] == '?') { // change "\?" to "?", make a copy first. if (*newp == NULL) { @@ -686,8 +700,7 @@ static int peekchr(void) } } break; - case '\\': - { + case '\\': { int c = regparse[1]; if (c == NUL) { @@ -712,13 +725,11 @@ static int peekchr(void) * Handle abbreviations, like "\t" for TAB -- webb */ curchr = backslash_trans(c); - } else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) + } else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) { curchr = toggle_Magic(c); - else { - /* - * Next character can never be (made) magic? - * Then backslashing it won't do anything. - */ + } else { + // Next character can never be (made) magic? + // Then backslashing it won't do anything. curchr = utf_ptr2char((char *)regparse + 1); } break; @@ -804,7 +815,7 @@ static void ungetchr(void) * Return -1 if there is no valid hex number. * The position is updated: * blahblah\%x20asdf - * before-^ ^-after + * before-^ ^-after * The parameter controls the maximum number of input characters. This will be * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. */ @@ -816,15 +827,17 @@ static int64_t gethexchrs(int maxinputlen) for (i = 0; i < maxinputlen; ++i) { c = regparse[0]; - if (!ascii_isxdigit(c)) + if (!ascii_isxdigit(c)) { break; + } nr <<= 4; nr |= hex2nr(c); ++regparse; } - if (i == 0) + if (i == 0) { return -1; + } return nr; } @@ -840,16 +853,18 @@ static int64_t getdecchrs(void) for (i = 0;; ++i) { c = regparse[0]; - if (c < '0' || c > '9') + if (c < '0' || c > '9') { break; + } nr *= 10; nr += c - '0'; regparse++; curchr = -1; // no longer valid } - if (i == 0) + if (i == 0) { return -1; + } return nr; } @@ -859,7 +874,7 @@ static int64_t getdecchrs(void) * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't * treat 8 or 9 as recognised characters. Position is updated: * blahblah\%o210asdf - * before-^ ^-after + * before-^ ^-after */ static int64_t getoctchrs(void) { @@ -869,15 +884,17 @@ static int64_t getoctchrs(void) for (i = 0; i < 3 && nr < 040; i++) { // -V536 c = regparse[0]; - if (c < '0' || c > '7') + if (c < '0' || c > '7') { break; + } nr <<= 3; nr |= hex2nr(c); ++regparse; } - if (i == 0) + if (i == 0) { return -1; + } return nr; } @@ -891,7 +908,7 @@ static int64_t getoctchrs(void) static int read_limits(long *minval, long *maxval) { int reverse = false; - char_u *first_char; + char_u *first_char; long tmp; if (*regparse == '-') { @@ -943,7 +960,7 @@ static int read_limits(long *minval, long *maxval) // Sometimes need to save a copy of a line. Since alloc()/free() is very // slow, we keep one allocated piece of memory and only re-allocate it when // it's too small. It's freed in bt_regexec_both() when finished. -static char_u *reg_tofree = NULL; +static char_u *reg_tofree = NULL; static unsigned reg_tofreelen; // Structure used to store the execution state of the regex engine. @@ -1039,8 +1056,8 @@ static char_u *reg_getline(linenr_T lnum) return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, false); } -static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning -static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches +static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning +static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos @@ -1063,8 +1080,9 @@ static reg_extmatch_T *make_extmatch(void) */ reg_extmatch_T *ref_extmatch(reg_extmatch_T *em) { - if (em != NULL) + if (em != NULL) { em->refcnt++; + } return em; } @@ -1077,8 +1095,9 @@ void unref_extmatch(reg_extmatch_T *em) int i; if (em != NULL && --em->refcnt <= 0) { - for (i = 0; i < NSUBEXP; ++i) + for (i = 0; i < NSUBEXP; i++) { xfree(em->matches[i]); + } xfree(em); } } @@ -1087,9 +1106,8 @@ void unref_extmatch(reg_extmatch_T *em) static int reg_prev_class(void) { if (rex.input > rex.line) { - return mb_get_class_tab( - rex.input - 1 - utf_head_off(rex.line, rex.input - 1), - rex.reg_buf->b_chartab); + return mb_get_class_tab(rex.input - 1 - utf_head_off(rex.line, rex.input - 1), + rex.reg_buf->b_chartab); } return -1; } @@ -1147,10 +1165,12 @@ static bool reg_match_visual(void) } else if (mode == Ctrl_V) { getvvcol(wp, &top, &start, NULL, &end); getvvcol(wp, &bot, &start2, NULL, &end2); - if (start2 < start) + if (start2 < start) { start = start2; - if (end2 > end) + } + if (end2 > end) { end = end2; + } if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL) { end = MAXCOL; } @@ -1175,7 +1195,7 @@ static bool reg_match_visual(void) */ static int prog_magic_wrong(void) { - regprog_T *prog; + regprog_T *prog; prog = REG_MULTI ? rex.reg_mmatch->regprog : rex.reg_match->regprog; if (prog->engine == &nfa_regengine) { @@ -1241,21 +1261,18 @@ static void reg_nextline(void) * If "bytelen" is not NULL, it is set to the byte length of the match in the * last line. */ -static int match_with_backref( - linenr_T start_lnum, - colnr_T start_col, - linenr_T end_lnum, - colnr_T end_col, - int *bytelen) +static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, + colnr_T end_col, int *bytelen) { linenr_T clnum = start_lnum; colnr_T ccol = start_col; int len; - char_u *p; + char_u *p; - if (bytelen != NULL) + if (bytelen != NULL) { *bytelen = 0; - for (;; ) { + } + for (;;) { // Since getting one line may invalidate the other, need to make copy. // Slow! if (rex.line != reg_tofree) { @@ -1275,10 +1292,11 @@ static int match_with_backref( p = reg_getline(clnum); assert(p); - if (clnum == end_lnum) + if (clnum == end_lnum) { len = end_col - ccol; - else + } else { len = (int)STRLEN(p + ccol); + } if (cstrncmp(p + ccol, rex.input, &len) != 0) { return RA_NOMATCH; // doesn't match @@ -1295,12 +1313,14 @@ static int match_with_backref( // Advance to next line. reg_nextline(); - if (bytelen != NULL) + if (bytelen != NULL) { *bytelen = 0; - ++clnum; + } + clnum++; ccol = 0; - if (got_int) + if (got_int) { return RA_FAIL; + } } // found a match! Note that rex.line may now point to a copy of the line, @@ -1324,7 +1344,7 @@ typedef struct { } decomp_T; // 0xfb20 - 0xfb4f -static decomp_T decomp_table[0xfb4f-0xfb20+1] = +static decomp_T decomp_table[0xfb4f - 0xfb20 + 1] = { { 0x5e2, 0, 0 }, // 0xfb20 alt ayin { 0x5d0, 0, 0 }, // 0xfb21 alt alef @@ -1407,7 +1427,7 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n) // if it failed and it's utf8 and we want to combineignore: if (result != 0 && rex.reg_icombine) { - char_u *str1, *str2; + char_u *str1, *str2; int c1, c2, c11, c12; int junk; @@ -1435,8 +1455,9 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n) } } result = c2 - c1; - if (result == 0) + if (result == 0) { *n = (int)(str2 - s2); + } } return result; @@ -1533,9 +1554,9 @@ static fptr_T do_Lower(int *d, int c) */ char_u *regtilde(char_u *source, int magic, bool preview) { - char_u *newsub = source; - char_u *tmpsub; - char_u *p; + char_u *newsub = source; + char_u *tmpsub; + char_u *p; int len; int prevlen; @@ -1605,8 +1626,7 @@ static regsubmatch_T rsm; // can only be used when can_f_submatch is true /// Put the submatches in "argv[argskip]" which is a list passed into /// call_func() by vim_regsub_both(). -static int fill_submatch_list(int argc FUNC_ATTR_UNUSED, typval_T *argv, - int argskip, int argcount) +static int fill_submatch_list(int argc FUNC_ATTR_UNUSED, typval_T *argv, int argskip, int argcount) FUNC_ATTR_NONNULL_ALL { typval_T *listarg = argv + argskip; @@ -1658,8 +1678,8 @@ static void clear_submatch_list(staticList10_T *sl) /// references invalid! /// /// Returns the size of the replacement, including terminating NUL. -int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, - int copy, int magic, int backslash) +int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, int copy, int magic, + int backslash) { regexec_T rex_save; bool rex_in_use_save = rex_in_use; @@ -1685,8 +1705,8 @@ int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, return result; } -int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, - int copy, int magic, int backslash) +int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, int copy, + int magic, int backslash) { regexec_T rex_save; bool rex_in_use_save = rex_in_use; @@ -1713,12 +1733,12 @@ int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *de return result; } -static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, - int copy, int magic, int backslash) +static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, + int backslash) { - char_u *src; - char_u *dst; - char_u *s; + char_u *src; + char_u *dst; + char_u *s; int c; int cc; int no = -1; @@ -1737,8 +1757,9 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, emsg(_(e_null)); return 0; } - if (prog_magic_wrong()) + if (prog_magic_wrong()) { return 0; + } src = source; dst = dest; @@ -1851,11 +1872,11 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, rsm = rsm_save; } } - } else + } else { while ((c = *src++) != NUL) { - if (c == '&' && magic) + if (c == '&' && magic) { no = 0; - else if (c == '\\' && *src != NUL) { + } else if (c == '\\' && *src != NUL) { if (*src == '&' && !magic) { ++src; no = 0; @@ -1863,16 +1884,21 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, no = *src++ - '0'; } else if (vim_strchr((char_u *)"uUlLeE", *src)) { switch (*src++) { - case 'u': func_one = (fptr_T)do_upper; + case 'u': + func_one = (fptr_T)do_upper; continue; - case 'U': func_all = (fptr_T)do_Upper; + case 'U': + func_all = (fptr_T)do_Upper; continue; - case 'l': func_one = (fptr_T)do_lower; + case 'l': + func_one = (fptr_T)do_lower; continue; - case 'L': func_all = (fptr_T)do_Lower; + case 'L': + func_all = (fptr_T)do_Lower; continue; case 'e': - case 'E': func_one = func_all = (fptr_T)NULL; + case 'E': + func_one = func_all = (fptr_T)NULL; continue; } } @@ -1894,12 +1920,16 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, if (c == '\\' && *src != NUL) { // Check for abbreviations -- webb switch (*src) { - case 'r': c = CAR; ++src; break; - case 'n': c = NL; ++src; break; - case 't': c = TAB; ++src; break; + case 'r': + c = CAR; ++src; break; + case 'n': + c = NL; ++src; break; + case 't': + c = TAB; ++src; break; // Oh no! \e already has meaning in subst pat :-( // case 'e': c = ESC; ++src; break; - case 'b': c = Ctrl_H; ++src; break; + case 'b': + c = Ctrl_H; ++src; break; // If "backslash" is true the backslash will be removed // later. Used to insert a literal CR. @@ -1967,7 +1997,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } } if (s != NULL) { - for (;; ) { + for (;;) { if (len == 0) { if (REG_MULTI) { if (rex.reg_mmatch->endpos[no].lnum == clnum) { @@ -2042,8 +2072,10 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, no = -1; } } - if (copy) + } + if (copy) { *dst = NUL; + } exit: return (int)((dst - dest) + 1 - num_escaped); @@ -2078,13 +2110,14 @@ static char_u *reg_getline_submatch(linenr_T lnum) */ char_u *reg_submatch(int no) { - char_u *retval = NULL; - char_u *s; + char_u *retval = NULL; + char_u *s; int round; linenr_T lnum; - if (!can_f_submatch || no < 0) + if (!can_f_submatch || no < 0) { return NULL; + } if (rsm.sm_match == NULL) { ssize_t len; @@ -2123,12 +2156,14 @@ char_u *reg_submatch(int no) lnum++; while (lnum < rsm.sm_mmatch->endpos[no].lnum) { s = reg_getline_submatch(lnum++); - if (round == 2) + if (round == 2) { STRCPY(retval + len, s); + } len += STRLEN(s); - if (round == 2) + if (round == 2) { retval[len] = '\n'; - ++len; + } + len++; } if (round == 2) { STRNCPY(retval + len, reg_getline_submatch(lnum), @@ -2252,9 +2287,9 @@ static char_u regname[][30] = { */ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) { - regprog_T *prog = NULL; - char_u *expr = expr_arg; - int save_called_emsg; + regprog_T *prog = NULL; + char_u *expr = expr_arg; + int save_called_emsg; regexp_engine = p_re; @@ -2273,8 +2308,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) regname[newengine]); #endif } else { - emsg(_( - "E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); + emsg(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); regexp_engine = AUTOMATIC_ENGINE; } } @@ -2292,7 +2326,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) called_emsg = false; if (regexp_engine != BACKTRACKING_ENGINE) { prog = nfa_regengine.regcomp(expr, - re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0)); + re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0)); } else { prog = bt_regengine.regcomp(expr, re_flags); } @@ -2339,8 +2373,9 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) */ void vim_regfree(regprog_T *prog) { - if (prog != NULL) + if (prog != NULL) { prog->engine->regfree(prog); + } } @@ -2462,16 +2497,17 @@ bool vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) /// Note: "rmp->regprog" may be freed and changed, even set to NULL. /// Uses curbuf for line count and 'iskeyword'. /// -/// Return zero if there is no match. Return number of lines contained in the -/// match otherwise. -long vim_regexec_multi( - regmmatch_T *rmp, - win_T *win, // window in which to search or NULL - buf_T *buf, // buffer in which to search - linenr_T lnum, // nr of line to start looking for match - colnr_T col, // column to start looking for match - proftime_T *tm, // timeout limit or NULL - int *timed_out) // flag is set when timeout limit reached +/// @param win window in which to search or NULL +/// @param buf buffer in which to search +/// @param lnum nr of line to start looking for match +/// @param col column to start looking for match +/// @param tm timeout limit or NULL +/// @param timed_out flag is set when timeout limit reached +/// +/// @return zero if there is no match. Return number of lines contained in the +/// match otherwise. +long vim_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, + proftime_T *tm, int *timed_out) FUNC_ATTR_NONNULL_ARG(1) { regexec_T rex_save; diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index ed03bb6172..e5964e6f88 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -1,8 +1,6 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -// uncrustify:off - /* * * Backtracking regular expression implementation. @@ -19,22 +17,22 @@ * * END NOTICE * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. + * Copyright (c) 1986 by University of Toronto. + * Written by Henry Spencer. Not derived from licensed software. * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: + * Permission is granted to anyone to use this software for any + * purpose on any computer system, and to redistribute it freely, + * subject to the following restrictions: * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. + * 1. The author is not responsible for the consequences of use of + * this software, no matter how awful, even if they arise + * from defects in it. * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. + * 2. The origin of this software must not be misrepresented, either + * by explicit claim or by omission. * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. + * 3. Altered versions must be plainly marked as such, and must not + * be misrepresented as being the original software. * * Beware that some of this code is subtly aware of the way operator * precedence is structured in regular expressions. Serious changes in @@ -51,12 +49,12 @@ * compile to execute that permits the execute phase to run lots faster on * simple cases. They are: * - * regstart char that must begin a match; NUL if none obvious; Can be a - * multi-byte character. - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL - * regmlen length of regmust string - * regflags RF_ values or'ed together + * regstart char that must begin a match; NUL if none obvious; Can be a + * multi-byte character. + * reganch is the match anchored (at beginning-of-line only)? + * regmust string (pointer into program) that match must include, or NULL + * regmlen length of regmust string + * regflags RF_ values or'ed together * * Regstart and reganch permit very fast decisions on suitable starting points * for a match, cutting down the work a lot. Regmust permits fast rejection @@ -75,7 +73,7 @@ * plus a "next" pointer, possibly plus an operand. "Next" pointers of * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" * pointer with a BRANCH on both ends of it is connecting two alternatives. - * (Here we have one of the subtle syntax dependencies: an individual BRANCH + * (Here we have one of the subtle syntax dependencies: an individual BRANCH * (as opposed to a collection of them) is never concatenated with anything * because of operator precedence). The "next" pointer of a BRACES_COMPLEX * node points to the node after the stuff to be repeated. @@ -85,52 +83,52 @@ * (NB this is *not* a tree structure: the tail of the branch connects to the * thing following the set of BRANCHes.) * - * pattern is coded like: + * pattern is coded like: * - * +-----------------+ - * | V - * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END - * | ^ | ^ - * +------+ +----------+ + * +-----------------+ + * | V + * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END + * | ^ | ^ + * +------+ +----------+ * * - * +------------------+ - * V | - * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END - * | | ^ ^ - * | +---------------+ | - * +---------------------------------------------+ + * +------------------+ + * V | + * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END + * | | ^ ^ + * | +---------------+ | + * +---------------------------------------------+ * * - * +----------------------+ - * V | - * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END - * | | ^ ^ - * | +-----------+ | - * +--------------------------------------------------+ + * +----------------------+ + * V | + * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END + * | | ^ ^ + * | +-----------+ | + * +--------------------------------------------------+ * * - * +-------------------------+ - * V | - * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END - * | | ^ - * | +----------------+ - * +-----------------------------------------------+ + * +-------------------------+ + * V | + * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END + * | | ^ + * | +----------------+ + * +-----------------------------------------------+ * * - * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END - * | | ^ ^ - * | +----------------+ | - * +--------------------------------+ + * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END + * | | ^ ^ + * | +----------------+ | + * +--------------------------------+ * - * +---------+ - * | V - * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END - * | | | | ^ ^ - * | | | +-----+ | - * | | +----------------+ | - * | +---------------------------+ | - * +------------------------------------------------------+ + * +---------+ + * | V + * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END + * | | | | ^ ^ + * | | | +-----+ | + * | | +----------------+ | + * | +---------------------------+ | + * +------------------------------------------------------+ * * They all start with a BRANCH for "\|" alternatives, even when there is only * one alternative. @@ -141,8 +139,8 @@ #include <stdbool.h> #include <string.h> -#include "nvim/regexp.h" #include "nvim/garray.h" +#include "nvim/regexp.h" /* * The opcodes are: @@ -220,10 +218,10 @@ // end of match. #define BACKREF 100 // -109 node Match same string again \1-\9. -# define ZOPEN 110 // -119 Mark this point in input as start of - // \z( … \) subexpr. -# define ZCLOSE 120 // -129 Analogous to ZOPEN. -# define ZREF 130 // -139 node Match external submatch \z1-\z9 +#define ZOPEN 110 // -119 Mark this point in input as start of + // \z( … \) subexpr. +#define ZCLOSE 120 // -129 Analogous to ZOPEN. +#define ZREF 130 // -139 node Match external submatch \z1-\z9 #define BRACE_COMPLEX 140 // -149 node Match nodes between m & n times @@ -267,8 +265,8 @@ static int brace_count[10]; ///< Current counts for complex brace repeats static int one_exactly = false; ///< only do one char for EXACTLY // When making changes to classchars also change nfa_classcodes. -static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; -static int classcodes[] = { +static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; +static int classcodes[] = { ANY, IDENT, SIDENT, KWORD, SKWORD, FNAME, SFNAME, PRINT, SPRINT, WHITE, NWHITE, DIGIT, NDIGIT, @@ -282,7 +280,7 @@ static int classcodes[] = { * When regcode is set to this value, code is not emitted and size is computed * instead. */ -#define JUST_CALC_SIZE ((char_u *) -1) +#define JUST_CALC_SIZE ((char_u *)-1) // Values for rs_state in regitem_T. typedef enum regstate_E { @@ -299,7 +297,7 @@ typedef enum regstate_E { RS_BEHIND1, // BEHIND / NOBEHIND matching rest RS_BEHIND2, // BEHIND / NOBEHIND matching behind part RS_STAR_LONG, // STAR/PLUS/BRACE_SIMPLE longest match - RS_STAR_SHORT // STAR/PLUS/BRACE_SIMPLE shortest match + RS_STAR_SHORT, // STAR/PLUS/BRACE_SIMPLE shortest match } regstate_T; /* @@ -307,34 +305,29 @@ typedef enum regstate_E { * restored after trying a match. Used by reg_save() and reg_restore(). * Also stores the length of "backpos". */ -typedef struct -{ - union - { - char_u *ptr; // rex.input pointer, for single-line regexp - lpos_T pos; // rex.input pos, for multi-line regexp +typedef struct { + union { + char_u *ptr; // rex.input pointer, for single-line regexp + lpos_T pos; // rex.input pos, for multi-line regexp } rs_u; - int rs_len; + int rs_len; } regsave_T; // struct to save start/end pointer/position in for \(\) -typedef struct -{ - union - { - char_u *ptr; - lpos_T pos; - } se_u; +typedef struct { + union { + char_u *ptr; + lpos_T pos; + } se_u; } save_se_T; // used for BEHIND and NOBEHIND matching -typedef struct regbehind_S -{ - regsave_T save_after; - regsave_T save_behind; - int save_need_clear_subexpr; - save_se_T save_start[NSUBEXP]; - save_se_T save_end[NSUBEXP]; +typedef struct regbehind_S { + regsave_T save_after; + regsave_T save_behind; + int save_need_clear_subexpr; + save_se_T save_start[NSUBEXP]; + save_se_T save_end[NSUBEXP]; } regbehind_T; /* @@ -343,35 +336,31 @@ typedef struct regbehind_S * Before it may be another type of item, depending on rs_state, to remember * more things. */ -typedef struct regitem_S -{ - regstate_T rs_state; // what we are doing, one of RS_ above - short rs_no; // submatch nr or BEHIND/NOBEHIND - char_u *rs_scan; // current node in program - union - { - save_se_T sesave; - regsave_T regsave; +typedef struct regitem_S { + regstate_T rs_state; // what we are doing, one of RS_ above + int16_t rs_no; // submatch nr or BEHIND/NOBEHIND + char_u *rs_scan; // current node in program + union { + save_se_T sesave; + regsave_T regsave; } rs_un; // room for saving rex.input } regitem_T; // used for STAR, PLUS and BRACE_SIMPLE matching -typedef struct regstar_S -{ - int nextb; // next byte - int nextb_ic; // next byte reverse case - long count; - long minval; - long maxval; +typedef struct regstar_S { + int nextb; // next byte + int nextb_ic; // next byte reverse case + long count; + long minval; + long maxval; } regstar_T; // used to store input position when a BACK was encountered, so that we now if // we made any progress since the last time. -typedef struct backpos_S -{ - char_u *bp_scan; // "scan" where BACK was encountered - regsave_T bp_pos; // last input position +typedef struct backpos_S { + char_u *bp_scan; // "scan" where BACK was encountered + regsave_T bp_pos; // last input position } backpos_T; /* @@ -397,34 +386,34 @@ static regsave_T behind_pos; * This makes it fast while not keeping a lot of memory allocated. * A three times speed increase was observed when using many simple patterns. */ -#define REGSTACK_INITIAL 2048 -#define BACKPOS_INITIAL 64 +#define REGSTACK_INITIAL 2048 +#define BACKPOS_INITIAL 64 /* * Opcode notes: * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. + * BRANCH The set of branches constituting a single choice are hooked + * together with their "next" pointers, since precedence prevents + * anything being concatenated to any individual branch. The + * "next" pointer of the last BRANCH in a choice points to the + * thing following the whole choice. This is also where the + * final "next" pointer of each individual branch points; each + * branch starts with the operand node of a BRANCH node. * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. + * BACK Normal "next" pointers all implicitly point forward; BACK + * exists to make loop structures possible. * - * STAR,PLUS '=', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. + * STAR,PLUS '=', and complex '*' and '+', are implemented as circular + * BRANCH structures using BACK. Simple cases (one character + * per match) are implemented with STAR and PLUS for speed + * and to minimize recursive plunges. * - * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX - * node, and defines the min and max limits to be used for that - * node. + * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX + * node, and defines the min and max limits to be used for that + * node. * - * MOPEN,MCLOSE ...are numbered at compile time. - * ZOPEN,ZCLOSE ...ditto + * MOPEN,MCLOSE ...are numbered at compile time. + * ZOPEN,ZCLOSE ...ditto */ /* @@ -442,7 +431,7 @@ static regsave_T behind_pos; #define OPERAND(p) ((p) + 3) // Obtain an operand that was stored as four bytes, MSB first. #define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \ - + ((long)(p)[5] << 8) + (long)(p)[6]) + + ((long)(p)[5] << 8) + (long)(p)[6]) // Obtain a second operand stored as four bytes. #define OPERAND_MAX(p) OPERAND_MIN((p) + 4) // Obtain a second single-byte operand stored after a four bytes operand. @@ -451,14 +440,14 @@ static regsave_T behind_pos; static char_u *reg(int paren, int *flagp); #ifdef BT_REGEXP_DUMP -static void regdump(char_u *, bt_regprog_T *); +static void regdump(char_u *, bt_regprog_T *); #endif #ifdef REGEXP_DEBUG -static char_u *regprop(char_u *); +static char_u *regprop(char_u *); -static int regnarrate = 0; +static int regnarrate = 0; #endif #ifdef INCLUDE_GENERATED_DECLARATIONS @@ -469,15 +458,14 @@ static int regnarrate = 0; /* * Setup to parse the regexp. Used once to get the length and once to do it. */ -static void regcomp_start( - char_u *expr, - int re_flags) // see vim_regcomp() +static void regcomp_start(char_u *expr, int re_flags) // see vim_regcomp() { initchr(expr); - if (re_flags & RE_MAGIC) + if (re_flags & RE_MAGIC) { reg_magic = MAGIC_ON; - else + } else { reg_magic = MAGIC_OFF; + } reg_string = (re_flags & RE_STRING); reg_strict = (re_flags & RE_STRICT); get_cpo_flags(); @@ -508,10 +496,11 @@ static bool use_multibytecode(int c) */ static void regc(int b) { - if (regcode == JUST_CALC_SIZE) + if (regcode == JUST_CALC_SIZE) { regsize++; - else + } else { *regcode++ = b; + } } /* @@ -537,12 +526,36 @@ static void reg_equi_class(int c) { switch (c) { // Do not use '\300' style, it results in a negative number. - case 'A': case 0xc0: case 0xc1: case 0xc2: case 0xc3: case 0xc4: - case 0xc5: case 0x100: case 0x102: case 0x104: case 0x1cd: - case 0x1de: case 0x1e0: case 0x1fa: case 0x202: case 0x226: - case 0x23a: case 0x1e00: case 0x1ea0: case 0x1ea2: case 0x1ea4: - case 0x1ea6: case 0x1ea8: case 0x1eaa: case 0x1eac: case 0x1eae: - case 0x1eb0: case 0x1eb2: case 0x1eb4: case 0x1eb6: + case 'A': + case 0xc0: + case 0xc1: + case 0xc2: + case 0xc3: + case 0xc4: + case 0xc5: + case 0x100: + case 0x102: + case 0x104: + case 0x1cd: + case 0x1de: + case 0x1e0: + case 0x1fa: + case 0x202: + case 0x226: + case 0x23a: + case 0x1e00: + case 0x1ea0: + case 0x1ea2: + case 0x1ea4: + case 0x1ea6: + case 0x1ea8: + case 0x1eaa: + case 0x1eac: + case 0x1eae: + case 0x1eb0: + case 0x1eb2: + case 0x1eb4: + case 0x1eb6: regmbc('A'); regmbc(0xc0); regmbc(0xc1); regmbc(0xc2); regmbc(0xc3); regmbc(0xc4); regmbc(0xc5); regmbc(0x100); regmbc(0x102); regmbc(0x104); @@ -554,33 +567,71 @@ static void reg_equi_class(int c) regmbc(0x1eae); regmbc(0x1eb0); regmbc(0x1eb2); regmbc(0x1eb4); regmbc(0x1eb6); return; - case 'B': case 0x181: case 0x243: case 0x1e02: - case 0x1e04: case 0x1e06: + case 'B': + case 0x181: + case 0x243: + case 0x1e02: + case 0x1e04: + case 0x1e06: regmbc('B'); regmbc(0x181); regmbc(0x243); regmbc(0x1e02); regmbc(0x1e04); regmbc(0x1e06); return; - case 'C': case 0xc7: - case 0x106: case 0x108: case 0x10a: case 0x10c: case 0x187: - case 0x23b: case 0x1e08: case 0xa792: + case 'C': + case 0xc7: + case 0x106: + case 0x108: + case 0x10a: + case 0x10c: + case 0x187: + case 0x23b: + case 0x1e08: + case 0xa792: regmbc('C'); regmbc(0xc7); regmbc(0x106); regmbc(0x108); regmbc(0x10a); regmbc(0x10c); regmbc(0x187); regmbc(0x23b); regmbc(0x1e08); regmbc(0xa792); return; - case 'D': case 0x10e: case 0x110: case 0x18a: - case 0x1e0a: case 0x1e0c: case 0x1e0e: case 0x1e10: + case 'D': + case 0x10e: + case 0x110: + case 0x18a: + case 0x1e0a: + case 0x1e0c: + case 0x1e0e: + case 0x1e10: case 0x1e12: regmbc('D'); regmbc(0x10e); regmbc(0x110); regmbc(0x18a); regmbc(0x1e0a); regmbc(0x1e0c); regmbc(0x1e0e); regmbc(0x1e10); regmbc(0x1e12); return; - case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb: - case 0x112: case 0x114: case 0x116: case 0x118: case 0x11a: - case 0x204: case 0x206: case 0x228: case 0x246: case 0x1e14: - case 0x1e16: case 0x1e18: case 0x1e1a: case 0x1e1c: - case 0x1eb8: case 0x1eba: case 0x1ebc: case 0x1ebe: - case 0x1ec0: case 0x1ec2: case 0x1ec4: case 0x1ec6: + case 'E': + case 0xc8: + case 0xc9: + case 0xca: + case 0xcb: + case 0x112: + case 0x114: + case 0x116: + case 0x118: + case 0x11a: + case 0x204: + case 0x206: + case 0x228: + case 0x246: + case 0x1e14: + case 0x1e16: + case 0x1e18: + case 0x1e1a: + case 0x1e1c: + case 0x1eb8: + case 0x1eba: + case 0x1ebc: + case 0x1ebe: + case 0x1ec0: + case 0x1ec2: + case 0x1ec4: + case 0x1ec6: regmbc('E'); regmbc(0xc8); regmbc(0xc9); regmbc(0xca); regmbc(0xcb); regmbc(0x112); regmbc(0x114); regmbc(0x116); regmbc(0x118); @@ -591,30 +642,61 @@ static void reg_equi_class(int c) regmbc(0x1ebc); regmbc(0x1ebe); regmbc(0x1ec0); regmbc(0x1ec2); regmbc(0x1ec4); regmbc(0x1ec6); return; - case 'F': case 0x191: case 0x1e1e: case 0xa798: + case 'F': + case 0x191: + case 0x1e1e: + case 0xa798: regmbc('F'); regmbc(0x191); regmbc(0x1e1e); regmbc(0xa798); return; - case 'G': case 0x11c: case 0x11e: case 0x120: - case 0x122: case 0x193: case 0x1e4: case 0x1e6: - case 0x1f4: case 0x1e20: case 0xa7a0: + case 'G': + case 0x11c: + case 0x11e: + case 0x120: + case 0x122: + case 0x193: + case 0x1e4: + case 0x1e6: + case 0x1f4: + case 0x1e20: + case 0xa7a0: regmbc('G'); regmbc(0x11c); regmbc(0x11e); regmbc(0x120); regmbc(0x122); regmbc(0x193); regmbc(0x1e4); regmbc(0x1e6); regmbc(0x1f4); regmbc(0x1e20); regmbc(0xa7a0); return; - case 'H': case 0x124: case 0x126: case 0x21e: - case 0x1e22: case 0x1e24: case 0x1e26: - case 0x1e28: case 0x1e2a: case 0x2c67: + case 'H': + case 0x124: + case 0x126: + case 0x21e: + case 0x1e22: + case 0x1e24: + case 0x1e26: + case 0x1e28: + case 0x1e2a: + case 0x2c67: regmbc('H'); regmbc(0x124); regmbc(0x126); regmbc(0x21e); regmbc(0x1e22); regmbc(0x1e24); regmbc(0x1e26); regmbc(0x1e28); regmbc(0x1e2a); regmbc(0x2c67); return; - case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf: - case 0x128: case 0x12a: case 0x12c: case 0x12e: - case 0x130: case 0x197: case 0x1cf: case 0x208: - case 0x20a: case 0x1e2c: case 0x1e2e: case 0x1ec8: + case 'I': + case 0xcc: + case 0xcd: + case 0xce: + case 0xcf: + case 0x128: + case 0x12a: + case 0x12c: + case 0x12e: + case 0x130: + case 0x197: + case 0x1cf: + case 0x208: + case 0x20a: + case 0x1e2c: + case 0x1e2e: + case 0x1ec8: case 0x1eca: regmbc('I'); regmbc(0xcc); regmbc(0xcd); regmbc(0xce); regmbc(0xcf); regmbc(0x128); @@ -623,44 +705,102 @@ static void reg_equi_class(int c) regmbc(0x208); regmbc(0x20a); regmbc(0x1e2c); regmbc(0x1e2e); regmbc(0x1ec8); regmbc(0x1eca); return; - case 'J': case 0x134: case 0x248: + case 'J': + case 0x134: + case 0x248: regmbc('J'); regmbc(0x134); regmbc(0x248); return; - case 'K': case 0x136: case 0x198: case 0x1e8: case 0x1e30: - case 0x1e32: case 0x1e34: case 0x2c69: case 0xa740: + case 'K': + case 0x136: + case 0x198: + case 0x1e8: + case 0x1e30: + case 0x1e32: + case 0x1e34: + case 0x2c69: + case 0xa740: regmbc('K'); regmbc(0x136); regmbc(0x198); regmbc(0x1e8); regmbc(0x1e30); regmbc(0x1e32); regmbc(0x1e34); regmbc(0x2c69); regmbc(0xa740); return; - case 'L': case 0x139: case 0x13b: case 0x13d: case 0x13f: - case 0x141: case 0x23d: case 0x1e36: case 0x1e38: - case 0x1e3a: case 0x1e3c: case 0x2c60: + case 'L': + case 0x139: + case 0x13b: + case 0x13d: + case 0x13f: + case 0x141: + case 0x23d: + case 0x1e36: + case 0x1e38: + case 0x1e3a: + case 0x1e3c: + case 0x2c60: regmbc('L'); regmbc(0x139); regmbc(0x13b); regmbc(0x13d); regmbc(0x13f); regmbc(0x141); regmbc(0x23d); regmbc(0x1e36); regmbc(0x1e38); regmbc(0x1e3a); regmbc(0x1e3c); regmbc(0x2c60); return; - case 'M': case 0x1e3e: case 0x1e40: case 0x1e42: + case 'M': + case 0x1e3e: + case 0x1e40: + case 0x1e42: regmbc('M'); regmbc(0x1e3e); regmbc(0x1e40); regmbc(0x1e42); return; - case 'N': case 0xd1: - case 0x143: case 0x145: case 0x147: case 0x1f8: - case 0x1e44: case 0x1e46: case 0x1e48: case 0x1e4a: + case 'N': + case 0xd1: + case 0x143: + case 0x145: + case 0x147: + case 0x1f8: + case 0x1e44: + case 0x1e46: + case 0x1e48: + case 0x1e4a: case 0xa7a4: regmbc('N'); regmbc(0xd1); regmbc(0x143); regmbc(0x145); regmbc(0x147); regmbc(0x1f8); regmbc(0x1e44); regmbc(0x1e46); regmbc(0x1e48); regmbc(0x1e4a); regmbc(0xa7a4); return; - case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5: case 0xd6: - case 0xd8: case 0x14c: case 0x14e: case 0x150: case 0x19f: - case 0x1a0: case 0x1d1: case 0x1ea: case 0x1ec: case 0x1fe: - case 0x20c: case 0x20e: case 0x22a: case 0x22c: case 0x22e: - case 0x230: case 0x1e4c: case 0x1e4e: case 0x1e50: case 0x1e52: - case 0x1ecc: case 0x1ece: case 0x1ed0: case 0x1ed2: case 0x1ed4: - case 0x1ed6: case 0x1ed8: case 0x1eda: case 0x1edc: case 0x1ede: - case 0x1ee0: case 0x1ee2: + case 'O': + case 0xd2: + case 0xd3: + case 0xd4: + case 0xd5: + case 0xd6: + case 0xd8: + case 0x14c: + case 0x14e: + case 0x150: + case 0x19f: + case 0x1a0: + case 0x1d1: + case 0x1ea: + case 0x1ec: + case 0x1fe: + case 0x20c: + case 0x20e: + case 0x22a: + case 0x22c: + case 0x22e: + case 0x230: + case 0x1e4c: + case 0x1e4e: + case 0x1e50: + case 0x1e52: + case 0x1ecc: + case 0x1ece: + case 0x1ed0: + case 0x1ed2: + case 0x1ed4: + case 0x1ed6: + case 0x1ed8: + case 0x1eda: + case 0x1edc: + case 0x1ede: + case 0x1ee0: + case 0x1ee2: regmbc('O'); regmbc(0xd2); regmbc(0xd3); regmbc(0xd4); regmbc(0xd5); regmbc(0xd6); regmbc(0xd8); regmbc(0x14c); regmbc(0x14e); regmbc(0x150); @@ -675,47 +815,105 @@ static void reg_equi_class(int c) regmbc(0x1edc); regmbc(0x1ede); regmbc(0x1ee0); regmbc(0x1ee2); return; - case 'P': case 0x1a4: case 0x1e54: case 0x1e56: case 0x2c63: + case 'P': + case 0x1a4: + case 0x1e54: + case 0x1e56: + case 0x2c63: regmbc('P'); regmbc(0x1a4); regmbc(0x1e54); regmbc(0x1e56); regmbc(0x2c63); return; - case 'Q': case 0x24a: + case 'Q': + case 0x24a: regmbc('Q'); regmbc(0x24a); return; - case 'R': case 0x154: case 0x156: case 0x158: case 0x210: - case 0x212: case 0x24c: case 0x1e58: case 0x1e5a: - case 0x1e5c: case 0x1e5e: case 0x2c64: case 0xa7a6: + case 'R': + case 0x154: + case 0x156: + case 0x158: + case 0x210: + case 0x212: + case 0x24c: + case 0x1e58: + case 0x1e5a: + case 0x1e5c: + case 0x1e5e: + case 0x2c64: + case 0xa7a6: regmbc('R'); regmbc(0x154); regmbc(0x156); regmbc(0x210); regmbc(0x212); regmbc(0x158); regmbc(0x24c); regmbc(0x1e58); regmbc(0x1e5a); regmbc(0x1e5c); regmbc(0x1e5e); regmbc(0x2c64); regmbc(0xa7a6); return; - case 'S': case 0x15a: case 0x15c: case 0x15e: case 0x160: - case 0x218: case 0x1e60: case 0x1e62: case 0x1e64: - case 0x1e66: case 0x1e68: case 0x2c7e: case 0xa7a8: + case 'S': + case 0x15a: + case 0x15c: + case 0x15e: + case 0x160: + case 0x218: + case 0x1e60: + case 0x1e62: + case 0x1e64: + case 0x1e66: + case 0x1e68: + case 0x2c7e: + case 0xa7a8: regmbc('S'); regmbc(0x15a); regmbc(0x15c); regmbc(0x15e); regmbc(0x160); regmbc(0x218); regmbc(0x1e60); regmbc(0x1e62); regmbc(0x1e64); regmbc(0x1e66); regmbc(0x1e68); regmbc(0x2c7e); regmbc(0xa7a8); return; - case 'T': case 0x162: case 0x164: case 0x166: case 0x1ac: - case 0x1ae: case 0x21a: case 0x23e: case 0x1e6a: case 0x1e6c: - case 0x1e6e: case 0x1e70: + case 'T': + case 0x162: + case 0x164: + case 0x166: + case 0x1ac: + case 0x1ae: + case 0x21a: + case 0x23e: + case 0x1e6a: + case 0x1e6c: + case 0x1e6e: + case 0x1e70: regmbc('T'); regmbc(0x162); regmbc(0x164); regmbc(0x166); regmbc(0x1ac); regmbc(0x23e); regmbc(0x1ae); regmbc(0x21a); regmbc(0x1e6a); regmbc(0x1e6c); regmbc(0x1e6e); regmbc(0x1e70); return; - case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc: - case 0x168: case 0x16a: case 0x16c: case 0x16e: - case 0x170: case 0x172: case 0x1af: case 0x1d3: - case 0x1d5: case 0x1d7: case 0x1d9: case 0x1db: - case 0x214: case 0x216: case 0x244: case 0x1e72: - case 0x1e74: case 0x1e76: case 0x1e78: case 0x1e7a: - case 0x1ee4: case 0x1ee6: case 0x1ee8: case 0x1eea: - case 0x1eec: case 0x1eee: case 0x1ef0: + case 'U': + case 0xd9: + case 0xda: + case 0xdb: + case 0xdc: + case 0x168: + case 0x16a: + case 0x16c: + case 0x16e: + case 0x170: + case 0x172: + case 0x1af: + case 0x1d3: + case 0x1d5: + case 0x1d7: + case 0x1d9: + case 0x1db: + case 0x214: + case 0x216: + case 0x244: + case 0x1e72: + case 0x1e74: + case 0x1e76: + case 0x1e78: + case 0x1e7a: + case 0x1ee4: + case 0x1ee6: + case 0x1ee8: + case 0x1eea: + case 0x1eec: + case 0x1eee: + case 0x1ef0: regmbc('U'); regmbc(0xd9); regmbc(0xda); regmbc(0xdb); regmbc(0xdc); regmbc(0x168); regmbc(0x16a); regmbc(0x16c); regmbc(0x16e); @@ -728,41 +926,92 @@ static void reg_equi_class(int c) regmbc(0x1ee8); regmbc(0x1eea); regmbc(0x1eec); regmbc(0x1eee); regmbc(0x1ef0); return; - case 'V': case 0x1b2: case 0x1e7c: case 0x1e7e: + case 'V': + case 0x1b2: + case 0x1e7c: + case 0x1e7e: regmbc('V'); regmbc(0x1b2); regmbc(0x1e7c); regmbc(0x1e7e); return; - case 'W': case 0x174: case 0x1e80: case 0x1e82: - case 0x1e84: case 0x1e86: case 0x1e88: + case 'W': + case 0x174: + case 0x1e80: + case 0x1e82: + case 0x1e84: + case 0x1e86: + case 0x1e88: regmbc('W'); regmbc(0x174); regmbc(0x1e80); regmbc(0x1e82); regmbc(0x1e84); regmbc(0x1e86); regmbc(0x1e88); return; - case 'X': case 0x1e8a: case 0x1e8c: + case 'X': + case 0x1e8a: + case 0x1e8c: regmbc('X'); regmbc(0x1e8a); regmbc(0x1e8c); return; - case 'Y': case 0xdd: - case 0x176: case 0x178: case 0x1b3: case 0x232: case 0x24e: - case 0x1e8e: case 0x1ef2: case 0x1ef6: case 0x1ef4: case 0x1ef8: + case 'Y': + case 0xdd: + case 0x176: + case 0x178: + case 0x1b3: + case 0x232: + case 0x24e: + case 0x1e8e: + case 0x1ef2: + case 0x1ef6: + case 0x1ef4: + case 0x1ef8: regmbc('Y'); regmbc(0xdd); regmbc(0x176); regmbc(0x178); regmbc(0x1b3); regmbc(0x232); regmbc(0x24e); regmbc(0x1e8e); regmbc(0x1ef2); regmbc(0x1ef4); regmbc(0x1ef6); regmbc(0x1ef8); return; - case 'Z': case 0x179: case 0x17b: case 0x17d: case 0x1b5: - case 0x1e90: case 0x1e92: case 0x1e94: case 0x2c6b: + case 'Z': + case 0x179: + case 0x17b: + case 0x17d: + case 0x1b5: + case 0x1e90: + case 0x1e92: + case 0x1e94: + case 0x2c6b: regmbc('Z'); regmbc(0x179); regmbc(0x17b); regmbc(0x17d); regmbc(0x1b5); regmbc(0x1e90); regmbc(0x1e92); regmbc(0x1e94); regmbc(0x2c6b); return; - case 'a': case 0xe0: case 0xe1: case 0xe2: - case 0xe3: case 0xe4: case 0xe5: case 0x101: case 0x103: - case 0x105: case 0x1ce: case 0x1df: case 0x1e1: case 0x1fb: - case 0x201: case 0x203: case 0x227: case 0x1d8f: case 0x1e01: - case 0x1e9a: case 0x1ea1: case 0x1ea3: case 0x1ea5: - case 0x1ea7: case 0x1ea9: case 0x1eab: case 0x1ead: - case 0x1eaf: case 0x1eb1: case 0x1eb3: case 0x1eb5: - case 0x1eb7: case 0x2c65: + case 'a': + case 0xe0: + case 0xe1: + case 0xe2: + case 0xe3: + case 0xe4: + case 0xe5: + case 0x101: + case 0x103: + case 0x105: + case 0x1ce: + case 0x1df: + case 0x1e1: + case 0x1fb: + case 0x201: + case 0x203: + case 0x227: + case 0x1d8f: + case 0x1e01: + case 0x1e9a: + case 0x1ea1: + case 0x1ea3: + case 0x1ea5: + case 0x1ea7: + case 0x1ea9: + case 0x1eab: + case 0x1ead: + case 0x1eaf: + case 0x1eb1: + case 0x1eb3: + case 0x1eb5: + case 0x1eb7: + case 0x2c65: regmbc('a'); regmbc(0xe0); regmbc(0xe1); regmbc(0xe2); regmbc(0xe3); regmbc(0xe4); regmbc(0xe5); regmbc(0x101); regmbc(0x103); @@ -775,36 +1024,80 @@ static void reg_equi_class(int c) regmbc(0x1eaf); regmbc(0x1eb1); regmbc(0x1eb3); regmbc(0x1eb5); regmbc(0x1eb7); regmbc(0x2c65); return; - case 'b': case 0x180: case 0x253: case 0x1d6c: case 0x1d80: - case 0x1e03: case 0x1e05: case 0x1e07: + case 'b': + case 0x180: + case 0x253: + case 0x1d6c: + case 0x1d80: + case 0x1e03: + case 0x1e05: + case 0x1e07: regmbc('b'); regmbc(0x180); regmbc(0x253); regmbc(0x1d6c); regmbc(0x1d80); regmbc(0x1e03); regmbc(0x1e05); regmbc(0x1e07); return; - case 'c': case 0xe7: - case 0x107: case 0x109: case 0x10b: case 0x10d: case 0x188: - case 0x23c: case 0x1e09: case 0xa793: case 0xa794: + case 'c': + case 0xe7: + case 0x107: + case 0x109: + case 0x10b: + case 0x10d: + case 0x188: + case 0x23c: + case 0x1e09: + case 0xa793: + case 0xa794: regmbc('c'); regmbc(0xe7); regmbc(0x107); regmbc(0x109); regmbc(0x10b); regmbc(0x10d); regmbc(0x188); regmbc(0x23c); regmbc(0x1e09); regmbc(0xa793); regmbc(0xa794); return; - case 'd': case 0x10f: case 0x111: case 0x257: case 0x1d6d: - case 0x1d81: case 0x1d91: case 0x1e0b: case 0x1e0d: - case 0x1e0f: case 0x1e11: case 0x1e13: + case 'd': + case 0x10f: + case 0x111: + case 0x257: + case 0x1d6d: + case 0x1d81: + case 0x1d91: + case 0x1e0b: + case 0x1e0d: + case 0x1e0f: + case 0x1e11: + case 0x1e13: regmbc('d'); regmbc(0x10f); regmbc(0x111); regmbc(0x257); regmbc(0x1d6d); regmbc(0x1d81); regmbc(0x1d91); regmbc(0x1e0b); regmbc(0x1e0d); regmbc(0x1e0f); regmbc(0x1e11); regmbc(0x1e13); return; - case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb: - case 0x113: case 0x115: case 0x117: case 0x119: - case 0x11b: case 0x205: case 0x207: case 0x229: - case 0x247: case 0x1d92: case 0x1e15: case 0x1e17: - case 0x1e19: case 0x1e1b: case 0x1eb9: case 0x1ebb: - case 0x1e1d: case 0x1ebd: case 0x1ebf: case 0x1ec1: - case 0x1ec3: case 0x1ec5: case 0x1ec7: + case 'e': + case 0xe8: + case 0xe9: + case 0xea: + case 0xeb: + case 0x113: + case 0x115: + case 0x117: + case 0x119: + case 0x11b: + case 0x205: + case 0x207: + case 0x229: + case 0x247: + case 0x1d92: + case 0x1e15: + case 0x1e17: + case 0x1e19: + case 0x1e1b: + case 0x1eb9: + case 0x1ebb: + case 0x1e1d: + case 0x1ebd: + case 0x1ebf: + case 0x1ec1: + case 0x1ec3: + case 0x1ec5: + case 0x1ec7: regmbc('e'); regmbc(0xe8); regmbc(0xe9); regmbc(0xea); regmbc(0xeb); regmbc(0x113); regmbc(0x115); regmbc(0x117); regmbc(0x119); @@ -816,31 +1109,66 @@ static void reg_equi_class(int c) regmbc(0x1ec1); regmbc(0x1ec3); regmbc(0x1ec5); regmbc(0x1ec7); return; - case 'f': case 0x192: case 0x1d6e: case 0x1d82: - case 0x1e1f: case 0xa799: + case 'f': + case 0x192: + case 0x1d6e: + case 0x1d82: + case 0x1e1f: + case 0xa799: regmbc('f'); regmbc(0x192); regmbc(0x1d6e); regmbc(0x1d82); regmbc(0x1e1f); regmbc(0xa799); return; - case 'g': case 0x11d: case 0x11f: case 0x121: case 0x123: - case 0x1e5: case 0x1e7: case 0x260: case 0x1f5: case 0x1d83: - case 0x1e21: case 0xa7a1: + case 'g': + case 0x11d: + case 0x11f: + case 0x121: + case 0x123: + case 0x1e5: + case 0x1e7: + case 0x260: + case 0x1f5: + case 0x1d83: + case 0x1e21: + case 0xa7a1: regmbc('g'); regmbc(0x11d); regmbc(0x11f); regmbc(0x121); regmbc(0x123); regmbc(0x1e5); regmbc(0x1e7); regmbc(0x1f5); regmbc(0x260); regmbc(0x1d83); regmbc(0x1e21); regmbc(0xa7a1); return; - case 'h': case 0x125: case 0x127: case 0x21f: case 0x1e23: - case 0x1e25: case 0x1e27: case 0x1e29: case 0x1e2b: - case 0x1e96: case 0x2c68: case 0xa795: + case 'h': + case 0x125: + case 0x127: + case 0x21f: + case 0x1e23: + case 0x1e25: + case 0x1e27: + case 0x1e29: + case 0x1e2b: + case 0x1e96: + case 0x2c68: + case 0xa795: regmbc('h'); regmbc(0x125); regmbc(0x127); regmbc(0x21f); regmbc(0x1e23); regmbc(0x1e25); regmbc(0x1e27); regmbc(0x1e29); regmbc(0x1e2b); regmbc(0x1e96); regmbc(0x2c68); regmbc(0xa795); return; - case 'i': case 0xec: case 0xed: case 0xee: case 0xef: - case 0x129: case 0x12b: case 0x12d: case 0x12f: - case 0x1d0: case 0x209: case 0x20b: case 0x268: - case 0x1d96: case 0x1e2d: case 0x1e2f: case 0x1ec9: + case 'i': + case 0xec: + case 0xed: + case 0xee: + case 0xef: + case 0x129: + case 0x12b: + case 0x12d: + case 0x12f: + case 0x1d0: + case 0x209: + case 0x20b: + case 0x268: + case 0x1d96: + case 0x1e2d: + case 0x1e2f: + case 0x1ec9: case 0x1ecb: regmbc('i'); regmbc(0xec); regmbc(0xed); regmbc(0xee); regmbc(0xef); regmbc(0x129); @@ -849,33 +1177,66 @@ static void reg_equi_class(int c) regmbc(0x268); regmbc(0x1d96); regmbc(0x1e2d); regmbc(0x1e2f); regmbc(0x1ec9); regmbc(0x1ecb); return; - case 'j': case 0x135: case 0x1f0: case 0x249: + case 'j': + case 0x135: + case 0x1f0: + case 0x249: regmbc('j'); regmbc(0x135); regmbc(0x1f0); regmbc(0x249); return; - case 'k': case 0x137: case 0x199: case 0x1e9: - case 0x1d84: case 0x1e31: case 0x1e33: case 0x1e35: - case 0x2c6a: case 0xa741: + case 'k': + case 0x137: + case 0x199: + case 0x1e9: + case 0x1d84: + case 0x1e31: + case 0x1e33: + case 0x1e35: + case 0x2c6a: + case 0xa741: regmbc('k'); regmbc(0x137); regmbc(0x199); regmbc(0x1e9); regmbc(0x1d84); regmbc(0x1e31); regmbc(0x1e33); regmbc(0x1e35); regmbc(0x2c6a); regmbc(0xa741); return; - case 'l': case 0x13a: case 0x13c: case 0x13e: - case 0x140: case 0x142: case 0x19a: case 0x1e37: - case 0x1e39: case 0x1e3b: case 0x1e3d: case 0x2c61: + case 'l': + case 0x13a: + case 0x13c: + case 0x13e: + case 0x140: + case 0x142: + case 0x19a: + case 0x1e37: + case 0x1e39: + case 0x1e3b: + case 0x1e3d: + case 0x2c61: regmbc('l'); regmbc(0x13a); regmbc(0x13c); regmbc(0x13e); regmbc(0x140); regmbc(0x142); regmbc(0x19a); regmbc(0x1e37); regmbc(0x1e39); regmbc(0x1e3b); regmbc(0x1e3d); regmbc(0x2c61); return; - case 'm': case 0x1d6f: case 0x1e3f: case 0x1e41: case 0x1e43: + case 'm': + case 0x1d6f: + case 0x1e3f: + case 0x1e41: + case 0x1e43: regmbc('m'); regmbc(0x1d6f); regmbc(0x1e3f); regmbc(0x1e41); regmbc(0x1e43); return; - case 'n': case 0xf1: case 0x144: case 0x146: case 0x148: - case 0x149: case 0x1f9: case 0x1d70: case 0x1d87: - case 0x1e45: case 0x1e47: case 0x1e49: case 0x1e4b: + case 'n': + case 0xf1: + case 0x144: + case 0x146: + case 0x148: + case 0x149: + case 0x1f9: + case 0x1d70: + case 0x1d87: + case 0x1e45: + case 0x1e47: + case 0x1e49: + case 0x1e4b: case 0xa7a5: regmbc('n'); regmbc(0xf1); regmbc(0x144); regmbc(0x146); regmbc(0x148); regmbc(0x149); @@ -883,15 +1244,44 @@ static void reg_equi_class(int c) regmbc(0x1e45); regmbc(0x1e47); regmbc(0x1e49); regmbc(0x1e4b); regmbc(0xa7a5); return; - case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5: - case 0xf6: case 0xf8: case 0x14d: case 0x14f: case 0x151: - case 0x1a1: case 0x1d2: case 0x1eb: case 0x1ed: case 0x1ff: - case 0x20d: case 0x20f: case 0x22b: case 0x22d: case 0x22f: - case 0x231: case 0x275: case 0x1e4d: case 0x1e4f: - case 0x1e51: case 0x1e53: case 0x1ecd: case 0x1ecf: - case 0x1ed1: case 0x1ed3: case 0x1ed5: case 0x1ed7: - case 0x1ed9: case 0x1edb: case 0x1edd: case 0x1edf: - case 0x1ee1: case 0x1ee3: + case 'o': + case 0xf2: + case 0xf3: + case 0xf4: + case 0xf5: + case 0xf6: + case 0xf8: + case 0x14d: + case 0x14f: + case 0x151: + case 0x1a1: + case 0x1d2: + case 0x1eb: + case 0x1ed: + case 0x1ff: + case 0x20d: + case 0x20f: + case 0x22b: + case 0x22d: + case 0x22f: + case 0x231: + case 0x275: + case 0x1e4d: + case 0x1e4f: + case 0x1e51: + case 0x1e53: + case 0x1ecd: + case 0x1ecf: + case 0x1ed1: + case 0x1ed3: + case 0x1ed5: + case 0x1ed7: + case 0x1ed9: + case 0x1edb: + case 0x1edd: + case 0x1edf: + case 0x1ee1: + case 0x1ee3: regmbc('o'); regmbc(0xf2); regmbc(0xf3); regmbc(0xf4); regmbc(0xf5); regmbc(0xf6); regmbc(0xf8); regmbc(0x14d); regmbc(0x14f); @@ -906,18 +1296,37 @@ static void reg_equi_class(int c) regmbc(0x1edb); regmbc(0x1edd); regmbc(0x1edf); regmbc(0x1ee1); regmbc(0x1ee3); return; - case 'p': case 0x1a5: case 0x1d71: case 0x1d88: case 0x1d7d: - case 0x1e55: case 0x1e57: + case 'p': + case 0x1a5: + case 0x1d71: + case 0x1d88: + case 0x1d7d: + case 0x1e55: + case 0x1e57: regmbc('p'); regmbc(0x1a5); regmbc(0x1d71); regmbc(0x1d7d); regmbc(0x1d88); regmbc(0x1e55); regmbc(0x1e57); return; - case 'q': case 0x24b: case 0x2a0: + case 'q': + case 0x24b: + case 0x2a0: regmbc('q'); regmbc(0x24b); regmbc(0x2a0); return; - case 'r': case 0x155: case 0x157: case 0x159: case 0x211: - case 0x213: case 0x24d: case 0x27d: case 0x1d72: case 0x1d73: - case 0x1d89: case 0x1e59: case 0x1e5b: case 0x1e5d: case 0x1e5f: + case 'r': + case 0x155: + case 0x157: + case 0x159: + case 0x211: + case 0x213: + case 0x24d: + case 0x27d: + case 0x1d72: + case 0x1d73: + case 0x1d89: + case 0x1e59: + case 0x1e5b: + case 0x1e5d: + case 0x1e5f: case 0xa7a7: regmbc('r'); regmbc(0x155); regmbc(0x157); regmbc(0x159); regmbc(0x211); regmbc(0x213); @@ -926,32 +1335,81 @@ static void reg_equi_class(int c) regmbc(0x1e5b); regmbc(0x1e5d); regmbc(0x1e5f); regmbc(0xa7a7); return; - case 's': case 0x15b: case 0x15d: case 0x15f: case 0x161: - case 0x1e61: case 0x219: case 0x23f: case 0x1d74: case 0x1d8a: - case 0x1e63: case 0x1e65: case 0x1e67: case 0x1e69: case 0xa7a9: + case 's': + case 0x15b: + case 0x15d: + case 0x15f: + case 0x161: + case 0x1e61: + case 0x219: + case 0x23f: + case 0x1d74: + case 0x1d8a: + case 0x1e63: + case 0x1e65: + case 0x1e67: + case 0x1e69: + case 0xa7a9: regmbc('s'); regmbc(0x15b); regmbc(0x15d); regmbc(0x15f); regmbc(0x161); regmbc(0x23f); regmbc(0x219); regmbc(0x1d74); regmbc(0x1d8a); regmbc(0x1e61); regmbc(0x1e63); regmbc(0x1e65); regmbc(0x1e67); regmbc(0x1e69); regmbc(0xa7a9); return; - case 't': case 0x163: case 0x165: case 0x167: case 0x1ab: - case 0x1ad: case 0x21b: case 0x288: case 0x1d75: case 0x1e6b: - case 0x1e6d: case 0x1e6f: case 0x1e71: case 0x1e97: case 0x2c66: + case 't': + case 0x163: + case 0x165: + case 0x167: + case 0x1ab: + case 0x1ad: + case 0x21b: + case 0x288: + case 0x1d75: + case 0x1e6b: + case 0x1e6d: + case 0x1e6f: + case 0x1e71: + case 0x1e97: + case 0x2c66: regmbc('t'); regmbc(0x163); regmbc(0x165); regmbc(0x167); regmbc(0x1ab); regmbc(0x21b); regmbc(0x1ad); regmbc(0x288); regmbc(0x1d75); regmbc(0x1e6b); regmbc(0x1e6d); regmbc(0x1e6f); regmbc(0x1e71); regmbc(0x1e97); regmbc(0x2c66); return; - case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc: - case 0x169: case 0x16b: case 0x16d: case 0x16f: - case 0x171: case 0x173: case 0x1b0: case 0x1d4: - case 0x1d6: case 0x1d8: case 0x1da: case 0x1dc: - case 0x215: case 0x217: case 0x289: case 0x1e73: - case 0x1d7e: case 0x1d99: case 0x1e75: case 0x1e77: - case 0x1e79: case 0x1e7b: case 0x1ee5: case 0x1ee7: - case 0x1ee9: case 0x1eeb: case 0x1eed: case 0x1eef: + case 'u': + case 0xf9: + case 0xfa: + case 0xfb: + case 0xfc: + case 0x169: + case 0x16b: + case 0x16d: + case 0x16f: + case 0x171: + case 0x173: + case 0x1b0: + case 0x1d4: + case 0x1d6: + case 0x1d8: + case 0x1da: + case 0x1dc: + case 0x215: + case 0x217: + case 0x289: + case 0x1e73: + case 0x1d7e: + case 0x1d99: + case 0x1e75: + case 0x1e77: + case 0x1e79: + case 0x1e7b: + case 0x1ee5: + case 0x1ee7: + case 0x1ee9: + case 0x1eeb: + case 0x1eed: + case 0x1eef: case 0x1ef1: regmbc('u'); regmbc(0xf9); regmbc(0xfa); regmbc(0xfb); regmbc(0xfc); regmbc(0x169); @@ -966,31 +1424,61 @@ static void reg_equi_class(int c) regmbc(0x1eeb); regmbc(0x1eed); regmbc(0x1eef); regmbc(0x1ef1); return; - case 'v': case 0x28b: case 0x1d8c: case 0x1e7d: case 0x1e7f: + case 'v': + case 0x28b: + case 0x1d8c: + case 0x1e7d: + case 0x1e7f: regmbc('v'); regmbc(0x28b); regmbc(0x1d8c); regmbc(0x1e7d); regmbc(0x1e7f); return; - case 'w': case 0x175: case 0x1e81: case 0x1e83: - case 0x1e85: case 0x1e87: case 0x1e89: case 0x1e98: + case 'w': + case 0x175: + case 0x1e81: + case 0x1e83: + case 0x1e85: + case 0x1e87: + case 0x1e89: + case 0x1e98: regmbc('w'); regmbc(0x175); regmbc(0x1e81); regmbc(0x1e83); regmbc(0x1e85); regmbc(0x1e87); regmbc(0x1e89); regmbc(0x1e98); return; - case 'x': case 0x1e8b: case 0x1e8d: + case 'x': + case 0x1e8b: + case 0x1e8d: regmbc('x'); regmbc(0x1e8b); regmbc(0x1e8d); return; - case 'y': case 0xfd: case 0xff: case 0x177: case 0x1b4: - case 0x233: case 0x24f: case 0x1e8f: case 0x1e99: case 0x1ef3: - case 0x1ef5: case 0x1ef7: case 0x1ef9: + case 'y': + case 0xfd: + case 0xff: + case 0x177: + case 0x1b4: + case 0x233: + case 0x24f: + case 0x1e8f: + case 0x1e99: + case 0x1ef3: + case 0x1ef5: + case 0x1ef7: + case 0x1ef9: regmbc('y'); regmbc(0xfd); regmbc(0xff); regmbc(0x177); regmbc(0x1b4); regmbc(0x233); regmbc(0x24f); regmbc(0x1e8f); regmbc(0x1e99); regmbc(0x1ef3); regmbc(0x1ef5); regmbc(0x1ef7); regmbc(0x1ef9); return; - case 'z': case 0x17a: case 0x17c: case 0x17e: case 0x1b6: - case 0x1d76: case 0x1d8e: case 0x1e91: case 0x1e93: - case 0x1e95: case 0x2c6c: + case 'z': + case 0x17a: + case 0x17c: + case 0x17e: + case 0x1b6: + case 0x1d76: + case 0x1d8e: + case 0x1e91: + case 0x1e93: + case 0x1e95: + case 0x2c6c: regmbc('z'); regmbc(0x17a); regmbc(0x17c); regmbc(0x17e); regmbc(0x1b6); regmbc(0x1d76); regmbc(0x1d8e); regmbc(0x1e91); regmbc(0x1e93); @@ -1002,19 +1490,18 @@ static void reg_equi_class(int c) } - /* * Emit a node. * Return pointer to generated code. */ static char_u *regnode(int op) { - char_u *ret; + char_u *ret; ret = regcode; - if (ret == JUST_CALC_SIZE) + if (ret == JUST_CALC_SIZE) { regsize += 3; - else { + } else { *regcode++ = op; *regcode++ = NUL; // Null "next" pointer. *regcode++ = NUL; @@ -1044,17 +1531,20 @@ static char_u *regnext(char_u *p) { int offset; - if (p == JUST_CALC_SIZE || reg_toolong) + if (p == JUST_CALC_SIZE || reg_toolong) { return NULL; + } offset = NEXT(p); - if (offset == 0) + if (offset == 0) { return NULL; + } - if (OP(p) == BACK) + if (OP(p) == BACK) { return p - offset; - else + } else { return p + offset; + } } // Set the next-pointer at the end of a node chain. @@ -1068,7 +1558,7 @@ static void regtail(char_u *p, char_u *val) // Find last node. char_u *scan = p; - for (;; ) { + for (;;) { char_u *temp = regnext(scan); if (temp == NULL) { break; @@ -1100,8 +1590,9 @@ static void regoptail(char_u *p, char_u *val) // When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" if (p == NULL || p == JUST_CALC_SIZE || (OP(p) != BRANCH - && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9))) + && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9))) { return; + } regtail(OPERAND(p), val); } @@ -1113,9 +1604,9 @@ static void regoptail(char_u *p, char_u *val) */ static void reginsert(int op, char_u *opnd) { - char_u *src; - char_u *dst; - char_u *place; + char_u *src; + char_u *dst; + char_u *place; if (regcode == JUST_CALC_SIZE) { regsize += 3; @@ -1124,8 +1615,9 @@ static void reginsert(int op, char_u *opnd) src = regcode; regcode += 3; dst = regcode; - while (src > opnd) + while (src > opnd) { *--dst = *--src; + } place = opnd; // Op node, where operand used to be. *place++ = op; @@ -1139,9 +1631,9 @@ static void reginsert(int op, char_u *opnd) */ static void reginsert_nr(int op, long val, char_u *opnd) { - char_u *src; - char_u *dst; - char_u *place; + char_u *src; + char_u *dst; + char_u *place; if (regcode == JUST_CALC_SIZE) { regsize += 7; @@ -1150,8 +1642,9 @@ static void reginsert_nr(int op, long val, char_u *opnd) src = regcode; regcode += 7; dst = regcode; - while (src > opnd) + while (src > opnd) { *--dst = *--src; + } place = opnd; // Op node, where operand used to be. *place++ = op; @@ -1169,9 +1662,9 @@ static void reginsert_nr(int op, long val, char_u *opnd) */ static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) { - char_u *src; - char_u *dst; - char_u *place; + char_u *src; + char_u *dst; + char_u *place; if (regcode == JUST_CALC_SIZE) { regsize += 11; @@ -1180,8 +1673,9 @@ static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) src = regcode; regcode += 11; dst = regcode; - while (src > opnd) + while (src > opnd) { *--dst = *--src; + } place = opnd; // Op node, where operand used to be. *place++ = op; @@ -1201,15 +1695,15 @@ static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) static int seen_endbrace(int refnum) { if (!had_endbrace[refnum]) { - char_u *p; + char_u *p; - // Trick: check if "@<=" or "@<!" follows, in which case - // the \1 can appear before the referenced match. - for (p = regparse; *p != NUL; p++) { - if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '=')) { - break; - } + // Trick: check if "@<=" or "@<!" follows, in which case + // the \1 can appear before the referenced match. + for (p = regparse; *p != NUL; p++) { + if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '=')) { + break; } + } if (*p == NUL) { emsg(_("E65: Illegal back reference")); @@ -1229,10 +1723,10 @@ static int seen_endbrace(int refnum) */ static char_u *regatom(int *flagp) { - char_u *ret; + char_u *ret; int flags; int c; - char_u *p; + char_u *p; int extra = 0; int save_prev_at_start = prev_at_start; @@ -1273,10 +1767,11 @@ static char_u *regatom(int *flagp) *flagp |= HASNL; // "\_[" is character range plus newline - if (c == '[') + if (c == '[') { goto collection; + } - // "\_x" is character class plus newline + // "\_x" is character class plus newline FALLTHROUGH; // Character classes. @@ -1308,8 +1803,9 @@ static char_u *regatom(int *flagp) case Magic('u'): case Magic('U'): p = vim_strchr(classchars, no_Magic(c)); - if (p == NULL) + if (p == NULL) { EMSG_RET_NULL(_("E63: invalid use of \\_")); + } // When '.' is followed by a composing char ignore the dot, so that // the composing char is matched here. if (c == Magic('.') && utf_iscomposing(peekchr())) { @@ -1335,11 +1831,13 @@ static char_u *regatom(int *flagp) break; case Magic('('): - if (one_exactly) + if (one_exactly) { EMSG_ONE_RET_NULL; + } ret = reg(REG_PAREN, &flags); - if (ret == NULL) + if (ret == NULL) { return NULL; + } *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); break; @@ -1347,8 +1845,9 @@ static char_u *regatom(int *flagp) case Magic('|'): case Magic('&'): case Magic(')'): - if (one_exactly) + if (one_exactly) { EMSG_ONE_RET_NULL; + } IEMSG_RET_NULL(_(e_internal)); // Supposed to be caught earlier. // NOTREACHED @@ -1365,20 +1864,23 @@ static char_u *regatom(int *flagp) case Magic('~'): // previous substitute pattern if (reg_prev_sub != NULL) { - char_u *lp; + char_u *lp; ret = regnode(EXACTLY); lp = reg_prev_sub; - while (*lp != NUL) + while (*lp != NUL) { regc(*lp++); + } regc(NUL); if (*reg_prev_sub != NUL) { *flagp |= HASWIDTH; - if ((lp - reg_prev_sub) == 1) + if ((lp - reg_prev_sub) == 1) { *flagp |= SIMPLE; + } } - } else + } else { EMSG_RET_NULL(_(e_nopresub)); + } break; case Magic('1'): @@ -1389,8 +1891,7 @@ static char_u *regatom(int *flagp) case Magic('6'): case Magic('7'): case Magic('8'): - case Magic('9'): - { + case Magic('9'): { int refnum; refnum = c - Magic('0'); @@ -1402,16 +1903,19 @@ static char_u *regatom(int *flagp) break; case Magic('z'): - { c = no_Magic(getchr()); switch (c) { - case '(': if ((reg_do_extmatch & REX_SET) == 0) + case '(': + if ((reg_do_extmatch & REX_SET) == 0) { EMSG_RET_NULL(_(e_z_not_allowed)); - if (one_exactly) + } + if (one_exactly) { EMSG_ONE_RET_NULL; + } ret = reg(REG_ZPAREN, &flags); - if (ret == NULL) + if (ret == NULL) { return NULL; + } *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH); re_has_z = REX_SET; break; @@ -1424,40 +1928,45 @@ static char_u *regatom(int *flagp) case '6': case '7': case '8': - case '9': if ((reg_do_extmatch & REX_USE) == 0) + case '9': + if ((reg_do_extmatch & REX_USE) == 0) { EMSG_RET_NULL(_(e_z1_not_allowed)); + } ret = regnode(ZREF + c - '0'); re_has_z = REX_USE; break; - case 's': ret = regnode(MOPEN + 0); + case 's': + ret = regnode(MOPEN + 0); if (!re_mult_next("\\zs")) { return NULL; } break; - case 'e': ret = regnode(MCLOSE + 0); + case 'e': + ret = regnode(MCLOSE + 0); if (!re_mult_next("\\ze")) { return NULL; } break; - default: EMSG_RET_NULL(_("E68: Invalid character after \\z")); + default: + EMSG_RET_NULL(_("E68: Invalid character after \\z")); } - } - break; + break; case Magic('%'): - { c = no_Magic(getchr()); switch (c) { // () without a back reference case '(': - if (one_exactly) + if (one_exactly) { EMSG_ONE_RET_NULL; + } ret = reg(REG_NPAREN, &flags); - if (ret == NULL) + if (ret == NULL) { return NULL; + } *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); break; @@ -1486,18 +1995,20 @@ static char_u *regatom(int *flagp) // \%[abc]: Emit as a list of branches, all ending at the last // branch which matches nothing. case '[': - if (one_exactly) // doesn't nest + if (one_exactly) { // doesn't nest EMSG_ONE_RET_NULL; + } { - char_u *lastbranch; - char_u *lastnode = NULL; - char_u *br; + char_u *lastbranch; + char_u *lastnode = NULL; + char_u *br; ret = NULL; while ((c = getchr()) != ']') { - if (c == NUL) + if (c == NUL) { EMSG2_RET_NULL(_(e_missing_sb), - reg_magic == MAGIC_ALL); + reg_magic == MAGIC_ALL); + } br = regnode(BRANCH); if (ret == NULL) { ret = br; @@ -1516,9 +2027,10 @@ static char_u *regatom(int *flagp) return NULL; } } - if (ret == NULL) + if (ret == NULL) { EMSG2_RET_NULL(_(e_empty_sb), - reg_magic == MAGIC_ALL); + reg_magic == MAGIC_ALL); + } lastbranch = regnode(BRANCH); br = regnode(NOTHING); if (ret != JUST_CALC_SIZE) { @@ -1526,15 +2038,16 @@ static char_u *regatom(int *flagp) regtail(lastbranch, br); // connect all branches to the NOTHING // branch at the end - for (br = ret; br != lastnode; ) { + for (br = ret; br != lastnode;) { if (OP(br) == BRANCH) { regtail(br, lastbranch); if (reg_toolong) { return NULL; } br = OPERAND(br); - } else + } else { br = regnext(br); + } } } *flagp &= ~(HASWIDTH | SIMPLE); @@ -1550,12 +2063,18 @@ static char_u *regatom(int *flagp) int64_t i; switch (c) { - case 'd': i = getdecchrs(); break; - case 'o': i = getoctchrs(); break; - case 'x': i = gethexchrs(2); break; - case 'u': i = gethexchrs(4); break; - case 'U': i = gethexchrs(8); break; - default: i = -1; break; + case 'd': + i = getdecchrs(); break; + case 'o': + i = getoctchrs(); break; + case 'x': + i = gethexchrs(2); break; + case 'u': + i = gethexchrs(4); break; + case 'U': + i = gethexchrs(8); break; + default: + i = -1; break; } if (i < 0 || i > INT_MAX) { @@ -1599,9 +2118,9 @@ static char_u *regatom(int *flagp) // "\%'m", "\%<'m" and "\%>'m": Mark c = getchr(); ret = regnode(RE_MARK); - if (ret == JUST_CALC_SIZE) + if (ret == JUST_CALC_SIZE) { regsize += 2; - else { + } else { *regcode++ = c; *regcode++ = cmp; } @@ -1647,15 +2166,14 @@ static char_u *regatom(int *flagp) } EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), - reg_magic == MAGIC_ALL); + reg_magic == MAGIC_ALL); } - } - break; + break; case Magic('['): collection: { - char_u *lp; + char_u *lp; // If there is no matching ']', we assume the '[' is a normal // character. This makes 'incsearch' and ":help [" work. @@ -1669,8 +2187,9 @@ collection: if (*regparse == '^') { // Complement of range. ret = regnode(ANYBUT + extra); regparse++; - } else + } else { ret = regnode(ANYOF + extra); + } // At the start ']' and '-' mean the literal character. if (*regparse == ']' || *regparse == '-') { @@ -1691,15 +2210,17 @@ collection: } else { // Also accept "a-[.z.]" endc = 0; - if (*regparse == '[') + if (*regparse == '[') { endc = get_coll_element(®parse); + } if (endc == 0) { endc = mb_ptr2char_adv((const char_u **)®parse); } // Handle \o40, \x20 and \u20AC style sequences - if (endc == '\\' && !reg_cpo_lit) + if (endc == '\\' && !reg_cpo_lit) { endc = coll_get_char(); + } if (startc > endc) { EMSG_RET_NULL(_(e_reverse_range)); @@ -1714,8 +2235,9 @@ collection: regmbc(startc); } } else { - while (++startc <= endc) + while (++startc <= endc) { regc(startc); + } } startc = -1; } @@ -1727,7 +2249,7 @@ collection: && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, - regparse[1]) != NULL))) { + regparse[1]) != NULL))) { regparse++; if (*regparse == 'n') { // '\n' in range: also match NL @@ -1748,10 +2270,11 @@ collection: || *regparse == 'u' || *regparse == 'U') { startc = coll_get_char(); - if (startc == 0) + if (startc == 0) { regc(0x0a); - else + } else { regmbc(startc); + } } else { startc = backslash_trans(*regparse++); regc(startc); @@ -1840,8 +2363,9 @@ collection: } break; case CLASS_SPACE: - for (cu = 9; cu <= 13; cu++) + for (cu = 9; cu <= 13; cu++) { regc(cu); + } regc(' '); break; case CLASS_UPPER: @@ -1908,18 +2432,19 @@ collection: } regc(NUL); prevchr_len = 1; // last char was the ']' - if (*regparse != ']') + if (*regparse != ']') { EMSG_RET_NULL(_(e_toomsbra)); // Cannot happen? + } skipchr(); // let's be friends with the lexer again *flagp |= HASWIDTH | SIMPLE; break; - } else if (reg_strict) + } else if (reg_strict) { EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); + } } FALLTHROUGH; - default: - { + default: { int len; // A multi-byte character is handled as a separate atom if it's @@ -1952,7 +2477,7 @@ do_multibyte: int l; // Need to get composing character too. - for (;; ) { + for (;;) { l = utf_ptr2len((char *)regparse); if (!utf_composinglike(regparse, regparse + l)) { break; @@ -1968,8 +2493,9 @@ do_multibyte: regc(NUL); *flagp |= HASWIDTH; - if (len == 1) + if (len == 1) { *flagp |= SIMPLE; + } } break; } @@ -1996,8 +2522,9 @@ static char_u *regpiece(int *flagp) long maxval; ret = regatom(&flags); - if (ret == NULL) + if (ret == NULL) { return NULL; + } op = peekchr(); if (re_multi_type(op) == NOT_MULTI) { @@ -2010,9 +2537,9 @@ static char_u *regpiece(int *flagp) skipchr(); switch (op) { case Magic('*'): - if (flags & SIMPLE) + if (flags & SIMPLE) { reginsert(STAR, ret); - else { + } else { // Emit x* as (x&|), where & means "self". reginsert(BRANCH, ret); // Either x regoptail(ret, regnode(BACK)); // and loop @@ -2023,9 +2550,9 @@ static char_u *regpiece(int *flagp) break; case Magic('+'): - if (flags & SIMPLE) + if (flags & SIMPLE) { reginsert(PLUS, ret); - else { + } else { // Emit x+ as x(&|), where & means "self". next = regnode(BRANCH); // Either regtail(ret, next); @@ -2036,23 +2563,29 @@ static char_u *regpiece(int *flagp) *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH))); break; - case Magic('@'): - { + case Magic('@'): { int lop = END; int64_t nr = getdecchrs(); switch (no_Magic(getchr())) { - case '=': lop = MATCH; break; // \@= - case '!': lop = NOMATCH; break; // \@! - case '>': lop = SUBPAT; break; // \@> - case '<': switch (no_Magic(getchr())) { - case '=': lop = BEHIND; break; // \@<= - case '!': lop = NOBEHIND; break; // \@<! - } + case '=': + lop = MATCH; break; // \@= + case '!': + lop = NOMATCH; break; // \@! + case '>': + lop = SUBPAT; break; // \@> + case '<': + switch (no_Magic(getchr())) { + case '=': + lop = BEHIND; break; // \@<= + case '!': + lop = NOBEHIND; break; // \@<! + } } - if (lop == END) + if (lop == END) { EMSG2_RET_NULL(_("E59: invalid character after %s@"), - reg_magic == MAGIC_ALL); + reg_magic == MAGIC_ALL); + } // Look behind must match with behind_pos. if (lop == BEHIND || lop == NOBEHIND) { regtail(ret, regnode(BHPOS)); @@ -2060,11 +2593,13 @@ static char_u *regpiece(int *flagp) } regtail(ret, regnode(END)); // operand ends if (lop == BEHIND || lop == NOBEHIND) { - if (nr < 0) + if (nr < 0) { nr = 0; // no limit is same as zero limit + } reginsert_nr(lop, (uint32_t)nr, ret); - } else + } else { reginsert(lop, ret); + } break; } @@ -2079,23 +2614,26 @@ static char_u *regpiece(int *flagp) break; case Magic('{'): - if (!read_limits(&minval, &maxval)) + if (!read_limits(&minval, &maxval)) { return NULL; + } if (flags & SIMPLE) { reginsert(BRACE_SIMPLE, ret); reginsert_limits(BRACE_LIMITS, minval, maxval, ret); } else { - if (num_complex_braces >= 10) + if (num_complex_braces >= 10) { EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), - reg_magic == MAGIC_ALL); + reg_magic == MAGIC_ALL); + } reginsert(BRACE_COMPLEX + num_complex_braces, ret); regoptail(ret, regnode(BACK)); regoptail(ret, ret); reginsert_limits(BRACE_LIMITS, minval, maxval, ret); ++num_complex_braces; } - if (minval > 0 && maxval > 0) + if (minval > 0 && maxval > 0) { *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH))); + } break; } if (re_multi_type(peekchr()) != NOT_MULTI) { @@ -2115,9 +2653,9 @@ static char_u *regpiece(int *flagp) */ static char_u *regconcat(int *flagp) { - char_u *first = NULL; - char_u *chain = NULL; - char_u *latest; + char_u *first = NULL; + char_u *chain = NULL; + char_u *latest; int flags; int cont = true; @@ -2165,21 +2703,25 @@ static char_u *regconcat(int *flagp) break; default: latest = regpiece(&flags); - if (latest == NULL || reg_toolong) + if (latest == NULL || reg_toolong) { return NULL; + } *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH); - if (chain == NULL) // First piece. + if (chain == NULL) { // First piece. *flagp |= flags & SPSTART; - else + } else { regtail(chain, latest); + } chain = latest; - if (first == NULL) + if (first == NULL) { first = latest; + } break; } } - if (first == NULL) // Loop ran zero times. + if (first == NULL) { // Loop ran zero times. first = regnode(NOTHING); + } return first; } @@ -2189,18 +2731,19 @@ static char_u *regconcat(int *flagp) */ static char_u *regbranch(int *flagp) { - char_u *ret; - char_u *chain = NULL; - char_u *latest; + char_u *ret; + char_u *chain = NULL; + char_u *latest; int flags; *flagp = WORST | HASNL; // Tentatively. ret = regnode(BRANCH); - for (;; ) { + for (;;) { latest = regconcat(&flags); - if (latest == NULL) + if (latest == NULL) { return NULL; + } // If one of the branches has width, the whole thing has. If one of // the branches anchors at start-of-line, the whole thing does. // If one of the branches uses look-behind, the whole thing does. @@ -2208,14 +2751,17 @@ static char_u *regbranch(int *flagp) // If one of the branches doesn't match a line-break, the whole thing // doesn't. *flagp &= ~HASNL | (flags & HASNL); - if (chain != NULL) + if (chain != NULL) { regtail(chain, latest); - if (peekchr() != Magic('&')) + } + if (peekchr() != Magic('&')) { break; + } skipchr(); regtail(latest, regnode(END)); // operand ends - if (reg_toolong) + if (reg_toolong) { break; + } reginsert(MATCH, latest); chain = latest; } @@ -2223,22 +2769,20 @@ static char_u *regbranch(int *flagp) return ret; } -/* - * Parse regular expression, i.e. main body or parenthesized thing. - * - * Caller must absorb opening parenthesis. - * - * Combining parenthesis handling with the base level of regular expression - * is a trifle forced, but the need to tie the tails of the branches to what - * follows makes it hard to avoid. - */ -static char_u *reg( - int paren, // REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN - int *flagp) +/// Parse regular expression, i.e. main body or parenthesized thing. +/// +/// Caller must absorb opening parenthesis. +/// +/// Combining parenthesis handling with the base level of regular expression +/// is a trifle forced, but the need to tie the tails of the branches to what +/// follows makes it hard to avoid. +/// +/// @param paren REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN +static char_u *reg(int paren, int *flagp) { - char_u *ret; - char_u *br; - char_u *ender; + char_u *ret; + char_u *br; + char_u *ender; int parno = 0; int flags; @@ -2246,73 +2790,83 @@ static char_u *reg( if (paren == REG_ZPAREN) { // Make a ZOPEN node. - if (regnzpar >= NSUBEXP) + if (regnzpar >= NSUBEXP) { EMSG_RET_NULL(_("E50: Too many \\z(")); + } parno = regnzpar; regnzpar++; ret = regnode(ZOPEN + parno); - } else if (paren == REG_PAREN) { + } else if (paren == REG_PAREN) { // Make a MOPEN node. - if (regnpar >= NSUBEXP) + if (regnpar >= NSUBEXP) { EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); + } parno = regnpar; ++regnpar; ret = regnode(MOPEN + parno); - } else if (paren == REG_NPAREN) { + } else if (paren == REG_NPAREN) { // Make a NOPEN node. ret = regnode(NOPEN); - } else + } else { ret = NULL; + } // Pick up the branches, linking them together. br = regbranch(&flags); - if (br == NULL) + if (br == NULL) { return NULL; - if (ret != NULL) + } + if (ret != NULL) { regtail(ret, br); // [MZ]OPEN -> first. - else + } else { ret = br; + } // If one of the branches can be zero-width, the whole thing can. // If one of the branches has * at start or matches a line-break, the // whole thing can. - if (!(flags & HASWIDTH)) + if (!(flags & HASWIDTH)) { *flagp &= ~HASWIDTH; + } *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); while (peekchr() == Magic('|')) { skipchr(); br = regbranch(&flags); - if (br == NULL || reg_toolong) + if (br == NULL || reg_toolong) { return NULL; + } regtail(ret, br); // BRANCH -> BRANCH. - if (!(flags & HASWIDTH)) + if (!(flags & HASWIDTH)) { *flagp &= ~HASWIDTH; + } *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); } // Make a closing node, and hook it on the end. - ender = regnode( - paren == REG_ZPAREN ? ZCLOSE + parno : - paren == REG_PAREN ? MCLOSE + parno : - paren == REG_NPAREN ? NCLOSE : END); + ender = regnode(paren == REG_ZPAREN ? ZCLOSE + parno : + paren == REG_PAREN ? MCLOSE + parno : + paren == REG_NPAREN ? NCLOSE : END); regtail(ret, ender); // Hook the tails of the branches to the closing node. - for (br = ret; br != NULL; br = regnext(br)) + for (br = ret; br != NULL; br = regnext(br)) { regoptail(br, ender); + } // Check for proper termination. if (paren != REG_NOPAREN && getchr() != Magic(')')) { - if (paren == REG_ZPAREN) + if (paren == REG_ZPAREN) { EMSG_RET_NULL(_("E52: Unmatched \\z(")); - else if (paren == REG_NPAREN) + } else if (paren == REG_NPAREN) { EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); - else + } else { EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); + } } else if (paren == REG_NOPAREN && peekchr() != NUL) { - if (curchr == Magic(')')) + if (curchr == Magic(')')) { EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); - else + } else { EMSG_RET_NULL(_(e_trailing)); // "Can't happen". + } // NOTREACHED } // Here we set the flag allowing back references to this set of @@ -2347,8 +2901,8 @@ static char_u *reg( */ static regprog_T *bt_regcomp(char_u *expr, int re_flags) { - char_u *scan; - char_u *longest; + char_u *scan; + char_u *longest; int len; int flags; @@ -2362,8 +2916,9 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) regcomp_start(expr, re_flags); regcode = JUST_CALC_SIZE; regc(REGMAGIC); - if (reg(REG_NOPAREN, &flags) == NULL) + if (reg(REG_NOPAREN, &flags) == NULL) { return NULL; + } // Allocate space. bt_regprog_T *r = xmalloc(sizeof(bt_regprog_T) + regsize); @@ -2375,8 +2930,9 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) regc(REGMAGIC); if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong) { xfree(r); - if (reg_toolong) + if (reg_toolong) { EMSG_RET_NULL(_("E339: Pattern too long")); + } return NULL; } @@ -2386,10 +2942,12 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) r->regmust = NULL; r->regmlen = 0; r->regflags = regflags; - if (flags & HASNL) + if (flags & HASNL) { r->regflags |= RF_HASNL; - if (flags & HASLOOKBH) + } + if (flags & HASLOOKBH) { r->regflags |= RF_LOOKBH; + } // Remember whether this pattern has any \z specials in it. r->reghasz = re_has_z; scan = r->program + 1; // First BRANCH. @@ -2407,7 +2965,7 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) } else if (OP(scan) == BOW || OP(scan) == EOW || OP(scan) == NOTHING - || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN + || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) { char_u *regnext_scan = regnext(scan); if (OP(regnext_scan) == EXACTLY) { @@ -2428,11 +2986,12 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) && !(flags & HASNL)) { longest = NULL; len = 0; - for (; scan != NULL; scan = regnext(scan)) + for (; scan != NULL; scan = regnext(scan)) { if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len) { longest = OPERAND(scan); len = (int)STRLEN(OPERAND(scan)); } + } r->regmust = longest; r->regmlen = len; } @@ -2462,11 +3021,16 @@ static int coll_get_char(void) int64_t nr = -1; switch (*regparse++) { - case 'd': nr = getdecchrs(); break; - case 'o': nr = getoctchrs(); break; - case 'x': nr = gethexchrs(2); break; - case 'u': nr = gethexchrs(4); break; - case 'U': nr = gethexchrs(8); break; + case 'd': + nr = getdecchrs(); break; + case 'o': + nr = getoctchrs(); break; + case 'x': + nr = gethexchrs(2); break; + case 'u': + nr = gethexchrs(4); break; + case 'U': + nr = gethexchrs(8); break; } if (nr < 0 || nr > INT_MAX) { // If getting the number fails be backwards compatible: the character @@ -2492,8 +3056,8 @@ static void bt_regfree(regprog_T *prog) * to regmatch(), but they are here to reduce the amount of stack space used * (it can be called recursively many times). */ -static long bl_minval; -static long bl_maxval; +static long bl_minval; +static long bl_maxval; // Save the input line and position in a regsave_T. static void reg_save(regsave_T *save, garray_T *gap) @@ -2543,10 +3107,10 @@ static bool reg_save_equal(const regsave_T *save) // After a failed match restore the sub-expressions. #define restore_se(savep, posp, pp) { \ - if (REG_MULTI) \ - *(posp) = (savep)->se_u.pos; \ - else \ - *(pp) = (savep)->se_u.ptr; } + if (REG_MULTI) /* NOLINT(readability/braces) */ \ + *(posp) = (savep)->se_u.pos; \ + else /* NOLINT */ \ + *(pp) = (savep)->se_u.ptr; } /* * Tentatively set the sub-expression start to the current position (after @@ -2568,17 +3132,14 @@ static void save_se_one(save_se_T *savep, char_u **pp) *pp = rex.input; } -/* - * regrepeat - repeatedly match something simple, return how many. - * Advances rex.input (and rex.lnum) to just after the matched chars. - */ - static int -regrepeat( - char_u *p, - long maxcount) // maximum number of matches allowed +/// regrepeat - repeatedly match something simple, return how many. +/// Advances rex.input (and rex.lnum) to just after the matched chars. +/// +/// @param maxcount maximum number of matches allowed +static int regrepeat(char_u *p, long maxcount) { long count = 0; - char_u *opnd; + char_u *opnd; int mask; int testval = 0; @@ -2822,8 +3383,7 @@ do_class: mask = RI_UPPER; goto do_class; - case EXACTLY: - { + case EXACTLY: { int cu, cl; // This doesn't do a multi-byte character, because a MULTIBYTECODE @@ -2846,8 +3406,7 @@ do_class: break; } - case MULTIBYTECODE: - { + case MULTIBYTECODE: { int i, len, cf = 0; // Safety check (just in case 'encoding' was changed since @@ -2900,9 +3459,10 @@ do_class: } scan += len; } else { - if ((cstrchr(opnd, *scan) == NULL) == testval) + if ((cstrchr(opnd, *scan) == NULL) == testval) { break; - ++scan; + } + scan++; } ++count; } @@ -2944,7 +3504,7 @@ do_class: */ static regitem_T *regstack_push(regstate_T state, char_u *scan) { - regitem_T *rp; + regitem_T *rp; if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { emsg(_(e_maxmempat)); @@ -2965,7 +3525,7 @@ static regitem_T *regstack_push(regstate_T state, char_u *scan) */ static void regstack_pop(char_u **scan) { - regitem_T *rp; + regitem_T *rp; rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; *scan = rp->rs_scan; @@ -3021,19 +3581,20 @@ static void restore_subexpr(regbehind_T *bp) /// (that don't need to know whether the rest of the match failed) by a nested /// loop. /// -/// Returns true when there is a match. Leaves rex.input and rex.lnum -/// just after the last matched character. -/// Returns false when there is no match. Leaves rex.input and rex.lnum in an -/// undefined state! -static bool regmatch( - char_u *scan, // Current node. - proftime_T *tm, // timeout limit or NULL - int *timed_out) // flag set on timeout or NULL +/// @param scan Current node. +/// @param tm timeout limit or NULL +/// @param timed_out flag set on timeout or NULL +/// +/// @return - true when there is a match. Leaves rex.input and rex.lnum +/// just after the last matched character. +/// - false when there is no match. Leaves rex.input and rex.lnum in an +/// undefined state! +static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) { - char_u *next; // Next node. + char_u *next; // Next node. int op; int c; - regitem_T *rp; + regitem_T *rp; int no; int status; // one of the RA_ values: int tm_count = 0; @@ -3044,7 +3605,7 @@ static bool regmatch( backpos.ga_len = 0; // Repeat until "regstack" is empty. - for (;; ) { + for (;;) { // Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q". // Allow interrupting them with CTRL-C. fast_breakcheck(); @@ -3058,7 +3619,7 @@ static bool regmatch( // Repeat for items that can be matched sequentially, without using the // regstack. - for (;; ) { + for (;;) { if (got_int || scan == NULL) { status = RA_FAIL; break; @@ -3086,8 +3647,9 @@ static bool regmatch( mch_errmsg(_("External submatches:\n")); for (i = 0; i < NSUBEXP; i++) { mch_errmsg(" \""); - if (re_extmatch_in->matches[i] != NULL) + if (re_extmatch_in->matches[i] != NULL) { mch_errmsg((char *)re_extmatch_in->matches[i]); + } mch_errmsg("\"\n"); } } @@ -3168,7 +3730,7 @@ static bool regmatch( status = RA_NOMATCH; } else { const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum - && pos->col == MAXCOL + && pos->col == MAXCOL ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) : pos->col; @@ -3188,8 +3750,9 @@ static bool regmatch( break; case RE_VISUAL: - if (!reg_match_visual()) + if (!reg_match_visual()) { status = RA_NOMATCH; + } break; case RE_LNUM: @@ -3260,10 +3823,11 @@ static bool regmatch( break; case IDENT: - if (!vim_isIDc(c)) + if (!vim_isIDc(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case SIDENT: @@ -3324,135 +3888,152 @@ static bool regmatch( break; case WHITE: - if (!ascii_iswhite(c)) + if (!ascii_iswhite(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NWHITE: - if (c == NUL || ascii_iswhite(c)) + if (c == NUL || ascii_iswhite(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case DIGIT: - if (!ri_digit(c)) + if (!ri_digit(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NDIGIT: - if (c == NUL || ri_digit(c)) + if (c == NUL || ri_digit(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case HEX: - if (!ri_hex(c)) + if (!ri_hex(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NHEX: - if (c == NUL || ri_hex(c)) + if (c == NUL || ri_hex(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case OCTAL: - if (!ri_octal(c)) + if (!ri_octal(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NOCTAL: - if (c == NUL || ri_octal(c)) + if (c == NUL || ri_octal(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case WORD: - if (!ri_word(c)) + if (!ri_word(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NWORD: - if (c == NUL || ri_word(c)) + if (c == NUL || ri_word(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case HEAD: - if (!ri_head(c)) + if (!ri_head(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NHEAD: - if (c == NUL || ri_head(c)) + if (c == NUL || ri_head(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case ALPHA: - if (!ri_alpha(c)) + if (!ri_alpha(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NALPHA: - if (c == NUL || ri_alpha(c)) + if (c == NUL || ri_alpha(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case LOWER: - if (!ri_lower(c)) + if (!ri_lower(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NLOWER: - if (c == NUL || ri_lower(c)) + if (c == NUL || ri_lower(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case UPPER: - if (!ri_upper(c)) + if (!ri_upper(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case NUPPER: - if (c == NUL || ri_upper(c)) + if (c == NUL || ri_upper(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; - case EXACTLY: - { + case EXACTLY: { int len; - char_u *opnd; + char_u *opnd; opnd = OPERAND(scan); // Inline the first byte, for speed. @@ -3492,29 +4073,29 @@ static bool regmatch( case ANYOF: case ANYBUT: - if (c == NUL) + if (c == NUL) { status = RA_NOMATCH; - else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF)) + } else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; - case MULTIBYTECODE: - { + case MULTIBYTECODE: { int i, len; const char_u *opnd = OPERAND(scan); - // Safety check (just in case 'encoding' was changed since - // compiling the program). + // Safety check (just in case 'encoding' was changed since + // compiling the program). if ((len = utfc_ptr2len((char *)opnd)) < 2) { status = RA_NOMATCH; break; } const int opndc = utf_ptr2char((char *)opnd); if (utf_iscomposing(opndc)) { - // When only a composing char is given match at any - // position where that composing char appears. + // When only a composing char is given match at any + // position where that composing char appears. status = RA_NOMATCH; for (i = 0; rex.input[i] != NUL; i += utf_ptr2len((char *)rex.input + i)) { @@ -3524,7 +4105,7 @@ static bool regmatch( break; } } else if (opndc == inpc) { - // Include all following composing chars. + // Include all following composing chars. len = i + utfc_ptr2len((char *)rex.input + i); status = RA_MATCH; break; @@ -3543,19 +4124,16 @@ static bool regmatch( break; case RE_COMPOSING: - { - // Skip composing characters. + // Skip composing characters. while (utf_iscomposing(utf_ptr2char((char *)rex.input))) { MB_CPTR_ADV(rex.input); } - } - break; + break; case NOTHING: break; - case BACK: - { + case BACK: { int i; // When we run into BACK we need to check if we don't keep @@ -3565,15 +4143,18 @@ static bool regmatch( // The positions are stored in "backpos" and found by the // current value of "scan", the position in the RE program. backpos_T *bp = (backpos_T *)backpos.ga_data; - for (i = 0; i < backpos.ga_len; ++i) - if (bp[i].bp_scan == scan) + for (i = 0; i < backpos.ga_len; i++) { + if (bp[i].bp_scan == scan) { break; + } + } if (i == backpos.ga_len) { backpos_T *p = GA_APPEND_VIA_PTR(backpos_T, &backpos); p->bp_scan = scan; - } else if (reg_save_equal(&bp[i].bp_pos)) + } else if (reg_save_equal(&bp[i].bp_pos)) { // Still at same position as last time, fail. status = RA_NOMATCH; + } assert(status != RA_FAIL); if (status != RA_NOMATCH) { @@ -3592,25 +4173,24 @@ static bool regmatch( case MOPEN + 7: case MOPEN + 8: case MOPEN + 9: - { no = op - MOPEN; cleanup_subexpr(); rp = regstack_push(RS_MOPEN, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else { + } else { rp->rs_no = no; save_se(&rp->rs_un.sesave, &rex.reg_startpos[no], &rex.reg_startp[no]); // We simply continue and handle the result when done. } - } - break; + break; case NOPEN: // \%( case NCLOSE: // \) after \%( - if (regstack_push(RS_NOPEN, scan) == NULL) + if (regstack_push(RS_NOPEN, scan) == NULL) { status = RA_FAIL; + } // We simply continue and handle the result when done. break; @@ -3623,20 +4203,18 @@ static bool regmatch( case ZOPEN + 7: case ZOPEN + 8: case ZOPEN + 9: - { no = op - ZOPEN; cleanup_zsubexpr(); rp = regstack_push(RS_ZOPEN, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else { + } else { rp->rs_no = no; save_se(&rp->rs_un.sesave, ®_startzpos[no], - ®_startzp[no]); + ®_startzp[no]); // We simply continue and handle the result when done. } - } - break; + break; case MCLOSE + 0: // Match end: \ze case MCLOSE + 1: // \) @@ -3648,7 +4226,6 @@ static bool regmatch( case MCLOSE + 7: case MCLOSE + 8: case MCLOSE + 9: - { no = op - MCLOSE; cleanup_subexpr(); rp = regstack_push(RS_MCLOSE, scan); @@ -3659,8 +4236,7 @@ static bool regmatch( save_se(&rp->rs_un.sesave, &rex.reg_endpos[no], &rex.reg_endp[no]); // We simply continue and handle the result when done. } - } - break; + break; case ZCLOSE + 1: // \) after \z( case ZCLOSE + 2: @@ -3671,20 +4247,18 @@ static bool regmatch( case ZCLOSE + 7: case ZCLOSE + 8: case ZCLOSE + 9: - { no = op - ZCLOSE; cleanup_zsubexpr(); rp = regstack_push(RS_ZCLOSE, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else { + } else { rp->rs_no = no; save_se(&rp->rs_un.sesave, ®_endzpos[no], - ®_endzp[no]); + ®_endzp[no]); // We simply continue and handle the result when done. } - } - break; + break; case BACKREF + 1: case BACKREF + 2: @@ -3694,8 +4268,7 @@ static bool regmatch( case BACKREF + 6: case BACKREF + 7: case BACKREF + 8: - case BACKREF + 9: - { + case BACKREF + 9: { int len; no = op - BACKREF; @@ -3752,7 +4325,6 @@ static bool regmatch( case ZREF + 7: case ZREF + 8: case ZREF + 9: - { cleanup_zsubexpr(); no = op - ZREF; if (re_extmatch_in != NULL @@ -3766,25 +4338,22 @@ static bool regmatch( } else { // Backref was not set: Match an empty string. } - } - break; + break; case BRANCH: - { - if (OP(next) != BRANCH) // No choice. + if (OP(next) != BRANCH) { // No choice. next = OPERAND(scan); // Avoid recursion. - else { + } else { rp = regstack_push(RS_BRANCH, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else + } else { status = RA_BREAK; // rest is below + } } - } - break; + break; case BRACE_LIMITS: - { if (OP(next) == BRACE_SIMPLE) { bl_minval = OPERAND_MIN(scan); bl_maxval = OPERAND_MAX(scan); @@ -3798,8 +4367,7 @@ static bool regmatch( internal_error("BRACE_LIMITS"); status = RA_FAIL; } - } - break; + break; case BRACE_COMPLEX + 0: case BRACE_COMPLEX + 1: @@ -3811,7 +4379,6 @@ static bool regmatch( case BRACE_COMPLEX + 7: case BRACE_COMPLEX + 8: case BRACE_COMPLEX + 9: - { no = op - BRACE_COMPLEX; ++brace_count[no]; @@ -3819,9 +4386,9 @@ static bool regmatch( if (brace_count[no] <= (brace_min[no] <= brace_max[no] ? brace_min[no] : brace_max[no])) { rp = regstack_push(RS_BRCPLX_MORE, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else { + } else { rp->rs_no = no; reg_save(&rp->rs_un.regsave, &backpos); next = OPERAND(scan); @@ -3835,9 +4402,9 @@ static bool regmatch( // Range is the normal way around, use longest match if (brace_count[no] <= brace_max[no]) { rp = regstack_push(RS_BRCPLX_LONG, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else { + } else { rp->rs_no = no; reg_save(&rp->rs_un.regsave, &backpos); next = OPERAND(scan); @@ -3848,21 +4415,19 @@ static bool regmatch( // Range is backwards, use shortest match first if (brace_count[no] <= brace_min[no]) { rp = regstack_push(RS_BRCPLX_SHORT, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else { + } else { reg_save(&rp->rs_un.regsave, &backpos); // We continue and handle the result when done. } } } - } - break; + break; case BRACE_SIMPLE: case STAR: - case PLUS: - { + case PLUS: { regstar_T rst; // Lookahead to avoid useless match attempts when we know @@ -3910,18 +4475,17 @@ static bool regmatch( } else { ga_grow(®stack, sizeof(regstar_T)); regstack.ga_len += sizeof(regstar_T); - rp = regstack_push(rst.minval <= rst.maxval - ? RS_STAR_LONG : RS_STAR_SHORT, scan); - if (rp == NULL) + rp = regstack_push(rst.minval <= rst.maxval ? RS_STAR_LONG : RS_STAR_SHORT, scan); + if (rp == NULL) { status = RA_FAIL; - else { + } else { *(((regstar_T *)rp) - 1) = rst; status = RA_BREAK; // skip the restore bits } } - } else + } else { status = RA_NOMATCH; - + } } break; @@ -3929,9 +4493,9 @@ static bool regmatch( case MATCH: case SUBPAT: rp = regstack_push(RS_NOMATCH, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else { + } else { rp->rs_no = op; reg_save(&rp->rs_un.regsave, &backpos); next = OPERAND(scan); @@ -3949,9 +4513,9 @@ static bool regmatch( ga_grow(®stack, sizeof(regbehind_T)); regstack.ga_len += sizeof(regbehind_T); rp = regstack_push(RS_BEHIND1, scan); - if (rp == NULL) + if (rp == NULL) { status = RA_FAIL; - else { + } else { // Need to save the subexpr to be able to restore them // when there is a match but we don't use it. save_subexpr(((regbehind_T *)rp) - 1); @@ -4001,13 +4565,13 @@ static bool regmatch( } // If we can't continue sequentially, break the inner loop. - if (status != RA_CONT) + if (status != RA_CONT) { break; + } // Continue in inner loop, advance to next item. scan = next; - - } // end of inner loop + } // end of inner loop // If there is something on the regstack execute the code for the state. // If the state is popped then loop and use the older state. @@ -4030,9 +4594,10 @@ static bool regmatch( case RS_ZOPEN: // Pop the state. Restore pointers when there is no match. - if (status == RA_NOMATCH) + if (status == RA_NOMATCH) { restore_se(&rp->rs_un.sesave, ®_startzpos[rp->rs_no], - ®_startzp[rp->rs_no]); + ®_startzp[rp->rs_no]); + } regstack_pop(&scan); break; @@ -4047,17 +4612,18 @@ static bool regmatch( case RS_ZCLOSE: // Pop the state. Restore pointers when there is no match. - if (status == RA_NOMATCH) + if (status == RA_NOMATCH) { restore_se(&rp->rs_un.sesave, ®_endzpos[rp->rs_no], - ®_endzp[rp->rs_no]); + ®_endzp[rp->rs_no]); + } regstack_pop(&scan); break; case RS_BRANCH: - if (status == RA_MATCH) + if (status == RA_MATCH) { // this branch matched, use it regstack_pop(&scan); - else { + } else { if (status != RA_BREAK) { // After a non-matching branch: try next one. reg_restore(&rp->rs_un.regsave, &backpos); @@ -4095,15 +4661,17 @@ static bool regmatch( status = RA_CONT; } regstack_pop(&scan); - if (status == RA_CONT) + if (status == RA_CONT) { scan = regnext(scan); + } break; case RS_BRCPLX_SHORT: // Pop the state. Restore pointers when there is no match. - if (status == RA_NOMATCH) + if (status == RA_NOMATCH) { // There was no match, try to match one more item. reg_restore(&rp->rs_un.regsave, &backpos); + } regstack_pop(&scan); if (status == RA_NOMATCH) { scan = OPERAND(scan); @@ -4115,16 +4683,18 @@ static bool regmatch( // Pop the state. If the operand matches for NOMATCH or // doesn't match for MATCH/SUBPAT, we fail. Otherwise backup, // except for SUBPAT, and continue with the next item. - if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH)) + if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH)) { status = RA_NOMATCH; - else { + } else { status = RA_CONT; - if (rp->rs_no != SUBPAT) // zero-width + if (rp->rs_no != SUBPAT) { // zero-width reg_restore(&rp->rs_un.regsave, &backpos); + } } regstack_pop(&scan); - if (status == RA_CONT) + if (status == RA_CONT) { scan = regnext(scan); + } break; case RS_BEHIND1: @@ -4162,10 +4732,10 @@ static bool regmatch( if (status == RA_MATCH && reg_save_equal(&behind_pos)) { // found a match that ends where "next" started behind_pos = (((regbehind_T *)rp) - 1)->save_behind; - if (rp->rs_no == BEHIND) + if (rp->rs_no == BEHIND) { reg_restore(&(((regbehind_T *)rp) - 1)->save_after, - &backpos); - else { + &backpos); + } else { // But we didn't want a match. Need to restore the // subexpr, because what follows matched, so they have // been set. @@ -4187,16 +4757,15 @@ static bool regmatch( < behind_pos.rs_u.pos.lnum ? (colnr_T)STRLEN(rex.line) : behind_pos.rs_u.pos.col) - - rp->rs_un.regsave.rs_u.pos.col >= limit)) + - rp->rs_un.regsave.rs_u.pos.col >= limit)) { no = FAIL; - else if (rp->rs_un.regsave.rs_u.pos.col == 0) { + } else if (rp->rs_un.regsave.rs_u.pos.col == 0) { if (rp->rs_un.regsave.rs_u.pos.lnum < behind_pos.rs_u.pos.lnum - || reg_getline( - --rp->rs_un.regsave.rs_u.pos.lnum) - == NULL) + || reg_getline(--rp->rs_un.regsave.rs_u.pos.lnum) + == NULL) { no = FAIL; - else { + } else { reg_restore(&rp->rs_un.regsave, &backpos); rp->rs_un.regsave.rs_u.pos.col = (colnr_T)STRLEN(rex.line); @@ -4207,7 +4776,7 @@ static bool regmatch( rp->rs_un.regsave.rs_u.pos.col -= utf_head_off(line, - line + rp->rs_un.regsave.rs_u.pos.col - 1) + line + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; } } else { @@ -4236,7 +4805,7 @@ static bool regmatch( behind_pos = (((regbehind_T *)rp) - 1)->save_behind; if (rp->rs_no == NOBEHIND) { reg_restore(&(((regbehind_T *)rp) - 1)->save_after, - &backpos); + &backpos); status = RA_MATCH; } else { // We do want a proper match. Need to restore the @@ -4254,9 +4823,8 @@ static bool regmatch( break; case RS_STAR_LONG: - case RS_STAR_SHORT: - { - regstar_T *rst = ((regstar_T *)rp) - 1; + case RS_STAR_SHORT: { + regstar_T *rst = ((regstar_T *)rp) - 1; if (status == RA_MATCH) { regstack_pop(&scan); @@ -4265,18 +4833,20 @@ static bool regmatch( } // Tried once already, restore input pointers. - if (status != RA_BREAK) + if (status != RA_BREAK) { reg_restore(&rp->rs_un.regsave, &backpos); + } // Repeat until we found a position where it could match. - for (;; ) { + for (;;) { if (status != RA_BREAK) { // Tried first position already, advance. if (rp->rs_state == RS_STAR_LONG) { // Trying for longest match, but couldn't or // didn't match -- back up one char. - if (--rst->count < rst->minval) + if (--rst->count < rst->minval) { break; + } if (rex.input == rex.line) { // backup to last char of previous line if (rex.lnum == 0) { @@ -4300,14 +4870,17 @@ static bool regmatch( // Couldn't or didn't match: try advancing one // char. if (rst->count == rst->minval - || regrepeat(OPERAND(rp->rs_scan), 1L) == 0) + || regrepeat(OPERAND(rp->rs_scan), 1L) == 0) { break; - ++rst->count; + } + rst->count++; } - if (got_int) + if (got_int) { break; - } else + } + } else { status = RA_NOMATCH; + } // If it could match, try it. if (rst->nextb == NUL || *rex.input == rst->nextb @@ -4331,13 +4904,15 @@ static bool regmatch( // If we want to continue the inner loop or didn't pop a state // continue matching loop if (status == RA_CONT || rp == (regitem_T *) - ((char *)regstack.ga_data + regstack.ga_len) - 1) + ((char *)regstack.ga_data + regstack.ga_len) - 1) { break; + } } // May need to continue with the inner loop, starting at "scan". - if (status == RA_CONT) + if (status == RA_CONT) { continue; + } // If the regstack is empty or something failed we are done. if (GA_EMPTY(®stack) || status == RA_FAIL) { @@ -4351,18 +4926,18 @@ static bool regmatch( } return status == RA_MATCH; } - - } // End of loop until the regstack is empty. + } // End of loop until the regstack is empty. // NOTREACHED } /// Try match of "prog" with at rex.line["col"]. -/// @returns 0 for failure, or number of lines contained in the match. -static long regtry(bt_regprog_T *prog, - colnr_T col, - proftime_T *tm, // timeout limit or NULL - int *timed_out) // flag set on timeout or NULL +/// +/// @param tm timeout limit or NULL +/// @param timed_out flag set on timeout or NULL +/// +/// @return 0 for failure, or number of lines contained in the match. +static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, int *timed_out) { rex.input = rex.line + col; rex.need_clear_subexpr = true; @@ -4416,9 +4991,10 @@ static long regtry(bt_regprog_T *prog, - reg_startzpos[i].col); } } else { - if (reg_startzp[i] != NULL && reg_endzp[i] != NULL) + if (reg_startzp[i] != NULL && reg_endzp[i] != NULL) { re_extmatch_out->matches[i] = vim_strnsave(reg_startzp[i], reg_endzp[i] - reg_startzp[i]); + } } } } @@ -4427,14 +5003,16 @@ static long regtry(bt_regprog_T *prog, /// Match a regexp against a string ("line" points to the string) or multiple /// lines (if "line" is NULL, use reg_getline()). -/// @return 0 for failure, or number of lines contained in the match. -static long bt_regexec_both(char_u *line, - colnr_T col, // column to start search - proftime_T *tm, // timeout limit or NULL - int *timed_out) // flag set on timeout or NULL +/// +/// @param col column to start search +/// @param tm timeout limit or NULL +/// @param timed_out flag set on timeout or NULL +/// +/// @return 0 for failure, or number of lines contained in the match. +static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm, int *timed_out) { - bt_regprog_T *prog; - char_u *s; + bt_regprog_T *prog; + char_u *s; long retval = 0L; // Create "regstack" and "backpos" if they are not allocated yet. @@ -4473,8 +5051,9 @@ static long bt_regexec_both(char_u *line, } // Check validity of program. - if (prog_magic_wrong()) + if (prog_magic_wrong()) { goto theend; + } // If the start column is past the maximum column: no need to try. if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) { @@ -4590,10 +5169,12 @@ theend: if (reg_tofreelen > 400) { XFREE_CLEAR(reg_tofree); } - if (regstack.ga_maxlen > REGSTACK_INITIAL) + if (regstack.ga_maxlen > REGSTACK_INITIAL) { ga_clear(®stack); - if (backpos.ga_maxlen > BACKPOS_INITIAL) + } + if (backpos.ga_maxlen > BACKPOS_INITIAL) { ga_clear(&backpos); + } if (retval > 0) { // Make sure the end is never before the start. Can happen when \zs @@ -4616,19 +5197,16 @@ theend: return retval; } -/* - * Match a regexp against a string. - * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). - * Uses curbuf for line count and 'iskeyword'. - * If "line_lbr" is true, consider a "\n" in "line" to be a line break. - * - * Returns 0 for failure, number of lines contained in the match otherwise. - */ -static int bt_regexec_nl( - regmatch_T *rmp, - char_u *line, // string to match against - colnr_T col, // column to start looking for match - bool line_lbr) +/// Match a regexp against a string. +/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). +/// Uses curbuf for line count and 'iskeyword'. +/// If "line_lbr" is true, consider a "\n" in "line" to be a line break. +/// +/// @param line string to match against +/// @param col column to start looking for match +/// +/// @return 0 for failure, number of lines contained in the match otherwise. +static int bt_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col, bool line_lbr) { rex.reg_match = rmp; rex.reg_mmatch = NULL; @@ -4658,8 +5236,7 @@ static int bt_regexec_nl( /// /// @return zero if there is no match and number of lines contained in the match /// otherwise. -static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, - linenr_T lnum, colnr_T col, +static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm, int *timed_out) { rex.reg_match = NULL; @@ -4683,10 +5260,12 @@ static int re_num_cmp(uint32_t val, char_u *scan) { uint32_t n = (uint32_t)OPERAND_MIN(scan); - if (OPERAND_CMP(scan) == '>') + if (OPERAND_CMP(scan) == '>') { return val > n; - if (OPERAND_CMP(scan) == '<') + } + if (OPERAND_CMP(scan) == '<') { return val < n; + } return val == n; } @@ -4697,21 +5276,22 @@ static int re_num_cmp(uint32_t val, char_u *scan) */ static void regdump(char_u *pattern, bt_regprog_T *r) { - char_u *s; + char_u *s; int op = EXACTLY; // Arbitrary non-END op. - char_u *next; - char_u *end = NULL; - FILE *f; + char_u *next; + char_u *end = NULL; + FILE *f; -#ifdef BT_REGEXP_LOG +# ifdef BT_REGEXP_LOG f = fopen("bt_regexp_log.log", "a"); -#else +# else f = stdout; -#endif - if (f == NULL) +# endif + if (f == NULL) { return; + } fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", - pattern); + pattern); s = r->program + 1; // Loop until we find the END that isn't before a referred next (an END @@ -4720,12 +5300,14 @@ static void regdump(char_u *pattern, bt_regprog_T *r) op = OP(s); fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); // Where, what. next = regnext(s); - if (next == NULL) // Next ptr. + if (next == NULL) { // Next ptr. fprintf(f, "(0)"); - else + } else { fprintf(f, "(%d)", (int)((s - r->program) + (next - s))); - if (end < next) + } + if (end < next) { end = next; + } if (op == BRACE_LIMITS) { // Two ints fprintf(f, " minval %" PRId64 ", maxval %" PRId64, @@ -4746,8 +5328,9 @@ static void regdump(char_u *pattern, bt_regprog_T *r) || op == EXACTLY) { // Literal string, where present. fprintf(f, "\nxxxxxxxxx\n"); - while (*s != NUL) + while (*s != NUL) { fprintf(f, "%c", *s++); + } fprintf(f, "\nxxxxxxxxx\n"); s++; } @@ -4755,19 +5338,22 @@ static void regdump(char_u *pattern, bt_regprog_T *r) } // Header fields of interest. - if (r->regstart != NUL) + if (r->regstart != NUL) { fprintf(f, "start `%s' 0x%x; ", r->regstart < 256 - ? (char *)transchar(r->regstart) - : "multibyte", r->regstart); - if (r->reganch) + ? (char *)transchar(r->regstart) + : "multibyte", r->regstart); + } + if (r->reganch) { fprintf(f, "anchored; "); - if (r->regmust != NULL) + } + if (r->regmust != NULL) { fprintf(f, "must have \"%s\"", r->regmust); + } fprintf(f, "\r\n"); -#ifdef BT_REGEXP_LOG +# ifdef BT_REGEXP_LOG fclose(f); -#endif +# endif } #endif // BT_REGEXP_DUMP @@ -4778,7 +5364,7 @@ static void regdump(char_u *pattern, bt_regprog_T *r) */ static char_u *regprop(char_u *op) { - char *p; + char *p; static char buf[50]; STRCPY(buf, ":"); @@ -5144,8 +5730,9 @@ static char_u *regprop(char_u *op) p = NULL; break; } - if (p != NULL) + if (p != NULL) { STRCAT(buf, p); + } return (char_u *)buf; } #endif // REGEXP_DEBUG diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 7ab4f4ed53..4a73941f47 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -1,8 +1,6 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -// uncrustify:off - /* * NFA regular expression implementation. * @@ -11,8 +9,8 @@ #include <assert.h> #include <inttypes.h> -#include <stdbool.h> #include <limits.h> +#include <stdbool.h> #include "nvim/ascii.h" #include "nvim/garray.h" @@ -39,13 +37,13 @@ # define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log" #endif -/* Added to NFA_ANY - NFA_NUPPER_IC to include a NL. */ +// Added to NFA_ANY - NFA_NUPPER_IC to include a NL. #define NFA_ADD_NL 31 enum { NFA_SPLIT = -1024, NFA_MATCH, - NFA_EMPTY, /* matches 0-length */ + NFA_EMPTY, // matches 0-length NFA_START_COLL, // [abc] start NFA_END_COLL, // [abc] end @@ -64,17 +62,17 @@ enum { NFA_QUEST, // greedy \? (postfix only) NFA_QUEST_NONGREEDY, // non-greedy \? (postfix only) - NFA_BOL, /* ^ Begin line */ - NFA_EOL, /* $ End line */ - NFA_BOW, /* \< Begin word */ - NFA_EOW, /* \> End word */ - NFA_BOF, /* \%^ Begin file */ - NFA_EOF, /* \%$ End file */ + NFA_BOL, // ^ Begin line + NFA_EOL, // $ End line + NFA_BOW, // \< Begin word + NFA_EOW, // \> End word + NFA_BOF, // \%^ Begin file + NFA_EOF, // \%$ End file NFA_NEWL, - NFA_ZSTART, /* Used for \zs */ - NFA_ZEND, /* Used for \ze */ - NFA_NOPEN, /* Start of subexpression marked with \%( */ - NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ + NFA_ZSTART, // Used for \zs + NFA_ZEND, // Used for \ze + NFA_NOPEN, // Start of subexpression marked with \%( + NFA_NCLOSE, // End of subexpr. marked with \%( ... \) NFA_START_INVISIBLE, NFA_START_INVISIBLE_FIRST, NFA_START_INVISIBLE_NEG, @@ -91,34 +89,34 @@ enum { // composing multibyte char NFA_END_COMPOSING, // End of a composing char in the NFA NFA_ANY_COMPOSING, // \%C: Any composing characters. - NFA_OPT_CHARS, /* \%[abc] */ - - /* The following are used only in the postfix form, not in the NFA */ - NFA_PREV_ATOM_NO_WIDTH, /* Used for \@= */ - NFA_PREV_ATOM_NO_WIDTH_NEG, /* Used for \@! */ - NFA_PREV_ATOM_JUST_BEFORE, /* Used for \@<= */ - NFA_PREV_ATOM_JUST_BEFORE_NEG, /* Used for \@<! */ - NFA_PREV_ATOM_LIKE_PATTERN, /* Used for \@> */ - - NFA_BACKREF1, /* \1 */ - NFA_BACKREF2, /* \2 */ - NFA_BACKREF3, /* \3 */ - NFA_BACKREF4, /* \4 */ - NFA_BACKREF5, /* \5 */ - NFA_BACKREF6, /* \6 */ - NFA_BACKREF7, /* \7 */ - NFA_BACKREF8, /* \8 */ - NFA_BACKREF9, /* \9 */ - NFA_ZREF1, /* \z1 */ - NFA_ZREF2, /* \z2 */ - NFA_ZREF3, /* \z3 */ - NFA_ZREF4, /* \z4 */ - NFA_ZREF5, /* \z5 */ - NFA_ZREF6, /* \z6 */ - NFA_ZREF7, /* \z7 */ - NFA_ZREF8, /* \z8 */ - NFA_ZREF9, /* \z9 */ - NFA_SKIP, /* Skip characters */ + NFA_OPT_CHARS, // \%[abc] + + // The following are used only in the postfix form, not in the NFA + NFA_PREV_ATOM_NO_WIDTH, // Used for \@= + NFA_PREV_ATOM_NO_WIDTH_NEG, // Used for \@! + NFA_PREV_ATOM_JUST_BEFORE, // Used for \@<= + NFA_PREV_ATOM_JUST_BEFORE_NEG, // Used for \@<! + NFA_PREV_ATOM_LIKE_PATTERN, // Used for \@> + + NFA_BACKREF1, // \1 + NFA_BACKREF2, // \2 + NFA_BACKREF3, // \3 + NFA_BACKREF4, // \4 + NFA_BACKREF5, // \5 + NFA_BACKREF6, // \6 + NFA_BACKREF7, // \7 + NFA_BACKREF8, // \8 + NFA_BACKREF9, // \9 + NFA_ZREF1, // \z1 + NFA_ZREF2, // \z2 + NFA_ZREF3, // \z3 + NFA_ZREF4, // \z4 + NFA_ZREF5, // \z5 + NFA_ZREF6, // \z6 + NFA_ZREF7, // \z7 + NFA_ZREF8, // \z8 + NFA_ZREF9, // \z9 + NFA_SKIP, // Skip characters NFA_MOPEN, NFA_MOPEN1, @@ -164,58 +162,58 @@ enum { NFA_ZCLOSE8, NFA_ZCLOSE9, - /* NFA_FIRST_NL */ - NFA_ANY, /* Match any one character. */ - NFA_IDENT, /* Match identifier char */ - NFA_SIDENT, /* Match identifier char but no digit */ - NFA_KWORD, /* Match keyword char */ - NFA_SKWORD, /* Match word char but no digit */ - NFA_FNAME, /* Match file name char */ - NFA_SFNAME, /* Match file name char but no digit */ - NFA_PRINT, /* Match printable char */ - NFA_SPRINT, /* Match printable char but no digit */ - NFA_WHITE, /* Match whitespace char */ - NFA_NWHITE, /* Match non-whitespace char */ - NFA_DIGIT, /* Match digit char */ - NFA_NDIGIT, /* Match non-digit char */ - NFA_HEX, /* Match hex char */ - NFA_NHEX, /* Match non-hex char */ - NFA_OCTAL, /* Match octal char */ - NFA_NOCTAL, /* Match non-octal char */ - NFA_WORD, /* Match word char */ - NFA_NWORD, /* Match non-word char */ - NFA_HEAD, /* Match head char */ - NFA_NHEAD, /* Match non-head char */ - NFA_ALPHA, /* Match alpha char */ - NFA_NALPHA, /* Match non-alpha char */ - NFA_LOWER, /* Match lowercase char */ - NFA_NLOWER, /* Match non-lowercase char */ - NFA_UPPER, /* Match uppercase char */ - NFA_NUPPER, /* Match non-uppercase char */ - NFA_LOWER_IC, /* Match [a-z] */ - NFA_NLOWER_IC, /* Match [^a-z] */ - NFA_UPPER_IC, /* Match [A-Z] */ - NFA_NUPPER_IC, /* Match [^A-Z] */ + // NFA_FIRST_NL + NFA_ANY, // Match any one character. + NFA_IDENT, // Match identifier char + NFA_SIDENT, // Match identifier char but no digit + NFA_KWORD, // Match keyword char + NFA_SKWORD, // Match word char but no digit + NFA_FNAME, // Match file name char + NFA_SFNAME, // Match file name char but no digit + NFA_PRINT, // Match printable char + NFA_SPRINT, // Match printable char but no digit + NFA_WHITE, // Match whitespace char + NFA_NWHITE, // Match non-whitespace char + NFA_DIGIT, // Match digit char + NFA_NDIGIT, // Match non-digit char + NFA_HEX, // Match hex char + NFA_NHEX, // Match non-hex char + NFA_OCTAL, // Match octal char + NFA_NOCTAL, // Match non-octal char + NFA_WORD, // Match word char + NFA_NWORD, // Match non-word char + NFA_HEAD, // Match head char + NFA_NHEAD, // Match non-head char + NFA_ALPHA, // Match alpha char + NFA_NALPHA, // Match non-alpha char + NFA_LOWER, // Match lowercase char + NFA_NLOWER, // Match non-lowercase char + NFA_UPPER, // Match uppercase char + NFA_NUPPER, // Match non-uppercase char + NFA_LOWER_IC, // Match [a-z] + NFA_NLOWER_IC, // Match [^a-z] + NFA_UPPER_IC, // Match [A-Z] + NFA_NUPPER_IC, // Match [^A-Z] NFA_FIRST_NL = NFA_ANY + NFA_ADD_NL, NFA_LAST_NL = NFA_NUPPER_IC + NFA_ADD_NL, - NFA_CURSOR, /* Match cursor pos */ - NFA_LNUM, /* Match line number */ - NFA_LNUM_GT, /* Match > line number */ - NFA_LNUM_LT, /* Match < line number */ - NFA_COL, /* Match cursor column */ - NFA_COL_GT, /* Match > cursor column */ - NFA_COL_LT, /* Match < cursor column */ - NFA_VCOL, /* Match cursor virtual column */ - NFA_VCOL_GT, /* Match > cursor virtual column */ - NFA_VCOL_LT, /* Match < cursor virtual column */ - NFA_MARK, /* Match mark */ - NFA_MARK_GT, /* Match > mark */ - NFA_MARK_LT, /* Match < mark */ - NFA_VISUAL, /* Match Visual area */ - - /* Character classes [:alnum:] etc */ + NFA_CURSOR, // Match cursor pos + NFA_LNUM, // Match line number + NFA_LNUM_GT, // Match > line number + NFA_LNUM_LT, // Match < line number + NFA_COL, // Match cursor column + NFA_COL_GT, // Match > cursor column + NFA_COL_LT, // Match < cursor column + NFA_VCOL, // Match cursor virtual column + NFA_VCOL_GT, // Match > cursor virtual column + NFA_VCOL_LT, // Match < cursor virtual column + NFA_MARK, // Match mark + NFA_MARK_GT, // Match > mark + NFA_MARK_LT, // Match < mark + NFA_VISUAL, // Match Visual area + + // Character classes [:alnum:] etc NFA_CLASS_ALNUM, NFA_CLASS_ALPHA, NFA_CLASS_BLANK, @@ -237,9 +235,9 @@ enum { NFA_CLASS_FNAME, }; -/* Keep in sync with classchars. */ +// Keep in sync with classchars. static int nfa_classcodes[] = { - NFA_ANY, NFA_IDENT, NFA_SIDENT, NFA_KWORD,NFA_SKWORD, + NFA_ANY, NFA_IDENT, NFA_SIDENT, NFA_KWORD, NFA_SKWORD, NFA_FNAME, NFA_SFNAME, NFA_PRINT, NFA_SPRINT, NFA_WHITE, NFA_NWHITE, NFA_DIGIT, NFA_NDIGIT, NFA_HEX, NFA_NHEX, NFA_OCTAL, NFA_NOCTAL, @@ -248,11 +246,9 @@ static int nfa_classcodes[] = { NFA_UPPER, NFA_NUPPER }; -static char_u e_nul_found[] = N_( - "E865: (NFA) Regexp end encountered prematurely"); +static char_u e_nul_found[] = N_("E865: (NFA) Regexp end encountered prematurely"); static char_u e_misplaced[] = N_("E866: (NFA regexp) Misplaced %c"); -static char_u e_ill_char_class[] = N_( - "E877: (NFA regexp) Invalid character class: %" PRId64); +static char_u e_ill_char_class[] = N_("E877: (NFA regexp) Invalid character class: %" PRId64); static char_u e_value_too_large[] = N_("E951: \\% value too large"); // Since the out pointers in the list are always @@ -260,13 +256,13 @@ static char_u e_value_too_large[] = N_("E951: \\% value too large"); // as storage for the Ptrlists. typedef union Ptrlist Ptrlist; union Ptrlist { - Ptrlist *next; + Ptrlist *next; nfa_state_T *s; }; struct Frag { nfa_state_T *start; - Ptrlist *out; + Ptrlist *out; }; typedef struct Frag Frag_T; @@ -276,36 +272,36 @@ typedef struct { // When REG_MULTI is true list.multi is used, otherwise list.line. union { struct multipos { - linenr_T start_lnum; - linenr_T end_lnum; + linenr_T start_lnum; + linenr_T end_lnum; colnr_T start_col; colnr_T end_col; } multi[NSUBEXP]; struct linepos { - char_u *start; - char_u *end; + char_u *start; + char_u *end; } line[NSUBEXP]; } list; } regsub_T; typedef struct { - regsub_T norm; /* \( .. \) matches */ - regsub_T synt; /* \z( .. \) matches */ + regsub_T norm; // \( .. \) matches + regsub_T synt; // \z( .. \) matches } regsubs_T; -/* nfa_pim_T stores a Postponed Invisible Match. */ +// nfa_pim_T stores a Postponed Invisible Match. typedef struct nfa_pim_S nfa_pim_T; struct nfa_pim_S { - int result; /* NFA_PIM_*, see below */ - nfa_state_T *state; /* the invisible match start state */ - regsubs_T subs; /* submatch info, only party used */ + int result; // NFA_PIM_*, see below + nfa_state_T *state; // the invisible match start state + regsubs_T subs; // submatch info, only party used union { lpos_T pos; - char_u *ptr; - } end; /* where the match must end */ + char_u *ptr; + } end; // where the match must end }; -/* nfa_thread_T contains execution information of a NFA state */ +// nfa_thread_T contains execution information of a NFA state typedef struct { nfa_state_T *state; int count; @@ -316,7 +312,7 @@ typedef struct { // nfa_list_T contains the alternative NFA execution states. typedef struct { - nfa_thread_T *t; ///< allocated array of states + nfa_thread_T *t; ///< allocated array of states int n; ///< nr of states currently in "t" int len; ///< max nr of states in "t" int id; ///< ID of the list @@ -337,10 +333,10 @@ static bool wants_nfa; static int nstate; ///< Number of states in the NFA. Also used when executing. static int istate; ///< Index in the state vector, used in alloc_state() -/* If not NULL match must end at this position */ +// If not NULL match must end at this position static save_se_T *nfa_endp = NULL; -/* 0 for first call to nfa_regmatch(), 1 for recursive call. */ +// 0 for first call to nfa_regmatch(), 1 for recursive call. static int nfa_ll_index = 0; #ifdef INCLUDE_GENERATED_DECLARATIONS @@ -349,35 +345,31 @@ static int nfa_ll_index = 0; // Helper functions used when doing re2post() ... regatom() parsing #define EMIT(c) \ - do { \ - if (post_ptr >= post_end) { \ - realloc_post_list(); \ - } \ - *post_ptr++ = c; \ - } while (0) - -/* - * Initialize internal variables before NFA compilation. - */ -static void -nfa_regcomp_start ( - char_u *expr, - int re_flags /* see vim_regcomp() */ -) + do { \ + if (post_ptr >= post_end) { \ + realloc_post_list(); \ + } \ + *post_ptr++ = c; \ + } while (0) + +/// Initialize internal variables before NFA compilation. +/// +/// @param re_flags @see vim_regcomp() +static void nfa_regcomp_start(char_u *expr, int re_flags) { size_t postfix_size; size_t nstate_max; nstate = 0; istate = 0; - /* A reasonable estimation for maximum size */ + // A reasonable estimation for maximum size nstate_max = (STRLEN(expr) + 1) * 25; // Some items blow up in size, such as [A-z]. Add more space for that. // When it is still not enough realloc_post_list() will be used. nstate_max += 1000; - /* Size for postfix representation of expr. */ + // Size for postfix representation of expr. postfix_size = sizeof(int) * nstate_max; post_start = (int *)xmalloc(postfix_size); @@ -387,7 +379,7 @@ nfa_regcomp_start ( rex.nfa_has_zend = false; rex.nfa_has_backref = false; - /* shared with BT engine */ + // shared with BT engine regcomp_start(expr, re_flags); } @@ -399,14 +391,15 @@ static int nfa_get_reganch(nfa_state_T *start, int depth) { nfa_state_T *p = start; - if (depth > 4) + if (depth > 4) { return 0; + } while (p != NULL) { switch (p->c) { case NFA_BOL: case NFA_BOF: - return 1; /* yes! */ + return 1; // yes! case NFA_ZSTART: case NFA_ZEND: @@ -442,7 +435,7 @@ static int nfa_get_reganch(nfa_state_T *start, int depth) && nfa_get_reganch(p->out1, depth + 1); default: - return 0; /* noooo */ + return 0; // noooo } } return 0; @@ -456,12 +449,13 @@ static int nfa_get_regstart(nfa_state_T *start, int depth) { nfa_state_T *p = start; - if (depth > 4) + if (depth > 4) { return 0; + } while (p != NULL) { switch (p->c) { - /* all kinds of zero-width matches */ + // all kinds of zero-width matches case NFA_BOL: case NFA_BOF: case NFA_BOW: @@ -507,19 +501,20 @@ static int nfa_get_regstart(nfa_state_T *start, int depth) p = p->out; break; - case NFA_SPLIT: - { + case NFA_SPLIT: { int c1 = nfa_get_regstart(p->out, depth + 1); int c2 = nfa_get_regstart(p->out1, depth + 1); - if (c1 == c2) - return c1; /* yes! */ + if (c1 == c2) { + return c1; // yes! + } return 0; } default: - if (p->c > 0) - return p->c; /* yes! */ + if (p->c > 0) { + return p->c; // yes! + } return 0; } } @@ -535,21 +530,23 @@ static char_u *nfa_get_match_text(nfa_state_T *start) { nfa_state_T *p = start; int len = 0; - char_u *ret; - char_u *s; + char_u *ret; + char_u *s; - if (p->c != NFA_MOPEN) - return NULL; /* just in case */ + if (p->c != NFA_MOPEN) { + return NULL; // just in case + } p = p->out; while (p->c > 0) { len += utf_char2len(p->c); p = p->out; } - if (p->c != NFA_MCLOSE || p->out->c != NFA_MATCH) + if (p->c != NFA_MCLOSE || p->out->c != NFA_MATCH) { return NULL; + } ret = xmalloc(len); - p = start->out->out; /* skip first char, it goes into regstart */ + p = start->out->out; // skip first char, it goes into regstart s = ret; while (p->c > 0) { s += utf_char2bytes(p->c, (char *)s); @@ -587,22 +584,23 @@ static void realloc_post_list(void) */ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) { -# define CLASS_not 0x80 -# define CLASS_af 0x40 -# define CLASS_AF 0x20 -# define CLASS_az 0x10 -# define CLASS_AZ 0x08 -# define CLASS_o7 0x04 -# define CLASS_o9 0x02 -# define CLASS_underscore 0x01 - - char_u *p; +#define CLASS_not 0x80 +#define CLASS_af 0x40 +#define CLASS_AF 0x20 +#define CLASS_az 0x10 +#define CLASS_AZ 0x08 +#define CLASS_o7 0x04 +#define CLASS_o9 0x02 +#define CLASS_underscore 0x01 + + char_u *p; int config = 0; bool newl = extra_newl == true; - if (*end != ']') + if (*end != ']') { return FAIL; + } p = start; if (*p == '^') { config |= CLASS_not; @@ -652,12 +650,14 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) } else if (*p == '\n') { newl = true; p++; - } else + } else { return FAIL; - } /* while (p < end) */ + } + } // while (p < end) - if (p != end) + if (p != end) { return FAIL; + } if (newl == true) { extra_newl = NFA_ADD_NL; @@ -769,17 +769,40 @@ static void nfa_emit_equi_class(int c) #define y_acute 0xfd #define y_diaeresis 0xff switch (c) { - case 'A': case A_grave: case A_acute: case A_circumflex: - case A_virguilla: case A_diaeresis: case A_ring: - case 0x100: case 0x102: case 0x104: case 0x1cd: - case 0x1de: case 0x1e0: case 0x1fa: case 0x200: - case 0x202: case 0x226: case 0x23a: case 0x1e00: - case 0x1ea0: case 0x1ea2: case 0x1ea4: case 0x1ea6: - case 0x1ea8: case 0x1eaa: case 0x1eac: case 0x1eae: - case 0x1eb0: case 0x1eb2: case 0x1eb4: case 0x1eb6: - EMIT2('A') EMIT2(A_grave) EMIT2(A_acute) - EMIT2(A_circumflex) EMIT2(A_virguilla) - EMIT2(A_diaeresis) EMIT2(A_ring) + case 'A': + case A_grave: + case A_acute: + case A_circumflex: + case A_virguilla: + case A_diaeresis: + case A_ring: + case 0x100: + case 0x102: + case 0x104: + case 0x1cd: + case 0x1de: + case 0x1e0: + case 0x1fa: + case 0x200: + case 0x202: + case 0x226: + case 0x23a: + case 0x1e00: + case 0x1ea0: + case 0x1ea2: + case 0x1ea4: + case 0x1ea6: + case 0x1ea8: + case 0x1eaa: + case 0x1eac: + case 0x1eae: + case 0x1eb0: + case 0x1eb2: + case 0x1eb4: + case 0x1eb6: + EMIT2('A') EMIT2(A_grave) EMIT2(A_acute) // NOLINT(whitespace/cast) + EMIT2(A_circumflex) EMIT2(A_virguilla) // NOLINT(whitespace/cast) + EMIT2(A_diaeresis) EMIT2(A_ring) // NOLINT(whitespace/cast) EMIT2(0x100) EMIT2(0x102) EMIT2(0x104) EMIT2(0x1cd) EMIT2(0x1de) EMIT2(0x1e0) EMIT2(0x1fa) EMIT2(0x200) EMIT2(0x202) @@ -790,39 +813,76 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1eb2) EMIT2(0x1eb6) EMIT2(0x1eb4) return; - case 'B': case 0x181: case 0x243: case 0x1e02: - case 0x1e04: case 0x1e06: + case 'B': + case 0x181: + case 0x243: + case 0x1e02: + case 0x1e04: + case 0x1e06: EMIT2('B') EMIT2(0x181) EMIT2(0x243) EMIT2(0x1e02) EMIT2(0x1e04) EMIT2(0x1e06) return; - case 'C': case C_cedilla: case 0x106: case 0x108: - case 0x10a: case 0x10c: case 0x187: case 0x23b: - case 0x1e08: case 0xa792: + case 'C': + case C_cedilla: + case 0x106: + case 0x108: + case 0x10a: + case 0x10c: + case 0x187: + case 0x23b: + case 0x1e08: + case 0xa792: EMIT2('C') EMIT2(C_cedilla) EMIT2(0x106) EMIT2(0x108) EMIT2(0x10a) EMIT2(0x10c) EMIT2(0x187) EMIT2(0x23b) EMIT2(0x1e08) EMIT2(0xa792) return; - case 'D': case 0x10e: case 0x110: case 0x18a: - case 0x1e0a: case 0x1e0c: case 0x1e0e: case 0x1e10: + case 'D': + case 0x10e: + case 0x110: + case 0x18a: + case 0x1e0a: + case 0x1e0c: + case 0x1e0e: + case 0x1e10: case 0x1e12: EMIT2('D') EMIT2(0x10e) EMIT2(0x110) EMIT2(0x18a) EMIT2(0x1e0a) EMIT2(0x1e0c) EMIT2(0x1e0e) EMIT2(0x1e10) EMIT2(0x1e12) return; - case 'E': case E_grave: case E_acute: case E_circumflex: - case E_diaeresis: case 0x112: case 0x114: case 0x116: - case 0x118: case 0x11a: case 0x204: case 0x206: - case 0x228: case 0x246: case 0x1e14: case 0x1e16: - case 0x1e18: case 0x1e1a: case 0x1e1c: case 0x1eb8: - case 0x1eba: case 0x1ebc: case 0x1ebe: case 0x1ec0: - case 0x1ec2: case 0x1ec4: case 0x1ec6: - EMIT2('E') EMIT2(E_grave) EMIT2(E_acute) - EMIT2(E_circumflex) EMIT2(E_diaeresis) + case 'E': + case E_grave: + case E_acute: + case E_circumflex: + case E_diaeresis: + case 0x112: + case 0x114: + case 0x116: + case 0x118: + case 0x11a: + case 0x204: + case 0x206: + case 0x228: + case 0x246: + case 0x1e14: + case 0x1e16: + case 0x1e18: + case 0x1e1a: + case 0x1e1c: + case 0x1eb8: + case 0x1eba: + case 0x1ebc: + case 0x1ebe: + case 0x1ec0: + case 0x1ec2: + case 0x1ec4: + case 0x1ec6: + EMIT2('E') EMIT2(E_grave) EMIT2(E_acute) // NOLINT(whitespace/cast) + EMIT2(E_circumflex) EMIT2(E_diaeresis) // NOLINT(whitespace/cast) EMIT2(0x112) EMIT2(0x114) EMIT2(0x116) EMIT2(0x118) EMIT2(0x11a) EMIT2(0x204) EMIT2(0x206) EMIT2(0x228) EMIT2(0x246) @@ -833,34 +893,65 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1ec6) return; - case 'F': case 0x191: case 0x1e1e: case 0xa798: + case 'F': + case 0x191: + case 0x1e1e: + case 0xa798: EMIT2('F') EMIT2(0x191) EMIT2(0x1e1e) EMIT2(0xa798) return; - case 'G': case 0x11c: case 0x11e: case 0x120: - case 0x122: case 0x193: case 0x1e4: case 0x1e6: - case 0x1f4: case 0x1e20: case 0xa7a0: + case 'G': + case 0x11c: + case 0x11e: + case 0x120: + case 0x122: + case 0x193: + case 0x1e4: + case 0x1e6: + case 0x1f4: + case 0x1e20: + case 0xa7a0: EMIT2('G') EMIT2(0x11c) EMIT2(0x11e) EMIT2(0x120) EMIT2(0x122) EMIT2(0x193) EMIT2(0x1e4) EMIT2(0x1e6) EMIT2(0x1f4) EMIT2(0x1e20) EMIT2(0xa7a0) return; - case 'H': case 0x124: case 0x126: case 0x21e: - case 0x1e22: case 0x1e24: case 0x1e26: case 0x1e28: - case 0x1e2a: case 0x2c67: + case 'H': + case 0x124: + case 0x126: + case 0x21e: + case 0x1e22: + case 0x1e24: + case 0x1e26: + case 0x1e28: + case 0x1e2a: + case 0x2c67: EMIT2('H') EMIT2(0x124) EMIT2(0x126) EMIT2(0x21e) EMIT2(0x1e22) EMIT2(0x1e24) EMIT2(0x1e26) EMIT2(0x1e28) EMIT2(0x1e2a) EMIT2(0x2c67) return; - case 'I': case I_grave: case I_acute: case I_circumflex: - case I_diaeresis: case 0x128: case 0x12a: case 0x12c: - case 0x12e: case 0x130: case 0x197: case 0x1cf: - case 0x208: case 0x20a: case 0x1e2c: case 0x1e2e: - case 0x1ec8: case 0x1eca: - EMIT2('I') EMIT2(I_grave) EMIT2(I_acute) - EMIT2(I_circumflex) EMIT2(I_diaeresis) + case 'I': + case I_grave: + case I_acute: + case I_circumflex: + case I_diaeresis: + case 0x128: + case 0x12a: + case 0x12c: + case 0x12e: + case 0x130: + case 0x197: + case 0x1cf: + case 0x208: + case 0x20a: + case 0x1e2c: + case 0x1e2e: + case 0x1ec8: + case 0x1eca: + EMIT2('I') EMIT2(I_grave) EMIT2(I_acute) // NOLINT(whitespace/cast) + EMIT2(I_circumflex) EMIT2(I_diaeresis) // NOLINT(whitespace/cast) EMIT2(0x128) EMIT2(0x12a) EMIT2(0x12c) EMIT2(0x12e) EMIT2(0x130) EMIT2(0x197) EMIT2(0x1cf) EMIT2(0x208) EMIT2(0x20a) @@ -868,34 +959,62 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1eca) return; - case 'J': case 0x134: case 0x248: + case 'J': + case 0x134: + case 0x248: EMIT2('J') EMIT2(0x134) EMIT2(0x248) return; - case 'K': case 0x136: case 0x198: case 0x1e8: case 0x1e30: - case 0x1e32: case 0x1e34: case 0x2c69: case 0xa740: + case 'K': + case 0x136: + case 0x198: + case 0x1e8: + case 0x1e30: + case 0x1e32: + case 0x1e34: + case 0x2c69: + case 0xa740: EMIT2('K') EMIT2(0x136) EMIT2(0x198) EMIT2(0x1e8) EMIT2(0x1e30) EMIT2(0x1e32) EMIT2(0x1e34) EMIT2(0x2c69) EMIT2(0xa740) return; - case 'L': case 0x139: case 0x13b: case 0x13d: - case 0x13f: case 0x141: case 0x23d: case 0x1e36: - case 0x1e38: case 0x1e3a: case 0x1e3c: case 0x2c60: + case 'L': + case 0x139: + case 0x13b: + case 0x13d: + case 0x13f: + case 0x141: + case 0x23d: + case 0x1e36: + case 0x1e38: + case 0x1e3a: + case 0x1e3c: + case 0x2c60: EMIT2('L') EMIT2(0x139) EMIT2(0x13b) EMIT2(0x13d) EMIT2(0x13f) EMIT2(0x141) EMIT2(0x23d) EMIT2(0x1e36) EMIT2(0x1e38) EMIT2(0x1e3a) EMIT2(0x1e3c) EMIT2(0x2c60) return; - case 'M': case 0x1e3e: case 0x1e40: case 0x1e42: + case 'M': + case 0x1e3e: + case 0x1e40: + case 0x1e42: EMIT2('M') EMIT2(0x1e3e) EMIT2(0x1e40) EMIT2(0x1e42) return; - case 'N': case N_virguilla: - case 0x143: case 0x145: case 0x147: case 0x1f8: - case 0x1e44: case 0x1e46: case 0x1e48: case 0x1e4a: + case 'N': + case N_virguilla: + case 0x143: + case 0x145: + case 0x147: + case 0x1f8: + case 0x1e44: + case 0x1e46: + case 0x1e48: + case 0x1e4a: case 0xa7a4: EMIT2('N') EMIT2(N_virguilla) EMIT2(0x143) EMIT2(0x145) EMIT2(0x147) @@ -903,19 +1022,47 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1e48) EMIT2(0x1e4a) EMIT2(0xa7a4) return; - case 'O': case O_grave: case O_acute: case O_circumflex: - case O_virguilla: case O_diaeresis: case O_slash: - case 0x14c: case 0x14e: case 0x150: case 0x19f: - case 0x1a0: case 0x1d1: case 0x1ea: case 0x1ec: - case 0x1fe: case 0x20c: case 0x20e: case 0x22a: - case 0x22c: case 0x22e: case 0x230: case 0x1e4c: - case 0x1e4e: case 0x1e50: case 0x1e52: case 0x1ecc: - case 0x1ece: case 0x1ed0: case 0x1ed2: case 0x1ed4: - case 0x1ed6: case 0x1ed8: case 0x1eda: case 0x1edc: - case 0x1ede: case 0x1ee0: case 0x1ee2: - EMIT2('O') EMIT2(O_grave) EMIT2(O_acute) - EMIT2(O_circumflex) EMIT2(O_virguilla) - EMIT2(O_diaeresis) EMIT2(O_slash) + case 'O': + case O_grave: + case O_acute: + case O_circumflex: + case O_virguilla: + case O_diaeresis: + case O_slash: + case 0x14c: + case 0x14e: + case 0x150: + case 0x19f: + case 0x1a0: + case 0x1d1: + case 0x1ea: + case 0x1ec: + case 0x1fe: + case 0x20c: + case 0x20e: + case 0x22a: + case 0x22c: + case 0x22e: + case 0x230: + case 0x1e4c: + case 0x1e4e: + case 0x1e50: + case 0x1e52: + case 0x1ecc: + case 0x1ece: + case 0x1ed0: + case 0x1ed2: + case 0x1ed4: + case 0x1ed6: + case 0x1ed8: + case 0x1eda: + case 0x1edc: + case 0x1ede: + case 0x1ee0: + case 0x1ee2: + EMIT2('O') EMIT2(O_grave) EMIT2(O_acute) // NOLINT(whitespace/cast) + EMIT2(O_circumflex) EMIT2(O_virguilla) // NOLINT(whitespace/cast) + EMIT2(O_diaeresis) EMIT2(O_slash) // NOLINT(whitespace/cast) EMIT2(0x14c) EMIT2(0x14e) EMIT2(0x150) EMIT2(0x19f) EMIT2(0x1a0) EMIT2(0x1d1) EMIT2(0x1ea) EMIT2(0x1ec) EMIT2(0x1fe) @@ -929,51 +1076,109 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1ee2) return; - case 'P': case 0x1a4: case 0x1e54: case 0x1e56: case 0x2c63: + case 'P': + case 0x1a4: + case 0x1e54: + case 0x1e56: + case 0x2c63: EMIT2('P') EMIT2(0x1a4) EMIT2(0x1e54) EMIT2(0x1e56) EMIT2(0x2c63) return; - case 'Q': case 0x24a: + case 'Q': + case 0x24a: EMIT2('Q') EMIT2(0x24a) return; - case 'R': case 0x154: case 0x156: case 0x158: case 0x210: - case 0x212: case 0x24c: case 0x1e58: case 0x1e5a: - case 0x1e5c: case 0x1e5e: case 0x2c64: case 0xa7a6: + case 'R': + case 0x154: + case 0x156: + case 0x158: + case 0x210: + case 0x212: + case 0x24c: + case 0x1e58: + case 0x1e5a: + case 0x1e5c: + case 0x1e5e: + case 0x2c64: + case 0xa7a6: EMIT2('R') EMIT2(0x154) EMIT2(0x156) EMIT2(0x158) EMIT2(0x210) EMIT2(0x212) EMIT2(0x24c) EMIT2(0x1e58) EMIT2(0x1e5a) EMIT2(0x1e5c) EMIT2(0x1e5e) EMIT2(0x2c64) EMIT2(0xa7a6) return; - case 'S': case 0x15a: case 0x15c: case 0x15e: case 0x160: - case 0x218: case 0x1e60: case 0x1e62: case 0x1e64: - case 0x1e66: case 0x1e68: case 0x2c7e: case 0xa7a8: + case 'S': + case 0x15a: + case 0x15c: + case 0x15e: + case 0x160: + case 0x218: + case 0x1e60: + case 0x1e62: + case 0x1e64: + case 0x1e66: + case 0x1e68: + case 0x2c7e: + case 0xa7a8: EMIT2('S') EMIT2(0x15a) EMIT2(0x15c) EMIT2(0x15e) EMIT2(0x160) EMIT2(0x218) EMIT2(0x1e60) EMIT2(0x1e62) EMIT2(0x1e64) EMIT2(0x1e66) EMIT2(0x1e68) EMIT2(0x2c7e) EMIT2(0xa7a8) return; - case 'T': case 0x162: case 0x164: case 0x166: case 0x1ac: - case 0x1ae: case 0x21a: case 0x23e: case 0x1e6a: case 0x1e6c: - case 0x1e6e: case 0x1e70: + case 'T': + case 0x162: + case 0x164: + case 0x166: + case 0x1ac: + case 0x1ae: + case 0x21a: + case 0x23e: + case 0x1e6a: + case 0x1e6c: + case 0x1e6e: + case 0x1e70: EMIT2('T') EMIT2(0x162) EMIT2(0x164) EMIT2(0x166) EMIT2(0x1ac) EMIT2(0x1ae) EMIT2(0x23e) EMIT2(0x21a) EMIT2(0x1e6a) EMIT2(0x1e6c) EMIT2(0x1e6e) EMIT2(0x1e70) return; - case 'U': case U_grave: case U_acute: case U_diaeresis: - case U_circumflex: case 0x168: case 0x16a: case 0x16c: - case 0x16e: case 0x170: case 0x172: case 0x1af: - case 0x1d3: case 0x1d5: case 0x1d7: case 0x1d9: - case 0x1db: case 0x214: case 0x216: case 0x244: - case 0x1e72: case 0x1e74: case 0x1e76: case 0x1e78: - case 0x1e7a: case 0x1ee4: case 0x1ee6: case 0x1ee8: - case 0x1eea: case 0x1eec: case 0x1eee: case 0x1ef0: - EMIT2('U') EMIT2(U_grave) EMIT2(U_acute) - EMIT2(U_diaeresis) EMIT2(U_circumflex) + case 'U': + case U_grave: + case U_acute: + case U_diaeresis: + case U_circumflex: + case 0x168: + case 0x16a: + case 0x16c: + case 0x16e: + case 0x170: + case 0x172: + case 0x1af: + case 0x1d3: + case 0x1d5: + case 0x1d7: + case 0x1d9: + case 0x1db: + case 0x214: + case 0x216: + case 0x244: + case 0x1e72: + case 0x1e74: + case 0x1e76: + case 0x1e78: + case 0x1e7a: + case 0x1ee4: + case 0x1ee6: + case 0x1ee8: + case 0x1eea: + case 0x1eec: + case 0x1eee: + case 0x1ef0: + EMIT2('U') EMIT2(U_grave) EMIT2(U_acute) // NOLINT(whitespace/cast) + EMIT2(U_diaeresis) EMIT2(U_circumflex) // NOLINT(whitespace/cast) EMIT2(0x168) EMIT2(0x16a) EMIT2(0x16c) EMIT2(0x16e) EMIT2(0x170) EMIT2(0x172) EMIT2(0x1af) EMIT2(0x1d3) @@ -986,23 +1191,42 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1ef0) return; - case 'V': case 0x1b2: case 0x1e7c: case 0x1e7e: + case 'V': + case 0x1b2: + case 0x1e7c: + case 0x1e7e: EMIT2('V') EMIT2(0x1b2) EMIT2(0x1e7c) EMIT2(0x1e7e) return; - case 'W': case 0x174: case 0x1e80: case 0x1e82: case 0x1e84: - case 0x1e86: case 0x1e88: + case 'W': + case 0x174: + case 0x1e80: + case 0x1e82: + case 0x1e84: + case 0x1e86: + case 0x1e88: EMIT2('W') EMIT2(0x174) EMIT2(0x1e80) EMIT2(0x1e82) EMIT2(0x1e84) EMIT2(0x1e86) EMIT2(0x1e88) return; - case 'X': case 0x1e8a: case 0x1e8c: + case 'X': + case 0x1e8a: + case 0x1e8c: EMIT2('X') EMIT2(0x1e8a) EMIT2(0x1e8c) return; - case 'Y': case Y_acute: case 0x176: case 0x178: - case 0x1b3: case 0x232: case 0x24e: case 0x1e8e: - case 0x1ef2: case 0x1ef4: case 0x1ef6: case 0x1ef8: + case 'Y': + case Y_acute: + case 0x176: + case 0x178: + case 0x1b3: + case 0x232: + case 0x24e: + case 0x1e8e: + case 0x1ef2: + case 0x1ef4: + case 0x1ef6: + case 0x1ef8: EMIT2('Y') EMIT2(Y_acute) EMIT2(0x176) EMIT2(0x178) EMIT2(0x1b3) EMIT2(0x232) EMIT2(0x24e) EMIT2(0x1e8e) @@ -1010,26 +1234,56 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1ef8) return; - case 'Z': case 0x179: case 0x17b: case 0x17d: - case 0x1b5: case 0x1e90: case 0x1e92: case 0x1e94: + case 'Z': + case 0x179: + case 0x17b: + case 0x17d: + case 0x1b5: + case 0x1e90: + case 0x1e92: + case 0x1e94: case 0x2c6b: EMIT2('Z') EMIT2(0x179) EMIT2(0x17b) EMIT2(0x17d) EMIT2(0x1b5) EMIT2(0x1e90) EMIT2(0x1e92) EMIT2(0x1e94) EMIT2(0x2c6b) return; - case 'a': case a_grave: case a_acute: case a_circumflex: - case a_virguilla: case a_diaeresis: case a_ring: - case 0x101: case 0x103: case 0x105: case 0x1ce: - case 0x1df: case 0x1e1: case 0x1fb: case 0x201: - case 0x203: case 0x227: case 0x1d8f: case 0x1e01: - case 0x1e9a: case 0x1ea1: case 0x1ea3: case 0x1ea5: - case 0x1ea7: case 0x1ea9: case 0x1eab: case 0x1ead: - case 0x1eaf: case 0x1eb1: case 0x1eb3: case 0x1eb5: - case 0x1eb7: case 0x2c65: - EMIT2('a') EMIT2(a_grave) EMIT2(a_acute) - EMIT2(a_circumflex) EMIT2(a_virguilla) - EMIT2(a_diaeresis) EMIT2(a_ring) + case 'a': + case a_grave: + case a_acute: + case a_circumflex: + case a_virguilla: + case a_diaeresis: + case a_ring: + case 0x101: + case 0x103: + case 0x105: + case 0x1ce: + case 0x1df: + case 0x1e1: + case 0x1fb: + case 0x201: + case 0x203: + case 0x227: + case 0x1d8f: + case 0x1e01: + case 0x1e9a: + case 0x1ea1: + case 0x1ea3: + case 0x1ea5: + case 0x1ea7: + case 0x1ea9: + case 0x1eab: + case 0x1ead: + case 0x1eaf: + case 0x1eb1: + case 0x1eb3: + case 0x1eb5: + case 0x1eb7: + case 0x2c65: + EMIT2('a') EMIT2(a_grave) EMIT2(a_acute) // NOLINT(whitespace/cast) + EMIT2(a_circumflex) EMIT2(a_virguilla) // NOLINT(whitespace/cast) + EMIT2(a_diaeresis) EMIT2(a_ring) // NOLINT(whitespace/cast) EMIT2(0x101) EMIT2(0x103) EMIT2(0x105) EMIT2(0x1ce) EMIT2(0x1df) EMIT2(0x1e1) EMIT2(0x1fb) EMIT2(0x201) EMIT2(0x203) @@ -1041,14 +1295,28 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1eb7) EMIT2(0x2c65) return; - case 'b': case 0x180: case 0x253: case 0x1d6c: case 0x1d80: - case 0x1e03: case 0x1e05: case 0x1e07: + case 'b': + case 0x180: + case 0x253: + case 0x1d6c: + case 0x1d80: + case 0x1e03: + case 0x1e05: + case 0x1e07: EMIT2('b') EMIT2(0x180) EMIT2(0x253) EMIT2(0x1d6c) EMIT2(0x1d80) EMIT2(0x1e03) EMIT2(0x1e05) EMIT2(0x1e07) return; - case 'c': case c_cedilla: case 0x107: case 0x109: case 0x10b: - case 0x10d: case 0x188: case 0x23c: case 0x1e09: case 0xa793: + case 'c': + case c_cedilla: + case 0x107: + case 0x109: + case 0x10b: + case 0x10d: + case 0x188: + case 0x23c: + case 0x1e09: + case 0xa793: case 0xa794: EMIT2('c') EMIT2(c_cedilla) EMIT2(0x107) EMIT2(0x109) EMIT2(0x10b) @@ -1056,24 +1324,54 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1e09) EMIT2(0xa793) EMIT2(0xa794) return; - case 'd': case 0x10f: case 0x111: case 0x257: case 0x1d6d: - case 0x1d81: case 0x1d91: case 0x1e0b: case 0x1e0d: case 0x1e0f: - case 0x1e11: case 0x1e13: + case 'd': + case 0x10f: + case 0x111: + case 0x257: + case 0x1d6d: + case 0x1d81: + case 0x1d91: + case 0x1e0b: + case 0x1e0d: + case 0x1e0f: + case 0x1e11: + case 0x1e13: EMIT2('d') EMIT2(0x10f) EMIT2(0x111) EMIT2(0x257) EMIT2(0x1d6d) EMIT2(0x1d81) EMIT2(0x1d91) EMIT2(0x1e0b) EMIT2(0x1e0d) EMIT2(0x1e0f) EMIT2(0x1e11) EMIT2(0x1e13) return; - case 'e': case e_grave: case e_acute: case e_circumflex: - case e_diaeresis: case 0x113: case 0x115: case 0x117: - case 0x119: case 0x11b: case 0x205: case 0x207: - case 0x229: case 0x247: case 0x1d92: case 0x1e15: - case 0x1e17: case 0x1e19: case 0x1e1b: case 0x1e1d: - case 0x1eb9: case 0x1ebb: case 0x1ebd: case 0x1ebf: - case 0x1ec1: case 0x1ec3: case 0x1ec5: case 0x1ec7: - EMIT2('e') EMIT2(e_grave) EMIT2(e_acute) - EMIT2(e_circumflex) EMIT2(e_diaeresis) + case 'e': + case e_grave: + case e_acute: + case e_circumflex: + case e_diaeresis: + case 0x113: + case 0x115: + case 0x117: + case 0x119: + case 0x11b: + case 0x205: + case 0x207: + case 0x229: + case 0x247: + case 0x1d92: + case 0x1e15: + case 0x1e17: + case 0x1e19: + case 0x1e1b: + case 0x1e1d: + case 0x1eb9: + case 0x1ebb: + case 0x1ebd: + case 0x1ebf: + case 0x1ec1: + case 0x1ec3: + case 0x1ec5: + case 0x1ec7: + EMIT2('e') EMIT2(e_grave) EMIT2(e_acute) // NOLINT(whitespace/cast) + EMIT2(e_circumflex) EMIT2(e_diaeresis) // NOLINT(whitespace/cast) EMIT2(0x113) EMIT2(0x115) EMIT2(0x117) EMIT2(0x119) EMIT2(0x11b) EMIT2(0x205) EMIT2(0x207) EMIT2(0x229) @@ -1084,37 +1382,72 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1ec3) EMIT2(0x1ec5) EMIT2(0x1ec7) return; - case 'f': case 0x192: case 0x1d6e: case 0x1d82: - case 0x1e1f: case 0xa799: + case 'f': + case 0x192: + case 0x1d6e: + case 0x1d82: + case 0x1e1f: + case 0xa799: EMIT2('f') EMIT2(0x192) EMIT2(0x1d6e) EMIT2(0x1d82) EMIT2(0x1e1f) EMIT2(0xa799) return; - case 'g': case 0x11d: case 0x11f: case 0x121: case 0x123: - case 0x1e5: case 0x1e7: case 0x1f5: case 0x260: case 0x1d83: - case 0x1e21: case 0xa7a1: + case 'g': + case 0x11d: + case 0x11f: + case 0x121: + case 0x123: + case 0x1e5: + case 0x1e7: + case 0x1f5: + case 0x260: + case 0x1d83: + case 0x1e21: + case 0xa7a1: EMIT2('g') EMIT2(0x11d) EMIT2(0x11f) EMIT2(0x121) EMIT2(0x123) EMIT2(0x1e5) EMIT2(0x1e7) EMIT2(0x1f5) EMIT2(0x260) EMIT2(0x1d83) EMIT2(0x1e21) EMIT2(0xa7a1) return; - case 'h': case 0x125: case 0x127: case 0x21f: case 0x1e23: - case 0x1e25: case 0x1e27: case 0x1e29: case 0x1e2b: - case 0x1e96: case 0x2c68: case 0xa795: + case 'h': + case 0x125: + case 0x127: + case 0x21f: + case 0x1e23: + case 0x1e25: + case 0x1e27: + case 0x1e29: + case 0x1e2b: + case 0x1e96: + case 0x2c68: + case 0xa795: EMIT2('h') EMIT2(0x125) EMIT2(0x127) EMIT2(0x21f) EMIT2(0x1e23) EMIT2(0x1e25) EMIT2(0x1e27) EMIT2(0x1e29) EMIT2(0x1e2b) EMIT2(0x1e96) EMIT2(0x2c68) EMIT2(0xa795) return; - case 'i': case i_grave: case i_acute: case i_circumflex: - case i_diaeresis: case 0x129: case 0x12b: case 0x12d: - case 0x12f: case 0x1d0: case 0x209: case 0x20b: - case 0x268: case 0x1d96: case 0x1e2d: case 0x1e2f: - case 0x1ec9: case 0x1ecb: - EMIT2('i') EMIT2(i_grave) EMIT2(i_acute) - EMIT2(i_circumflex) EMIT2(i_diaeresis) + case 'i': + case i_grave: + case i_acute: + case i_circumflex: + case i_diaeresis: + case 0x129: + case 0x12b: + case 0x12d: + case 0x12f: + case 0x1d0: + case 0x209: + case 0x20b: + case 0x268: + case 0x1d96: + case 0x1e2d: + case 0x1e2f: + case 0x1ec9: + case 0x1ecb: + EMIT2('i') EMIT2(i_grave) EMIT2(i_acute) // NOLINT(whitespace/cast) + EMIT2(i_circumflex) EMIT2(i_diaeresis) // NOLINT(whitespace/cast) EMIT2(0x129) EMIT2(0x12b) EMIT2(0x12d) EMIT2(0x12f) EMIT2(0x1d0) EMIT2(0x209) EMIT2(0x20b) EMIT2(0x268) EMIT2(0x1d96) @@ -1122,34 +1455,69 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1ecb) EMIT2(0x1ecb) return; - case 'j': case 0x135: case 0x1f0: case 0x249: + case 'j': + case 0x135: + case 0x1f0: + case 0x249: EMIT2('j') EMIT2(0x135) EMIT2(0x1f0) EMIT2(0x249) return; - case 'k': case 0x137: case 0x199: case 0x1e9: case 0x1d84: - case 0x1e31: case 0x1e33: case 0x1e35: case 0x2c6a: case 0xa741: + case 'k': + case 0x137: + case 0x199: + case 0x1e9: + case 0x1d84: + case 0x1e31: + case 0x1e33: + case 0x1e35: + case 0x2c6a: + case 0xa741: EMIT2('k') EMIT2(0x137) EMIT2(0x199) EMIT2(0x1e9) EMIT2(0x1d84) EMIT2(0x1e31) EMIT2(0x1e33) EMIT2(0x1e35) EMIT2(0x2c6a) EMIT2(0xa741) return; - case 'l': case 0x13a: case 0x13c: case 0x13e: case 0x140: - case 0x142: case 0x19a: case 0x1e37: case 0x1e39: case 0x1e3b: - case 0x1e3d: case 0x2c61: + case 'l': + case 0x13a: + case 0x13c: + case 0x13e: + case 0x140: + case 0x142: + case 0x19a: + case 0x1e37: + case 0x1e39: + case 0x1e3b: + case 0x1e3d: + case 0x2c61: EMIT2('l') EMIT2(0x13a) EMIT2(0x13c) EMIT2(0x13e) EMIT2(0x140) EMIT2(0x142) EMIT2(0x19a) EMIT2(0x1e37) EMIT2(0x1e39) EMIT2(0x1e3b) EMIT2(0x1e3d) EMIT2(0x2c61) return; - case 'm': case 0x1d6f: case 0x1e3f: case 0x1e41: case 0x1e43: + case 'm': + case 0x1d6f: + case 0x1e3f: + case 0x1e41: + case 0x1e43: EMIT2('m') EMIT2(0x1d6f) EMIT2(0x1e3f) EMIT2(0x1e41) EMIT2(0x1e43) return; - case 'n': case n_virguilla: case 0x144: case 0x146: case 0x148: - case 0x149: case 0x1f9: case 0x1d70: case 0x1d87: case 0x1e45: - case 0x1e47: case 0x1e49: case 0x1e4b: case 0xa7a5: + case 'n': + case n_virguilla: + case 0x144: + case 0x146: + case 0x148: + case 0x149: + case 0x1f9: + case 0x1d70: + case 0x1d87: + case 0x1e45: + case 0x1e47: + case 0x1e49: + case 0x1e4b: + case 0xa7a5: EMIT2('n') EMIT2(n_virguilla) EMIT2(0x144) EMIT2(0x146) EMIT2(0x148) EMIT2(0x149) EMIT2(0x1f9) EMIT2(0x1d70) @@ -1157,19 +1525,47 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1e49) EMIT2(0x1e4b) EMIT2(0xa7a5) return; - case 'o': case o_grave: case o_acute: case o_circumflex: - case o_virguilla: case o_diaeresis: case o_slash: - case 0x14d: case 0x14f: case 0x151: case 0x1a1: - case 0x1d2: case 0x1eb: case 0x1ed: case 0x1ff: - case 0x20d: case 0x20f: case 0x22b: case 0x22d: - case 0x22f: case 0x231: case 0x275: case 0x1e4d: - case 0x1e4f: case 0x1e51: case 0x1e53: case 0x1ecd: - case 0x1ecf: case 0x1ed1: case 0x1ed3: case 0x1ed5: - case 0x1ed7: case 0x1ed9: case 0x1edb: case 0x1edd: - case 0x1edf: case 0x1ee1: case 0x1ee3: - EMIT2('o') EMIT2(o_grave) EMIT2(o_acute) - EMIT2(o_circumflex) EMIT2(o_virguilla) - EMIT2(o_diaeresis) EMIT2(o_slash) + case 'o': + case o_grave: + case o_acute: + case o_circumflex: + case o_virguilla: + case o_diaeresis: + case o_slash: + case 0x14d: + case 0x14f: + case 0x151: + case 0x1a1: + case 0x1d2: + case 0x1eb: + case 0x1ed: + case 0x1ff: + case 0x20d: + case 0x20f: + case 0x22b: + case 0x22d: + case 0x22f: + case 0x231: + case 0x275: + case 0x1e4d: + case 0x1e4f: + case 0x1e51: + case 0x1e53: + case 0x1ecd: + case 0x1ecf: + case 0x1ed1: + case 0x1ed3: + case 0x1ed5: + case 0x1ed7: + case 0x1ed9: + case 0x1edb: + case 0x1edd: + case 0x1edf: + case 0x1ee1: + case 0x1ee3: + EMIT2('o') EMIT2(o_grave) EMIT2(o_acute) // NOLINT(whitespace/cast) + EMIT2(o_circumflex) EMIT2(o_virguilla) // NOLINT(whitespace/cast) + EMIT2(o_diaeresis) EMIT2(o_slash) // NOLINT(whitespace/cast) EMIT2(0x14d) EMIT2(0x14f) EMIT2(0x151) EMIT2(0x1a1) EMIT2(0x1d2) EMIT2(0x1eb) EMIT2(0x1ed) EMIT2(0x1ff) EMIT2(0x20d) @@ -1183,19 +1579,38 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1ee3) return; - case 'p': case 0x1a5: case 0x1d71: case 0x1d7d: case 0x1d88: - case 0x1e55: case 0x1e57: + case 'p': + case 0x1a5: + case 0x1d71: + case 0x1d7d: + case 0x1d88: + case 0x1e55: + case 0x1e57: EMIT2('p') EMIT2(0x1a5) EMIT2(0x1d71) EMIT2(0x1d7d) EMIT2(0x1d88) EMIT2(0x1e55) EMIT2(0x1e57) return; - case 'q': case 0x24b: case 0x2a0: + case 'q': + case 0x24b: + case 0x2a0: EMIT2('q') EMIT2(0x24b) EMIT2(0x2a0) return; - case 'r': case 0x155: case 0x157: case 0x159: case 0x211: - case 0x213: case 0x24d: case 0x27d: case 0x1d72: case 0x1d73: - case 0x1d89: case 0x1e59: case 0x1e5b: case 0x1e5d: case 0x1e5f: + case 'r': + case 0x155: + case 0x157: + case 0x159: + case 0x211: + case 0x213: + case 0x24d: + case 0x27d: + case 0x1d72: + case 0x1d73: + case 0x1d89: + case 0x1e59: + case 0x1e5b: + case 0x1e5d: + case 0x1e5f: case 0xa7a7: EMIT2('r') EMIT2(0x155) EMIT2(0x157) EMIT2(0x159) EMIT2(0x211) EMIT2(0x213) EMIT2(0x24d) EMIT2(0x27d) @@ -1203,34 +1618,84 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1e5b) EMIT2(0x1e5d) EMIT2(0x1e5f) EMIT2(0xa7a7) return; - case 's': case 0x15b: case 0x15d: case 0x15f: case 0x161: - case 0x219: case 0x23f: case 0x1d74: case 0x1d8a: case 0x1e61: - case 0x1e63: case 0x1e65: case 0x1e67: case 0x1e69: case 0xa7a9: + case 's': + case 0x15b: + case 0x15d: + case 0x15f: + case 0x161: + case 0x219: + case 0x23f: + case 0x1d74: + case 0x1d8a: + case 0x1e61: + case 0x1e63: + case 0x1e65: + case 0x1e67: + case 0x1e69: + case 0xa7a9: EMIT2('s') EMIT2(0x15b) EMIT2(0x15d) EMIT2(0x15f) EMIT2(0x161) EMIT2(0x219) EMIT2(0x23f) EMIT2(0x1d74) EMIT2(0x1d8a) EMIT2(0x1e61) EMIT2(0x1e63) EMIT2(0x1e65) EMIT2(0x1e67) EMIT2(0x1e69) EMIT2(0xa7a9) return; - case 't': case 0x163: case 0x165: case 0x167: case 0x1ab: - case 0x1ad: case 0x21b: case 0x288: case 0x1d75: case 0x1e6b: - case 0x1e6d: case 0x1e6f: case 0x1e71: case 0x1e97: case 0x2c66: + case 't': + case 0x163: + case 0x165: + case 0x167: + case 0x1ab: + case 0x1ad: + case 0x21b: + case 0x288: + case 0x1d75: + case 0x1e6b: + case 0x1e6d: + case 0x1e6f: + case 0x1e71: + case 0x1e97: + case 0x2c66: EMIT2('t') EMIT2(0x163) EMIT2(0x165) EMIT2(0x167) EMIT2(0x1ab) EMIT2(0x1ad) EMIT2(0x21b) EMIT2(0x288) EMIT2(0x1d75) EMIT2(0x1e6b) EMIT2(0x1e6d) EMIT2(0x1e6f) EMIT2(0x1e71) EMIT2(0x1e97) EMIT2(0x2c66) return; - case 'u': case u_grave: case u_acute: case u_circumflex: - case u_diaeresis: case 0x169: case 0x16b: case 0x16d: - case 0x16f: case 0x171: case 0x173: case 0x1b0: case 0x1d4: - case 0x1d6: case 0x1d8: case 0x1da: case 0x1dc: case 0x215: - case 0x217: case 0x289: case 0x1d7e: case 0x1d99: case 0x1e73: - case 0x1e75: case 0x1e77: case 0x1e79: case 0x1e7b: - case 0x1ee5: case 0x1ee7: case 0x1ee9: case 0x1eeb: - case 0x1eed: case 0x1eef: case 0x1ef1: - EMIT2('u') EMIT2(u_grave) EMIT2(u_acute) - EMIT2(u_circumflex) EMIT2(u_diaeresis) + case 'u': + case u_grave: + case u_acute: + case u_circumflex: + case u_diaeresis: + case 0x169: + case 0x16b: + case 0x16d: + case 0x16f: + case 0x171: + case 0x173: + case 0x1b0: + case 0x1d4: + case 0x1d6: + case 0x1d8: + case 0x1da: + case 0x1dc: + case 0x215: + case 0x217: + case 0x289: + case 0x1d7e: + case 0x1d99: + case 0x1e73: + case 0x1e75: + case 0x1e77: + case 0x1e79: + case 0x1e7b: + case 0x1ee5: + case 0x1ee7: + case 0x1ee9: + case 0x1eeb: + case 0x1eed: + case 0x1eef: + case 0x1ef1: + EMIT2('u') EMIT2(u_grave) EMIT2(u_acute) // NOLINT(whitespace/cast) + EMIT2(u_circumflex) EMIT2(u_diaeresis) // NOLINT(whitespace/cast) EMIT2(0x169) EMIT2(0x16b) EMIT2(0x16d) EMIT2(0x16f) EMIT2(0x171) EMIT2(0x173) EMIT2(0x1d6) EMIT2(0x1d8) @@ -1243,24 +1708,45 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1eed) EMIT2(0x1eef) EMIT2(0x1ef1) return; - case 'v': case 0x28b: case 0x1d8c: case 0x1e7d: case 0x1e7f: + case 'v': + case 0x28b: + case 0x1d8c: + case 0x1e7d: + case 0x1e7f: EMIT2('v') EMIT2(0x28b) EMIT2(0x1d8c) EMIT2(0x1e7d) EMIT2(0x1e7f) return; - case 'w': case 0x175: case 0x1e81: case 0x1e83: case 0x1e85: - case 0x1e87: case 0x1e89: case 0x1e98: + case 'w': + case 0x175: + case 0x1e81: + case 0x1e83: + case 0x1e85: + case 0x1e87: + case 0x1e89: + case 0x1e98: EMIT2('w') EMIT2(0x175) EMIT2(0x1e81) EMIT2(0x1e83) EMIT2(0x1e85) EMIT2(0x1e87) EMIT2(0x1e89) EMIT2(0x1e98) return; - case 'x': case 0x1e8b: case 0x1e8d: + case 'x': + case 0x1e8b: + case 0x1e8d: EMIT2('x') EMIT2(0x1e8b) EMIT2(0x1e8d) return; - case 'y': case y_acute: case y_diaeresis: case 0x177: - case 0x1b4: case 0x233: case 0x24f: case 0x1e8f: - case 0x1e99: case 0x1ef3: case 0x1ef5: case 0x1ef7: + case 'y': + case y_acute: + case y_diaeresis: + case 0x177: + case 0x1b4: + case 0x233: + case 0x24f: + case 0x1e8f: + case 0x1e99: + case 0x1ef3: + case 0x1ef5: + case 0x1ef7: case 0x1ef9: EMIT2('y') EMIT2(y_acute) EMIT2(y_diaeresis) EMIT2(0x177) EMIT2(0x1b4) EMIT2(0x233) EMIT2(0x24f) @@ -1268,9 +1754,17 @@ static void nfa_emit_equi_class(int c) EMIT2(0x1ef5) EMIT2(0x1ef7) EMIT2(0x1ef9) return; - case 'z': case 0x17a: case 0x17c: case 0x17e: case 0x1b6: - case 0x1d76: case 0x1d8e: case 0x1e91: case 0x1e93: - case 0x1e95: case 0x2c6c: + case 'z': + case 0x17a: + case 0x17c: + case 0x17e: + case 0x1b6: + case 0x1d76: + case 0x1d8e: + case 0x1e91: + case 0x1e93: + case 0x1e95: + case 0x2c6c: EMIT2('z') EMIT2(0x17a) EMIT2(0x17c) EMIT2(0x17e) EMIT2(0x1b6) EMIT2(0x1d76) EMIT2(0x1d8e) EMIT2(0x1e91) EMIT2(0x1e93) EMIT2(0x1e95) EMIT2(0x2c6c) @@ -1311,9 +1805,9 @@ static int nfa_regatom(void) int equiclass; int collclass; int got_coll_char; - char_u *p; - char_u *endp; - char_u *old_regparse = regparse; + char_u *p; + char_u *endp; + char_u *old_regparse = regparse; int extra = 0; int emit_range; int negated; @@ -1344,14 +1838,15 @@ static int nfa_regatom(void) case Magic('_'): c = no_Magic(getchr()); - if (c == NUL) + if (c == NUL) { EMSG_RET_FAIL(_(e_nul_found)); + } - if (c == '^') { /* "\_^" is start-of-line */ + if (c == '^') { // "\_^" is start-of-line EMIT(NFA_BOL); break; } - if (c == '$') { /* "\_$" is end-of-line */ + if (c == '$') { // "\_$" is end-of-line EMIT(NFA_EOL); had_eol = true; break; @@ -1359,12 +1854,13 @@ static int nfa_regatom(void) extra = NFA_ADD_NL; - /* "\_[" is collection plus newline */ - if (c == '[') + // "\_[" is collection plus newline + if (c == '[') { goto collection; + } - // "\_x" is character class plus newline - FALLTHROUGH; + // "\_x" is character class plus newline + FALLTHROUGH; /* * Character classes. @@ -1454,9 +1950,8 @@ static int nfa_regatom(void) semsg(_(e_misplaced), (int64_t)no_Magic(c)); return FAIL; - case Magic('~'): - { - char_u *lp; + case Magic('~'): { + char_u *lp; // Previous substitute pattern. // Generated as "\%(pattern\)". @@ -1482,17 +1977,16 @@ static int nfa_regatom(void) case Magic('6'): case Magic('7'): case Magic('8'): - case Magic('9'): - { - int refnum = no_Magic(c) - '1'; + case Magic('9'): { + int refnum = no_Magic(c) - '1'; - if (!seen_endbrace(refnum + 1)) { - return FAIL; - } - EMIT(NFA_BACKREF1 + refnum); - rex.nfa_has_backref = true; + if (!seen_endbrace(refnum + 1)) { + return FAIL; } - break; + EMIT(NFA_BACKREF1 + refnum); + rex.nfa_has_backref = true; + } + break; case Magic('z'): c = no_Magic(getchr()); @@ -1548,28 +2042,35 @@ static int nfa_regatom(void) case Magic('%'): c = no_Magic(getchr()); switch (c) { - /* () without a back reference */ + // () without a back reference case '(': - if (nfa_reg(REG_NPAREN) == FAIL) + if (nfa_reg(REG_NPAREN) == FAIL) { return FAIL; + } EMIT(NFA_NOPEN); break; - case 'd': /* %d123 decimal */ - case 'o': /* %o123 octal */ - case 'x': /* %xab hex 2 */ - case 'u': /* %uabcd hex 4 */ - case 'U': /* %U1234abcd hex 8 */ + case 'd': // %d123 decimal + case 'o': // %o123 octal + case 'x': // %xab hex 2 + case 'u': // %uabcd hex 4 + case 'U': // %U1234abcd hex 8 { int64_t nr; switch (c) { - case 'd': nr = getdecchrs(); break; - case 'o': nr = getoctchrs(); break; - case 'x': nr = gethexchrs(2); break; - case 'u': nr = gethexchrs(4); break; - case 'U': nr = gethexchrs(8); break; - default: nr = -1; break; + case 'd': + nr = getdecchrs(); break; + case 'o': + nr = getoctchrs(); break; + case 'x': + nr = gethexchrs(2); break; + case 'u': + nr = gethexchrs(4); break; + case 'U': + nr = gethexchrs(8); break; + default: + nr = -1; break; } if (nr < 0 || nr > INT_MAX) { @@ -1604,18 +2105,19 @@ static int nfa_regatom(void) EMIT(NFA_ANY_COMPOSING); break; - case '[': - { + case '[': { int n; - /* \%[abc] */ - for (n = 0; (c = peekchr()) != ']'; ++n) { - if (c == NUL) + // \%[abc] + for (n = 0; (c = peekchr()) != ']'; n++) { + if (c == NUL) { EMSG2_RET_FAIL(_(e_missing_sb), - reg_magic == MAGIC_ALL); - /* recursive call! */ - if (nfa_regatom() == FAIL) + reg_magic == MAGIC_ALL); + } + // recursive call! + if (nfa_regatom() == FAIL) { return FAIL; + } } (void)getchr(); // get the ] if (n == 0) { @@ -1635,8 +2137,7 @@ static int nfa_regatom(void) break; } - default: - { + default: { int64_t n = 0; const int cmp = c; bool cur = false; @@ -1700,9 +2201,9 @@ static int nfa_regatom(void) EMIT((int)n); break; } else if (c == '\'' && n == 0) { - /* \%'m \%<'m \%>'m */ + // \%'m \%<'m \%>'m EMIT(cmp == '<' ? NFA_MARK_LT : - cmp == '>' ? NFA_MARK_GT : NFA_MARK); + cmp == '>' ? NFA_MARK_GT : NFA_MARK); EMIT(getchr()); break; } @@ -1740,8 +2241,9 @@ collection: EMIT(result - NFA_ADD_NL); EMIT(NFA_NEWL); EMIT(NFA_OR); - } else + } else { EMIT(result); + } regparse = endp; MB_PTR_ADV(regparse); return OK; @@ -1756,8 +2258,9 @@ collection: negated = true; MB_PTR_ADV(regparse); EMIT(NFA_START_NEG_COLL); - } else + } else { EMIT(NFA_START_COLL); + } if (*regparse == '-') { startc = '-'; EMIT(startc); @@ -1771,16 +2274,17 @@ collection: startc = -1; got_coll_char = false; if (*regparse == '[') { - /* Check for [: :], [= =], [. .] */ + // Check for [: :], [= =], [. .] equiclass = collclass = 0; charclass = get_char_class(®parse); if (charclass == CLASS_NONE) { equiclass = get_equi_class(®parse); - if (equiclass == 0) + if (equiclass == 0) { collclass = get_coll_element(®parse); + } } - /* Character class like [:alpha:] */ + // Character class like [:alpha:] if (charclass != CLASS_NONE) { switch (charclass) { case CLASS_ALNUM: @@ -1846,12 +2350,12 @@ collection: EMIT(NFA_CONCAT); continue; } - /* Try equivalence class [=a=] and the like */ + // Try equivalence class [=a=] and the like if (equiclass != 0) { nfa_emit_equi_class(equiclass); continue; } - /* Try collating class like [. .] */ + // Try collating class like [. .] if (collclass != 0) { startc = collclass; // allow [.a.]-x as a range // Will emit the proper atom at the end of the @@ -1876,26 +2380,23 @@ collection: && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, regparse[1]) - != NULL) - ) - ) { + != NULL))) { MB_PTR_ADV(regparse); if (*regparse == 'n') { startc = (reg_string || emit_range || regparse[1] == '-') ? NL : NFA_NEWL; - } else if (*regparse == 'd' - || *regparse == 'o' - || *regparse == 'x' - || *regparse == 'u' - || *regparse == 'U' - ) { + } else if (*regparse == 'd' + || *regparse == 'o' + || *regparse == 'x' + || *regparse == 'u' + || *regparse == 'U') { // TODO(RE): This needs more testing startc = coll_get_char(); got_coll_char = true; MB_PTR_BACK(old_regparse, regparse); } else { - /* \r,\t,\e,\b */ + // \r,\t,\e,\b startc = backslash_trans(*regparse); } } @@ -1905,7 +2406,7 @@ collection: startc = utf_ptr2char((char *)regparse); } - /* Previous char was '-', so this char is end of range. */ + // Previous char was '-', so this char is end of range. if (emit_range) { int endc = startc; startc = oldstartc; @@ -1977,7 +2478,7 @@ collection: EMIT(NFA_CONCAT); } - /* skip the trailing ] */ + // skip the trailing ] regparse = endp; MB_PTR_ADV(regparse); @@ -1995,14 +2496,14 @@ collection: } return OK; - } /* if exists closing ] */ + } // if exists closing ] - if (reg_strict) + if (reg_strict) { EMSG_RET_FAIL(_(e_missingbracket)); + } FALLTHROUGH; - default: - { + default: { int plen; nfa_do_multibyte: @@ -2019,12 +2520,14 @@ nfa_do_multibyte: // building the postfix form, not the NFA itself; // a composing char could be: a, b, c, NFA_COMPOSING // where 'b' and 'c' are chars with codes > 256. */ - for (;; ) { + for (;;) { EMIT(c); - if (i > 0) + if (i > 0) { EMIT(NFA_CONCAT); - if ((i += utf_char2len(c)) >= plen) + } + if ((i += utf_char2len(c)) >= plen) { break; + } c = utf_ptr2char((char *)old_regparse + i); } EMIT(NFA_COMPOSING); @@ -2047,8 +2550,8 @@ nfa_do_multibyte: * times the atom can be matched. Example: "a*" matches any sequence of "a" * characters: "", "a", "aa", etc. * - * piece ::= atom - * or atom multi + * piece ::= atom + * or atom multi */ static int nfa_regpiece(void) { @@ -2068,16 +2571,17 @@ static int nfa_regpiece(void) // next. save_parse_state(&old_state); - /* store current pos in the postfix form, for \{m,n} involving 0s */ + // store current pos in the postfix form, for \{m,n} involving 0s my_post_start = (int)(post_ptr - post_start); ret = nfa_regatom(); - if (ret == FAIL) - return FAIL; /* cascaded error */ - + if (ret == FAIL) { + return FAIL; // cascaded error + } op = peekchr(); - if (re_multi_type(op) == NOT_MULTI) + if (re_multi_type(op) == NOT_MULTI) { return OK; + } skipchr(); switch (op) { @@ -2099,37 +2603,39 @@ static int nfa_regpiece(void) */ restore_parse_state(&old_state); curchr = -1; - if (nfa_regatom() == FAIL) + if (nfa_regatom() == FAIL) { return FAIL; + } EMIT(NFA_STAR); EMIT(NFA_CONCAT); - skipchr(); /* skip the \+ */ + skipchr(); // skip the \+ break; case Magic('@'): c2 = getdecchrs(); op = no_Magic(getchr()); i = 0; - switch(op) { + switch (op) { case '=': - /* \@= */ + // \@= i = NFA_PREV_ATOM_NO_WIDTH; break; case '!': - /* \@! */ + // \@! i = NFA_PREV_ATOM_NO_WIDTH_NEG; break; case '<': op = no_Magic(getchr()); - if (op == '=') - /* \@<= */ + if (op == '=') { + // \@<= i = NFA_PREV_ATOM_JUST_BEFORE; - else if (op == '!') - /* \@<! */ + } else if (op == '!') { + // \@<! i = NFA_PREV_ATOM_JUST_BEFORE_NEG; + } break; case '>': - /* \@> */ + // \@> i = NFA_PREV_ATOM_LIKE_PATTERN; break; } @@ -2139,8 +2645,9 @@ static int nfa_regpiece(void) } EMIT(i); if (i == NFA_PREV_ATOM_JUST_BEFORE - || i == NFA_PREV_ATOM_JUST_BEFORE_NEG) + || i == NFA_PREV_ATOM_JUST_BEFORE_NEG) { EMIT(c2); + } break; case Magic('?'): @@ -2161,26 +2668,28 @@ static int nfa_regpiece(void) skipchr(); greedy = false; } - if (!read_limits(&minval, &maxval)) + if (!read_limits(&minval, &maxval)) { EMSG_RET_FAIL(_("E870: (NFA regexp) Error reading repetition limits")); + } // <atom>{0,inf}, <atom>{0,} and <atom>{} are equivalent to // <atom>* if (minval == 0 && maxval == MAX_LIMIT) { - if (greedy) - /* \{}, \{0,} */ + if (greedy) { + // \{}, \{0,} EMIT(NFA_STAR); - else - /* \{-}, \{-0,} */ + } else { + // \{-}, \{-0,} EMIT(NFA_STAR_NONGREEDY); + } break; } - /* Special case: x{0} or x{-0} */ + // Special case: x{0} or x{-0} if (maxval == 0) { - /* Ignore result of previous call to nfa_regatom() */ + // Ignore result of previous call to nfa_regatom() post_ptr = post_start + my_post_start; - /* NFA_EMPTY is 0-length and works everywhere */ + // NFA_EMPTY is 0-length and works everywhere EMIT(NFA_EMPTY); return OK; } @@ -2199,35 +2708,40 @@ static int nfa_regpiece(void) return FAIL; } - /* Ignore previous call to nfa_regatom() */ + // Ignore previous call to nfa_regatom() post_ptr = post_start + my_post_start; - /* Save parse state after the repeated atom and the \{} */ + // Save parse state after the repeated atom and the \{} save_parse_state(&new_state); quest = (greedy == true ? NFA_QUEST : NFA_QUEST_NONGREEDY); for (i = 0; i < maxval; i++) { - /* Goto beginning of the repeated atom */ + // Goto beginning of the repeated atom restore_parse_state(&old_state); old_post_pos = (int)(post_ptr - post_start); - if (nfa_regatom() == FAIL) + if (nfa_regatom() == FAIL) { return FAIL; - /* after "minval" times, atoms are optional */ + } + // after "minval" times, atoms are optional if (i + 1 > minval) { if (maxval == MAX_LIMIT) { - if (greedy) + if (greedy) { EMIT(NFA_STAR); - else + } else { EMIT(NFA_STAR_NONGREEDY); - } else + } + } else { EMIT(quest); + } } - if (old_post_pos != my_post_start) + if (old_post_pos != my_post_start) { EMIT(NFA_CONCAT); - if (i + 1 > minval && maxval == MAX_LIMIT) + } + if (i + 1 > minval && maxval == MAX_LIMIT) { break; + } } - /* Go to just after the repeated atom and the \{} */ + // Go to just after the repeated atom and the \{} restore_parse_state(&new_state); curchr = -1; @@ -2236,7 +2750,7 @@ static int nfa_regpiece(void) default: break; - } /* end switch */ + } // end switch if (re_multi_type(peekchr()) != NOT_MULTI) { // Can't have a multi follow a multi. @@ -2251,10 +2765,10 @@ static int nfa_regpiece(void) * first piece, followed by a match for the second piece, etc. Example: * "f[0-9]b", first matches "f", then a digit and then "b". * - * concat ::= piece - * or piece piece - * or piece piece piece - * etc. + * concat ::= piece + * or piece piece + * or piece piece piece + * etc. */ static int nfa_regconcat(void) { @@ -2326,10 +2840,10 @@ static int nfa_regconcat(void) * "foobeep\&..." matches "foo" in "foobeep". * ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob" * - * branch ::= concat - * or concat \& concat - * or concat \& concat \& concat - * etc. + * branch ::= concat + * or concat \& concat + * or concat \& concat \& concat + * etc. */ static int nfa_regbranch(void) { @@ -2337,9 +2851,10 @@ static int nfa_regbranch(void) old_post_pos = (int)(post_ptr - post_start); - /* First branch, possibly the only one */ - if (nfa_regconcat() == FAIL) + // First branch, possibly the only one + if (nfa_regconcat() == FAIL) { return FAIL; + } // Try next concats while (peekchr() == Magic('&')) { @@ -2351,71 +2866,76 @@ static int nfa_regbranch(void) EMIT(NFA_NOPEN); EMIT(NFA_PREV_ATOM_NO_WIDTH); old_post_pos = (int)(post_ptr - post_start); - if (nfa_regconcat() == FAIL) + if (nfa_regconcat() == FAIL) { return FAIL; - /* if concat is empty do emit a node */ - if (old_post_pos == (int)(post_ptr - post_start)) + } + // if concat is empty do emit a node + if (old_post_pos == (int)(post_ptr - post_start)) { EMIT(NFA_EMPTY); + } EMIT(NFA_CONCAT); } - /* if a branch is empty, emit one node for it */ - if (old_post_pos == (int)(post_ptr - post_start)) + // if a branch is empty, emit one node for it + if (old_post_pos == (int)(post_ptr - post_start)) { EMIT(NFA_EMPTY); + } return OK; } -/* - * Parse a pattern, one or more branches, separated by "\|". It matches - * anything that matches one of the branches. Example: "foo\|beep" matches - * "foo" and matches "beep". If more than one branch matches, the first one - * is used. - * - * pattern ::= branch - * or branch \| branch - * or branch \| branch \| branch - * etc. - */ -static int -nfa_reg ( - int paren /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */ -) +/// Parse a pattern, one or more branches, separated by "\|". It matches +/// anything that matches one of the branches. Example: "foo\|beep" matches +/// "foo" and matches "beep". If more than one branch matches, the first one +/// is used. +/// +/// pattern ::= branch +/// or branch \| branch +/// or branch \| branch \| branch +/// etc. +/// +/// @param paren REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN +static int nfa_reg(int paren) { int parno = 0; if (paren == REG_PAREN) { - if (regnpar >= NSUBEXP) /* Too many `(' */ + if (regnpar >= NSUBEXP) { // Too many `(' EMSG_RET_FAIL(_("E872: (NFA regexp) Too many '('")); + } parno = regnpar++; } else if (paren == REG_ZPAREN) { - /* Make a ZOPEN node. */ - if (regnzpar >= NSUBEXP) + // Make a ZOPEN node. + if (regnzpar >= NSUBEXP) { EMSG_RET_FAIL(_("E879: (NFA regexp) Too many \\z(")); + } parno = regnzpar++; } - if (nfa_regbranch() == FAIL) - return FAIL; /* cascaded error */ - + if (nfa_regbranch() == FAIL) { + return FAIL; // cascaded error + } while (peekchr() == Magic('|')) { skipchr(); - if (nfa_regbranch() == FAIL) - return FAIL; /* cascaded error */ + if (nfa_regbranch() == FAIL) { + return FAIL; // cascaded error + } EMIT(NFA_OR); } - /* Check for proper termination. */ + // Check for proper termination. if (paren != REG_NOPAREN && getchr() != Magic(')')) { - if (paren == REG_NPAREN) + if (paren == REG_NPAREN) { EMSG2_RET_FAIL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); - else + } else { EMSG2_RET_FAIL(_(e_unmatchedp), reg_magic == MAGIC_ALL); + } } else if (paren == REG_NOPAREN && peekchr() != NUL) { - if (peekchr() == Magic(')')) + if (peekchr() == Magic(')')) { EMSG2_RET_FAIL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); - else + } else { EMSG_RET_FAIL(_("E873: (NFA regexp) proper termination error")); + } } // Here we set the flag allowing back references to this set of // parentheses. @@ -2443,32 +2963,57 @@ static void nfa_set_code(int c) STRCPY(code, ""); switch (c) { - case NFA_MATCH: STRCPY(code, "NFA_MATCH "); break; - case NFA_SPLIT: STRCPY(code, "NFA_SPLIT "); break; - case NFA_CONCAT: STRCPY(code, "NFA_CONCAT "); break; - case NFA_NEWL: STRCPY(code, "NFA_NEWL "); break; - case NFA_ZSTART: STRCPY(code, "NFA_ZSTART"); break; - case NFA_ZEND: STRCPY(code, "NFA_ZEND"); break; - - case NFA_BACKREF1: STRCPY(code, "NFA_BACKREF1"); break; - case NFA_BACKREF2: STRCPY(code, "NFA_BACKREF2"); break; - case NFA_BACKREF3: STRCPY(code, "NFA_BACKREF3"); break; - case NFA_BACKREF4: STRCPY(code, "NFA_BACKREF4"); break; - case NFA_BACKREF5: STRCPY(code, "NFA_BACKREF5"); break; - case NFA_BACKREF6: STRCPY(code, "NFA_BACKREF6"); break; - case NFA_BACKREF7: STRCPY(code, "NFA_BACKREF7"); break; - case NFA_BACKREF8: STRCPY(code, "NFA_BACKREF8"); break; - case NFA_BACKREF9: STRCPY(code, "NFA_BACKREF9"); break; - case NFA_ZREF1: STRCPY(code, "NFA_ZREF1"); break; - case NFA_ZREF2: STRCPY(code, "NFA_ZREF2"); break; - case NFA_ZREF3: STRCPY(code, "NFA_ZREF3"); break; - case NFA_ZREF4: STRCPY(code, "NFA_ZREF4"); break; - case NFA_ZREF5: STRCPY(code, "NFA_ZREF5"); break; - case NFA_ZREF6: STRCPY(code, "NFA_ZREF6"); break; - case NFA_ZREF7: STRCPY(code, "NFA_ZREF7"); break; - case NFA_ZREF8: STRCPY(code, "NFA_ZREF8"); break; - case NFA_ZREF9: STRCPY(code, "NFA_ZREF9"); break; - case NFA_SKIP: STRCPY(code, "NFA_SKIP"); break; + case NFA_MATCH: + STRCPY(code, "NFA_MATCH "); break; + case NFA_SPLIT: + STRCPY(code, "NFA_SPLIT "); break; + case NFA_CONCAT: + STRCPY(code, "NFA_CONCAT "); break; + case NFA_NEWL: + STRCPY(code, "NFA_NEWL "); break; + case NFA_ZSTART: + STRCPY(code, "NFA_ZSTART"); break; + case NFA_ZEND: + STRCPY(code, "NFA_ZEND"); break; + + case NFA_BACKREF1: + STRCPY(code, "NFA_BACKREF1"); break; + case NFA_BACKREF2: + STRCPY(code, "NFA_BACKREF2"); break; + case NFA_BACKREF3: + STRCPY(code, "NFA_BACKREF3"); break; + case NFA_BACKREF4: + STRCPY(code, "NFA_BACKREF4"); break; + case NFA_BACKREF5: + STRCPY(code, "NFA_BACKREF5"); break; + case NFA_BACKREF6: + STRCPY(code, "NFA_BACKREF6"); break; + case NFA_BACKREF7: + STRCPY(code, "NFA_BACKREF7"); break; + case NFA_BACKREF8: + STRCPY(code, "NFA_BACKREF8"); break; + case NFA_BACKREF9: + STRCPY(code, "NFA_BACKREF9"); break; + case NFA_ZREF1: + STRCPY(code, "NFA_ZREF1"); break; + case NFA_ZREF2: + STRCPY(code, "NFA_ZREF2"); break; + case NFA_ZREF3: + STRCPY(code, "NFA_ZREF3"); break; + case NFA_ZREF4: + STRCPY(code, "NFA_ZREF4"); break; + case NFA_ZREF5: + STRCPY(code, "NFA_ZREF5"); break; + case NFA_ZREF6: + STRCPY(code, "NFA_ZREF6"); break; + case NFA_ZREF7: + STRCPY(code, "NFA_ZREF7"); break; + case NFA_ZREF8: + STRCPY(code, "NFA_ZREF8"); break; + case NFA_ZREF9: + STRCPY(code, "NFA_ZREF9"); break; + case NFA_SKIP: + STRCPY(code, "NFA_SKIP"); break; case NFA_PREV_ATOM_NO_WIDTH: STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break; @@ -2481,9 +3026,12 @@ static void nfa_set_code(int c) case NFA_PREV_ATOM_LIKE_PATTERN: STRCPY(code, "NFA_PREV_ATOM_LIKE_PATTERN"); break; - case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; - case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; - case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; + case NFA_NOPEN: + STRCPY(code, "NFA_NOPEN"); break; + case NFA_NCLOSE: + STRCPY(code, "NFA_NCLOSE"); break; + case NFA_START_INVISIBLE: + STRCPY(code, "NFA_START_INVISIBLE"); break; case NFA_START_INVISIBLE_FIRST: STRCPY(code, "NFA_START_INVISIBLE_FIRST"); break; case NFA_START_INVISIBLE_NEG: @@ -2498,14 +3046,21 @@ static void nfa_set_code(int c) STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG"); break; case NFA_START_INVISIBLE_BEFORE_NEG_FIRST: STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG_FIRST"); break; - case NFA_START_PATTERN: STRCPY(code, "NFA_START_PATTERN"); break; - case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; - case NFA_END_INVISIBLE_NEG: STRCPY(code, "NFA_END_INVISIBLE_NEG"); break; - case NFA_END_PATTERN: STRCPY(code, "NFA_END_PATTERN"); break; + case NFA_START_PATTERN: + STRCPY(code, "NFA_START_PATTERN"); break; + case NFA_END_INVISIBLE: + STRCPY(code, "NFA_END_INVISIBLE"); break; + case NFA_END_INVISIBLE_NEG: + STRCPY(code, "NFA_END_INVISIBLE_NEG"); break; + case NFA_END_PATTERN: + STRCPY(code, "NFA_END_PATTERN"); break; - case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; - case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; - case NFA_OPT_CHARS: STRCPY(code, "NFA_OPT_CHARS"); break; + case NFA_COMPOSING: + STRCPY(code, "NFA_COMPOSING"); break; + case NFA_END_COMPOSING: + STRCPY(code, "NFA_END_COMPOSING"); break; + case NFA_OPT_CHARS: + STRCPY(code, "NFA_OPT_CHARS"); break; case NFA_MOPEN: case NFA_MOPEN1: @@ -2559,94 +3114,178 @@ static void nfa_set_code(int c) STRCPY(code, "NFA_ZCLOSE(x)"); code[11] = c - NFA_ZCLOSE + '0'; break; - case NFA_EOL: STRCPY(code, "NFA_EOL "); break; - case NFA_BOL: STRCPY(code, "NFA_BOL "); break; - case NFA_EOW: STRCPY(code, "NFA_EOW "); break; - case NFA_BOW: STRCPY(code, "NFA_BOW "); break; - case NFA_EOF: STRCPY(code, "NFA_EOF "); break; - case NFA_BOF: STRCPY(code, "NFA_BOF "); break; - case NFA_LNUM: STRCPY(code, "NFA_LNUM "); break; - case NFA_LNUM_GT: STRCPY(code, "NFA_LNUM_GT "); break; - case NFA_LNUM_LT: STRCPY(code, "NFA_LNUM_LT "); break; - case NFA_COL: STRCPY(code, "NFA_COL "); break; - case NFA_COL_GT: STRCPY(code, "NFA_COL_GT "); break; - case NFA_COL_LT: STRCPY(code, "NFA_COL_LT "); break; - case NFA_VCOL: STRCPY(code, "NFA_VCOL "); break; - case NFA_VCOL_GT: STRCPY(code, "NFA_VCOL_GT "); break; - case NFA_VCOL_LT: STRCPY(code, "NFA_VCOL_LT "); break; - case NFA_MARK: STRCPY(code, "NFA_MARK "); break; - case NFA_MARK_GT: STRCPY(code, "NFA_MARK_GT "); break; - case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break; - case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break; - case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break; - case NFA_ANY_COMPOSING: STRCPY(code, "NFA_ANY_COMPOSING "); break; - - case NFA_STAR: STRCPY(code, "NFA_STAR "); break; - case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; - case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; - case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; - case NFA_EMPTY: STRCPY(code, "NFA_EMPTY"); break; - case NFA_OR: STRCPY(code, "NFA_OR"); break; - - case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break; - case NFA_END_COLL: STRCPY(code, "NFA_END_COLL"); break; - case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break; - case NFA_END_NEG_COLL: STRCPY(code, "NFA_END_NEG_COLL"); break; - case NFA_RANGE: STRCPY(code, "NFA_RANGE"); break; - case NFA_RANGE_MIN: STRCPY(code, "NFA_RANGE_MIN"); break; - case NFA_RANGE_MAX: STRCPY(code, "NFA_RANGE_MAX"); break; - - case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break; - case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break; - case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break; - case NFA_CLASS_CNTRL: STRCPY(code, "NFA_CLASS_CNTRL"); break; - case NFA_CLASS_DIGIT: STRCPY(code, "NFA_CLASS_DIGIT"); break; - case NFA_CLASS_GRAPH: STRCPY(code, "NFA_CLASS_GRAPH"); break; - case NFA_CLASS_LOWER: STRCPY(code, "NFA_CLASS_LOWER"); break; - case NFA_CLASS_PRINT: STRCPY(code, "NFA_CLASS_PRINT"); break; - case NFA_CLASS_PUNCT: STRCPY(code, "NFA_CLASS_PUNCT"); break; - case NFA_CLASS_SPACE: STRCPY(code, "NFA_CLASS_SPACE"); break; - case NFA_CLASS_UPPER: STRCPY(code, "NFA_CLASS_UPPER"); break; - case NFA_CLASS_XDIGIT: STRCPY(code, "NFA_CLASS_XDIGIT"); break; - case NFA_CLASS_TAB: STRCPY(code, "NFA_CLASS_TAB"); break; - case NFA_CLASS_RETURN: STRCPY(code, "NFA_CLASS_RETURN"); break; - case NFA_CLASS_BACKSPACE: STRCPY(code, "NFA_CLASS_BACKSPACE"); break; - case NFA_CLASS_ESCAPE: STRCPY(code, "NFA_CLASS_ESCAPE"); break; - case NFA_CLASS_IDENT: STRCPY(code, "NFA_CLASS_IDENT"); break; - case NFA_CLASS_KEYWORD: STRCPY(code, "NFA_CLASS_KEYWORD"); break; - case NFA_CLASS_FNAME: STRCPY(code, "NFA_CLASS_FNAME"); break; - - case NFA_ANY: STRCPY(code, "NFA_ANY"); break; - case NFA_IDENT: STRCPY(code, "NFA_IDENT"); break; - case NFA_SIDENT: STRCPY(code, "NFA_SIDENT"); break; - case NFA_KWORD: STRCPY(code, "NFA_KWORD"); break; - case NFA_SKWORD: STRCPY(code, "NFA_SKWORD"); break; - case NFA_FNAME: STRCPY(code, "NFA_FNAME"); break; - case NFA_SFNAME: STRCPY(code, "NFA_SFNAME"); break; - case NFA_PRINT: STRCPY(code, "NFA_PRINT"); break; - case NFA_SPRINT: STRCPY(code, "NFA_SPRINT"); break; - case NFA_WHITE: STRCPY(code, "NFA_WHITE"); break; - case NFA_NWHITE: STRCPY(code, "NFA_NWHITE"); break; - case NFA_DIGIT: STRCPY(code, "NFA_DIGIT"); break; - case NFA_NDIGIT: STRCPY(code, "NFA_NDIGIT"); break; - case NFA_HEX: STRCPY(code, "NFA_HEX"); break; - case NFA_NHEX: STRCPY(code, "NFA_NHEX"); break; - case NFA_OCTAL: STRCPY(code, "NFA_OCTAL"); break; - case NFA_NOCTAL: STRCPY(code, "NFA_NOCTAL"); break; - case NFA_WORD: STRCPY(code, "NFA_WORD"); break; - case NFA_NWORD: STRCPY(code, "NFA_NWORD"); break; - case NFA_HEAD: STRCPY(code, "NFA_HEAD"); break; - case NFA_NHEAD: STRCPY(code, "NFA_NHEAD"); break; - case NFA_ALPHA: STRCPY(code, "NFA_ALPHA"); break; - case NFA_NALPHA: STRCPY(code, "NFA_NALPHA"); break; - case NFA_LOWER: STRCPY(code, "NFA_LOWER"); break; - case NFA_NLOWER: STRCPY(code, "NFA_NLOWER"); break; - case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break; - case NFA_NUPPER: STRCPY(code, "NFA_NUPPER"); break; - case NFA_LOWER_IC: STRCPY(code, "NFA_LOWER_IC"); break; - case NFA_NLOWER_IC: STRCPY(code, "NFA_NLOWER_IC"); break; - case NFA_UPPER_IC: STRCPY(code, "NFA_UPPER_IC"); break; - case NFA_NUPPER_IC: STRCPY(code, "NFA_NUPPER_IC"); break; + case NFA_EOL: + STRCPY(code, "NFA_EOL "); break; + case NFA_BOL: + STRCPY(code, "NFA_BOL "); break; + case NFA_EOW: + STRCPY(code, "NFA_EOW "); break; + case NFA_BOW: + STRCPY(code, "NFA_BOW "); break; + case NFA_EOF: + STRCPY(code, "NFA_EOF "); break; + case NFA_BOF: + STRCPY(code, "NFA_BOF "); break; + case NFA_LNUM: + STRCPY(code, "NFA_LNUM "); break; + case NFA_LNUM_GT: + STRCPY(code, "NFA_LNUM_GT "); break; + case NFA_LNUM_LT: + STRCPY(code, "NFA_LNUM_LT "); break; + case NFA_COL: + STRCPY(code, "NFA_COL "); break; + case NFA_COL_GT: + STRCPY(code, "NFA_COL_GT "); break; + case NFA_COL_LT: + STRCPY(code, "NFA_COL_LT "); break; + case NFA_VCOL: + STRCPY(code, "NFA_VCOL "); break; + case NFA_VCOL_GT: + STRCPY(code, "NFA_VCOL_GT "); break; + case NFA_VCOL_LT: + STRCPY(code, "NFA_VCOL_LT "); break; + case NFA_MARK: + STRCPY(code, "NFA_MARK "); break; + case NFA_MARK_GT: + STRCPY(code, "NFA_MARK_GT "); break; + case NFA_MARK_LT: + STRCPY(code, "NFA_MARK_LT "); break; + case NFA_CURSOR: + STRCPY(code, "NFA_CURSOR "); break; + case NFA_VISUAL: + STRCPY(code, "NFA_VISUAL "); break; + case NFA_ANY_COMPOSING: + STRCPY(code, "NFA_ANY_COMPOSING "); break; + + case NFA_STAR: + STRCPY(code, "NFA_STAR "); break; + case NFA_STAR_NONGREEDY: + STRCPY(code, "NFA_STAR_NONGREEDY "); break; + case NFA_QUEST: + STRCPY(code, "NFA_QUEST"); break; + case NFA_QUEST_NONGREEDY: + STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; + case NFA_EMPTY: + STRCPY(code, "NFA_EMPTY"); break; + case NFA_OR: + STRCPY(code, "NFA_OR"); break; + + case NFA_START_COLL: + STRCPY(code, "NFA_START_COLL"); break; + case NFA_END_COLL: + STRCPY(code, "NFA_END_COLL"); break; + case NFA_START_NEG_COLL: + STRCPY(code, "NFA_START_NEG_COLL"); break; + case NFA_END_NEG_COLL: + STRCPY(code, "NFA_END_NEG_COLL"); break; + case NFA_RANGE: + STRCPY(code, "NFA_RANGE"); break; + case NFA_RANGE_MIN: + STRCPY(code, "NFA_RANGE_MIN"); break; + case NFA_RANGE_MAX: + STRCPY(code, "NFA_RANGE_MAX"); break; + + case NFA_CLASS_ALNUM: + STRCPY(code, "NFA_CLASS_ALNUM"); break; + case NFA_CLASS_ALPHA: + STRCPY(code, "NFA_CLASS_ALPHA"); break; + case NFA_CLASS_BLANK: + STRCPY(code, "NFA_CLASS_BLANK"); break; + case NFA_CLASS_CNTRL: + STRCPY(code, "NFA_CLASS_CNTRL"); break; + case NFA_CLASS_DIGIT: + STRCPY(code, "NFA_CLASS_DIGIT"); break; + case NFA_CLASS_GRAPH: + STRCPY(code, "NFA_CLASS_GRAPH"); break; + case NFA_CLASS_LOWER: + STRCPY(code, "NFA_CLASS_LOWER"); break; + case NFA_CLASS_PRINT: + STRCPY(code, "NFA_CLASS_PRINT"); break; + case NFA_CLASS_PUNCT: + STRCPY(code, "NFA_CLASS_PUNCT"); break; + case NFA_CLASS_SPACE: + STRCPY(code, "NFA_CLASS_SPACE"); break; + case NFA_CLASS_UPPER: + STRCPY(code, "NFA_CLASS_UPPER"); break; + case NFA_CLASS_XDIGIT: + STRCPY(code, "NFA_CLASS_XDIGIT"); break; + case NFA_CLASS_TAB: + STRCPY(code, "NFA_CLASS_TAB"); break; + case NFA_CLASS_RETURN: + STRCPY(code, "NFA_CLASS_RETURN"); break; + case NFA_CLASS_BACKSPACE: + STRCPY(code, "NFA_CLASS_BACKSPACE"); break; + case NFA_CLASS_ESCAPE: + STRCPY(code, "NFA_CLASS_ESCAPE"); break; + case NFA_CLASS_IDENT: + STRCPY(code, "NFA_CLASS_IDENT"); break; + case NFA_CLASS_KEYWORD: + STRCPY(code, "NFA_CLASS_KEYWORD"); break; + case NFA_CLASS_FNAME: + STRCPY(code, "NFA_CLASS_FNAME"); break; + + case NFA_ANY: + STRCPY(code, "NFA_ANY"); break; + case NFA_IDENT: + STRCPY(code, "NFA_IDENT"); break; + case NFA_SIDENT: + STRCPY(code, "NFA_SIDENT"); break; + case NFA_KWORD: + STRCPY(code, "NFA_KWORD"); break; + case NFA_SKWORD: + STRCPY(code, "NFA_SKWORD"); break; + case NFA_FNAME: + STRCPY(code, "NFA_FNAME"); break; + case NFA_SFNAME: + STRCPY(code, "NFA_SFNAME"); break; + case NFA_PRINT: + STRCPY(code, "NFA_PRINT"); break; + case NFA_SPRINT: + STRCPY(code, "NFA_SPRINT"); break; + case NFA_WHITE: + STRCPY(code, "NFA_WHITE"); break; + case NFA_NWHITE: + STRCPY(code, "NFA_NWHITE"); break; + case NFA_DIGIT: + STRCPY(code, "NFA_DIGIT"); break; + case NFA_NDIGIT: + STRCPY(code, "NFA_NDIGIT"); break; + case NFA_HEX: + STRCPY(code, "NFA_HEX"); break; + case NFA_NHEX: + STRCPY(code, "NFA_NHEX"); break; + case NFA_OCTAL: + STRCPY(code, "NFA_OCTAL"); break; + case NFA_NOCTAL: + STRCPY(code, "NFA_NOCTAL"); break; + case NFA_WORD: + STRCPY(code, "NFA_WORD"); break; + case NFA_NWORD: + STRCPY(code, "NFA_NWORD"); break; + case NFA_HEAD: + STRCPY(code, "NFA_HEAD"); break; + case NFA_NHEAD: + STRCPY(code, "NFA_NHEAD"); break; + case NFA_ALPHA: + STRCPY(code, "NFA_ALPHA"); break; + case NFA_NALPHA: + STRCPY(code, "NFA_NALPHA"); break; + case NFA_LOWER: + STRCPY(code, "NFA_LOWER"); break; + case NFA_NLOWER: + STRCPY(code, "NFA_NLOWER"); break; + case NFA_UPPER: + STRCPY(code, "NFA_UPPER"); break; + case NFA_NUPPER: + STRCPY(code, "NFA_NUPPER"); break; + case NFA_LOWER_IC: + STRCPY(code, "NFA_LOWER_IC"); break; + case NFA_NLOWER_IC: + STRCPY(code, "NFA_NLOWER_IC"); break; + case NFA_UPPER_IC: + STRCPY(code, "NFA_UPPER_IC"); break; + case NFA_NUPPER_IC: + STRCPY(code, "NFA_NUPPER_IC"); break; default: STRCPY(code, "CHAR(x)"); @@ -2659,8 +3298,8 @@ static void nfa_set_code(int c) } static FILE *log_fd; -static char_u e_log_open_failed[] = N_( - "Could not open temporary log file for writing, displaying on stderr... "); +static char_u e_log_open_failed[] = + N_("Could not open temporary log file for writing, displaying on stderr... "); /* * Print the postfix notation of the current regexp. @@ -2684,8 +3323,9 @@ static void nfa_postfix_dump(char_u *expr, int retval) fprintf(f, "%s, ", code); } fprintf(f, "\"\nPostfix notation (int): "); - for (p = post_start; *p && p < post_ptr; p++) + for (p = post_start; *p && p < post_ptr; p++) { fprintf(f, "%d ", *p); + } fprintf(f, "\n\n"); fclose(f); } @@ -2706,14 +3346,15 @@ static void nfa_print_state(FILE *debugf, nfa_state_T *state) static void nfa_print_state2(FILE *debugf, nfa_state_T *state, garray_T *indent) { - char_u *p; + char_u *p; - if (state == NULL) + if (state == NULL) { return; + } fprintf(debugf, "(%2d)", abs(state->id)); - /* Output indent */ + // Output indent p = (char_u *)indent->ga_data; if (indent->ga_len >= 3) { int last = indent->ga_len - 3; @@ -2722,39 +3363,42 @@ static void nfa_print_state2(FILE *debugf, nfa_state_T *state, garray_T *indent) STRNCPY(save, &p[last], 2); STRNCPY(&p[last], "+-", 2); fprintf(debugf, " %s", p); - STRNCPY(&p[last], save, 2); - } else + STRNCPY(&p[last], save, 2); // NOLINT(runtime/printf) + } else { fprintf(debugf, " %s", p); + } nfa_set_code(state->c); fprintf(debugf, "%s (%d) (id=%d) val=%d\n", - code, - state->c, - abs(state->id), - state->val); - if (state->id < 0) + code, + state->c, + abs(state->id), + state->val); + if (state->id < 0) { return; + } state->id = abs(state->id) * -1; - /* grow indent for state->out */ + // grow indent for state->out indent->ga_len -= 1; - if (state->out1) + if (state->out1) { ga_concat(indent, (char_u *)"| "); - else + } else { ga_concat(indent, (char_u *)" "); + } ga_append(indent, NUL); nfa_print_state2(debugf, state->out, indent); - /* replace last part of indent for state->out1 */ + // replace last part of indent for state->out1 indent->ga_len -= 3; ga_concat(indent, (char_u *)" "); ga_append(indent, NUL); nfa_print_state2(debugf, state->out1, indent); - /* shrink indent */ + // shrink indent indent->ga_len -= 3; ga_append(indent, NUL); } @@ -2769,13 +3413,16 @@ static void nfa_dump(nfa_regprog_T *prog) if (debugf != NULL) { nfa_print_state(debugf, prog->start); - if (prog->reganch) + if (prog->reganch) { fprintf(debugf, "reganch: %d\n", prog->reganch); - if (prog->regstart != NUL) + } + if (prog->regstart != NUL) { fprintf(debugf, "regstart: %c (decimal: %d)\n", - prog->regstart, prog->regstart); - if (prog->match_text != NULL) + prog->regstart, prog->regstart); + } + if (prog->match_text != NULL) { fprintf(debugf, "match_text: \"%s\"\n", prog->match_text); + } fclose(debugf); } @@ -2788,13 +3435,14 @@ static void nfa_dump(nfa_regprog_T *prog) */ static int *re2post(void) { - if (nfa_reg(REG_NOPAREN) == FAIL) + if (nfa_reg(REG_NOPAREN) == FAIL) { return NULL; + } EMIT(NFA_MOPEN); return post_start; } -/* NB. Some of the code below is inspired by Russ's. */ +// NB. Some of the code below is inspired by Russ's. /* * Represents an NFA state plus zero or one or two arrows exiting. @@ -2803,7 +3451,7 @@ static int *re2post(void) * If c < 256, labeled arrow with character c to out. */ -static nfa_state_T *state_ptr; /* points to nfa_prog->state */ +static nfa_state_T *state_ptr; // points to nfa_prog->state /* * Allocate and initialize nfa_state_T. @@ -2812,8 +3460,9 @@ static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1) { nfa_state_T *s; - if (istate >= nstate) + if (istate >= nstate) { return NULL; + } s = &state_ptr[istate++]; @@ -2883,8 +3532,9 @@ static Ptrlist *append(Ptrlist *l1, Ptrlist *l2) Ptrlist *oldl1; oldl1 = l1; - while (l1->next) + while (l1->next) { l1 = l1->next; + } l1->next = l2; return oldl1; } @@ -2903,11 +3553,11 @@ static void st_error(int *postfix, int *end, int *p) df = fopen(NFA_REGEXP_ERROR_LOG, "a"); if (df) { fprintf(df, "Error popping the stack!\n"); -#ifdef REGEXP_DEBUG +# ifdef REGEXP_DEBUG fprintf(df, "Current regexp is \"%s\"\n", nfa_regengine.expr); -#endif +# endif fprintf(df, "Postfix form is: "); -#ifdef REGEXP_DEBUG +# ifdef REGEXP_DEBUG for (p2 = postfix; p2 < end; p2++) { nfa_set_code(*p2); fprintf(df, "%s, ", code); @@ -2918,7 +3568,7 @@ static void st_error(int *postfix, int *end, int *p) nfa_set_code(*p2); fprintf(df, "%s, ", code); } -#else +# else for (p2 = postfix; p2 < end; p2++) { fprintf(df, "%d, ", *p2); } @@ -2926,7 +3576,7 @@ static void st_error(int *postfix, int *end, int *p) for (p2 = postfix; p2 <= p; p2++) { fprintf(df, "%d, ", *p2); } -#endif +# endif fprintf(df, "\n--------------------------\n"); fclose(df); } @@ -2941,8 +3591,9 @@ static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end) { Frag_T *stackp = *p; - if (stackp >= stack_end) + if (stackp >= stack_end) { return; + } *stackp = s; *p = *p + 1; } @@ -2956,8 +3607,9 @@ static Frag_T st_pop(Frag_T **p, Frag_T *stack) *p = *p - 1; stackp = *p; - if (stackp < stack) + if (stackp < stack) { return empty; + } return **p; } @@ -2968,26 +3620,28 @@ static Frag_T st_pop(Frag_T **p, Frag_T *stack) static int nfa_max_width(nfa_state_T *startstate, int depth) { int l, r; - nfa_state_T *state = startstate; + nfa_state_T *state = startstate; int len = 0; - /* detect looping in a NFA_SPLIT */ - if (depth > 4) + // detect looping in a NFA_SPLIT + if (depth > 4) { return -1; + } while (state != NULL) { switch (state->c) { case NFA_END_INVISIBLE: case NFA_END_INVISIBLE_NEG: - /* the end, return what we have */ + // the end, return what we have return len; case NFA_SPLIT: - /* two alternatives, use the maximum */ + // two alternatives, use the maximum l = nfa_max_width(state->out, depth + 1); r = nfa_max_width(state->out1, depth + 1); - if (l < 0 || r < 0) + if (l < 0 || r < 0) { return -1; + } return len + (l > r ? l : r); case NFA_ANY: @@ -3006,8 +3660,8 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) case NFA_WHITE: case NFA_HEX: case NFA_OCTAL: - /* ascii */ - ++len; + // ascii + len++; break; case NFA_IDENT: @@ -3045,7 +3699,7 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) case NFA_START_INVISIBLE_NEG: case NFA_START_INVISIBLE_BEFORE: case NFA_START_INVISIBLE_BEFORE_NEG: - /* zero-width, out1 points to the END state */ + // zero-width, out1 points to the END state state = state->out1->out; continue; @@ -3069,7 +3723,7 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) case NFA_ZREF9: case NFA_NEWL: case NFA_SKIP: - /* unknown width */ + // unknown width return -1; case NFA_BOL: @@ -3144,23 +3798,24 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) case NFA_END_PATTERN: case NFA_COMPOSING: case NFA_END_COMPOSING: - /* zero-width */ + // zero-width break; default: - if (state->c < 0) - /* don't know what this is */ + if (state->c < 0) { + // don't know what this is return -1; + } // normal character len += utf_char2len(state->c); break; } - /* normal way to continue */ + // normal way to continue state = state->out; } - /* unrecognized, "cannot happen" */ + // unrecognized, "cannot happen" return -1; } @@ -3170,12 +3825,12 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) */ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) { - int *p; + int *p; int mopen; int mclose; - Frag_T *stack = NULL; - Frag_T *stackp = NULL; - Frag_T *stack_end = NULL; + Frag_T *stack = NULL; + Frag_T *stackp = NULL; + Frag_T *stack_end = NULL; Frag_T e1; Frag_T e2; Frag_T e; @@ -3184,8 +3839,9 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) nfa_state_T *matchstate; nfa_state_T *ret = NULL; - if (postfix == NULL) + if (postfix == NULL) { return NULL; + } #define PUSH(s) st_push((s), &stackp, stack_end) #define POP() st_pop(&stackp, stack); \ @@ -3228,8 +3884,9 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) e2 = POP(); e1 = POP(); s = alloc_state(NFA_SPLIT, e1.start, e2.start); - if (s == NULL) + if (s == NULL) { goto theend; + } PUSH(frag(s, append(e1.out, e2.out))); break; @@ -3241,8 +3898,9 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) } e = POP(); s = alloc_state(NFA_SPLIT, e.start, NULL); - if (s == NULL) + if (s == NULL) { goto theend; + } patch(e.out, s); PUSH(frag(s, list1(&s->out1))); break; @@ -3255,8 +3913,9 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) } e = POP(); s = alloc_state(NFA_SPLIT, NULL, e.start); - if (s == NULL) + if (s == NULL) { goto theend; + } patch(e.out, s); PUSH(frag(s, list1(&s->out))); break; @@ -3269,8 +3928,9 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) } e = POP(); s = alloc_state(NFA_SPLIT, e.start, NULL); - if (s == NULL) + if (s == NULL) { goto theend; + } PUSH(frag(s, append(e.out, list1(&s->out1)))); break; @@ -3282,8 +3942,9 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) } e = POP(); s = alloc_state(NFA_SPLIT, NULL, e.start); - if (s == NULL) + if (s == NULL) { goto theend; + } PUSH(frag(s, append(e.out, list1(&s->out)))); break; @@ -3298,8 +3959,9 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) } e = POP(); s = alloc_state(NFA_END_COLL, NULL, NULL); - if (s == NULL) + if (s == NULL) { goto theend; + } patch(e.out, s); e.start->out1 = s; PUSH(frag(e.start, list1(&s->out))); @@ -3329,13 +3991,13 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; } s = alloc_state(NFA_EMPTY, NULL, NULL); - if (s == NULL) + if (s == NULL) { goto theend; + } PUSH(frag(s, list1(&s->out))); break; - case NFA_OPT_CHARS: - { + case NFA_OPT_CHARS: { int n; // \%[abc] implemented as: @@ -3354,16 +4016,18 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) nstate += n; break; } - s = NULL; /* avoid compiler warning */ - e1.out = NULL; /* stores list with out1's */ - s1 = NULL; /* previous NFA_SPLIT to connect to */ + s = NULL; // avoid compiler warning + e1.out = NULL; // stores list with out1's + s1 = NULL; // previous NFA_SPLIT to connect to while (n-- > 0) { - e = POP(); /* get character */ + e = POP(); // get character s = alloc_state(NFA_SPLIT, e.start, NULL); - if (s == NULL) + if (s == NULL) { goto theend; - if (e1.out == NULL) + } + if (e1.out == NULL) { e1 = e; + } patch(e.out, s1); append(e1.out, list1(&s->out1)); s1 = s; @@ -3376,8 +4040,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_PREV_ATOM_NO_WIDTH_NEG: case NFA_PREV_ATOM_JUST_BEFORE: case NFA_PREV_ATOM_JUST_BEFORE_NEG: - case NFA_PREV_ATOM_LIKE_PATTERN: - { + case NFA_PREV_ATOM_LIKE_PATTERN: { int before = (*p == NFA_PREV_ATOM_JUST_BEFORE || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN); @@ -3404,15 +4067,15 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) start_state = NFA_START_INVISIBLE_BEFORE_NEG; end_state = NFA_END_INVISIBLE_NEG; break; - default: /* NFA_PREV_ATOM_LIKE_PATTERN: */ + default: // NFA_PREV_ATOM_LIKE_PATTERN: start_state = NFA_START_PATTERN; end_state = NFA_END_PATTERN; break; } - if (before) - n = *++p; /* get the count */ - + if (before) { + n = *++p; // get the count + } // The \@= operator: match the preceding atom with zero width. // The \@! operator: no match for the preceding atom. // The \@<= operator: match for the preceding atom. @@ -3426,14 +4089,16 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) } e = POP(); s1 = alloc_state(end_state, NULL, NULL); - if (s1 == NULL) + if (s1 == NULL) { goto theend; + } s = alloc_state(start_state, e.start, s1); - if (s == NULL) + if (s == NULL) { goto theend; + } if (pattern) { - /* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */ + // NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. skip = alloc_state(NFA_SKIP, NULL, NULL); if (skip == NULL) { goto theend; @@ -3463,7 +4128,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_COMPOSING: // char with composing char FALLTHROUGH; - case NFA_MOPEN: /* \( \) Submatch */ + case NFA_MOPEN: // \( \) Submatch case NFA_MOPEN1: case NFA_MOPEN2: case NFA_MOPEN3: @@ -3473,7 +4138,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_MOPEN7: case NFA_MOPEN8: case NFA_MOPEN9: - case NFA_ZOPEN: /* \z( \) Submatch */ + case NFA_ZOPEN: // \z( \) Submatch case NFA_ZOPEN1: case NFA_ZOPEN2: case NFA_ZOPEN3: @@ -3491,20 +4156,32 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) mopen = *p; switch (*p) { - case NFA_NOPEN: mclose = NFA_NCLOSE; break; - case NFA_ZOPEN: mclose = NFA_ZCLOSE; break; - case NFA_ZOPEN1: mclose = NFA_ZCLOSE1; break; - case NFA_ZOPEN2: mclose = NFA_ZCLOSE2; break; - case NFA_ZOPEN3: mclose = NFA_ZCLOSE3; break; - case NFA_ZOPEN4: mclose = NFA_ZCLOSE4; break; - case NFA_ZOPEN5: mclose = NFA_ZCLOSE5; break; - case NFA_ZOPEN6: mclose = NFA_ZCLOSE6; break; - case NFA_ZOPEN7: mclose = NFA_ZCLOSE7; break; - case NFA_ZOPEN8: mclose = NFA_ZCLOSE8; break; - case NFA_ZOPEN9: mclose = NFA_ZCLOSE9; break; - case NFA_COMPOSING: mclose = NFA_END_COMPOSING; break; + case NFA_NOPEN: + mclose = NFA_NCLOSE; break; + case NFA_ZOPEN: + mclose = NFA_ZCLOSE; break; + case NFA_ZOPEN1: + mclose = NFA_ZCLOSE1; break; + case NFA_ZOPEN2: + mclose = NFA_ZCLOSE2; break; + case NFA_ZOPEN3: + mclose = NFA_ZCLOSE3; break; + case NFA_ZOPEN4: + mclose = NFA_ZCLOSE4; break; + case NFA_ZOPEN5: + mclose = NFA_ZCLOSE5; break; + case NFA_ZOPEN6: + mclose = NFA_ZCLOSE6; break; + case NFA_ZOPEN7: + mclose = NFA_ZCLOSE7; break; + case NFA_ZOPEN8: + mclose = NFA_ZCLOSE8; break; + case NFA_ZOPEN9: + mclose = NFA_ZCLOSE9; break; + case NFA_COMPOSING: + mclose = NFA_END_COMPOSING; break; default: - /* NFA_MOPEN, NFA_MOPEN1 .. NFA_MOPEN9 */ + // NFA_MOPEN, NFA_MOPEN1 .. NFA_MOPEN9 mclose = *p + NSUBEXP; break; } @@ -3515,11 +4192,13 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) // empty groups of parenthesis, and empty mbyte chars if (stackp == stack) { s = alloc_state(mopen, NULL, NULL); - if (s == NULL) + if (s == NULL) { goto theend; + } s1 = alloc_state(mclose, NULL, NULL); - if (s1 == NULL) + if (s1 == NULL) { goto theend; + } patch(list1(&s->out), s1); PUSH(frag(s, list1(&s1->out))); break; @@ -3528,18 +4207,21 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) // At least one node was emitted before NFA_MOPEN, so // at least one node will be between NFA_MOPEN and NFA_MCLOSE e = POP(); - s = alloc_state(mopen, e.start, NULL); /* `(' */ - if (s == NULL) + s = alloc_state(mopen, e.start, NULL); // `(' + if (s == NULL) { goto theend; + } - s1 = alloc_state(mclose, NULL, NULL); /* `)' */ - if (s1 == NULL) + s1 = alloc_state(mclose, NULL, NULL); // `)' + if (s1 == NULL) { goto theend; + } patch(e.out, s1); - if (mopen == NFA_COMPOSING) - /* COMPOSING->out1 = END_COMPOSING */ + if (mopen == NFA_COMPOSING) { + // COMPOSING->out1 = END_COMPOSING patch(list1(&s->out1), s1); + } PUSH(frag(s, list1(&s1->out))); break; @@ -3567,11 +4249,13 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; } s = alloc_state(*p, NULL, NULL); - if (s == NULL) + if (s == NULL) { goto theend; + } s1 = alloc_state(NFA_SKIP, NULL, NULL); - if (s1 == NULL) + if (s1 == NULL) { goto theend; + } patch(list1(&s->out), s1); PUSH(frag(s, list1(&s1->out))); break; @@ -3587,17 +4271,17 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_COL_LT: case NFA_MARK: case NFA_MARK_GT: - case NFA_MARK_LT: - { - int n = *++p; /* lnum, col or mark name */ + case NFA_MARK_LT: { + int n = *++p; // lnum, col or mark name if (nfa_calc_size == true) { nstate += 1; break; } s = alloc_state(p[-1], NULL, NULL); - if (s == NULL) + if (s == NULL) { goto theend; + } s->val = n; PUSH(frag(s, list1(&s->out))); break; @@ -3612,18 +4296,17 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; } s = alloc_state(*p, NULL, NULL); - if (s == NULL) + if (s == NULL) { goto theend; + } PUSH(frag(s, list1(&s->out))); break; - - } /* switch(*p) */ - - } /* for(p = postfix; *p; ++p) */ + } // switch(*p) + } // for(p = postfix; *p; ++p) if (nfa_calc_size == true) { nstate++; - goto theend; /* Return value when counting size is ignored anyway */ + goto theend; // Return value when counting size is ignored anyway } e = POP(); @@ -3639,7 +4322,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) "Not enough space to store the whole NFA ")); } - matchstate = &state_ptr[istate++]; /* the match state */ + matchstate = &state_ptr[istate++]; // the match state matchstate->c = NFA_MATCH; matchstate->out = matchstate->out1 = NULL; matchstate->id = 0; @@ -3702,9 +4385,10 @@ static void nfa_postprocess(nfa_regprog_T *prog) directly = ch_follows < ch_invisible; } } - if (directly) - /* switch to the _FIRST state */ - ++prog->state[i].c; + if (directly) { + // switch to the _FIRST state + prog->state[i].c++; + } } } } @@ -3713,11 +4397,11 @@ static void nfa_postprocess(nfa_regprog_T *prog) // NFA execution code. ///////////////////////////////////////////////////////////////// -/* Values for done in nfa_pim_T. */ -#define NFA_PIM_UNUSED 0 /* pim not used */ -#define NFA_PIM_TODO 1 /* pim not done yet */ -#define NFA_PIM_MATCH 2 /* pim executed, matches */ -#define NFA_PIM_NOMATCH 3 /* pim executed, no match */ +// Values for done in nfa_pim_T. +#define NFA_PIM_UNUSED 0 // pim not used +#define NFA_PIM_TODO 1 // pim not done yet +#define NFA_PIM_MATCH 2 // pim executed, matches +#define NFA_PIM_NOMATCH 3 // pim executed, no match #ifdef REGEXP_DEBUG @@ -3733,23 +4417,24 @@ static void log_subexpr(regsub_T *sub) { int j; - for (j = 0; j < sub->in_use; j++) - if (REG_MULTI) + for (j = 0; j < sub->in_use; j++) { + if (REG_MULTI) { fprintf(log_fd, "*** group %d, start: c=%d, l=%d, end: c=%d, l=%d\n", - j, - sub->list.multi[j].start_col, - (int)sub->list.multi[j].start_lnum, - sub->list.multi[j].end_col, - (int)sub->list.multi[j].end_lnum); - else { + j, + sub->list.multi[j].start_col, + (int)sub->list.multi[j].start_lnum, + sub->list.multi[j].end_col, + (int)sub->list.multi[j].end_lnum); + } else { char *s = (char *)sub->list.line[j].start; char *e = (char *)sub->list.line[j].end; fprintf(log_fd, "*** group %d, start: \"%s\", end: \"%s\"\n", - j, - s == NULL ? "NULL" : s, - e == NULL ? "NULL" : e); + j, + s == NULL ? "NULL" : s, + e == NULL ? "NULL" : e); } + } } static char *pim_info(const nfa_pim_T *pim) @@ -3806,15 +4491,16 @@ static void copy_sub(regsub_T *to, regsub_T *from) { to->in_use = from->in_use; if (from->in_use > 0) { - /* Copy the match start and end positions. */ - if (REG_MULTI) + // Copy the match start and end positions. + if (REG_MULTI) { memmove(&to->list.multi[0], - &from->list.multi[0], - sizeof(struct multipos) * from->in_use); - else + &from->list.multi[0], + sizeof(struct multipos) * from->in_use); + } else { memmove(&to->list.line[0], - &from->list.line[0], - sizeof(struct linepos) * from->in_use); + &from->list.line[0], + sizeof(struct linepos) * from->in_use); + } } } @@ -3823,18 +4509,20 @@ static void copy_sub(regsub_T *to, regsub_T *from) */ static void copy_sub_off(regsub_T *to, regsub_T *from) { - if (to->in_use < from->in_use) + if (to->in_use < from->in_use) { to->in_use = from->in_use; + } if (from->in_use > 1) { - /* Copy the match start and end positions. */ - if (REG_MULTI) + // Copy the match start and end positions. + if (REG_MULTI) { memmove(&to->list.multi[1], - &from->list.multi[1], - sizeof(struct multipos) * (from->in_use - 1)); - else + &from->list.multi[1], + sizeof(struct multipos) * (from->in_use - 1)); + } else { memmove(&to->list.line[1], - &from->list.line[1], - sizeof(struct linepos) * (from->in_use - 1)); + &from->list.line[1], + sizeof(struct linepos) * (from->in_use - 1)); + } } } @@ -3845,13 +4533,14 @@ static void copy_ze_off(regsub_T *to, regsub_T *from) { if (rex.nfa_has_zend) { if (REG_MULTI) { - if (from->list.multi[0].end_lnum >= 0){ + if (from->list.multi[0].end_lnum >= 0) { to->list.multi[0].end_lnum = from->list.multi[0].end_lnum; to->list.multi[0].end_col = from->list.multi[0].end_col; } } else { - if (from->list.line[0].end != NULL) + if (from->list.line[0].end != NULL) { to->list.line[0].end = from->list.line[0].end; + } } } } @@ -3864,8 +4553,8 @@ static bool sub_equal(regsub_T *sub1, regsub_T *sub2) int todo; linenr_T s1; linenr_T s2; - char_u *sp1; - char_u *sp2; + char_u *sp1; + char_u *sp2; todo = sub1->in_use > sub2->in_use ? sub1->in_use : sub2->in_use; if (REG_MULTI) { @@ -3944,11 +4633,8 @@ static bool sub_equal(regsub_T *sub1, regsub_T *sub2) } #ifdef REGEXP_DEBUG -static void report_state(char *action, - regsub_T *sub, - nfa_state_T *state, - int lid, - nfa_pim_T *pim) { +static void report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid, nfa_pim_T *pim) +{ int col; if (sub->in_use <= 0) { @@ -3966,14 +4652,14 @@ static void report_state(char *action, #endif -// Return true if the same state is already in list "l" with the same -// positions as "subs". -static bool has_state_with_pos( - nfa_list_T *l, // runtime state list - nfa_state_T *state, // state to update - regsubs_T *subs, // pointers to subexpressions - nfa_pim_T *pim // postponed match or NULL -) +/// @param l runtime state list +/// @param state state to update +/// @param subs pointers to subexpressions +/// @param pim postponed match or NULL +/// +/// @return true if the same state is already in list "l" with the same +/// positions as "subs". +static bool has_state_with_pos(nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim) FUNC_ATTR_NONNULL_ARG(1, 2, 3) { for (int i = 0; i < l->n; i++) { @@ -4048,7 +4734,7 @@ static bool match_follows(const nfa_state_T *startstate, int depth) case NFA_START_INVISIBLE_BEFORE_NEG: case NFA_START_INVISIBLE_BEFORE_NEG_FIRST: case NFA_COMPOSING: - /* skip ahead to next state */ + // skip ahead to next state state = state->out1->out; continue; @@ -4105,12 +4791,12 @@ static bool match_follows(const nfa_state_T *startstate, int depth) } -// Return true if "state" is already in list "l". -static bool state_in_list( - nfa_list_T *l, // runtime state list - nfa_state_T *state, // state to update - regsubs_T *subs // pointers to subexpressions -) +/// @param l runtime state list +/// @param state state to update +/// @param subs pointers to subexpressions +/// +/// @return true if "state" is already in list "l". +static bool state_in_list(nfa_list_T *l, nfa_state_T *state, regsubs_T *subs) FUNC_ATTR_NONNULL_ALL { if (state->lastlist[nfa_ll_index] == l->id) { @@ -4124,15 +4810,18 @@ static bool state_in_list( // Offset used for "off" by addstate_here(). #define ADDSTATE_HERE_OFFSET 10 -// Add "state" and possibly what follows to state list ".". -// Returns "subs_arg", possibly copied into temp_subs. -// Returns NULL when recursiveness is too deep. -static regsubs_T *addstate( - nfa_list_T *l, // runtime state list - nfa_state_T *state, // state to update - regsubs_T *subs_arg, // pointers to subexpressions - nfa_pim_T *pim, // postponed look-behind match - int off_arg) // byte offset, when -1 go to next line +/// Add "state" and possibly what follows to state list ".". +/// +/// @param l runtime state list +/// @param state state to update +/// @param subs_arg pointers to subexpressions +/// @param pim postponed look-behind match +/// @param off_arg byte offset, when -1 go to next line +/// +/// @return "subs_arg", possibly copied into temp_subs. +/// NULL when recursiveness is too deep. +static regsubs_T *addstate(nfa_list_T *l, nfa_state_T *state, regsubs_T *subs_arg, nfa_pim_T *pim, + int off_arg) FUNC_ATTR_NONNULL_ARG(1, 2) FUNC_ATTR_WARN_UNUSED_RESULT { int subidx; @@ -4141,13 +4830,13 @@ static regsubs_T *addstate( int listindex = 0; int k; int found = false; - nfa_thread_T *thread; - struct multipos save_multipos; + nfa_thread_T *thread; + struct multipos save_multipos; int save_in_use; - char_u *save_ptr; + char_u *save_ptr; int i; - regsub_T *sub; - regsubs_T *subs = subs_arg; + regsub_T *sub; + regsubs_T *subs = subs_arg; static regsubs_T temp_subs; #ifdef REGEXP_DEBUG int did_print = false; @@ -4259,12 +4948,13 @@ static regsubs_T *addstate( skip_add: #ifdef REGEXP_DEBUG nfa_set_code(state->c); - fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s pim: %s has_pim: %d found: %d\n", + fprintf(log_fd, + "> Not adding state %d to list %d. char %d: %s pim: %s has_pim: %d found: %d\n", abs(state->id), l->id, state->c, code, pim == NULL ? "NULL" : "yes", l->has_pim, found); #endif - depth--; - return subs; + depth--; + return subs; } } @@ -4301,13 +4991,13 @@ skip_add: l->len = newlen; } - /* add the state to the list */ + // add the state to the list state->lastlist[nfa_ll_index] = l->id; thread = &l->t[l->n++]; thread->state = state; - if (pim == NULL) + if (pim == NULL) { thread->pim.result = NFA_PIM_UNUSED; - else { + } else { copy_pim(&thread->pim, pim); l->has_pim = true; } @@ -4322,15 +5012,16 @@ skip_add: } #ifdef REGEXP_DEBUG - if (!did_print) + if (!did_print) { report_state("Processing", &subs->norm, state, l->id, pim); + } #endif switch (state->c) { case NFA_MATCH: break; case NFA_SPLIT: - /* order matters here */ + // order matters here subs = addstate(l, state->out, subs, pim, off_arg); subs = addstate(l, state->out1, subs, pim, off_arg); break; @@ -4373,7 +5064,7 @@ skip_add: sub = &subs->norm; } - /* avoid compiler warnings */ + // avoid compiler warnings save_ptr = NULL; memset(&save_multipos, 0, sizeof(save_multipos)); @@ -4429,11 +5120,12 @@ skip_add: if (save_in_use == -1) { if (REG_MULTI) { sub->list.multi[subidx] = save_multipos; - } - else + } else { sub->list.line[subidx].start = save_ptr; - } else + } + } else { sub->in_use = save_in_use; + } break; case NFA_MCLOSE: @@ -4480,8 +5172,9 @@ skip_add: // We don't fill in gaps here, there must have been an MOPEN that // has done that. save_in_use = sub->in_use; - if (sub->in_use <= subidx) + if (sub->in_use <= subidx) { sub->in_use = subidx + 1; + } if (REG_MULTI) { save_multipos = sub->list.multi[subidx]; if (off == -1) { @@ -4492,7 +5185,7 @@ skip_add: sub->list.multi[subidx].end_col = (colnr_T)(rex.input - rex.line + off); } - /* avoid compiler warnings */ + // avoid compiler warnings save_ptr = NULL; } else { save_ptr = sub->list.line[subidx].end; @@ -4514,9 +5207,9 @@ skip_add: if (REG_MULTI) { sub->list.multi[subidx] = save_multipos; - } - else + } else { sub->list.line[subidx].end = save_ptr; + } sub->in_use = save_in_use; break; } @@ -4524,19 +5217,17 @@ skip_add: return subs; } -/* - * Like addstate(), but the new state(s) are put at position "*ip". - * Used for zero-width matches, next state to use is the added one. - * This makes sure the order of states to be tried does not change, which - * matters for alternatives. - */ -static regsubs_T *addstate_here( - nfa_list_T *l, // runtime state list - nfa_state_T *state, // state to update - regsubs_T *subs, // pointers to subexpressions - nfa_pim_T *pim, // postponed look-behind match - int *ip -) +/// Like addstate(), but the new state(s) are put at position "*ip". +/// Used for zero-width matches, next state to use is the added one. +/// This makes sure the order of states to be tried does not change, which +/// matters for alternatives. +/// +/// @param l runtime state list +/// @param state state to update +/// @param subs pointers to subexpressions +/// @param pim postponed look-behind match +static regsubs_T *addstate_here(nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, + int *ip) FUNC_ATTR_NONNULL_ARG(1, 2, 5) FUNC_ATTR_WARN_UNUSED_RESULT { int tlen = l->n; @@ -4578,25 +5269,25 @@ static regsubs_T *addstate_here( nfa_thread_T *const newl = xmalloc(newsize); l->len = newlen; memmove(&(newl[0]), - &(l->t[0]), - sizeof(nfa_thread_T) * listidx); + &(l->t[0]), + sizeof(nfa_thread_T) * listidx); memmove(&(newl[listidx]), - &(l->t[l->n - count]), - sizeof(nfa_thread_T) * count); + &(l->t[l->n - count]), + sizeof(nfa_thread_T) * count); memmove(&(newl[listidx + count]), - &(l->t[listidx + 1]), - sizeof(nfa_thread_T) * (l->n - count - listidx - 1)); + &(l->t[listidx + 1]), + sizeof(nfa_thread_T) * (l->n - count - listidx - 1)); xfree(l->t); l->t = newl; } else { // make space for new states, then move them from the // end to the current position memmove(&(l->t[listidx + count]), - &(l->t[listidx + 1]), - sizeof(nfa_thread_T) * (l->n - listidx - 1)); + &(l->t[listidx + 1]), + sizeof(nfa_thread_T) * (l->n - listidx - 1)); memmove(&(l->t[listidx]), - &(l->t[l->n - 1]), - sizeof(nfa_thread_T) * count); + &(l->t[l->n - 1]), + sizeof(nfa_thread_T) * count); } } --l->n; @@ -4622,8 +5313,9 @@ static int check_char_class(int class, int c) } break; case NFA_CLASS_BLANK: - if (c == ' ' || c == '\t') + if (c == ' ' || c == '\t') { return OK; + } break; case NFA_CLASS_CNTRL: if (c >= 1 && c <= 127 && iscntrl(c)) { @@ -4631,8 +5323,9 @@ static int check_char_class(int class, int c) } break; case NFA_CLASS_DIGIT: - if (ascii_isdigit(c)) + if (ascii_isdigit(c)) { return OK; + } break; case NFA_CLASS_GRAPH: if (c >= 1 && c <= 127 && isgraph(c)) { @@ -4645,8 +5338,9 @@ static int check_char_class(int class, int c) } break; case NFA_CLASS_PRINT: - if (vim_isprintc(c)) + if (vim_isprintc(c)) { return OK; + } break; case NFA_CLASS_PUNCT: if (c >= 1 && c < 128 && ispunct(c)) { @@ -4654,8 +5348,9 @@ static int check_char_class(int class, int c) } break; case NFA_CLASS_SPACE: - if ((c >= 9 && c <= 13) || (c == ' ')) + if ((c >= 9 && c <= 13) || (c == ' ')) { return OK; + } break; case NFA_CLASS_UPPER: if (mb_isupper(c)) { @@ -4663,20 +5358,24 @@ static int check_char_class(int class, int c) } break; case NFA_CLASS_XDIGIT: - if (ascii_isxdigit(c)) + if (ascii_isxdigit(c)) { return OK; + } break; case NFA_CLASS_TAB: - if (c == '\t') + if (c == '\t') { return OK; + } break; case NFA_CLASS_RETURN: - if (c == '\r') + if (c == '\r') { return OK; + } break; case NFA_CLASS_BACKSPACE: - if (c == '\b') + if (c == '\b') { return OK; + } break; case NFA_CLASS_ESCAPE: if (c == ESC) { @@ -4707,30 +5406,28 @@ static int check_char_class(int class, int c) return FAIL; } -/* - * Check for a match with subexpression "subidx". - * Return true if it matches. - */ -static int -match_backref ( - regsub_T *sub, /* pointers to subexpressions */ - int subidx, - int *bytelen /* out: length of match in bytes */ -) +/// Check for a match with subexpression "subidx". +/// +/// @param sub pointers to subexpressions +/// @param bytelen out: length of match in bytes +/// +/// @return true if it matches. +static int match_backref(regsub_T *sub, int subidx, int *bytelen) { int len; if (sub->in_use <= subidx) { retempty: - /* backref was not set, match an empty string */ + // backref was not set, match an empty string *bytelen = 0; return true; } if (REG_MULTI) { if (sub->list.multi[subidx].start_lnum < 0 - || sub->list.multi[subidx].end_lnum < 0) + || sub->list.multi[subidx].end_lnum < 0) { goto retempty; + } if (sub->list.multi[subidx].start_lnum == rex.lnum && sub->list.multi[subidx].end_lnum == rex.lnum) { len = sub->list.multi[subidx].end_col @@ -4751,8 +5448,9 @@ retempty: } } else { if (sub->list.line[subidx].start == NULL - || sub->list.line[subidx].end == NULL) + || sub->list.line[subidx].end == NULL) { goto retempty; + } len = (int)(sub->list.line[subidx].end - sub->list.line[subidx].start); if (cstrncmp(sub->list.line[subidx].start, rex.input, &len) == 0) { *bytelen = len; @@ -4763,22 +5461,18 @@ retempty: } - -/* - * Check for a match with \z subexpression "subidx". - * Return true if it matches. - */ -static int -match_zref ( - int subidx, - int *bytelen /* out: length of match in bytes */ -) +/// Check for a match with \z subexpression "subidx". +/// +/// @param bytelen out: length of match in bytes +/// +/// @return true if it matches. +static int match_zref(int subidx, int *bytelen) { int len; cleanup_zsubexpr(); if (re_extmatch_in == NULL || re_extmatch_in->matches[subidx] == NULL) { - /* backref was not set, match an empty string */ + // backref was not set, match an empty string *bytelen = 0; return true; } @@ -4799,11 +5493,11 @@ match_zref ( static void nfa_save_listids(nfa_regprog_T *prog, int *list) { int i; - nfa_state_T *p; + nfa_state_T *p; - /* Order in the list is reverse, it's a bit faster that way. */ + // Order in the list is reverse, it's a bit faster that way. p = &prog->state[0]; - for (i = prog->nstate; --i >= 0; ) { + for (i = prog->nstate; --i >= 0;) { list[i] = p->lastlist[1]; p->lastlist[1] = 0; ++p; @@ -4816,10 +5510,10 @@ static void nfa_save_listids(nfa_regprog_T *prog, int *list) static void nfa_restore_listids(nfa_regprog_T *prog, int *list) { int i; - nfa_state_T *p; + nfa_state_T *p; p = &prog->state[0]; - for (i = prog->nstate; --i >= 0; ) { + for (i = prog->nstate; --i >= 0;) { p->lastlist[1] = list[i]; ++p; } @@ -4827,8 +5521,12 @@ static void nfa_restore_listids(nfa_regprog_T *prog, int *list) static bool nfa_re_num_cmp(uintmax_t val, int op, uintmax_t pos) { - if (op == 1) return pos > val; - if (op == 2) return pos < val; + if (op == 1) { + return pos > val; + } + if (op == 2) { + return pos < val; + } return val == pos; } @@ -4838,9 +5536,8 @@ static bool nfa_re_num_cmp(uintmax_t val, int op, uintmax_t pos) * "pim" is NULL or contains info about a Postponed Invisible Match (start * position). */ -static int recursive_regmatch( - nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog, - regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len) +static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog, + regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len) FUNC_ATTR_NONNULL_ARG(1, 3, 5, 6, 7) { const int save_reginput_col = (int)(rex.input - rex.line); @@ -4849,7 +5546,7 @@ static int recursive_regmatch( const int save_nfa_listid = rex.nfa_listid; save_se_T *const save_nfa_endp = nfa_endp; save_se_T endpos; - save_se_T *endposp = NULL; + save_se_T *endposp = NULL; int need_restore = false; if (pim != NULL) { @@ -4919,8 +5616,9 @@ static int recursive_regmatch( } #ifdef REGEXP_DEBUG - if (log_fd != stderr) + if (log_fd != stderr) { fclose(log_fd); + } log_fd = NULL; #endif // Have to clear the lastlist field of the NFA nodes, so that @@ -4998,28 +5696,30 @@ static int failure_chance(nfa_state_T *state, int depth) int c = state->c; int l, r; - /* detect looping */ - if (depth > 4) + // detect looping + if (depth > 4) { return 1; + } switch (c) { case NFA_SPLIT: - if (state->out->c == NFA_SPLIT || state->out1->c == NFA_SPLIT) - /* avoid recursive stuff */ + if (state->out->c == NFA_SPLIT || state->out1->c == NFA_SPLIT) { + // avoid recursive stuff return 1; - /* two alternatives, use the lowest failure chance */ + } + // two alternatives, use the lowest failure chance l = failure_chance(state->out, depth + 1); r = failure_chance(state->out1, depth + 1); return l < r ? l : r; case NFA_ANY: - /* matches anything, unlikely to fail */ + // matches anything, unlikely to fail return 1; case NFA_MATCH: case NFA_MCLOSE: case NFA_ANY_COMPOSING: - /* empty match works always */ + // empty match works always return 0; case NFA_START_INVISIBLE: @@ -5031,7 +5731,7 @@ static int failure_chance(nfa_state_T *state, int depth) case NFA_START_INVISIBLE_BEFORE_NEG: case NFA_START_INVISIBLE_BEFORE_NEG_FIRST: case NFA_START_PATTERN: - /* recursive regmatch is expensive, use low failure chance */ + // recursive regmatch is expensive, use low failure chance return 5; case NFA_BOL: @@ -5106,7 +5806,7 @@ static int failure_chance(nfa_state_T *state, int depth) case NFA_ZREF7: case NFA_ZREF8: case NFA_ZREF9: - /* backreferences don't match in many places */ + // backreferences don't match in many places return 94; case NFA_LNUM_GT: @@ -5118,7 +5818,7 @@ static int failure_chance(nfa_state_T *state, int depth) case NFA_MARK_GT: case NFA_MARK_LT: case NFA_VISUAL: - /* before/after positions don't match very often */ + // before/after positions don't match very often return 85; case NFA_LNUM: @@ -5128,19 +5828,20 @@ static int failure_chance(nfa_state_T *state, int depth) case NFA_COL: case NFA_VCOL: case NFA_MARK: - /* specific positions rarely match */ + // specific positions rarely match return 98; case NFA_COMPOSING: return 95; default: - if (c > 0) - /* character match fails often */ + if (c > 0) { + // character match fails often return 95; + } } - /* something else, includes character classes */ + // something else, includes character classes return 50; } @@ -5236,8 +5937,7 @@ static int nfa_did_time_out(void) /// When there is a match "submatch" contains the positions. /// /// Note: Caller must ensure that: start != NULL. -static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, - regsubs_T *submatch, regsubs_T *m) +static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m) FUNC_ATTR_NONNULL_ARG(1, 2, 4) { int result = false; @@ -5246,9 +5946,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, nfa_thread_T *t; nfa_list_T list[2]; int listidx; - nfa_list_T *thislist; - nfa_list_T *nextlist; - int *listids = NULL; + nfa_list_T *thislist; + nfa_list_T *nextlist; + int *listids = NULL; int listids_len = 0; nfa_state_T *add_state; bool add_here; @@ -5257,7 +5957,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, int toplevel = start->c == NFA_MOPEN; regsubs_T *r; #ifdef NFA_REGEXP_DEBUG_LOG - FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a"); + FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a"); if (debug == NULL) { semsg("(NFA) COULD NOT OPEN %s!", NFA_REGEXP_DEBUG_LOG); @@ -5295,7 +5995,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, fprintf(log_fd, "**********************************\n"); nfa_set_code(start->c); fprintf(log_fd, " RUNNING nfa_regmatch() starting with state %d, code %s\n", - abs(start->id), code); + abs(start->id), code); fprintf(log_fd, "**********************************\n"); } else { emsg(_(e_log_open_failed)); @@ -5342,7 +6042,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, /* * Run for each character. */ - for (;; ) { + for (;;) { int curc = utf_ptr2char((char *)rex.input); int clen = utfc_ptr2len((char *)rex.input); if (curc == NUL) { @@ -5350,7 +6050,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, go_to_nextline = false; } - /* swap lists */ + // swap lists thislist = &list[flag]; nextlist = &list[flag ^= 1]; nextlist->n = 0; // clear nextlist @@ -5377,8 +6077,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, { int i; - for (i = 0; i < thislist->n; i++) + for (i = 0; i < thislist->n; i++) { fprintf(log_fd, "%d ", abs(thislist->t[i].state->id)); + } } fprintf(log_fd, "\n"); #endif @@ -5389,8 +6090,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, /* * If the state lists are empty we can stop. */ - if (thislist->n == 0) + if (thislist->n == 0) { break; + } // compute nextlist for (listidx = 0; listidx < thislist->n; listidx++) { @@ -5439,7 +6141,6 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, add_count = 0; switch (t->state->c) { case NFA_MATCH: - { // If the match is not at the start of the line, ends before a // composing characters and rex.reg_icombine is not set, that // is not really a match. @@ -5464,7 +6165,6 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, clen = 0; } goto nextchar; - } case NFA_END_INVISIBLE: case NFA_END_INVISIBLE_NEG: @@ -5530,11 +6230,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_START_INVISIBLE_BEFORE_FIRST: case NFA_START_INVISIBLE_BEFORE_NEG: case NFA_START_INVISIBLE_BEFORE_NEG_FIRST: - { #ifdef REGEXP_DEBUG fprintf(log_fd, "Failure chance invisible: %d, what follows: %d\n", - failure_chance(t->state->out, 0), - failure_chance(t->state->out1->out, 0)); + failure_chance(t->state->out, 0), + failure_chance(t->state->out1->out, 0)); #endif // Do it directly if there already is a PIM or when // nfa_postprocess() detected it will work better. @@ -5610,11 +6309,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, goto theend; } } - } - break; + break; - case NFA_START_PATTERN: - { + case NFA_START_PATTERN: { nfa_state_T *skip = NULL; #ifdef REGEXP_DEBUG int skip_lid = 0; @@ -5628,13 +6325,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, skip_lid = nextlist->id; #endif } else if (state_in_list(nextlist, - t->state->out1->out->out, &t->subs)) { + t->state->out1->out->out, &t->subs)) { skip = t->state->out1->out->out; #ifdef REGEXP_DEBUG skip_lid = nextlist->id; #endif } else if (state_in_list(thislist, - t->state->out1->out->out, &t->subs)) { + t->state->out1->out->out, &t->subs)) { skip = t->state->out1->out->out; #ifdef REGEXP_DEBUG skip_lid = thislist->id; @@ -5643,10 +6340,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, if (skip != NULL) { #ifdef REGEXP_DEBUG nfa_set_code(skip->c); - fprintf( - log_fd, - "> Not trying to match pattern, output state %d is already in list %d. char %d: %s\n", - abs(skip->id), skip_lid, skip->c, code); + fprintf(log_fd, + "> Not trying to match pattern, output state %d is already in list %d. char %d: %s\n", // NOLINT(whitespace/line_length) + abs(skip->id), skip_lid, skip->c, code); #endif break; } @@ -5783,8 +6479,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } break; - case NFA_COMPOSING: - { + case NFA_COMPOSING: { int mc = curc; int len = 0; nfa_state_T *end; @@ -5835,17 +6530,20 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // composing chars are matched. result = OK; while (sta->c != NFA_END_COMPOSING) { - for (j = 0; j < ccount; ++j) - if (cchars[j] == sta->c) + for (j = 0; j < ccount; j++) { + if (cchars[j] == sta->c) { break; + } + } if (j == ccount) { result = FAIL; break; } sta = sta->out; } - } else + } else { result = FAIL; + } end = t->state->out1; // NFA_END_COMPOSING ADD_STATE_IF_MATCH(end); @@ -5868,11 +6566,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_START_COLL: - case NFA_START_NEG_COLL: - { + case NFA_START_NEG_COLL: { // What follows is a list of characters, until NFA_END_COLL. // One of them must match or none of them must match. - nfa_state_T *state; + nfa_state_T *state; int result_if_matched; int c1, c2; @@ -5884,7 +6581,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, state = t->state->out; result_if_matched = (t->state->c == NFA_START_COLL); - for (;; ) { + for (;;) { if (state->c == NFA_END_COLL) { result = !result_if_matched; break; @@ -5895,7 +6592,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, c2 = state->val; #ifdef REGEXP_DEBUG fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n", - curc, c1, c2); + curc, c1, c2); #endif if (curc >= c1 && curc <= c2) { result = result_if_matched; @@ -6210,45 +6907,43 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_VCOL: case NFA_VCOL_GT: - case NFA_VCOL_LT: - { - int op = t->state->c - NFA_VCOL; - colnr_T col = (colnr_T)(rex.input - rex.line); - - // Bail out quickly when there can't be a match, avoid the overhead of - // win_linetabsize() on long lines. - if (op != 1 && col > t->state->val * MB_MAXBYTES) { - break; - } + case NFA_VCOL_LT: { + int op = t->state->c - NFA_VCOL; + colnr_T col = (colnr_T)(rex.input - rex.line); - result = false; - win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win; - if (op == 1 && col - 1 > t->state->val && col > 100) { - long ts = wp->w_buffer->b_p_ts; - - // Guess that a character won't use more columns than 'tabstop', - // with a minimum of 4. - if (ts < 4) { - ts = 4; - } - result = col > t->state->val * ts; - } - if (!result) { - uintmax_t lts = win_linetabsize(wp, rex.line, col); - assert(t->state->val >= 0); - result = nfa_re_num_cmp((uintmax_t)t->state->val, op, lts + 1); - } - if (result) { - add_here = true; - add_state = t->state->out; + // Bail out quickly when there can't be a match, avoid the overhead of + // win_linetabsize() on long lines. + if (op != 1 && col > t->state->val * MB_MAXBYTES) { + break; + } + + result = false; + win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win; + if (op == 1 && col - 1 > t->state->val && col > 100) { + long ts = wp->w_buffer->b_p_ts; + + // Guess that a character won't use more columns than 'tabstop', + // with a minimum of 4. + if (ts < 4) { + ts = 4; } + result = col > t->state->val * ts; } - break; + if (!result) { + uintmax_t lts = win_linetabsize(wp, rex.line, col); + assert(t->state->val >= 0); + result = nfa_re_num_cmp((uintmax_t)t->state->val, op, lts + 1); + } + if (result) { + add_here = true; + add_state = t->state->out; + } + } + break; case NFA_MARK: case NFA_MARK_GT: - case NFA_MARK_LT: - { + case NFA_MARK_LT: { pos_T *pos; size_t col = REG_MULTI ? rex.input - rex.line : 0; @@ -6264,7 +6959,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // exists and mark is set in reg_buf. if (pos != NULL && pos->lnum > 0) { const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum - && pos->col == MAXCOL + && pos->col == MAXCOL ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) : pos->col; @@ -6287,8 +6982,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_CURSOR: result = rex.reg_win != NULL - && (rex.lnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum) - && ((colnr_T)(rex.input - rex.line) == rex.reg_win->w_cursor.col); + && (rex.lnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum) + && ((colnr_T)(rex.input - rex.line) == rex.reg_win->w_cursor.col); if (result) { add_here = true; add_state = t->state->out; @@ -6358,10 +7053,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, nfa_pim_T *pim; nfa_pim_T pim_copy; - if (t->pim.result == NFA_PIM_UNUSED) + if (t->pim.result == NFA_PIM_UNUSED) { pim = NULL; - else + } else { pim = &t->pim; + } // Handle the postponed invisible match if the match might end // without advancing and before the end of the line. @@ -6394,10 +7090,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, result = (pim->result == NFA_PIM_MATCH); #ifdef REGEXP_DEBUG fprintf(log_fd, "\n"); - fprintf( - log_fd, - "Using previous recursive nfa_regmatch() result, result == %d\n", - pim->result); + fprintf(log_fd, + "Using previous recursive nfa_regmatch() result, result == %d\n", + pim->result); fprintf(log_fd, "MATCH = %s\n", result ? "OK" : "false"); fprintf(log_fd, "\n"); #endif @@ -6500,7 +7195,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, || utf_fold(c) != utf_fold(prog->regstart))) { #ifdef REGEXP_DEBUG fprintf(log_fd, - " Skipping start state, regstart does not match\n"); + " Skipping start state, regstart does not match\n"); #endif add = false; } @@ -6532,8 +7227,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, { int i; - for (i = 0; i < thislist->n; i++) + for (i = 0; i < thislist->n; i++) { fprintf(log_fd, "%d ", abs(thislist->t[i].state->id)); + } } fprintf(log_fd, "\n"); #endif @@ -6565,8 +7261,9 @@ nextchar: } #ifdef REGEXP_DEBUG - if (log_fd != stderr) + if (log_fd != stderr) { fclose(log_fd); + } log_fd = NULL; #endif @@ -6583,18 +7280,19 @@ theend: return nfa_match; } -// Try match of "prog" with at rex.line["col"]. -// Returns <= 0 for failure, number of lines contained in the match otherwise. -static long nfa_regtry(nfa_regprog_T *prog, - colnr_T col, - proftime_T *tm, // timeout limit or NULL - int *timed_out) // flag set on timeout or NULL +/// Try match of "prog" with at rex.line["col"]. +/// +/// @param tm timeout limit or NULL +/// @param timed_out flag set on timeout or NULL +/// +/// @return <= 0 for failure, number of lines contained in the match otherwise. +static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, proftime_T *tm, int *timed_out) { int i; regsubs_T subs, m; nfa_state_T *start = prog->start; #ifdef REGEXP_DEBUG - FILE *f; + FILE *f; #endif rex.input = rex.line + col; @@ -6606,10 +7304,10 @@ static long nfa_regtry(nfa_regprog_T *prog, f = fopen(NFA_REGEXP_RUN_LOG, "a"); if (f != NULL) { fprintf(f, - "\n\n\t=======================================================\n"); -#ifdef REGEXP_DEBUG + "\n\n\t=======================================================\n"); +# ifdef REGEXP_DEBUG fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); -#endif +# endif fprintf(f, "\tInput text is \"%s\" \n", rex.input); fprintf(f, "\t=======================================================\n\n"); nfa_print_state(f, start); @@ -6668,7 +7366,7 @@ static long nfa_regtry(nfa_regprog_T *prog, } } - /* Package any found \z(...\) matches for export. Default is none. */ + // Package any found \z(...\) matches for export. Default is none. unref_extmatch(re_extmatch_out); re_extmatch_out = NULL; @@ -6691,9 +7389,10 @@ static long nfa_regtry(nfa_regprog_T *prog, } else { struct linepos *lpos = &subs.synt.list.line[i]; - if (lpos->start != NULL && lpos->end != NULL) + if (lpos->start != NULL && lpos->end != NULL) { re_extmatch_out->matches[i] = vim_strnsave(lpos->start, lpos->end - lpos->start); + } } } } @@ -6711,10 +7410,9 @@ static long nfa_regtry(nfa_regprog_T *prog, /// /// @return <= 0 if there is no match and number of lines contained in the /// match otherwise. -static long nfa_regexec_both(char_u *line, colnr_T startcol, - proftime_T *tm, int *timed_out) +static long nfa_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm, int *timed_out) { - nfa_regprog_T *prog; + nfa_regprog_T *prog; long retval = 0L; colnr_T col = startcol; @@ -6729,7 +7427,7 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, rex.reg_endp = rex.reg_match->endp; } - /* Be paranoid... */ + // Be paranoid... if (prog == NULL || line == NULL) { iemsg(_(e_null)); goto theend; @@ -6759,8 +7457,9 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, nfa_regengine.expr = prog->pattern; #endif - if (prog->reganch && col > 0) + if (prog->reganch && col > 0) { return 0L; + } rex.need_clear_subexpr = true; // Clear the external match subpointers if necessary. @@ -6834,11 +7533,12 @@ theend: */ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) { - nfa_regprog_T *prog = NULL; - int *postfix; + nfa_regprog_T *prog = NULL; + int *postfix; - if (expr == NULL) + if (expr == NULL) { return NULL; + } #ifdef REGEXP_DEBUG nfa_regengine.expr = expr; @@ -6881,7 +7581,7 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) */ post2nfa(postfix, post_ptr, true); - /* allocate the regprog with space for the compiled regexp */ + // allocate the regprog with space for the compiled regexp size_t prog_size = sizeof(nfa_regprog_T) + sizeof(nfa_state_T) * (nstate - 1); prog = xmalloc(prog_size); state_ptr = prog->state; @@ -6912,7 +7612,7 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) nfa_postfix_dump(expr, OK); nfa_dump(prog); #endif - /* Remember whether this pattern has any \z specials in it. */ + // Remember whether this pattern has any \z specials in it. prog->reghasz = re_has_z; prog->pattern = vim_strsave(expr); #ifdef REGEXP_DEBUG @@ -6946,21 +7646,16 @@ static void nfa_regfree(regprog_T *prog) } } -/* - * Match a regexp against a string. - * "rmp->regprog" is a compiled regexp as returned by nfa_regcomp(). - * Uses curbuf for line count and 'iskeyword'. - * If "line_lbr" is true, consider a "\n" in "line" to be a line break. - * - * Returns <= 0 for failure, number of lines contained in the match otherwise. - */ -static int -nfa_regexec_nl ( - regmatch_T *rmp, - char_u *line, /* string to match against */ - colnr_T col, /* column to start looking for match */ - bool line_lbr -) +/// Match a regexp against a string. +/// "rmp->regprog" is a compiled regexp as returned by nfa_regcomp(). +/// Uses curbuf for line count and 'iskeyword'. +/// If "line_lbr" is true, consider a "\n" in "line" to be a line break. +/// +/// @param line string to match against +/// @param col column to start looking for match +/// +/// @return <= 0 for failure, number of lines contained in the match otherwise. +static int nfa_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col, bool line_lbr) { rex.reg_match = rmp; rex.reg_mmatch = NULL; @@ -7009,8 +7704,7 @@ nfa_regexec_nl ( /// /// @par /// FIXME if this behavior is not compatible. -static long nfa_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, - linenr_T lnum, colnr_T col, +static long nfa_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm, int *timed_out) { rex.reg_match = NULL; |