diff options
author | Lewis Russell <lewis6991@gmail.com> | 2022-11-07 10:21:44 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-07 10:21:44 +0000 |
commit | bdb98de2d16ce7185a0f53740e06511904fdd814 (patch) | |
tree | d7206b68750c35d0b31113d5d8ec94c2f3ad86eb | |
parent | e9c1cb71f8a4d6d7818dcb5f71ac78bee431309a (diff) | |
download | rneovim-bdb98de2d16ce7185a0f53740e06511904fdd814.tar.gz rneovim-bdb98de2d16ce7185a0f53740e06511904fdd814.tar.bz2 rneovim-bdb98de2d16ce7185a0f53740e06511904fdd814.zip |
refactor: more clint (#20910)
-rw-r--r-- | src/nvim/eval/executor.c | 4 | ||||
-rw-r--r-- | src/nvim/hardcopy.c | 10 | ||||
-rw-r--r-- | src/nvim/mbyte.c | 2 | ||||
-rw-r--r-- | src/nvim/memline.c | 14 | ||||
-rw-r--r-- | src/nvim/msgpack_rpc/helpers.c | 5 | ||||
-rw-r--r-- | src/nvim/normal.c | 2 | ||||
-rw-r--r-- | src/nvim/regexp.c | 269 | ||||
-rw-r--r-- | src/nvim/regexp_bt.c | 604 | ||||
-rw-r--r-- | src/nvim/regexp_defs.h | 70 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 414 | ||||
-rw-r--r-- | src/nvim/spellfile.c | 9 | ||||
-rw-r--r-- | src/nvim/strings.c | 20 | ||||
-rw-r--r-- | src/nvim/syntax.c | 19 | ||||
-rw-r--r-- | src/nvim/viml/parser/expressions.c | 34 |
14 files changed, 597 insertions, 879 deletions
diff --git a/src/nvim/eval/executor.c b/src/nvim/eval/executor.c index 0e0d0fe696..e253098df5 100644 --- a/src/nvim/eval/executor.c +++ b/src/nvim/eval/executor.c @@ -122,8 +122,8 @@ int eexe_mod_op(typval_T *const tv1, const typval_T *const tv2, const char *cons break; } const float_T f = (tv2->v_type == VAR_FLOAT - ? tv2->vval.v_float - : (float_T)tv_get_number(tv2)); + ? tv2->vval.v_float + : (float_T)tv_get_number(tv2)); switch (*op) { case '+': tv1->vval.v_float += f; break; diff --git a/src/nvim/hardcopy.c b/src/nvim/hardcopy.c index 7345e9cc35..50af6dafe7 100644 --- a/src/nvim/hardcopy.c +++ b/src/nvim/hardcopy.c @@ -729,7 +729,7 @@ void ex_hardcopy(exarg_T *eap) } assert(prtpos.bytes_printed <= SIZE_MAX / 100); - sprintf((char *)IObuff, _("Printing page %d (%zu%%)"), + sprintf((char *)IObuff, _("Printing page %d (%zu%%)"), // NOLINT(runtime/printf) page_count + 1 + side, prtpos.bytes_printed * 100 / bytes_to_print); if (!mch_print_begin_page((char_u *)IObuff)) { @@ -750,8 +750,7 @@ void ex_hardcopy(exarg_T *eap) prtpos.file_line); } - for (page_line = 0; page_line < settings.lines_per_page; - ++page_line) { + for (page_line = 0; page_line < settings.lines_per_page; page_line++) { prtpos.column = hardcopy_line(&settings, page_line, &prtpos); if (prtpos.column == 0) { @@ -2440,8 +2439,7 @@ bool mch_print_begin(prt_settings_T *psettings) prt_dsc_font_resource("DocumentNeededResources", &prt_ps_courier_font); } if (prt_out_mbyte) { - prt_dsc_font_resource((prt_use_courier ? NULL - : "DocumentNeededResources"), &prt_ps_mb_font); + prt_dsc_font_resource((prt_use_courier ? NULL : "DocumentNeededResources"), &prt_ps_mb_font); if (!prt_custom_cmap) { prt_dsc_resources(NULL, "cmap", prt_cmap); } @@ -2990,7 +2988,7 @@ int mch_print_text_out(char_u *const textp, size_t len) ga_append(&prt_ps_buffer, '\\'); break; default: - sprintf((char *)ch_buff, "%03o", (unsigned int)ch); + sprintf((char *)ch_buff, "%03o", (unsigned int)ch); // NOLINT(runtime/printf) ga_append(&prt_ps_buffer, (char)ch_buff[0]); ga_append(&prt_ps_buffer, (char)ch_buff[1]); ga_append(&prt_ps_buffer, (char)ch_buff[2]); diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 9e34c7e413..14691741d8 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1550,7 +1550,7 @@ void show_utf8(void) } clen = utf_ptr2len((char *)line + i); } - sprintf((char *)IObuff + rlen, "%02x ", + sprintf((char *)IObuff + rlen, "%02x ", // NOLINT(runtime/printf) (line[i] == NL) ? NUL : line[i]); // NUL is stored as NL clen--; rlen += (int)strlen(IObuff + rlen); diff --git a/src/nvim/memline.c b/src/nvim/memline.c index 225e2aeab1..f9900eb434 100644 --- a/src/nvim/memline.c +++ b/src/nvim/memline.c @@ -2135,13 +2135,13 @@ static int ml_append_int(buf_T *buf, linenr_T lnum, char_u *line, colnr_T len, b buf->b_ml.ml_stack_top = stack_idx + 1; // truncate stack if (lineadd) { - --(buf->b_ml.ml_stack_top); + (buf->b_ml.ml_stack_top)--; // fix line count for rest of blocks in the stack ml_lineadd(buf, lineadd); // fix stack itself buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high += lineadd; - ++(buf->b_ml.ml_stack_top); + (buf->b_ml.ml_stack_top)++; } // We are finished, break the loop here. @@ -2428,7 +2428,7 @@ static int ml_delete_int(buf_T *buf, linenr_T lnum, bool message) buf->b_ml.ml_stack[buf->b_ml.ml_stack_top].ip_high += buf->b_ml.ml_locked_lineadd; } - ++(buf->b_ml.ml_stack_top); + (buf->b_ml.ml_stack_top)++; break; } @@ -2698,11 +2698,11 @@ static bhdr_T *ml_find_line(buf_T *buf, linenr_T lnum, int action) && buf->b_ml.ml_locked_high >= lnum) { // remember to update pointer blocks and stack later if (action == ML_INSERT) { - ++(buf->b_ml.ml_locked_lineadd); - ++(buf->b_ml.ml_locked_high); + (buf->b_ml.ml_locked_lineadd)++; + (buf->b_ml.ml_locked_high)++; } else if (action == ML_DELETE) { - --(buf->b_ml.ml_locked_lineadd); - --(buf->b_ml.ml_locked_high); + (buf->b_ml.ml_locked_lineadd)--; + (buf->b_ml.ml_locked_high)--; } return buf->b_ml.ml_locked; } diff --git a/src/nvim/msgpack_rpc/helpers.c b/src/nvim/msgpack_rpc/helpers.c index ddca9afad0..86babd1c36 100644 --- a/src/nvim/msgpack_rpc/helpers.c +++ b/src/nvim/msgpack_rpc/helpers.c @@ -95,9 +95,8 @@ bool msgpack_rpc_to_object(const msgpack_object *const obj, Object *const arg) dest = conv(((String) { \ .size = obj->via.attr.size, \ .data = (obj->via.attr.ptr == NULL || obj->via.attr.size == 0 \ - ? xmemdupz("", 0) \ - : xmemdupz(obj->via.attr.ptr, obj->via.attr.size)), \ - })); \ + ? xmemdupz("", 0) \ + : xmemdupz(obj->via.attr.ptr, obj->via.attr.size)), })); \ break; \ } STR_CASE(MSGPACK_OBJECT_STR, str, cur.mobj, *cur.aobj, STRING_OBJ) diff --git a/src/nvim/normal.c b/src/nvim/normal.c index d142af555a..d7e384334b 100644 --- a/src/nvim/normal.c +++ b/src/nvim/normal.c @@ -2266,7 +2266,7 @@ bool find_decl(char_u *ptr, size_t len, bool locally, bool thisblock, int flags_ // Put "\V" before the pattern to avoid that the special meaning of "." // and "~" causes trouble. assert(len <= INT_MAX); - sprintf((char *)pat, vim_iswordp(ptr) ? "\\V\\<%.*s\\>" : "\\V%.*s", + sprintf((char *)pat, vim_iswordp(ptr) ? "\\V\\<%.*s\\>" : "\\V%.*s", // NOLINT(runtime/printf) (int)len, ptr); old_pos = curwin->w_cursor; save_p_ws = p_ws; diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 7a96889f22..27b5d198ac 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -1,9 +1,7 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -/* - * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() - */ +// Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() // By default: do not create debugging logs or files related to regular // expressions, even when compiling with -DDEBUG. @@ -41,21 +39,17 @@ # define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log" #endif -/* - * Magic characters have a special meaning, they don't match literally. - * Magic characters are negative. This separates them from literal characters - * (possibly multi-byte). Only ASCII characters can be Magic. - */ +// Magic characters have a special meaning, they don't match literally. +// Magic characters are negative. This separates them from literal characters +// (possibly multi-byte). Only ASCII characters can be Magic. #define Magic(x) ((int)(x) - 256) #define un_Magic(x) ((x) + 256) #define is_Magic(x) ((x) < 0) -/* - * We should define ftpr as a pointer to a function returning a pointer to - * a function returning a pointer to a function ... - * This is impossible, so we declare a pointer to a function returning a - * pointer to a function returning void. This should work for all compilers. - */ +// We should define ftpr as a pointer to a function returning a pointer to +// a function returning a pointer to a function ... +// This is impossible, so we declare a pointer to a function returning a +// pointer to a function returning void. This should work for all compilers. typedef void (*(*fptr_T)(int *, int))(void); static int no_Magic(int x) @@ -143,28 +137,24 @@ static int re_multi_type(int c) static char *reg_prev_sub = NULL; -/* - * REGEXP_INRANGE contains all characters which are always special in a [] - * range after '\'. - * REGEXP_ABBR contains all characters which act as abbreviations after '\'. - * These are: - * \n - New line (NL). - * \r - Carriage Return (CR). - * \t - Tab (TAB). - * \e - Escape (ESC). - * \b - Backspace (Ctrl_H). - * \d - Character code in decimal, eg \d123 - * \o - Character code in octal, eg \o80 - * \x - Character code in hex, eg \x4a - * \u - Multibyte character code, eg \u20ac - * \U - Long multibyte character code, eg \U12345678 - */ +// REGEXP_INRANGE contains all characters which are always special in a [] +// range after '\'. +// REGEXP_ABBR contains all characters which act as abbreviations after '\'. +// These are: +// \n - New line (NL). +// \r - Carriage Return (CR). +// \t - Tab (TAB). +// \e - Escape (ESC). +// \b - Backspace (Ctrl_H). +// \d - Character code in decimal, eg \d123 +// \o - Character code in octal, eg \o80 +// \x - Character code in hex, eg \x4a +// \u - Multibyte character code, eg \u20ac +// \U - Long multibyte character code, eg \U12345678 static char REGEXP_INRANGE[] = "]^-n\\"; static char REGEXP_ABBR[] = "nrtebdoxuU"; -/* - * Translate '\x' to its control character, except "\n", which is Magic. - */ +// Translate '\x' to its control character, except "\n", which is Magic. static int backslash_trans(int c) { switch (c) { @@ -239,10 +229,8 @@ static int get_char_class(char **pp) return CLASS_NONE; } -/* - * Specific version of character class functions. - * Using a table to keep this fast. - */ +// Specific version of character class functions. +// Using a table to keep this fast. static int16_t class_tab[256]; #define RI_DIGIT 0x01 @@ -325,9 +313,7 @@ static int reg_string; // matching with a string instead of a buffer // line static int reg_strict; // "[abc" is illegal -/* - * META contains all characters that may be magic, except '^' and '$'. - */ +// META contains all characters that may be magic, except '^' and '$'. // uncrustify:off @@ -391,11 +377,9 @@ int re_multiline(const regprog_T *prog) return prog->regflags & RF_HASNL; } -/* - * Check for an equivalence class name "[=a=]". "pp" points to the '['. - * Returns a character representing the class. Zero means that no item was - * recognized. Otherwise "pp" is advanced to after the item. - */ +// Check for an equivalence class name "[=a=]". "pp" points to the '['. +// Returns a character representing the class. Zero means that no item was +// recognized. Otherwise "pp" is advanced to after the item. static int get_equi_class(char **pp) { int c; @@ -413,12 +397,10 @@ static int get_equi_class(char **pp) return 0; } -/* - * Check for a collating element "[.a.]". "pp" points to the '['. - * Returns a character. Zero means that no item was recognized. Otherwise - * "pp" is advanced to after the item. - * Currently only single characters are recognized! - */ +// Check for a collating element "[.a.]". "pp" points to the '['. +// Returns a character. Zero means that no item was recognized. Otherwise +// "pp" is advanced to after the item. +// Currently only single characters are recognized! static int get_coll_element(char **pp) { int c; @@ -562,9 +544,7 @@ static int prevchr_len; // byte length of previous char static int at_start; // True when on the first character static int prev_at_start; // True when on the second character -/* - * Start parsing at "str". - */ +// Start parsing at "str". static void initchr(char_u *str) { regparse = (char *)str; @@ -574,10 +554,8 @@ static void initchr(char_u *str) prev_at_start = false; } -/* - * Save the current parse state, so that it can be restored and parsing - * starts in the same state again. - */ +// Save the current parse state, so that it can be restored and parsing +// starts in the same state again. static void save_parse_state(parse_state_T *ps) { ps->regparse = (char_u *)regparse; @@ -591,9 +569,7 @@ static void save_parse_state(parse_state_T *ps) ps->regnpar = regnpar; } -/* - * Restore a previously saved parse state. - */ +// Restore a previously saved parse state. static void restore_parse_state(parse_state_T *ps) { regparse = (char *)ps->regparse; @@ -607,9 +583,7 @@ static void restore_parse_state(parse_state_T *ps) regnpar = ps->regnpar; } -/* - * Get the next character without advancing. - */ +// Get the next character without advancing. static int peekchr(void) { static int after_slash = false; @@ -736,9 +710,7 @@ static int peekchr(void) after_slash--; curchr = toggle_Magic(curchr); } else if (vim_strchr(REGEXP_ABBR, c)) { - /* - * Handle abbreviations, like "\t" for TAB -- webb - */ + // Handle abbreviations, like "\t" for TAB -- webb curchr = backslash_trans(c); } else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) { curchr = toggle_Magic(c); @@ -757,9 +729,7 @@ static int peekchr(void) return curchr; } -/* - * Eat one lexed character. Do this in a way that we can undo it. - */ +// Eat one lexed character. Do this in a way that we can undo it. static void skipchr(void) { // peekchr() eats a backslash, do the same here @@ -781,10 +751,8 @@ static void skipchr(void) nextchr = -1; } -/* - * Skip a character while keeping the value of prev_at_start for at_start. - * prevchr and prevprevchr are also kept. - */ +// Skip a character while keeping the value of prev_at_start for at_start. +// prevchr and prevprevchr are also kept. static void skipchr_keepstart(void) { int as = prev_at_start; @@ -797,10 +765,8 @@ static void skipchr_keepstart(void) prevprevchr = prpr; } -/* - * Get the next character from the pattern. We know about magic and such, so - * therefore we need a lexical analyzer. - */ +// Get the next character from the pattern. We know about magic and such, so +// therefore we need a lexical analyzer. static int getchr(void) { int chr = peekchr(); @@ -809,9 +775,7 @@ static int getchr(void) return chr; } -/* - * put character back. Works only once! - */ +// put character back. Works only once! static void ungetchr(void) { nextchr = curchr; @@ -825,15 +789,13 @@ static void ungetchr(void) regparse -= prevchr_len; } -/* - * Get and return the value of the hex string at the current position. - * Return -1 if there is no valid hex number. - * The position is updated: - * blahblah\%x20asdf - * before-^ ^-after - * The parameter controls the maximum number of input characters. This will be - * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. - */ +// Get and return the value of the hex string at the current position. +// Return -1 if there is no valid hex number. +// The position is updated: +// blahblah\%x20asdf +// before-^ ^-after +// The parameter controls the maximum number of input characters. This will be +// 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. static int64_t gethexchrs(int maxinputlen) { int64_t nr = 0; @@ -856,10 +818,8 @@ static int64_t gethexchrs(int maxinputlen) return nr; } -/* - * Get and return the value of the decimal string immediately after the - * current position. Return -1 for invalid. Consumes all digits. - */ +// Get and return the value of the decimal string immediately after the +// current position. Return -1 for invalid. Consumes all digits. static int64_t getdecchrs(void) { int64_t nr = 0; @@ -883,14 +843,12 @@ static int64_t getdecchrs(void) return nr; } -/* - * get and return the value of the octal string immediately after the current - * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle - * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't - * treat 8 or 9 as recognised characters. Position is updated: - * blahblah\%o210asdf - * before-^ ^-after - */ +// get and return the value of the octal string immediately after the current +// position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle +// numbers > 377 correctly (for example, 400 is treated as 40) and doesn't +// treat 8 or 9 as recognised characters. Position is updated: +// blahblah\%o210asdf +// before-^ ^-after static int64_t getoctchrs(void) { int64_t nr = 0; @@ -913,12 +871,10 @@ static int64_t getoctchrs(void) return nr; } -/* - * read_limits - Read two integers to be taken as a minimum and maximum. - * If the first character is '-', then the range is reversed. - * Should end with 'end'. If minval is missing, zero is default, if maxval is - * missing, a very big number is the default. - */ +// read_limits - Read two integers to be taken as a minimum and maximum. +// If the first character is '-', then the range is reversed. +// Should end with 'end'. If minval is missing, zero is default, if maxval is +// missing, a very big number is the default. static int read_limits(long *minval, long *maxval) { int reverse = false; @@ -950,10 +906,8 @@ static int read_limits(long *minval, long *maxval) EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL); } - /* - * Reverse the range if there was a '-', or make sure it is in the right - * order otherwise. - */ + // Reverse the range if there was a '-', or make sure it is in the right + // order otherwise. if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) { tmp = *minval; *minval = *maxval; @@ -963,13 +917,9 @@ static int read_limits(long *minval, long *maxval) return OK; } -/* - * vim_regexec and friends - */ +// vim_regexec and friends -/* - * Global work variables for vim_regexec(). - */ +// Global work variables for vim_regexec(). // Sometimes need to save a copy of a line. Since alloc()/free() is very // slow, we keep one allocated piece of memory and only re-allocate it when @@ -1052,9 +1002,7 @@ static bool reg_iswordc(int c) return vim_iswordc_buf(c, rex.reg_buf); } -/* - * Get pointer to the line "lnum", which is relative to "reg_firstlnum". - */ +// Get pointer to the line "lnum", which is relative to "reg_firstlnum". static char_u *reg_getline(linenr_T lnum) { // when looking behind for a match/no-match lnum is negative. But we @@ -1077,9 +1025,7 @@ static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos // true if using multi-line regexp. #define REG_MULTI (rex.reg_match == NULL) -/* - * Create a new extmatch and mark it as referenced once. - */ +// Create a new extmatch and mark it as referenced once. static reg_extmatch_T *make_extmatch(void) FUNC_ATTR_NONNULL_RET { @@ -1088,9 +1034,7 @@ static reg_extmatch_T *make_extmatch(void) return em; } -/* - * Add a reference to an extmatch. - */ +// Add a reference to an extmatch. reg_extmatch_T *ref_extmatch(reg_extmatch_T *em) { if (em != NULL) { @@ -1099,10 +1043,8 @@ reg_extmatch_T *ref_extmatch(reg_extmatch_T *em) return em; } -/* - * Remove a reference to an extmatch. If there are no references left, free - * the info. - */ +// Remove a reference to an extmatch. If there are no references left, free +// the info. void unref_extmatch(reg_extmatch_T *em) { int i; @@ -1201,10 +1143,8 @@ static bool reg_match_visual(void) return true; } -/* - * Check the regexp program for its magic number. - * Return true if it's wrong. - */ +// Check the regexp program for its magic number. +// Return true if it's wrong. static int prog_magic_wrong(void) { regprog_T *prog; @@ -1222,11 +1162,9 @@ static int prog_magic_wrong(void) return false; } -/* - * Cleanup the subexpressions, if this wasn't done yet. - * This construction is used to clear the subexpressions only when they are - * used (to increase speed). - */ +// Cleanup the subexpressions, if this wasn't done yet. +// This construction is used to clear the subexpressions only when they are +// used (to increase speed). static void cleanup_subexpr(void) { if (rex.need_clear_subexpr) { @@ -1265,12 +1203,10 @@ static void reg_nextline(void) fast_breakcheck(); } -/* - * Check whether a backreference matches. - * Returns RA_FAIL, RA_NOMATCH or RA_MATCH. - * If "bytelen" is not NULL, it is set to the byte length of the match in the - * last line. - */ +// Check whether a backreference matches. +// Returns RA_FAIL, RA_NOMATCH or RA_MATCH. +// If "bytelen" is not NULL, it is set to the byte length of the match in the +// last line. static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen) { @@ -1449,9 +1385,9 @@ static int cstrncmp(char *s1, char *s2, int *n) c1 = mb_ptr2char_adv((const char_u **)&str1); c2 = mb_ptr2char_adv((const char_u **)&str2); - /* decompose the character if necessary, into 'base' characters - * because I don't care about Arabic, I will hard-code the Hebrew - * which I *do* care about! So sue me... */ + // decompose the character if necessary, into 'base' characters + // because I don't care about Arabic, I will hard-code the Hebrew + // which I *do* care about! So sue me... if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) { // decomposition necessary? mb_decompose(c1, &c11, &junk, &junk); @@ -1566,7 +1502,7 @@ char *regtilde(char *source, int magic, bool preview) int len; int prevlen; - for (p = newsub; *p; ++p) { + for (p = newsub; *p; p++) { if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) { if (reg_prev_sub != NULL) { // length = len(newsub) - 1 + len(prev_sub) + 1 @@ -1871,12 +1807,11 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des *s = CAR; } else if (*s == '\\' && s[1] != NUL) { s++; - /* Change NL to CR here too, so that this works: - * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: - * abc\ - * def - * Not when called from vim_regexec_nl(). - */ + // Change NL to CR here too, so that this works: + // :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: + // abc{backslash} + // def + // Not when called from vim_regexec_nl(). if (*s == NL && !rsm.sm_line_lbr) { *s = CAR; } @@ -2172,10 +2107,8 @@ char *reg_submatch(int no) if (rsm.sm_match == NULL) { ssize_t len; - /* - * First round: compute the length and allocate memory. - * Second round: copy the text. - */ + // First round: compute the length and allocate memory. + // Second round: copy the text. for (round = 1; round <= 2; round++) { lnum = rsm.sm_mmatch->startpos[no].lnum; if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0) { @@ -2216,7 +2149,7 @@ char *reg_submatch(int no) len++; } if (round == 2) { - STRNCPY(retval + len, reg_getline_submatch(lnum), + STRNCPY(retval + len, reg_getline_submatch(lnum), // NOLINT(runtime/printf) rsm.sm_mmatch->endpos[no].col); } len += rsm.sm_mmatch->endpos[no].col; @@ -2327,12 +2260,10 @@ static char_u regname[][30] = { }; #endif -/* - * Compile a regular expression into internal code. - * Returns the program in allocated memory. - * Use vim_regfree() to free the memory. - * Returns NULL for an error. - */ +// Compile a regular expression into internal code. +// Returns the program in allocated memory. +// Use vim_regfree() to free the memory. +// Returns NULL for an error. regprog_T *vim_regcomp(char *expr_arg, int re_flags) { regprog_T *prog = NULL; @@ -2413,9 +2344,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags) return prog; } -/* - * Free a compiled regexp program, returned by vim_regcomp(). - */ +// Free a compiled regexp program, returned by vim_regcomp(). void vim_regfree(regprog_T *prog) { if (prog != NULL) { diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 6f63b38a90..7b5f4cd12a 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -1,137 +1,130 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -/* - * - * Backtracking regular expression implementation. - * - * This file is included in "regexp.c". - * - * NOTICE: - * - * This is NOT the original regular expression code as written by Henry - * Spencer. This code has been modified specifically for use with the VIM - * editor, and should not be used separately from Vim. If you want a good - * regular expression library, get the original code. The copyright notice - * that follows is from the original. - * - * END NOTICE - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - * - * Beware that some of this code is subtly aware of the way operator - * precedence is structured in regular expressions. Serious changes in - * regular-expression syntax might require a total rethink. - * - * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert - * Webb, Ciaran McCreesh and Bram Moolenaar. - * Named character class support added by Walter Briscoe (1998 Jul 01) - */ - -/* - * The "internal use only" fields in regexp_defs.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases. They are: - * - * regstart char that must begin a match; NUL if none obvious; Can be a - * multi-byte character. - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL - * regmlen length of regmust string - * regflags RF_ values or'ed together - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot. Regmust permits fast rejection - * of lines that cannot possibly match. The regmust tests are costly enough - * that vim_regcomp() supplies a regmust only if the r.e. contains something - * potentially expensive (at present, the only such thing detected is * or + - * at the start of the r.e., which can involve a lot of backup). Regmlen is - * supplied because the test in vim_regexec() needs it and vim_regcomp() is - * computing it anyway. - */ - -/* - * Structure for regexp "program". This is essentially a linear encoding - * of a nondeterministic finite-state machine (aka syntax charts or - * "railroad normal form" in parsing technology). Each node is an opcode - * plus a "next" pointer, possibly plus an operand. "Next" pointers of - * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" - * pointer with a BRANCH on both ends of it is connecting two alternatives. - * (Here we have one of the subtle syntax dependencies: an individual BRANCH - * (as opposed to a collection of them) is never concatenated with anything - * because of operator precedence). The "next" pointer of a BRACES_COMPLEX - * node points to the node after the stuff to be repeated. - * The operand of some types of node is a literal string; for others, it is a - * node leading into a sub-FSM. In particular, the operand of a BRANCH node - * is the first node of the branch. - * (NB this is *not* a tree structure: the tail of the branch connects to the - * thing following the set of BRANCHes.) - * - * pattern is coded like: - * - * +-----------------+ - * | V - * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END - * | ^ | ^ - * +------+ +----------+ - * - * - * +------------------+ - * V | - * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END - * | | ^ ^ - * | +---------------+ | - * +---------------------------------------------+ - * - * - * +----------------------+ - * V | - * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END - * | | ^ ^ - * | +-----------+ | - * +--------------------------------------------------+ - * - * - * +-------------------------+ - * V | - * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END - * | | ^ - * | +----------------+ - * +-----------------------------------------------+ - * - * - * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END - * | | ^ ^ - * | +----------------+ | - * +--------------------------------+ - * - * +---------+ - * | V - * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END - * | | | | ^ ^ - * | | | +-----+ | - * | | +----------------+ | - * | +---------------------------+ | - * +------------------------------------------------------+ - * - * They all start with a BRANCH for "\|" alternatives, even when there is only - * one alternative. - */ +// Backtracking regular expression implementation. +// +// This file is included in "regexp.c". +// +// NOTICE: +// +// This is NOT the original regular expression code as written by Henry +// Spencer. This code has been modified specifically for use with the VIM +// editor, and should not be used separately from Vim. If you want a good +// regular expression library, get the original code. The copyright notice +// that follows is from the original. +// +// END NOTICE +// +// Copyright (c) 1986 by University of Toronto. +// Written by Henry Spencer. Not derived from licensed software. +// +// Permission is granted to anyone to use this software for any +// purpose on any computer system, and to redistribute it freely, +// subject to the following restrictions: +// +// 1. The author is not responsible for the consequences of use of +// this software, no matter how awful, even if they arise +// from defects in it. +// +// 2. The origin of this software must not be misrepresented, either +// by explicit claim or by omission. +// +// 3. Altered versions must be plainly marked as such, and must not +// be misrepresented as being the original software. +// +// Beware that some of this code is subtly aware of the way operator +// precedence is structured in regular expressions. Serious changes in +// regular-expression syntax might require a total rethink. +// +// Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert +// Webb, Ciaran McCreesh and Bram Moolenaar. +// Named character class support added by Walter Briscoe (1998 Jul 01) + +// The "internal use only" fields in regexp_defs.h are present to pass info from +// compile to execute that permits the execute phase to run lots faster on +// simple cases. They are: +// +// regstart char that must begin a match; NUL if none obvious; Can be a +// multi-byte character. +// reganch is the match anchored (at beginning-of-line only)? +// regmust string (pointer into program) that match must include, or NULL +// regmlen length of regmust string +// regflags RF_ values or'ed together +// +// Regstart and reganch permit very fast decisions on suitable starting points +// for a match, cutting down the work a lot. Regmust permits fast rejection +// of lines that cannot possibly match. The regmust tests are costly enough +// that vim_regcomp() supplies a regmust only if the r.e. contains something +// potentially expensive (at present, the only such thing detected is * or + +// at the start of the r.e., which can involve a lot of backup). Regmlen is +// supplied because the test in vim_regexec() needs it and vim_regcomp() is +// computing it anyway. + +// Structure for regexp "program". This is essentially a linear encoding +// of a nondeterministic finite-state machine (aka syntax charts or +// "railroad normal form" in parsing technology). Each node is an opcode +// plus a "next" pointer, possibly plus an operand. "Next" pointers of +// all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" +// pointer with a BRANCH on both ends of it is connecting two alternatives. +// (Here we have one of the subtle syntax dependencies: an individual BRANCH +// (as opposed to a collection of them) is never concatenated with anything +// because of operator precedence). The "next" pointer of a BRACES_COMPLEX +// node points to the node after the stuff to be repeated. +// The operand of some types of node is a literal string; for others, it is a +// node leading into a sub-FSM. In particular, the operand of a BRANCH node +// is the first node of the branch. +// (NB this is *not* a tree structure: the tail of the branch connects to the +// thing following the set of BRANCHes.) +// +// pattern is coded like: +// +// +-----------------+ +// | V +// <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END +// | ^ | ^ +// +------+ +----------+ +// +// +// +------------------+ +// V | +// <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END +// | | ^ ^ +// | +---------------+ | +// +---------------------------------------------+ +// +// +// +----------------------+ +// V | +// <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END +// | | ^ ^ +// | +-----------+ | +// +--------------------------------------------------+ +// +// +// +-------------------------+ +// V | +// <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END +// | | ^ +// | +----------------+ +// +-----------------------------------------------+ +// +// +// <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END +// | | ^ ^ +// | +----------------+ | +// +--------------------------------+ +// +// +---------+ +// | V +// \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END +// | | | | ^ ^ +// | | | +-----+ | +// | | +----------------+ | +// | +---------------------------+ | +// +------------------------------------------------------+ +// +// They all start with a BRANCH for "\|" alternatives, even when there is only +// one alternative. #include <assert.h> #include <inttypes.h> @@ -141,9 +134,7 @@ #include "nvim/garray.h" #include "nvim/regexp.h" -/* - * The opcodes are: - */ +// The opcodes are: // definition number opnd? meaning #define END 0 // End of program or NOMATCH operand. @@ -240,9 +231,7 @@ #define RE_VISUAL 208 // Match Visual area #define RE_COMPOSING 209 // any composing characters -/* - * Flags to be passed up and down. - */ +// Flags to be passed up and down. #define HASWIDTH 0x1 // Known never to match null string. #define SIMPLE 0x2 // Simple enough to be STAR/PLUS operand. #define SPSTART 0x4 // Starts with * or +. @@ -273,10 +262,8 @@ static int classcodes[] = { UPPER, NUPPER }; -/* - * When regcode is set to this value, code is not emitted and size is computed - * instead. - */ +// When regcode is set to this value, code is not emitted and size is computed +// instead. #define JUST_CALC_SIZE ((char_u *)-1) // Values for rs_state in regitem_T. @@ -297,11 +284,9 @@ typedef enum regstate_E { RS_STAR_SHORT, // STAR/PLUS/BRACE_SIMPLE shortest match } regstate_T; -/* - * Structure used to save the current input state, when it needs to be - * restored after trying a match. Used by reg_save() and reg_restore(). - * Also stores the length of "backpos". - */ +// Structure used to save the current input state, when it needs to be +// restored after trying a match. Used by reg_save() and reg_restore(). +// Also stores the length of "backpos". typedef struct { union { char_u *ptr; // rex.input pointer, for single-line regexp @@ -327,12 +312,10 @@ typedef struct regbehind_S { save_se_T save_end[NSUBEXP]; } regbehind_T; -/* - * When there are alternatives a regstate_T is put on the regstack to remember - * what we are doing. - * Before it may be another type of item, depending on rs_state, to remember - * more things. - */ +// When there are alternatives a regstate_T is put on the regstack to remember +// what we are doing. +// Before it may be another type of item, depending on rs_state, to remember +// more things. typedef struct regitem_S { regstate_T rs_state; // what we are doing, one of RS_ above int16_t rs_no; // submatch nr or BEHIND/NOBEHIND @@ -359,69 +342,63 @@ typedef struct backpos_S { regsave_T bp_pos; // last input position } backpos_T; -/* - * "regstack" and "backpos" are used by regmatch(). They are kept over calls - * to avoid invoking malloc() and free() often. - * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T - * or regbehind_T. - * "backpos_T" is a table with backpos_T for BACK - */ +// "regstack" and "backpos" are used by regmatch(). They are kept over calls +// to avoid invoking malloc() and free() often. +// "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T +// or regbehind_T. +// "backpos_T" is a table with backpos_T for BACK static garray_T regstack = GA_EMPTY_INIT_VALUE; static garray_T backpos = GA_EMPTY_INIT_VALUE; static regsave_T behind_pos; -/* - * Both for regstack and backpos tables we use the following strategy of - * allocation (to reduce malloc/free calls): - * - Initial size is fairly small. - * - When needed, the tables are grown bigger (8 times at first, double after - * that). - * - After executing the match we free the memory only if the array has grown. - * Thus the memory is kept allocated when it's at the initial size. - * This makes it fast while not keeping a lot of memory allocated. - * A three times speed increase was observed when using many simple patterns. - */ +// Both for regstack and backpos tables we use the following strategy of +// allocation (to reduce malloc/free calls): +// - Initial size is fairly small. +// - When needed, the tables are grown bigger (8 times at first, double after +// that). +// - After executing the match we free the memory only if the array has grown. +// Thus the memory is kept allocated when it's at the initial size. +// This makes it fast while not keeping a lot of memory allocated. +// A three times speed increase was observed when using many simple patterns. #define REGSTACK_INITIAL 2048 #define BACKPOS_INITIAL 64 -/* - * Opcode notes: - * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. - * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. - * - * STAR,PLUS '=', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. - * - * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX - * node, and defines the min and max limits to be used for that - * node. - * - * MOPEN,MCLOSE ...are numbered at compile time. - * ZOPEN,ZCLOSE ...ditto - */ - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit bytes, high order first. The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node. (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - */ +// Opcode notes: +// +// BRANCH The set of branches constituting a single choice are hooked +// together with their "next" pointers, since precedence prevents +// anything being concatenated to any individual branch. The +// "next" pointer of the last BRANCH in a choice points to the +// thing following the whole choice. This is also where the +// final "next" pointer of each individual branch points; each +// branch starts with the operand node of a BRANCH node. +// +// BACK Normal "next" pointers all implicitly point forward; BACK +// exists to make loop structures possible. +// +// STAR,PLUS '=', and complex '*' and '+', are implemented as circular +// BRANCH structures using BACK. Simple cases (one character +// per match) are implemented with STAR and PLUS for speed +// and to minimize recursive plunges. +// +// BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX +// node, and defines the min and max limits to be used for that +// node. +// +// MOPEN,MCLOSE ...are numbered at compile time. +// ZOPEN,ZCLOSE ...ditto +/// +// +// +// A node is one char of opcode followed by two chars of "next" pointer. +// "Next" pointers are stored as two 8-bit bytes, high order first. The +// value is a positive offset from the opcode of the node containing it. +// An operand, if any, simply follows the node. (Note that much of the +// code generation knows about this implicit relationship.) +// +// Using two bytes for the "next" pointer is vast overkill for most things, +// but allows patterns to get big without disasters. #define OP(p) ((int)(*(p))) #define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377)) #define OPERAND(p) ((p) + 3) @@ -449,9 +426,7 @@ static int regnarrate = 0; # include "regexp_bt.c.generated.h" #endif -/* - * Setup to parse the regexp. Used once to get the length and once to do it. - */ +// Setup to parse the regexp. Used once to get the length and once to do it. static void regcomp_start(char_u *expr, int re_flags) // see vim_regcomp() { initchr(expr); @@ -484,9 +459,7 @@ static bool use_multibytecode(int c) || utf_iscomposing(c)); } -/* - * Emit (if appropriate) a byte of code - */ +// Emit (if appropriate) a byte of code static void regc(int b) { if (regcode == JUST_CALC_SIZE) { @@ -496,9 +469,7 @@ static void regc(int b) } } -/* - * Emit (if appropriate) a multi-byte character of code - */ +// Emit (if appropriate) a multi-byte character of code static void regmbc(int c) { if (regcode == JUST_CALC_SIZE) { @@ -508,11 +479,9 @@ static void regmbc(int c) } } -/* - * Produce the bytes for equivalence class "c". - * Currently only handles latin1, latin9 and utf-8. - * NOTE: When changing this function, also change nfa_emit_equi_class() - */ +// Produce the bytes for equivalence class "c". +// Currently only handles latin1, latin9 and utf-8. +// NOTE: When changing this function, also change nfa_emit_equi_class() static void reg_equi_class(int c) { { @@ -1481,10 +1450,8 @@ static void reg_equi_class(int c) regmbc(c); } -/* - * Emit a node. - * Return pointer to generated code. - */ +// Emit a node. +// Return pointer to generated code. static char_u *regnode(int op) { char_u *ret; @@ -1500,9 +1467,7 @@ static char_u *regnode(int op) return ret; } -/* - * Write a four bytes number at "p" and return pointer to the next char. - */ +// Write a four bytes number at "p" and return pointer to the next char. static char_u *re_put_uint32(char_u *p, uint32_t val) { *p++ = (char_u)((val >> 24) & 0377); @@ -1512,11 +1477,9 @@ static char_u *re_put_uint32(char_u *p, uint32_t val) return p; } -/* - * regnext - dig the "next" pointer out of a node - * Returns NULL when calculating size, when there is no next item and when - * there is an error. - */ +// regnext - dig the "next" pointer out of a node +// Returns NULL when calculating size, when there is no next item and when +// there is an error. static char_u *regnext(char_u *p) FUNC_ATTR_NONNULL_ALL { @@ -1573,9 +1536,7 @@ static void regtail(char_u *p, char_u *val) } } -/* - * Like regtail, on item after a BRANCH; nop if none. - */ +// Like regtail, on item after a BRANCH; nop if none. static void regoptail(char_u *p, char_u *val) { // When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" @@ -1587,11 +1548,9 @@ static void regoptail(char_u *p, char_u *val) regtail(OPERAND(p), val); } -/* - * Insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ +// Insert an operator in front of already-emitted operand +// +// Means relocating the operand. static void reginsert(int op, char_u *opnd) { char_u *src; @@ -1615,10 +1574,8 @@ static void reginsert(int op, char_u *opnd) *place = NUL; } -/* - * Insert an operator in front of already-emitted operand. - * Add a number to the operator. - */ +// Insert an operator in front of already-emitted operand. +// Add a number to the operator. static void reginsert_nr(int op, long val, char_u *opnd) { char_u *src; @@ -1644,12 +1601,10 @@ static void reginsert_nr(int op, long val, char_u *opnd) re_put_uint32(place, (uint32_t)val); } -/* - * Insert an operator in front of already-emitted operand. - * The operator has the given limit values as operands. Also set next pointer. - * - * Means relocating the operand. - */ +// Insert an operator in front of already-emitted operand. +// The operator has the given limit values as operands. Also set next pointer. +// +// Means relocating the operand. static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) { char_u *src; @@ -1704,13 +1659,11 @@ static int seen_endbrace(int refnum) return true; } -/* - * Parse the lowest level. - * - * Optimization: gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run. Don't do this when one_exactly is set. - */ +// Parse the lowest level. +// +// Optimization: gobbles an entire sequence of ordinary characters so that +// it can turn them into a single node, which is smaller to store and +// faster to run. Don't do this when one_exactly is set. static char_u *regatom(int *flagp) { char_u *ret; @@ -2289,8 +2242,7 @@ collection: if (c_class != 0) { // produce equivalence class reg_equi_class(c_class); - } else if ((c_class = - get_coll_element(®parse)) != 0) { + } else if ((c_class = get_coll_element(®parse)) != 0) { // produce a collating element regmbc(c_class); } else { @@ -2466,7 +2418,7 @@ do_multibyte: for (len = 0; c != NUL && (len == 0 || (re_multi_type(peekchr()) == NOT_MULTI && !one_exactly - && !is_Magic(c))); ++len) { + && !is_Magic(c))); len++) { c = no_Magic(c); { regmbc(c); @@ -2500,15 +2452,13 @@ do_multibyte: return ret; } -/* - * Parse something followed by possible [*+=]. - * - * Note that the branching code sequences used for = and the general cases - * of * and + are somewhat optimized: they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ +// Parse something followed by possible [*+=]. +// +// Note that the branching code sequences used for = and the general cases +// of * and + are somewhat optimized: they use the same NOTHING node as +// both the endmarker for their branch list and the body of the last branch. +// It might seem that this node could be dispensed with entirely, but the +// endmarker role is not redundant. static char_u *regpiece(int *flagp) { char_u *ret; @@ -2644,10 +2594,8 @@ static char_u *regpiece(int *flagp) return ret; } -/* - * Parse one alternative of an | or & operator. - * Implements the concatenation operator. - */ +// Parse one alternative of an | or & operator. +// Implements the concatenation operator. static char_u *regconcat(int *flagp) { char_u *first = NULL; @@ -2722,10 +2670,8 @@ static char_u *regconcat(int *flagp) return first; } -/* - * Parse one alternative of an | operator. - * Implements the & operator. - */ +// Parse one alternative of an | operator. +// Implements the & operator. static char_u *regbranch(int *flagp) { char_u *ret; @@ -2874,27 +2820,25 @@ static char_u *reg(int paren, int *flagp) return ret; } -/* - * bt_regcomp() - compile a regular expression into internal code for the - * traditional back track matcher. - * Returns the program in allocated space. Returns NULL for an error. - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code. So we cheat: we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it. (Note that it has to be in - * one piece because free() must be able to free it all.) - * - * Whether upper/lower case is to be ignored is decided when executing the - * program, it does not matter here. - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. - * "re_flags": RE_MAGIC and/or RE_STRING. - */ +// bt_regcomp() - compile a regular expression into internal code for the +// traditional back track matcher. +// Returns the program in allocated space. Returns NULL for an error. +// +// We can't allocate space until we know how big the compiled form will be, +// but we can't compile it (and thus know how big it is) until we've got a +// place to put the code. So we cheat: we compile it twice, once with code +// generation turned off and size counting turned on, and once "for real". +// This also means that we don't allocate space until we are sure that the +// thing really will compile successfully, and we never have to move the +// code and thus invalidate pointers into it. (Note that it has to be in +// one piece because free() must be able to free it all.) +// +// Whether upper/lower case is to be ignored is decided when executing the +// program, it does not matter here. +// +// Beware that the optimization-preparation code in here knows about some +// of the structure of the compiled regexp. +// "re_flags": RE_MAGIC and/or RE_STRING. static regprog_T *bt_regcomp(char_u *expr, int re_flags) { char_u *scan; @@ -2999,19 +2943,15 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) return (regprog_T *)r; } -/* - * Check if during the previous call to vim_regcomp the EOL item "$" has been - * found. This is messy, but it works fine. - */ +// Check if during the previous call to vim_regcomp the EOL item "$" has been +// found. This is messy, but it works fine. int vim_regcomp_had_eol(void) { return had_eol; } -/* - * Get a number after a backslash that is inside []. - * When nothing is recognized return a backslash. - */ +// Get a number after a backslash that is inside []. +// When nothing is recognized return a backslash. static int coll_get_char(void) { int64_t nr = -1; @@ -3037,9 +2977,7 @@ static int coll_get_char(void) return (int)nr; } -/* - * Free a compiled regexp program, returned by bt_regcomp(). - */ +// Free a compiled regexp program, returned by bt_regcomp(). static void bt_regfree(regprog_T *prog) { xfree(prog); @@ -3047,11 +2985,9 @@ static void bt_regfree(regprog_T *prog) #define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) -/* - * The arguments from BRACE_LIMITS are stored here. They are actually local - * to regmatch(), but they are here to reduce the amount of stack space used - * (it can be called recursively many times). - */ +// The arguments from BRACE_LIMITS are stored here. They are actually local +// to regmatch(), but they are here to reduce the amount of stack space used +// (it can be called recursively many times). static long bl_minval; static long bl_maxval; @@ -3108,13 +3044,11 @@ static bool reg_save_equal(const regsave_T *save) else /* NOLINT */ \ *(pp) = (savep)->se_u.ptr; } -/* - * Tentatively set the sub-expression start to the current position (after - * calling regmatch() they will have changed). Need to save the existing - * values for when there is no match. - * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), - * depending on REG_MULTI. - */ +// Tentatively set the sub-expression start to the current position (after +// calling regmatch() they will have changed). Need to save the existing +// values for when there is no match. +// Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), +// depending on REG_MULTI. static void save_se_multi(save_se_T *savep, lpos_T *posp) { savep->se_u.pos = *posp; @@ -3494,10 +3428,8 @@ do_class: return (int)count; } -/* - * Push an item onto the regstack. - * Returns pointer to new item. Returns NULL when out of memory. - */ +// Push an item onto the regstack. +// Returns pointer to new item. Returns NULL when out of memory. static regitem_T *regstack_push(regstate_T state, char_u *scan) { regitem_T *rp; @@ -3516,9 +3448,7 @@ static regitem_T *regstack_push(regstate_T state, char_u *scan) return rp; } -/* - * Pop an item from the regstack. - */ +// Pop an item from the regstack. static void regstack_pop(char_u **scan) { regitem_T *rp; @@ -4643,7 +4573,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) // Pop the state. Restore pointers when there is no match. if (status == RA_NOMATCH) { reg_restore(&rp->rs_un.regsave, &backpos); - --brace_count[rp->rs_no]; // decrement match count + brace_count[rp->rs_no]--; // decrement match count } regstack_pop(&scan); break; @@ -4653,7 +4583,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) if (status == RA_NOMATCH) { // There was no match, but we did find enough matches. reg_restore(&rp->rs_un.regsave, &backpos); - --brace_count[rp->rs_no]; + brace_count[rp->rs_no]--; // continue with the items after "\{}" status = RA_CONT; } @@ -5247,9 +5177,7 @@ static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T return bt_regexec_both(NULL, col, tm, timed_out); } -/* - * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL. - */ +// Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL. static int re_num_cmp(uint32_t val, char_u *scan) { uint32_t n = (uint32_t)OPERAND_MIN(scan); @@ -5265,9 +5193,7 @@ static int re_num_cmp(uint32_t val, char_u *scan) #ifdef BT_REGEXP_DUMP -/* - * regdump - dump a regexp onto stdout in vaguely comprehensible form - */ +// regdump - dump a regexp onto stdout in vaguely comprehensible form static void regdump(char_u *pattern, bt_regprog_T *r) { char_u *s; @@ -5353,9 +5279,7 @@ static void regdump(char_u *pattern, bt_regprog_T *r) #ifdef REGEXP_DEBUG -/* - * regprop - printable representation of opcode - */ +// regprop - printable representation of opcode static char_u *regprop(char_u *op) { char *p; diff --git a/src/nvim/regexp_defs.h b/src/nvim/regexp_defs.h index b24ed350e8..ee32b8d13a 100644 --- a/src/nvim/regexp_defs.h +++ b/src/nvim/regexp_defs.h @@ -1,13 +1,11 @@ -/* - * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE - * - * This is NOT the original regular expression code as written by Henry - * Spencer. This code has been modified specifically for use with Vim, and - * should not be used apart from compiling Vim. If you want a good regular - * expression library, get the original code. - * - * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE - */ +// NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE +// +// This is NOT the original regular expression code as written by Henry +// Spencer. This code has been modified specifically for use with Vim, and +// should not be used apart from compiling Vim. If you want a good regular +// expression library, get the original code. +// +// NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE #ifndef NVIM_REGEXP_DEFS_H #define NVIM_REGEXP_DEFS_H @@ -17,18 +15,14 @@ #include "nvim/pos.h" #include "nvim/types.h" -/* - * The number of sub-matches is limited to 10. - * The first one (index 0) is the whole match, referenced with "\0". - * The second one (index 1) is the first sub-match, referenced with "\1". - * This goes up to the tenth (index 9), referenced with "\9". - */ +// The number of sub-matches is limited to 10. +// The first one (index 0) is the whole match, referenced with "\0". +// The second one (index 1) is the first sub-match, referenced with "\1". +// This goes up to the tenth (index 9), referenced with "\9". #define NSUBEXP 10 -/* - * In the NFA engine: how many braces are allowed. - * TODO(RE): Use dynamic memory allocation instead of static, like here - */ +// In the NFA engine: how many braces are allowed. +// TODO(RE): Use dynamic memory allocation instead of static, like here #define NFA_MAX_BRACES 20 // In the NFA engine: how many states are allowed. @@ -61,11 +55,9 @@ typedef struct { #include "nvim/buffer_defs.h" -/* - * Structure returned by vim_regcomp() to pass on to vim_regexec(). - * This is the general structure. For the actual matcher, two specific - * structures are used. See code below. - */ +// Structure returned by vim_regcomp() to pass on to vim_regexec(). +// This is the general structure. For the actual matcher, two specific +// structures are used. See code below. struct regprog { regengine_T *engine; unsigned regflags; @@ -74,11 +66,9 @@ struct regprog { bool re_in_use; ///< prog is being executed }; -/* - * Structure used by the back track matcher. - * These fields are only to be used in regexp.c! - * See regexp.c for an explanation. - */ +// Structure used by the back track matcher. +// These fields are only to be used in regexp.c! +// See regexp.c for an explanation. typedef struct { // These four members implement regprog_T. regengine_T *engine; @@ -107,9 +97,7 @@ struct nfa_state { int val; }; -/* - * Structure used by the NFA matcher. - */ +// Structure used by the NFA matcher. typedef struct { // These four members implement regprog_T. regengine_T *engine; @@ -133,11 +121,9 @@ typedef struct { nfa_state_T state[1]; // actually longer.. } nfa_regprog_T; -/* - * Structure to be used for single-line matching. - * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". - * When there is no match, the pointer is NULL. - */ +// Structure to be used for single-line matching. +// Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". +// When there is no match, the pointer is NULL. typedef struct { regprog_T *regprog; char *startp[NSUBEXP]; @@ -145,11 +131,9 @@ typedef struct { bool rm_ic; } regmatch_T; -/* - * Structure used to store external references: "\z\(\)" to "\z\1". - * Use a reference count to avoid the need to copy this around. When it goes - * from 1 to zero the matches need to be freed. - */ +// Structure used to store external references: "\z\(\)" to "\z\1". +// Use a reference count to avoid the need to copy this around. When it goes +// from 1 to zero the matches need to be freed. struct reg_extmatch { int16_t refcnt; char_u *matches[NSUBEXP]; diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index d4d2ed28cc..c4102c40ec 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -1,11 +1,9 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -/* - * NFA regular expression implementation. - * - * This file is included in "regexp.c". - */ +// NFA regular expression implementation. +// +// This file is included in "regexp.c". #include <assert.h> #include <inttypes.h> @@ -383,10 +381,8 @@ static void nfa_regcomp_start(char_u *expr, int re_flags) regcomp_start(expr, re_flags); } -/* - * Figure out if the NFA state list starts with an anchor, must match at start - * of the line. - */ +// Figure out if the NFA state list starts with an anchor, must match at start +// of the line. static int nfa_get_reganch(nfa_state_T *start, int depth) { nfa_state_T *p = start; @@ -441,10 +437,8 @@ static int nfa_get_reganch(nfa_state_T *start, int depth) return 0; } -/* - * Figure out if the NFA state list starts with a character which must match - * at start of the match. - */ +// Figure out if the NFA state list starts with a character which must match +// at start of the match. static int nfa_get_regstart(nfa_state_T *start, int depth) { nfa_state_T *p = start; @@ -521,11 +515,9 @@ static int nfa_get_regstart(nfa_state_T *start, int depth) return 0; } -/* - * Figure out if the NFA state list contains just literal text and nothing - * else. If so return a string in allocated memory with what must match after - * regstart. Otherwise return NULL. - */ +// Figure out if the NFA state list contains just literal text and nothing +// else. If so return a string in allocated memory with what must match after +// regstart. Otherwise return NULL. static char_u *nfa_get_match_text(nfa_state_T *start) { nfa_state_T *p = start; @@ -557,10 +549,8 @@ static char_u *nfa_get_match_text(nfa_state_T *start) return ret; } -/* - * Allocate more space for post_start. Called when - * running above the estimated number of states. - */ +// Allocate more space for post_start. Called when +// running above the estimated number of states. static void realloc_post_list(void) { // For weird patterns the number of states can be very high. Increasing by @@ -572,16 +562,14 @@ static void realloc_post_list(void) post_start = new_start; } -/* - * Search between "start" and "end" and try to recognize a - * character class in expanded form. For example [0-9]. - * On success, return the id the character class to be emitted. - * On failure, return 0 (=FAIL) - * Start points to the first char of the range, while end should point - * to the closing brace. - * Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may - * need to be interpreted as [a-zA-Z]. - */ +// Search between "start" and "end" and try to recognize a +// character class in expanded form. For example [0-9]. +// On success, return the id the character class to be emitted. +// On failure, return 0 (=FAIL) +// Start points to the first char of the range, while end should point +// to the closing brace. +// Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may +// need to be interpreted as [a-zA-Z]. static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) { #define CLASS_not 0x80 @@ -700,14 +688,12 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) return FAIL; } -/* - * Produce the bytes for equivalence class "c". - * Currently only handles latin1, latin9 and utf-8. - * Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is - * equivalent to 'a OR b OR c' - * - * NOTE! When changing this function, also update reg_equi_class() - */ +// Produce the bytes for equivalence class "c". +// Currently only handles latin1, latin9 and utf-8. +// Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is +// equivalent to 'a OR b OR c' +// +// NOTE! When changing this function, also update reg_equi_class() static void nfa_emit_equi_class(int c) { #define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT); @@ -1778,26 +1764,22 @@ static void nfa_emit_equi_class(int c) #undef EMIT2 } -/* - * Code to parse regular expression. - * - * We try to reuse parsing functions in regexp.c to - * minimize surprise and keep the syntax consistent. - */ - -/* - * Parse the lowest level. - * - * An atom can be one of a long list of items. Many atoms match one character - * in the text. It is often an ordinary character or a character class. - * Braces can be used to make a pattern into an atom. The "\z(\)" construct - * is only for syntax highlighting. - * - * atom ::= ordinary-atom - * or \( pattern \) - * or \%( pattern \) - * or \z( pattern \) - */ +// Code to parse regular expression. +// +// We try to reuse parsing functions in regexp.c to +// minimize surprise and keep the syntax consistent. + +// Parse the lowest level. +// +// An atom can be one of a long list of items. Many atoms match one character +// in the text. It is often an ordinary character or a character class. +// Braces can be used to make a pattern into an atom. The "\z(\)" construct +// is only for syntax highlighting. +// +// atom ::= ordinary-atom +// or \( pattern \) +// or \%( pattern \) +// or \z( pattern \) static int nfa_regatom(void) { int c; @@ -1862,9 +1844,7 @@ static int nfa_regatom(void) // "\_x" is character class plus newline FALLTHROUGH; - /* - * Character classes. - */ + // Character classes. case Magic('.'): case Magic('i'): case Magic('I'): @@ -2228,24 +2208,20 @@ static int nfa_regatom(void) case Magic('['): collection: - /* - * [abc] uses NFA_START_COLL - NFA_END_COLL - * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL - * Each character is produced as a regular state, using - * NFA_CONCAT to bind them together. - * Besides normal characters there can be: - * - character classes NFA_CLASS_* - * - ranges, two characters followed by NFA_RANGE. - */ + // [abc] uses NFA_START_COLL - NFA_END_COLL + // [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL + // Each character is produced as a regular state, using + // NFA_CONCAT to bind them together. + // Besides normal characters there can be: + // - character classes NFA_CLASS_* + // - ranges, two characters followed by NFA_RANGE. p = (char_u *)regparse; endp = skip_anyof((char *)p); if (*endp == ']') { - /* - * Try to reverse engineer character classes. For example, - * recognize that [0-9] stands for \d and [A-Za-z_] for \h, - * and perform the necessary substitutions in the NFA. - */ + // Try to reverse engineer character classes. For example, + // recognize that [0-9] stands for \d and [A-Za-z_] for \h, + // and perform the necessary substitutions in the NFA. int result = nfa_recognize_char_class((char_u *)regparse, endp, extra == NFA_ADD_NL); if (result != FAIL) { if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL) { @@ -2259,10 +2235,8 @@ collection: MB_PTR_ADV(regparse); return OK; } - /* - * Failed to recognize a character class. Use the simple - * version that turns [abc] into 'a' OR 'b' OR 'c' - */ + // Failed to recognize a character class. Use the simple + // version that turns [abc] into 'a' OR 'b' OR 'c' startc = -1; negated = false; if (*regparse == '^') { // negated range @@ -2554,16 +2528,14 @@ nfa_do_multibyte: return OK; } -/* - * Parse something followed by possible [*+=]. - * - * A piece is an atom, possibly followed by a multi, an indication of how many - * times the atom can be matched. Example: "a*" matches any sequence of "a" - * characters: "", "a", "aa", etc. - * - * piece ::= atom - * or atom multi - */ +// Parse something followed by possible [*+=]. +// +// A piece is an atom, possibly followed by a multi, an indication of how many +// times the atom can be matched. Example: "a*" matches any sequence of "a" +// characters: "", "a", "aa", etc. +// +// piece ::= atom +// or atom multi static int nfa_regpiece(void) { int i; @@ -2601,17 +2573,15 @@ static int nfa_regpiece(void) break; case Magic('+'): - /* - * Trick: Normally, (a*)\+ would match the whole input "aaa". The - * first and only submatch would be "aaa". But the backtracking - * engine interprets the plus as "try matching one more time", and - * a* matches a second time at the end of the input, the empty - * string. - * The submatch will be the empty string. - * - * In order to be consistent with the old engine, we replace - * <atom>+ with <atom><atom>* - */ + // Trick: Normally, (a*)\+ would match the whole input "aaa". The + // first and only submatch would be "aaa". But the backtracking + // engine interprets the plus as "try matching one more time", and + // a* matches a second time at the end of the input, the empty + // string. + // The submatch will be the empty string. + // + // In order to be consistent with the old engine, we replace + // <atom>+ with <atom><atom>* restore_parse_state(&old_state); curchr = -1; if (nfa_regatom() == FAIL) { @@ -2770,16 +2740,14 @@ static int nfa_regpiece(void) return OK; } -/* - * Parse one or more pieces, concatenated. It matches a match for the - * first piece, followed by a match for the second piece, etc. Example: - * "f[0-9]b", first matches "f", then a digit and then "b". - * - * concat ::= piece - * or piece piece - * or piece piece piece - * etc. - */ +// Parse one or more pieces, concatenated. It matches a match for the +// first piece, followed by a match for the second piece, etc. Example: +// "f[0-9]b", first matches "f", then a digit and then "b". +// +// concat ::= piece +// or piece piece +// or piece piece piece +// etc. static int nfa_regconcat(void) { bool cont = true; @@ -2843,18 +2811,16 @@ static int nfa_regconcat(void) return OK; } -/* - * Parse a branch, one or more concats, separated by "\&". It matches the - * last concat, but only if all the preceding concats also match at the same - * position. Examples: - * "foobeep\&..." matches "foo" in "foobeep". - * ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob" - * - * branch ::= concat - * or concat \& concat - * or concat \& concat \& concat - * etc. - */ +// Parse a branch, one or more concats, separated by "\&". It matches the +// last concat, but only if all the preceding concats also match at the same +// position. Examples: +// "foobeep\&..." matches "foo" in "foobeep". +// ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob" +// +// branch ::= concat +// or concat \& concat +// or concat \& concat \& concat +// etc. static int nfa_regbranch(void) { int old_post_pos; @@ -3311,9 +3277,7 @@ static FILE *log_fd; static char_u e_log_open_failed[] = N_("Could not open temporary log file for writing, displaying on stderr... "); -/* - * Print the postfix notation of the current regexp. - */ +// Print the postfix notation of the current regexp. static void nfa_postfix_dump(char_u *expr, int retval) { int *p; @@ -3341,9 +3305,7 @@ static void nfa_postfix_dump(char_u *expr, int retval) } } -/* - * Print the NFA starting with a root node "state". - */ +// Print the NFA starting with a root node "state". static void nfa_print_state(FILE *debugf, nfa_state_T *state) { garray_T indent; @@ -3413,9 +3375,7 @@ static void nfa_print_state2(FILE *debugf, nfa_state_T *state, garray_T *indent) ga_append(indent, NUL); } -/* - * Print the NFA state machine. - */ +// Print the NFA state machine. static void nfa_dump(nfa_regprog_T *prog) { FILE *debugf = fopen(NFA_REGEXP_DUMP_LOG, "a"); @@ -3437,12 +3397,10 @@ static void nfa_dump(nfa_regprog_T *prog) fclose(debugf); } } -#endif /* REGEXP_DEBUG */ +#endif // REGEXP_DEBUG -/* - * Parse r.e. @expr and convert it into postfix form. - * Return the postfix string on success, NULL otherwise. - */ +// Parse r.e. @expr and convert it into postfix form. +// Return the postfix string on success, NULL otherwise. static int *re2post(void) { if (nfa_reg(REG_NOPAREN) == FAIL) { @@ -3454,18 +3412,14 @@ static int *re2post(void) // NB. Some of the code below is inspired by Russ's. -/* - * Represents an NFA state plus zero or one or two arrows exiting. - * if c == MATCH, no arrows out; matching state. - * If c == SPLIT, unlabeled arrows to out and out1 (if != NULL). - * If c < 256, labeled arrow with character c to out. - */ +// Represents an NFA state plus zero or one or two arrows exiting. +// if c == MATCH, no arrows out; matching state. +// If c == SPLIT, unlabeled arrows to out and out1 (if != NULL). +// If c < 256, labeled arrow with character c to out. static nfa_state_T *state_ptr; // points to nfa_prog->state -/* - * Allocate and initialize nfa_state_T. - */ +// Allocate and initialize nfa_state_T. static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1) { nfa_state_T *s; @@ -3488,16 +3442,12 @@ static nfa_state_T *alloc_state(int c, nfa_state_T *out, nfa_state_T *out1) return s; } -/* - * A partially built NFA without the matching state filled in. - * Frag_T.start points at the start state. - * Frag_T.out is a list of places that need to be set to the - * next state for this fragment. - */ +// A partially built NFA without the matching state filled in. +// Frag_T.start points at the start state. +// Frag_T.out is a list of places that need to be set to the +// next state for this fragment. -/* - * Initialize a Frag_T struct and return it. - */ +// Initialize a Frag_T struct and return it. static Frag_T frag(nfa_state_T *start, Ptrlist *out) { Frag_T n; @@ -3507,9 +3457,7 @@ static Frag_T frag(nfa_state_T *start, Ptrlist *out) return n; } -/* - * Create singleton list containing just outp. - */ +// Create singleton list containing just outp. static Ptrlist *list1(nfa_state_T **outp) { Ptrlist *l; @@ -3519,9 +3467,7 @@ static Ptrlist *list1(nfa_state_T **outp) return l; } -/* - * Patch the list of states at out to point to start. - */ +// Patch the list of states at out to point to start. static void patch(Ptrlist *l, nfa_state_T *s) { Ptrlist *next; @@ -3532,9 +3478,7 @@ static void patch(Ptrlist *l, nfa_state_T *s) } } -/* - * Join the two lists l1 and l2, returning the combination. - */ +// Join the two lists l1 and l2, returning the combination. static Ptrlist *append(Ptrlist *l1, Ptrlist *l2) { Ptrlist *oldl1; @@ -3547,9 +3491,7 @@ static Ptrlist *append(Ptrlist *l1, Ptrlist *l2) return oldl1; } -/* - * Stack used for transforming postfix form into NFA. - */ +// Stack used for transforming postfix form into NFA. static Frag_T empty; static void st_error(int *postfix, int *end, int *p) @@ -3592,9 +3534,7 @@ static void st_error(int *postfix, int *end, int *p) emsg(_("E874: (NFA) Could not pop the stack!")); } -/* - * Push an item onto the stack. - */ +// Push an item onto the stack. static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end) { Frag_T *stackp = *p; @@ -3606,9 +3546,7 @@ static void st_push(Frag_T s, Frag_T **p, Frag_T *stack_end) *p = *p + 1; } -/* - * Pop an item from the stack. - */ +// Pop an item from the stack. static Frag_T st_pop(Frag_T **p, Frag_T *stack) { Frag_T *stackp; @@ -3621,10 +3559,8 @@ static Frag_T st_pop(Frag_T **p, Frag_T *stack) return **p; } -/* - * Estimate the maximum byte length of anything matching "state". - * When unknown or unlimited return -1. - */ +// Estimate the maximum byte length of anything matching "state". +// When unknown or unlimited return -1. static int nfa_max_width(nfa_state_T *startstate, int depth) { int l, r; @@ -3827,10 +3763,8 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) return -1; } -/* - * Convert a postfix form into its equivalent NFA. - * Return the NFA start state on success, NULL otherwise. - */ +// Convert a postfix form into its equivalent NFA. +// Return the NFA start state on success, NULL otherwise. static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) { int *p; @@ -3866,7 +3800,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) stack_end = stack + (nstate + 1); } - for (p = postfix; p < end; ++p) { + for (p = postfix; p < end; p++) { switch (*p) { case NFA_CONCAT: // Concatenation. @@ -4350,15 +4284,13 @@ theend: #undef PUSH } -/* - * After building the NFA program, inspect it to add optimization hints. - */ +// After building the NFA program, inspect it to add optimization hints. static void nfa_postprocess(nfa_regprog_T *prog) { int i; int c; - for (i = 0; i < prog->nstate; ++i) { + for (i = 0; i < prog->nstate; i++) { c = prog->state[i].c; if (c == NFA_START_INVISIBLE || c == NFA_START_INVISIBLE_NEG @@ -4490,9 +4422,7 @@ static void clear_sub(regsub_T *sub) sub->in_use = 0; } -/* - * Copy the submatches from "from" to "to". - */ +// Copy the submatches from "from" to "to". static void copy_sub(regsub_T *to, regsub_T *from) { to->in_use = from->in_use; @@ -4508,9 +4438,7 @@ static void copy_sub(regsub_T *to, regsub_T *from) } } -/* - * Like copy_sub() but exclude the main match. - */ +// Like copy_sub() but exclude the main match. static void copy_sub_off(regsub_T *to, regsub_T *from) { if (to->in_use < from->in_use) { @@ -4528,9 +4456,7 @@ static void copy_sub_off(regsub_T *to, regsub_T *from) } } -/* - * Like copy_sub() but only do the end of the main match if \ze is present. - */ +// Like copy_sub() but only do the end of the main match if \ze is present. static void copy_ze_off(regsub_T *to, regsub_T *from) { if (rex.nfa_has_zend) { @@ -4954,7 +4880,7 @@ static regsubs_T *addstate(nfa_list_T *l, nfa_state_T *state, regsubs_T *subs_ar // When called from addstate_here() do insert before // existing states. if (add_here) { - for (k = 0; k < l->n && k < listindex; ++k) { + for (k = 0; k < l->n && k < listindex; k++) { if (l->t[k].state->id == state->id) { found = true; break; @@ -5094,7 +5020,7 @@ skip_add: save_in_use = -1; } else { save_in_use = sub->in_use; - for (i = sub->in_use; i < subidx; ++i) { + for (i = sub->in_use; i < subidx; i++) { sub->list.multi[i].start_lnum = -1; sub->list.multi[i].end_lnum = -1; } @@ -5115,7 +5041,7 @@ skip_add: save_in_use = -1; } else { save_in_use = sub->in_use; - for (i = sub->in_use; i < subidx; ++i) { + for (i = sub->in_use; i < subidx; i++) { sub->list.line[i].start = NULL; sub->list.line[i].end = NULL; } @@ -5314,9 +5240,7 @@ static regsubs_T *addstate_here(nfa_list_T *l, nfa_state_T *state, regsubs_T *su return r; } -/* - * Check character class "class" against current character c. - */ +// Check character class "class" against current character c. static int check_char_class(int class, int c) { switch (class) { @@ -5502,11 +5426,9 @@ static int match_zref(int subidx, int *bytelen) return false; } -/* - * Save list IDs for all NFA states of "prog" into "list". - * Also reset the IDs to zero. - * Only used for the recursive value lastlist[1]. - */ +// Save list IDs for all NFA states of "prog" into "list". +// Also reset the IDs to zero. +// Only used for the recursive value lastlist[1]. static void nfa_save_listids(nfa_regprog_T *prog, int *list) { int i; @@ -5521,9 +5443,7 @@ static void nfa_save_listids(nfa_regprog_T *prog, int *list) } } -/* - * Restore list IDs from "list" to all NFA states. - */ +// Restore list IDs from "list" to all NFA states. static void nfa_restore_listids(nfa_regprog_T *prog, int *list) { int i; @@ -5547,11 +5467,9 @@ static bool nfa_re_num_cmp(uintmax_t val, int op, uintmax_t pos) return val == pos; } -/* - * Recursively call nfa_regmatch() - * "pim" is NULL or contains info about a Postponed Invisible Match (start - * position). - */ +// Recursively call nfa_regmatch() +// "pim" is NULL or contains info about a Postponed Invisible Match (start +// position). static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog, regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len) FUNC_ATTR_NONNULL_ARG(1, 3, 5, 6, 7) @@ -5691,12 +5609,10 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T return result; } -/* - * Estimate the chance of a match with "state" failing. - * empty match: 0 - * NFA_ANY: 1 - * specific character: 99 - */ +// Estimate the chance of a match with "state" failing. +// empty match: 0 +// NFA_ANY: 1 +// specific character: 99 static int failure_chance(nfa_state_T *state, int depth) { int c = state->c; @@ -5851,9 +5767,7 @@ static int failure_chance(nfa_state_T *state, int depth) return 50; } -/* - * Skip until the char "c" we know a match must start with. - */ +// Skip until the char "c" we know a match must start with. static int skip_to_start(int c, colnr_T *colp) { const char_u *const s = cstrchr(rex.line + *colp, c); @@ -5864,11 +5778,9 @@ static int skip_to_start(int c, colnr_T *colp) return OK; } -/* - * Check for a match with match_text. - * Called after skip_to_start() has found regstart. - * Returns zero for no match, 1 for a match. - */ +// Check for a match with match_text. +// Called after skip_to_start() has found regstart. +// Returns zero for no match, 1 for a match. static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) { #define PTR2LEN(x) utf_ptr2len(x) @@ -6038,9 +5950,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm add_off = clen; \ } - /* - * Run for each character. - */ + // Run for each character. for (;;) { int curc = utf_ptr2char((char *)rex.input); int clen = utfc_ptr2len((char *)rex.input); @@ -6086,9 +5996,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm #ifdef NFA_REGEXP_DEBUG_LOG fprintf(debug, "\n-------------------\n"); #endif - /* - * If the state lists are empty we can stop. - */ + // If the state lists are empty we can stop. if (thislist->n == 0) { break; } @@ -6131,10 +6039,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm } #endif - /* - * Handle the possible codes of the current state. - * The most important is NFA_MATCH. - */ + // Handle the possible codes of the current state. + // The most important is NFA_MATCH. add_state = NULL; add_here = false; add_count = 0; @@ -7525,10 +7431,8 @@ theend: return retval; } -/* - * Compile a regular expression into internal code for the NFA matcher. - * Returns the program in allocated space. Returns NULL for an error. - */ +// Compile a regular expression into internal code for the NFA matcher. +// Returns the program in allocated space. Returns NULL for an error. static regprog_T *nfa_regcomp(char_u *expr, int re_flags) { nfa_regprog_T *prog = NULL; @@ -7554,11 +7458,9 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) goto fail; // Cascaded (syntax?) error } - /* - * In order to build the NFA, we parse the input regexp twice: - * 1. first pass to count size (so we can allocate space) - * 2. second to emit code - */ + // In order to build the NFA, we parse the input regexp twice: + // 1. first pass to count size (so we can allocate space) + // 2. second to emit code #ifdef REGEXP_DEBUG { FILE *f = fopen(NFA_REGEXP_RUN_LOG, "a"); @@ -7573,10 +7475,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) } #endif - /* - * PASS 1 - * Count number of NFA states in "nstate". Do not build the NFA. - */ + // PASS 1 + // Count number of NFA states in "nstate". Do not build the NFA. post2nfa(postfix, post_ptr, true); // allocate the regprog with space for the compiled regexp @@ -7585,10 +7485,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) state_ptr = prog->state; prog->re_in_use = false; - /* - * PASS 2 - * Build the NFA - */ + // PASS 2 + // Build the NFA prog->start = post2nfa(postfix, post_ptr, false); if (prog->start == NULL) { goto fail; @@ -7632,9 +7530,7 @@ fail: goto out; } -/* - * Free a compiled regexp program, returned by nfa_regcomp(). - */ +// Free a compiled regexp program, returned by nfa_regcomp(). static void nfa_regfree(regprog_T *prog) { if (prog != NULL) { diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c index 7837f242b5..3983192f18 100644 --- a/src/nvim/spellfile.c +++ b/src/nvim/spellfile.c @@ -2465,9 +2465,9 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname) aff_entry->ae_cond = (char_u *)getroom_save(spin, (char_u *)items[4]); if (*items[0] == 'P') { - sprintf((char *)buf, "^%s", items[4]); + sprintf((char *)buf, "^%s", items[4]); // NOLINT(runtime/printf) } else { - sprintf((char *)buf, "%s$", items[4]); + sprintf((char *)buf, "%s$", items[4]); // NOLINT(runtime/printf) } aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING + RE_STRICT); if (aff_entry->ae_prog == NULL) { @@ -2514,8 +2514,7 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname) onecap_copy((char_u *)items[4], buf, true); aff_entry->ae_cond = (char_u *)getroom_save(spin, buf); if (aff_entry->ae_cond != NULL) { - sprintf((char *)buf, "^%s", - aff_entry->ae_cond); + sprintf((char *)buf, "^%s", aff_entry->ae_cond); // NOLINT(runtime/printf) vim_regfree(aff_entry->ae_prog); aff_entry->ae_prog = vim_regcomp((char *)buf, RE_MAGIC + RE_STRING); } @@ -3614,7 +3613,7 @@ static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afff if (store_aff_word(spin, newword, ae->ae_flags, affile, &affile->af_suff, xht, use_condit & (xht == NULL - ? ~0 : ~CONDIT_SUF), + ? ~0 : ~CONDIT_SUF), use_flags, use_pfxlist, pfxlen) == FAIL) { retval = FAIL; } diff --git a/src/nvim/strings.c b/src/nvim/strings.c index 4d1401293b..7086f47e33 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -1035,9 +1035,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t : va_arg(ap, long long)); // NOLINT (runtime/int) break; case 'z': - arg = (tvs - ? (ptrdiff_t)tv_nr(tvs, &arg_idx) - : va_arg(ap, ptrdiff_t)); + arg = (tvs ? (ptrdiff_t)tv_nr(tvs, &arg_idx) : va_arg(ap, ptrdiff_t)); break; } if (arg > 0) { @@ -1049,19 +1047,13 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t // unsigned switch (length_modifier) { case '\0': - uarg = (unsigned int)(tvs - ? tv_nr(tvs, &arg_idx) - : va_arg(ap, unsigned int)); + uarg = (unsigned int)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int)); break; case 'h': - uarg = (uint16_t)(tvs - ? tv_nr(tvs, &arg_idx) - : va_arg(ap, unsigned int)); + uarg = (uint16_t)(tvs ? tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned int)); break; case 'l': - uarg = (tvs - ? (unsigned long)tv_nr(tvs, &arg_idx) - : va_arg(ap, unsigned long)); + uarg = (tvs ? (unsigned long)tv_nr(tvs, &arg_idx) : va_arg(ap, unsigned long)); break; case '2': uarg = (uintmax_t)(unsigned long long)( // NOLINT (runtime/int) @@ -1071,9 +1063,7 @@ int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap, t : va_arg(ap, unsigned long long)); // NOLINT (runtime/int) break; case 'z': - uarg = (tvs - ? (size_t)tv_nr(tvs, &arg_idx) - : va_arg(ap, size_t)); + uarg = (tvs ? (size_t)tv_nr(tvs, &arg_idx) : va_arg(ap, size_t)); break; } arg_sign = (uarg != 0); diff --git a/src/nvim/syntax.c b/src/nvim/syntax.c index fb82df4fe9..7248d1240e 100644 --- a/src/nvim/syntax.c +++ b/src/nvim/syntax.c @@ -1655,13 +1655,12 @@ static int syn_current_attr(const bool syncing, const bool displaying, bool *con && (spp->sp_type == SPTYPE_MATCH || spp->sp_type == SPTYPE_START) && (current_next_list != NULL - ? in_id_list(NULL, current_next_list, - &spp->sp_syn, 0) - : (cur_si == NULL - ? !(spp->sp_flags & HL_CONTAINED) - : in_id_list(cur_si, - cur_si->si_cont_list, &spp->sp_syn, - spp->sp_flags & HL_CONTAINED)))) { + ? in_id_list(NULL, current_next_list, &spp->sp_syn, 0) + : (cur_si == NULL + ? !(spp->sp_flags & HL_CONTAINED) + : in_id_list(cur_si, + cur_si->si_cont_list, &spp->sp_syn, + spp->sp_flags & HL_CONTAINED)))) { // If we already tried matching in this line, and // there isn't a match before next_match_col, skip // this item. @@ -2788,9 +2787,9 @@ static keyentry_T *match_keyword(char *keyword, hashtab_T *ht, stateitem_T *cur_ if (current_next_list != 0 ? in_id_list(NULL, current_next_list, &kp->k_syn, 0) : (cur_si == NULL - ? !(kp->flags & HL_CONTAINED) - : in_id_list(cur_si, cur_si->si_cont_list, - &kp->k_syn, kp->flags & HL_CONTAINED))) { + ? !(kp->flags & HL_CONTAINED) + : in_id_list(cur_si, cur_si->si_cont_list, + &kp->k_syn, kp->flags & HL_CONTAINED))) { return kp; } } diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 8028950c07..77f85b5d2d 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -628,8 +628,8 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) GET_CCS(ret, pline); ret.data.cmp.inv = (schar == '<'); ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign) - ? kExprCmpGreaterOrEqual - : kExprCmpGreater); + ? kExprCmpGreaterOrEqual + : kExprCmpGreater); break; } @@ -1963,8 +1963,8 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat && ((*kv_Z(ast_stack, 1))->type != kExprNodeConcatOrSubscript)))) - ? kELFlagAllowFloat - : 0)); + ? kELFlagAllowFloat + : 0)); LexExprToken cur_token = viml_pexpr_next_token(pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); @@ -2031,9 +2031,9 @@ viml_pexpr_parse_process_token: const bool node_is_key = ( is_concat_or_subscript && (cur_token.type == kExprLexPlainIdentifier - ? (!cur_token.data.var.autoload - && cur_token.data.var.scope == kExprVarScopeMissing) - : (cur_token.type == kExprLexNumber)) + ? (!cur_token.data.var.autoload + && cur_token.data.var.scope == kExprVarScopeMissing) + : (cur_token.type == kExprLexNumber)) && prev_token.type != kExprLexSpacing); if (is_concat_or_subscript && !node_is_key) { // Note: in Vim "d. a" (this is the reason behind `prev_token.type != @@ -2707,14 +2707,14 @@ viml_pexpr_parse_figure_brace_closing_error: break; case kExprLexPlainIdentifier: { const ExprVarScope scope = (cur_token.type == kExprLexInvalid - ? kExprVarScopeMissing - : cur_token.data.var.scope); + ? kExprVarScopeMissing + : cur_token.data.var.scope); if (want_node == kENodeValue) { want_node = kENodeOperator; NEW_NODE_WITH_CUR_POS(cur_node, (node_is_key - ? kExprNodePlainKey - : kExprNodePlainIdentifier)); + ? kExprNodePlainKey + : kExprNodePlainIdentifier)); cur_node->data.var.scope = scope; const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2); cur_node->data.var.ident = (pline.data + cur_token.start.col @@ -2732,8 +2732,8 @@ viml_pexpr_parse_figure_brace_closing_error: scope_shift), cur_token.len - scope_shift, (node_is_key - ? HL(IdentifierKey) - : HL(IdentifierName))); + ? HL(IdentifierKey) + : HL(IdentifierName))); } else { if (scope == kExprVarScopeMissing) { // uncrustify:off @@ -2902,15 +2902,15 @@ viml_pexpr_parse_no_paren_closing_error: {} // different error numbers: "E114: Missing quote" and // "E115: Missing quote". ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double - ? _("E114: Missing double quote: %.*s") - : _("E115: Missing single quote: %.*s"))); + ? _("E114: Missing double quote: %.*s") + : _("E115: Missing single quote: %.*s"))); } if (want_node == kENodeOperator) { OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, (is_double - ? kExprNodeDoubleQuotedString - : kExprNodeSingleQuotedString)); + ? kExprNodeDoubleQuotedString + : kExprNodeSingleQuotedString)); *top_node_p = cur_node; parse_quoted_string(pstate, cur_node, cur_token, &ast_stack, is_invalid); want_node = kENodeOperator; |