diff options
author | Lewis Russell <lewis6991@gmail.com> | 2022-11-07 10:21:44 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-07 10:21:44 +0000 |
commit | bdb98de2d16ce7185a0f53740e06511904fdd814 (patch) | |
tree | d7206b68750c35d0b31113d5d8ec94c2f3ad86eb /src/nvim/regexp.c | |
parent | e9c1cb71f8a4d6d7818dcb5f71ac78bee431309a (diff) | |
download | rneovim-bdb98de2d16ce7185a0f53740e06511904fdd814.tar.gz rneovim-bdb98de2d16ce7185a0f53740e06511904fdd814.tar.bz2 rneovim-bdb98de2d16ce7185a0f53740e06511904fdd814.zip |
refactor: more clint (#20910)
Diffstat (limited to 'src/nvim/regexp.c')
-rw-r--r-- | src/nvim/regexp.c | 269 |
1 files changed, 99 insertions, 170 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 7a96889f22..27b5d198ac 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -1,9 +1,7 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -/* - * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() - */ +// Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() // By default: do not create debugging logs or files related to regular // expressions, even when compiling with -DDEBUG. @@ -41,21 +39,17 @@ # define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log" #endif -/* - * Magic characters have a special meaning, they don't match literally. - * Magic characters are negative. This separates them from literal characters - * (possibly multi-byte). Only ASCII characters can be Magic. - */ +// Magic characters have a special meaning, they don't match literally. +// Magic characters are negative. This separates them from literal characters +// (possibly multi-byte). Only ASCII characters can be Magic. #define Magic(x) ((int)(x) - 256) #define un_Magic(x) ((x) + 256) #define is_Magic(x) ((x) < 0) -/* - * We should define ftpr as a pointer to a function returning a pointer to - * a function returning a pointer to a function ... - * This is impossible, so we declare a pointer to a function returning a - * pointer to a function returning void. This should work for all compilers. - */ +// We should define ftpr as a pointer to a function returning a pointer to +// a function returning a pointer to a function ... +// This is impossible, so we declare a pointer to a function returning a +// pointer to a function returning void. This should work for all compilers. typedef void (*(*fptr_T)(int *, int))(void); static int no_Magic(int x) @@ -143,28 +137,24 @@ static int re_multi_type(int c) static char *reg_prev_sub = NULL; -/* - * REGEXP_INRANGE contains all characters which are always special in a [] - * range after '\'. - * REGEXP_ABBR contains all characters which act as abbreviations after '\'. - * These are: - * \n - New line (NL). - * \r - Carriage Return (CR). - * \t - Tab (TAB). - * \e - Escape (ESC). - * \b - Backspace (Ctrl_H). - * \d - Character code in decimal, eg \d123 - * \o - Character code in octal, eg \o80 - * \x - Character code in hex, eg \x4a - * \u - Multibyte character code, eg \u20ac - * \U - Long multibyte character code, eg \U12345678 - */ +// REGEXP_INRANGE contains all characters which are always special in a [] +// range after '\'. +// REGEXP_ABBR contains all characters which act as abbreviations after '\'. +// These are: +// \n - New line (NL). +// \r - Carriage Return (CR). +// \t - Tab (TAB). +// \e - Escape (ESC). +// \b - Backspace (Ctrl_H). +// \d - Character code in decimal, eg \d123 +// \o - Character code in octal, eg \o80 +// \x - Character code in hex, eg \x4a +// \u - Multibyte character code, eg \u20ac +// \U - Long multibyte character code, eg \U12345678 static char REGEXP_INRANGE[] = "]^-n\\"; static char REGEXP_ABBR[] = "nrtebdoxuU"; -/* - * Translate '\x' to its control character, except "\n", which is Magic. - */ +// Translate '\x' to its control character, except "\n", which is Magic. static int backslash_trans(int c) { switch (c) { @@ -239,10 +229,8 @@ static int get_char_class(char **pp) return CLASS_NONE; } -/* - * Specific version of character class functions. - * Using a table to keep this fast. - */ +// Specific version of character class functions. +// Using a table to keep this fast. static int16_t class_tab[256]; #define RI_DIGIT 0x01 @@ -325,9 +313,7 @@ static int reg_string; // matching with a string instead of a buffer // line static int reg_strict; // "[abc" is illegal -/* - * META contains all characters that may be magic, except '^' and '$'. - */ +// META contains all characters that may be magic, except '^' and '$'. // uncrustify:off @@ -391,11 +377,9 @@ int re_multiline(const regprog_T *prog) return prog->regflags & RF_HASNL; } -/* - * Check for an equivalence class name "[=a=]". "pp" points to the '['. - * Returns a character representing the class. Zero means that no item was - * recognized. Otherwise "pp" is advanced to after the item. - */ +// Check for an equivalence class name "[=a=]". "pp" points to the '['. +// Returns a character representing the class. Zero means that no item was +// recognized. Otherwise "pp" is advanced to after the item. static int get_equi_class(char **pp) { int c; @@ -413,12 +397,10 @@ static int get_equi_class(char **pp) return 0; } -/* - * Check for a collating element "[.a.]". "pp" points to the '['. - * Returns a character. Zero means that no item was recognized. Otherwise - * "pp" is advanced to after the item. - * Currently only single characters are recognized! - */ +// Check for a collating element "[.a.]". "pp" points to the '['. +// Returns a character. Zero means that no item was recognized. Otherwise +// "pp" is advanced to after the item. +// Currently only single characters are recognized! static int get_coll_element(char **pp) { int c; @@ -562,9 +544,7 @@ static int prevchr_len; // byte length of previous char static int at_start; // True when on the first character static int prev_at_start; // True when on the second character -/* - * Start parsing at "str". - */ +// Start parsing at "str". static void initchr(char_u *str) { regparse = (char *)str; @@ -574,10 +554,8 @@ static void initchr(char_u *str) prev_at_start = false; } -/* - * Save the current parse state, so that it can be restored and parsing - * starts in the same state again. - */ +// Save the current parse state, so that it can be restored and parsing +// starts in the same state again. static void save_parse_state(parse_state_T *ps) { ps->regparse = (char_u *)regparse; @@ -591,9 +569,7 @@ static void save_parse_state(parse_state_T *ps) ps->regnpar = regnpar; } -/* - * Restore a previously saved parse state. - */ +// Restore a previously saved parse state. static void restore_parse_state(parse_state_T *ps) { regparse = (char *)ps->regparse; @@ -607,9 +583,7 @@ static void restore_parse_state(parse_state_T *ps) regnpar = ps->regnpar; } -/* - * Get the next character without advancing. - */ +// Get the next character without advancing. static int peekchr(void) { static int after_slash = false; @@ -736,9 +710,7 @@ static int peekchr(void) after_slash--; curchr = toggle_Magic(curchr); } else if (vim_strchr(REGEXP_ABBR, c)) { - /* - * Handle abbreviations, like "\t" for TAB -- webb - */ + // Handle abbreviations, like "\t" for TAB -- webb curchr = backslash_trans(c); } else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) { curchr = toggle_Magic(c); @@ -757,9 +729,7 @@ static int peekchr(void) return curchr; } -/* - * Eat one lexed character. Do this in a way that we can undo it. - */ +// Eat one lexed character. Do this in a way that we can undo it. static void skipchr(void) { // peekchr() eats a backslash, do the same here @@ -781,10 +751,8 @@ static void skipchr(void) nextchr = -1; } -/* - * Skip a character while keeping the value of prev_at_start for at_start. - * prevchr and prevprevchr are also kept. - */ +// Skip a character while keeping the value of prev_at_start for at_start. +// prevchr and prevprevchr are also kept. static void skipchr_keepstart(void) { int as = prev_at_start; @@ -797,10 +765,8 @@ static void skipchr_keepstart(void) prevprevchr = prpr; } -/* - * Get the next character from the pattern. We know about magic and such, so - * therefore we need a lexical analyzer. - */ +// Get the next character from the pattern. We know about magic and such, so +// therefore we need a lexical analyzer. static int getchr(void) { int chr = peekchr(); @@ -809,9 +775,7 @@ static int getchr(void) return chr; } -/* - * put character back. Works only once! - */ +// put character back. Works only once! static void ungetchr(void) { nextchr = curchr; @@ -825,15 +789,13 @@ static void ungetchr(void) regparse -= prevchr_len; } -/* - * Get and return the value of the hex string at the current position. - * Return -1 if there is no valid hex number. - * The position is updated: - * blahblah\%x20asdf - * before-^ ^-after - * The parameter controls the maximum number of input characters. This will be - * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. - */ +// Get and return the value of the hex string at the current position. +// Return -1 if there is no valid hex number. +// The position is updated: +// blahblah\%x20asdf +// before-^ ^-after +// The parameter controls the maximum number of input characters. This will be +// 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. static int64_t gethexchrs(int maxinputlen) { int64_t nr = 0; @@ -856,10 +818,8 @@ static int64_t gethexchrs(int maxinputlen) return nr; } -/* - * Get and return the value of the decimal string immediately after the - * current position. Return -1 for invalid. Consumes all digits. - */ +// Get and return the value of the decimal string immediately after the +// current position. Return -1 for invalid. Consumes all digits. static int64_t getdecchrs(void) { int64_t nr = 0; @@ -883,14 +843,12 @@ static int64_t getdecchrs(void) return nr; } -/* - * get and return the value of the octal string immediately after the current - * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle - * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't - * treat 8 or 9 as recognised characters. Position is updated: - * blahblah\%o210asdf - * before-^ ^-after - */ +// get and return the value of the octal string immediately after the current +// position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle +// numbers > 377 correctly (for example, 400 is treated as 40) and doesn't +// treat 8 or 9 as recognised characters. Position is updated: +// blahblah\%o210asdf +// before-^ ^-after static int64_t getoctchrs(void) { int64_t nr = 0; @@ -913,12 +871,10 @@ static int64_t getoctchrs(void) return nr; } -/* - * read_limits - Read two integers to be taken as a minimum and maximum. - * If the first character is '-', then the range is reversed. - * Should end with 'end'. If minval is missing, zero is default, if maxval is - * missing, a very big number is the default. - */ +// read_limits - Read two integers to be taken as a minimum and maximum. +// If the first character is '-', then the range is reversed. +// Should end with 'end'. If minval is missing, zero is default, if maxval is +// missing, a very big number is the default. static int read_limits(long *minval, long *maxval) { int reverse = false; @@ -950,10 +906,8 @@ static int read_limits(long *minval, long *maxval) EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL); } - /* - * Reverse the range if there was a '-', or make sure it is in the right - * order otherwise. - */ + // Reverse the range if there was a '-', or make sure it is in the right + // order otherwise. if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) { tmp = *minval; *minval = *maxval; @@ -963,13 +917,9 @@ static int read_limits(long *minval, long *maxval) return OK; } -/* - * vim_regexec and friends - */ +// vim_regexec and friends -/* - * Global work variables for vim_regexec(). - */ +// Global work variables for vim_regexec(). // Sometimes need to save a copy of a line. Since alloc()/free() is very // slow, we keep one allocated piece of memory and only re-allocate it when @@ -1052,9 +1002,7 @@ static bool reg_iswordc(int c) return vim_iswordc_buf(c, rex.reg_buf); } -/* - * Get pointer to the line "lnum", which is relative to "reg_firstlnum". - */ +// Get pointer to the line "lnum", which is relative to "reg_firstlnum". static char_u *reg_getline(linenr_T lnum) { // when looking behind for a match/no-match lnum is negative. But we @@ -1077,9 +1025,7 @@ static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos // true if using multi-line regexp. #define REG_MULTI (rex.reg_match == NULL) -/* - * Create a new extmatch and mark it as referenced once. - */ +// Create a new extmatch and mark it as referenced once. static reg_extmatch_T *make_extmatch(void) FUNC_ATTR_NONNULL_RET { @@ -1088,9 +1034,7 @@ static reg_extmatch_T *make_extmatch(void) return em; } -/* - * Add a reference to an extmatch. - */ +// Add a reference to an extmatch. reg_extmatch_T *ref_extmatch(reg_extmatch_T *em) { if (em != NULL) { @@ -1099,10 +1043,8 @@ reg_extmatch_T *ref_extmatch(reg_extmatch_T *em) return em; } -/* - * Remove a reference to an extmatch. If there are no references left, free - * the info. - */ +// Remove a reference to an extmatch. If there are no references left, free +// the info. void unref_extmatch(reg_extmatch_T *em) { int i; @@ -1201,10 +1143,8 @@ static bool reg_match_visual(void) return true; } -/* - * Check the regexp program for its magic number. - * Return true if it's wrong. - */ +// Check the regexp program for its magic number. +// Return true if it's wrong. static int prog_magic_wrong(void) { regprog_T *prog; @@ -1222,11 +1162,9 @@ static int prog_magic_wrong(void) return false; } -/* - * Cleanup the subexpressions, if this wasn't done yet. - * This construction is used to clear the subexpressions only when they are - * used (to increase speed). - */ +// Cleanup the subexpressions, if this wasn't done yet. +// This construction is used to clear the subexpressions only when they are +// used (to increase speed). static void cleanup_subexpr(void) { if (rex.need_clear_subexpr) { @@ -1265,12 +1203,10 @@ static void reg_nextline(void) fast_breakcheck(); } -/* - * Check whether a backreference matches. - * Returns RA_FAIL, RA_NOMATCH or RA_MATCH. - * If "bytelen" is not NULL, it is set to the byte length of the match in the - * last line. - */ +// Check whether a backreference matches. +// Returns RA_FAIL, RA_NOMATCH or RA_MATCH. +// If "bytelen" is not NULL, it is set to the byte length of the match in the +// last line. static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen) { @@ -1449,9 +1385,9 @@ static int cstrncmp(char *s1, char *s2, int *n) c1 = mb_ptr2char_adv((const char_u **)&str1); c2 = mb_ptr2char_adv((const char_u **)&str2); - /* decompose the character if necessary, into 'base' characters - * because I don't care about Arabic, I will hard-code the Hebrew - * which I *do* care about! So sue me... */ + // decompose the character if necessary, into 'base' characters + // because I don't care about Arabic, I will hard-code the Hebrew + // which I *do* care about! So sue me... if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) { // decomposition necessary? mb_decompose(c1, &c11, &junk, &junk); @@ -1566,7 +1502,7 @@ char *regtilde(char *source, int magic, bool preview) int len; int prevlen; - for (p = newsub; *p; ++p) { + for (p = newsub; *p; p++) { if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) { if (reg_prev_sub != NULL) { // length = len(newsub) - 1 + len(prev_sub) + 1 @@ -1871,12 +1807,11 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des *s = CAR; } else if (*s == '\\' && s[1] != NUL) { s++; - /* Change NL to CR here too, so that this works: - * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: - * abc\ - * def - * Not when called from vim_regexec_nl(). - */ + // Change NL to CR here too, so that this works: + // :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: + // abc{backslash} + // def + // Not when called from vim_regexec_nl(). if (*s == NL && !rsm.sm_line_lbr) { *s = CAR; } @@ -2172,10 +2107,8 @@ char *reg_submatch(int no) if (rsm.sm_match == NULL) { ssize_t len; - /* - * First round: compute the length and allocate memory. - * Second round: copy the text. - */ + // First round: compute the length and allocate memory. + // Second round: copy the text. for (round = 1; round <= 2; round++) { lnum = rsm.sm_mmatch->startpos[no].lnum; if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0) { @@ -2216,7 +2149,7 @@ char *reg_submatch(int no) len++; } if (round == 2) { - STRNCPY(retval + len, reg_getline_submatch(lnum), + STRNCPY(retval + len, reg_getline_submatch(lnum), // NOLINT(runtime/printf) rsm.sm_mmatch->endpos[no].col); } len += rsm.sm_mmatch->endpos[no].col; @@ -2327,12 +2260,10 @@ static char_u regname[][30] = { }; #endif -/* - * Compile a regular expression into internal code. - * Returns the program in allocated memory. - * Use vim_regfree() to free the memory. - * Returns NULL for an error. - */ +// Compile a regular expression into internal code. +// Returns the program in allocated memory. +// Use vim_regfree() to free the memory. +// Returns NULL for an error. regprog_T *vim_regcomp(char *expr_arg, int re_flags) { regprog_T *prog = NULL; @@ -2413,9 +2344,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags) return prog; } -/* - * Free a compiled regexp program, returned by vim_regcomp(). - */ +// Free a compiled regexp program, returned by vim_regcomp(). void vim_regfree(regprog_T *prog) { if (prog != NULL) { |