diff options
author | Josh Rahm <joshuarahm@gmail.com> | 2023-01-25 17:57:01 +0000 |
---|---|---|
committer | Josh Rahm <joshuarahm@gmail.com> | 2023-01-25 17:57:01 +0000 |
commit | 9837de570c5972f98e74848edc97c297a13136ea (patch) | |
tree | cc948611912d116a3f98a744e690d3d7b6e2f59a /src/nvim/regexp.c | |
parent | c367400b73d207833d51e09d663f969ffab37531 (diff) | |
parent | 3c48d3c83fc21dbc0841f9210f04bdb073d73cd1 (diff) | |
download | rneovim-9837de570c5972f98e74848edc97c297a13136ea.tar.gz rneovim-9837de570c5972f98e74848edc97c297a13136ea.tar.bz2 rneovim-9837de570c5972f98e74848edc97c297a13136ea.zip |
Merge remote-tracking branch 'upstream/master' into colorcolchar
Diffstat (limited to 'src/nvim/regexp.c')
-rw-r--r-- | src/nvim/regexp.c | 636 |
1 files changed, 314 insertions, 322 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index e87382ff7c..122f3e2020 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -1,9 +1,7 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -/* - * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() - */ +// Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() // By default: do not create debugging logs or files related to regular // expressions, even when compiling with -DDEBUG. @@ -15,21 +13,34 @@ #include <inttypes.h> #include <stdbool.h> #include <string.h> +#include <sys/types.h> #include "nvim/ascii.h" +#include "nvim/buffer_defs.h" #include "nvim/charset.h" #include "nvim/eval.h" +#include "nvim/eval/typval.h" +#include "nvim/eval/typval_defs.h" #include "nvim/eval/userfunc.h" #include "nvim/garray.h" +#include "nvim/gettext.h" +#include "nvim/globals.h" +#include "nvim/keycodes.h" +#include "nvim/macros.h" #include "nvim/mark.h" +#include "nvim/mbyte.h" #include "nvim/memline.h" #include "nvim/memory.h" #include "nvim/message.h" +#include "nvim/option_defs.h" #include "nvim/os/input.h" #include "nvim/plines.h" -#include "nvim/profile.h" +#include "nvim/pos.h" #include "nvim/regexp.h" +#include "nvim/regexp_defs.h" #include "nvim/strings.h" +#include "nvim/types.h" +#include "nvim/undo_defs.h" #include "nvim/vim.h" #ifdef REGEXP_DEBUG @@ -41,21 +52,17 @@ # define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log" #endif -/* - * Magic characters have a special meaning, they don't match literally. - * Magic characters are negative. This separates them from literal characters - * (possibly multi-byte). Only ASCII characters can be Magic. - */ +// Magic characters have a special meaning, they don't match literally. +// Magic characters are negative. This separates them from literal characters +// (possibly multi-byte). Only ASCII characters can be Magic. #define Magic(x) ((int)(x) - 256) #define un_Magic(x) ((x) + 256) #define is_Magic(x) ((x) < 0) -/* - * We should define ftpr as a pointer to a function returning a pointer to - * a function returning a pointer to a function ... - * This is impossible, so we declare a pointer to a function returning a - * pointer to a function returning void. This should work for all compilers. - */ +// We should define ftpr as a pointer to a function returning a pointer to +// a function returning a pointer to a function ... +// This is impossible, so we declare a pointer to a function returning a +// pointer to a function returning void. This should work for all compilers. typedef void (*(*fptr_T)(int *, int))(void); static int no_Magic(int x) @@ -80,7 +87,7 @@ static int toggle_Magic(int x) #define REGMAGIC 0234 // Utility definitions. -#define UCHARAT(p) ((int)(*(char_u *)(p))) +#define UCHARAT(p) ((int)(*(uint8_t *)(p))) // Used for an error (down from) vim_regcomp(): give the error message, set // rc_did_emsg and return NULL @@ -97,20 +104,24 @@ static int toggle_Magic(int x) #define MAX_LIMIT (32767L << 16L) -static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); -static char_u e_reverse_range[] = N_("E944: Reverse range in character class"); -static char_u e_large_class[] = N_("E945: Range too large in character class"); -static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); -static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); -static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); -static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); -static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here"); -static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%["); -static char_u e_empty_sb[] = N_("E70: Empty %s%%[]"); -static char_u e_recursive[] = N_("E956: Cannot use pattern recursively"); -static char_u e_regexp_number_after_dot_pos_search[] +static char e_missingbracket[] = N_("E769: Missing ] after %s["); +static char e_reverse_range[] = N_("E944: Reverse range in character class"); +static char e_large_class[] = N_("E945: Range too large in character class"); +static char e_unmatchedpp[] = N_("E53: Unmatched %s%%("); +static char e_unmatchedp[] = N_("E54: Unmatched %s("); +static char e_unmatchedpar[] = N_("E55: Unmatched %s)"); +static char e_z_not_allowed[] = N_("E66: \\z( not allowed here"); +static char e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here"); +static char e_missing_sb[] = N_("E69: Missing ] after %s%%["); +static char e_empty_sb[] = N_("E70: Empty %s%%[]"); +static char e_recursive[] = N_("E956: Cannot use pattern recursively"); +static char e_regexp_number_after_dot_pos_search_chr[] = N_("E1204: No Number allowed after .: '\\%%%c'"); -static char_u e_substitute_nesting_too_deep[] = N_("E1290: substitute nesting too deep"); +static char e_nfa_regexp_missing_value_in_chr[] + = N_("E1273: (NFA regexp) missing value in '\\%%%c'"); +static char e_atom_engine_must_be_at_start_of_pattern[] + = N_("E1281: Atom '\\%%#=%c' must be at the start of the pattern"); +static char e_substitute_nesting_too_deep[] = N_("E1290: substitute nesting too deep"); #define NOT_MULTI 0 #define MULTI_ONE 1 @@ -139,28 +150,24 @@ static int re_multi_type(int c) static char *reg_prev_sub = NULL; -/* - * REGEXP_INRANGE contains all characters which are always special in a [] - * range after '\'. - * REGEXP_ABBR contains all characters which act as abbreviations after '\'. - * These are: - * \n - New line (NL). - * \r - Carriage Return (CR). - * \t - Tab (TAB). - * \e - Escape (ESC). - * \b - Backspace (Ctrl_H). - * \d - Character code in decimal, eg \d123 - * \o - Character code in octal, eg \o80 - * \x - Character code in hex, eg \x4a - * \u - Multibyte character code, eg \u20ac - * \U - Long multibyte character code, eg \U12345678 - */ +// REGEXP_INRANGE contains all characters which are always special in a [] +// range after '\'. +// REGEXP_ABBR contains all characters which act as abbreviations after '\'. +// These are: +// \n - New line (NL). +// \r - Carriage Return (CR). +// \t - Tab (TAB). +// \e - Escape (ESC). +// \b - Backspace (Ctrl_H). +// \d - Character code in decimal, eg \d123 +// \o - Character code in octal, eg \o80 +// \x - Character code in hex, eg \x4a +// \u - Multibyte character code, eg \u20ac +// \U - Long multibyte character code, eg \U12345678 static char REGEXP_INRANGE[] = "]^-n\\"; static char REGEXP_ABBR[] = "nrtebdoxuU"; -/* - * Translate '\x' to its control character, except "\n", which is Magic. - */ +// Translate '\x' to its control character, except "\n", which is Magic. static int backslash_trans(int c) { switch (c) { @@ -181,8 +188,7 @@ static int backslash_trans(int c) /// recognized. Otherwise "pp" is advanced to after the item. static int get_char_class(char **pp) { - static const char *(class_names[]) = - { + static const char *(class_names[]) = { "alnum:]", #define CLASS_ALNUM 0 "alpha:]", @@ -227,7 +233,7 @@ static int get_char_class(char **pp) if ((*pp)[1] == ':') { for (i = 0; i < (int)ARRAY_SIZE(class_names); i++) { - if (STRNCMP(*pp + 2, class_names[i], strlen(class_names[i])) == 0) { + if (strncmp(*pp + 2, class_names[i], strlen(class_names[i])) == 0) { *pp += strlen(class_names[i]) + 2; return i; } @@ -236,11 +242,9 @@ static int get_char_class(char **pp) return CLASS_NONE; } -/* - * Specific version of character class functions. - * Using a table to keep this fast. - */ -static short class_tab[256]; +// Specific version of character class functions. +// Using a table to keep this fast. +static int16_t class_tab[256]; #define RI_DIGIT 0x01 #define RI_HEX 0x02 @@ -312,24 +316,18 @@ static int re_has_z; ///< \z item detected static unsigned regflags; ///< RF_ flags for prog static int had_eol; ///< true when EOL found by vim_regcomp() -static int reg_magic; // magicness of the pattern: -#define MAGIC_NONE 1 // "\V" very unmagic -#define MAGIC_OFF 2 // "\M" or 'magic' off -#define MAGIC_ON 3 // "\m" or 'magic' -#define MAGIC_ALL 4 // "\v" very magic +static magic_T reg_magic; ///< magicness of the pattern static int reg_string; // matching with a string instead of a buffer // line static int reg_strict; // "[abc" is illegal -/* - * META contains all characters that may be magic, except '^' and '$'. - */ +// META contains all characters that may be magic, except '^' and '$'. // uncrustify:off // META[] is used often enough to justify turning it into a table. -static char_u META_flags[] = { +static uint8_t META_flags[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // % & ( ) * + . @@ -363,7 +361,7 @@ static int nextchr; // used for ungetchr() #define REG_NPAREN 3 // \%(\) typedef struct { - char_u *regparse; + char *regparse; int prevchr_len; int curchr; int prevchr; @@ -388,21 +386,19 @@ int re_multiline(const regprog_T *prog) return prog->regflags & RF_HASNL; } -/* - * Check for an equivalence class name "[=a=]". "pp" points to the '['. - * Returns a character representing the class. Zero means that no item was - * recognized. Otherwise "pp" is advanced to after the item. - */ +// Check for an equivalence class name "[=a=]". "pp" points to the '['. +// Returns a character representing the class. Zero means that no item was +// recognized. Otherwise "pp" is advanced to after the item. static int get_equi_class(char **pp) { int c; int l = 1; - char_u *p = (char_u *)(*pp); + char *p = *pp; if (p[1] == '=' && p[2] != NUL) { - l = utfc_ptr2len((char *)p + 2); + l = utfc_ptr2len(p + 2); if (p[l + 2] == '=' && p[l + 3] == ']') { - c = utf_ptr2char((char *)p + 2); + c = utf_ptr2char(p + 2); *pp += l + 4; return c; } @@ -410,22 +406,20 @@ static int get_equi_class(char **pp) return 0; } -/* - * Check for a collating element "[.a.]". "pp" points to the '['. - * Returns a character. Zero means that no item was recognized. Otherwise - * "pp" is advanced to after the item. - * Currently only single characters are recognized! - */ +// Check for a collating element "[.a.]". "pp" points to the '['. +// Returns a character. Zero means that no item was recognized. Otherwise +// "pp" is advanced to after the item. +// Currently only single characters are recognized! static int get_coll_element(char **pp) { int c; int l = 1; - char_u *p = (char_u *)(*pp); + char *p = *pp; if (p[0] != NUL && p[1] == '.' && p[2] != NUL) { - l = utfc_ptr2len((char *)p + 2); + l = utfc_ptr2len(p + 2); if (p[l + 2] == '.' && p[l + 3] == ']') { - c = utf_ptr2char((char *)p + 2); + c = utf_ptr2char(p + 2); *pp += l + 4; return c; } @@ -443,7 +437,7 @@ static void get_cpo_flags(void) /// Skip over a "[]" range. /// "p" must point to the character after the '['. /// The returned pointer is on the matching ']', or the terminating NUL. -static char_u *skip_anyof(char *p) +static char *skip_anyof(char *p) { int l; @@ -462,9 +456,9 @@ static char_u *skip_anyof(char *p) MB_PTR_ADV(p); } } else if (*p == '\\' - && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL + && (vim_strchr(REGEXP_INRANGE, (uint8_t)p[1]) != NULL || (!reg_cpo_lit - && vim_strchr(REGEXP_ABBR, p[1]) != NULL))) { + && vim_strchr(REGEXP_ABBR, (uint8_t)p[1]) != NULL))) { p += 2; } else if (*p == '[') { if (get_char_class(&p) == CLASS_NONE @@ -478,19 +472,41 @@ static char_u *skip_anyof(char *p) } } - return (char_u *)p; + return p; } /// Skip past regular expression. -/// Stop at end of "startp" or where "dirc" is found ('/', '?', etc). +/// Stop at end of "startp" or where "delim" is found ('/', '?', etc). /// Take care of characters with a backslash in front of it. /// Skip strings inside [ and ]. +char *skip_regexp(char *startp, int delim, int magic) +{ + return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL); +} + +/// Call skip_regexp() and when the delimiter does not match give an error and +/// return NULL. +char *skip_regexp_err(char *startp, int delim, int magic) +{ + char *p = skip_regexp(startp, delim, magic); + + if (*p != delim) { + semsg(_("E654: missing delimiter after search pattern: %s"), startp); + return NULL; + } + return p; +} + +/// skip_regexp() with extra arguments: /// When "newp" is not NULL and "dirc" is '?', make an allocated copy of the /// expression and change "\?" to "?". If "*newp" is not NULL the expression /// is changed in-place. -char *skip_regexp(char *startp, int dirc, int magic, char **newp) +/// If a "\?" is changed to "?" then "dropped" is incremented, unless NULL. +/// If "magic_val" is not NULL, returns the effective magicness of the pattern +char *skip_regexp_ex(char *startp, int dirc, int magic, char **newp, int *dropped, + magic_T *magic_val) { - int mymagic; + magic_T mymagic; char *p = startp; if (magic) { @@ -506,7 +522,7 @@ char *skip_regexp(char *startp, int dirc, int magic, char **newp) } if ((p[0] == '[' && mymagic >= MAGIC_ON) || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF)) { - p = (char *)skip_anyof(p + 1); + p = skip_anyof(p + 1); if (p[0] == NUL) { break; } @@ -517,6 +533,9 @@ char *skip_regexp(char *startp, int dirc, int magic, char **newp) *newp = xstrdup(startp); p = *newp + (p - startp); } + if (dropped != NULL) { + (*dropped)++; + } STRMOVE(p, p + 1); } else { p++; // skip next character @@ -528,6 +547,9 @@ char *skip_regexp(char *startp, int dirc, int magic, char **newp) } } } + if (magic_val != NULL) { + *magic_val = mymagic; + } return p; } @@ -536,25 +558,21 @@ static int prevchr_len; // byte length of previous char static int at_start; // True when on the first character static int prev_at_start; // True when on the second character -/* - * Start parsing at "str". - */ -static void initchr(char_u *str) +// Start parsing at "str". +static void initchr(char *str) { - regparse = (char *)str; + regparse = str; prevchr_len = 0; curchr = prevprevchr = prevchr = nextchr = -1; at_start = true; prev_at_start = false; } -/* - * Save the current parse state, so that it can be restored and parsing - * starts in the same state again. - */ +// Save the current parse state, so that it can be restored and parsing +// starts in the same state again. static void save_parse_state(parse_state_T *ps) { - ps->regparse = (char_u *)regparse; + ps->regparse = regparse; ps->prevchr_len = prevchr_len; ps->curchr = curchr; ps->prevchr = prevchr; @@ -565,12 +583,10 @@ static void save_parse_state(parse_state_T *ps) ps->regnpar = regnpar; } -/* - * Restore a previously saved parse state. - */ +// Restore a previously saved parse state. static void restore_parse_state(parse_state_T *ps) { - regparse = (char *)ps->regparse; + regparse = ps->regparse; prevchr_len = ps->prevchr_len; curchr = ps->curchr; prevchr = ps->prevchr; @@ -581,9 +597,7 @@ static void restore_parse_state(parse_state_T *ps) regnpar = ps->regnpar; } -/* - * Get the next character without advancing. - */ +// Get the next character without advancing. static int peekchr(void) { static int after_slash = false; @@ -663,7 +677,7 @@ static int peekchr(void) // '$' is only magic as the very last char and if it's in front of // either "\|", "\)", "\&", or "\n" if (reg_magic >= MAGIC_OFF) { - char_u *p = (char_u *)regparse + 1; + uint8_t *p = (uint8_t *)regparse + 1; bool is_magic_all = (reg_magic == MAGIC_ALL); // ignore \c \C \m \M \v \V and \Z after '$' @@ -710,9 +724,7 @@ static int peekchr(void) after_slash--; curchr = toggle_Magic(curchr); } else if (vim_strchr(REGEXP_ABBR, c)) { - /* - * Handle abbreviations, like "\t" for TAB -- webb - */ + // Handle abbreviations, like "\t" for TAB -- webb curchr = backslash_trans(c); } else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) { curchr = toggle_Magic(c); @@ -731,9 +743,7 @@ static int peekchr(void) return curchr; } -/* - * Eat one lexed character. Do this in a way that we can undo it. - */ +// Eat one lexed character. Do this in a way that we can undo it. static void skipchr(void) { // peekchr() eats a backslash, do the same here @@ -755,10 +765,8 @@ static void skipchr(void) nextchr = -1; } -/* - * Skip a character while keeping the value of prev_at_start for at_start. - * prevchr and prevprevchr are also kept. - */ +// Skip a character while keeping the value of prev_at_start for at_start. +// prevchr and prevprevchr are also kept. static void skipchr_keepstart(void) { int as = prev_at_start; @@ -771,10 +779,8 @@ static void skipchr_keepstart(void) prevprevchr = prpr; } -/* - * Get the next character from the pattern. We know about magic and such, so - * therefore we need a lexical analyzer. - */ +// Get the next character from the pattern. We know about magic and such, so +// therefore we need a lexical analyzer. static int getchr(void) { int chr = peekchr(); @@ -783,9 +789,7 @@ static int getchr(void) return chr; } -/* - * put character back. Works only once! - */ +// put character back. Works only once! static void ungetchr(void) { nextchr = curchr; @@ -799,15 +803,13 @@ static void ungetchr(void) regparse -= prevchr_len; } -/* - * Get and return the value of the hex string at the current position. - * Return -1 if there is no valid hex number. - * The position is updated: - * blahblah\%x20asdf - * before-^ ^-after - * The parameter controls the maximum number of input characters. This will be - * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. - */ +// Get and return the value of the hex string at the current position. +// Return -1 if there is no valid hex number. +// The position is updated: +// blahblah\%x20asdf +// before-^ ^-after +// The parameter controls the maximum number of input characters. This will be +// 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. static int64_t gethexchrs(int maxinputlen) { int64_t nr = 0; @@ -830,10 +832,8 @@ static int64_t gethexchrs(int maxinputlen) return nr; } -/* - * Get and return the value of the decimal string immediately after the - * current position. Return -1 for invalid. Consumes all digits. - */ +// Get and return the value of the decimal string immediately after the +// current position. Return -1 for invalid. Consumes all digits. static int64_t getdecchrs(void) { int64_t nr = 0; @@ -857,14 +857,12 @@ static int64_t getdecchrs(void) return nr; } -/* - * get and return the value of the octal string immediately after the current - * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle - * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't - * treat 8 or 9 as recognised characters. Position is updated: - * blahblah\%o210asdf - * before-^ ^-after - */ +// get and return the value of the octal string immediately after the current +// position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle +// numbers > 377 correctly (for example, 400 is treated as 40) and doesn't +// treat 8 or 9 as recognised characters. Position is updated: +// blahblah\%o210asdf +// before-^ ^-after static int64_t getoctchrs(void) { int64_t nr = 0; @@ -887,16 +885,14 @@ static int64_t getoctchrs(void) return nr; } -/* - * read_limits - Read two integers to be taken as a minimum and maximum. - * If the first character is '-', then the range is reversed. - * Should end with 'end'. If minval is missing, zero is default, if maxval is - * missing, a very big number is the default. - */ +// read_limits - Read two integers to be taken as a minimum and maximum. +// If the first character is '-', then the range is reversed. +// Should end with 'end'. If minval is missing, zero is default, if maxval is +// missing, a very big number is the default. static int read_limits(long *minval, long *maxval) { int reverse = false; - char_u *first_char; + char *first_char; long tmp; if (*regparse == '-') { @@ -904,7 +900,7 @@ static int read_limits(long *minval, long *maxval) regparse++; reverse = true; } - first_char = (char_u *)regparse; + first_char = regparse; *minval = getdigits_long(®parse, false, 0); if (*regparse == ',') { // There is a comma. if (ascii_isdigit(*++regparse)) { @@ -924,10 +920,8 @@ static int read_limits(long *minval, long *maxval) EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL); } - /* - * Reverse the range if there was a '-', or make sure it is in the right - * order otherwise. - */ + // Reverse the range if there was a '-', or make sure it is in the right + // order otherwise. if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) { tmp = *minval; *minval = *maxval; @@ -937,18 +931,14 @@ static int read_limits(long *minval, long *maxval) return OK; } -/* - * vim_regexec and friends - */ +// vim_regexec and friends -/* - * Global work variables for vim_regexec(). - */ +// Global work variables for vim_regexec(). // Sometimes need to save a copy of a line. Since alloc()/free() is very // slow, we keep one allocated piece of memory and only re-allocate it when // it's too small. It's freed in bt_regexec_both() when finished. -static char_u *reg_tofree = NULL; +static uint8_t *reg_tofree = NULL; static unsigned reg_tofreelen; // Structure used to store the execution state of the regex engine. @@ -969,10 +959,12 @@ static unsigned reg_tofreelen; typedef struct { regmatch_T *reg_match; regmmatch_T *reg_mmatch; - char_u **reg_startp; - char_u **reg_endp; + + uint8_t **reg_startp; + uint8_t **reg_endp; lpos_T *reg_startpos; lpos_T *reg_endpos; + win_T *reg_win; buf_T *reg_buf; linenr_T reg_firstlnum; @@ -981,8 +973,8 @@ typedef struct { // The current match-position is remembered with these variables: linenr_T lnum; ///< line number, relative to first line - char_u *line; ///< start of current line - char_u *input; ///< current input, points into "line" + uint8_t *line; ///< start of current line + uint8_t *input; ///< current input, points into "line" int need_clear_subexpr; ///< subexpressions still need to be cleared int need_clear_zsubexpr; ///< extmatch subexpressions still need to be @@ -1026,10 +1018,8 @@ static bool reg_iswordc(int c) return vim_iswordc_buf(c, rex.reg_buf); } -/* - * Get pointer to the line "lnum", which is relative to "reg_firstlnum". - */ -static char_u *reg_getline(linenr_T lnum) +// Get pointer to the line "lnum", which is relative to "reg_firstlnum". +static char *reg_getline(linenr_T lnum) { // when looking behind for a match/no-match lnum is negative. But we // can't go before line 1 @@ -1038,22 +1028,20 @@ static char_u *reg_getline(linenr_T lnum) } if (lnum > rex.reg_maxline) { // Must have matched the "\n" in the last line. - return (char_u *)""; + return ""; } - return (char_u *)ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, false); + return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, false); } -static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning -static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches +static uint8_t *reg_startzp[NSUBEXP]; // Workspace to mark beginning +static uint8_t *reg_endzp[NSUBEXP]; // and end of \z(...\) matches static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos // true if using multi-line regexp. #define REG_MULTI (rex.reg_match == NULL) -/* - * Create a new extmatch and mark it as referenced once. - */ +// Create a new extmatch and mark it as referenced once. static reg_extmatch_T *make_extmatch(void) FUNC_ATTR_NONNULL_RET { @@ -1062,9 +1050,7 @@ static reg_extmatch_T *make_extmatch(void) return em; } -/* - * Add a reference to an extmatch. - */ +// Add a reference to an extmatch. reg_extmatch_T *ref_extmatch(reg_extmatch_T *em) { if (em != NULL) { @@ -1073,10 +1059,8 @@ reg_extmatch_T *ref_extmatch(reg_extmatch_T *em) return em; } -/* - * Remove a reference to an extmatch. If there are no references left, free - * the info. - */ +// Remove a reference to an extmatch. If there are no references left, free +// the info. void unref_extmatch(reg_extmatch_T *em) { int i; @@ -1093,7 +1077,8 @@ void unref_extmatch(reg_extmatch_T *em) static int reg_prev_class(void) { if (rex.input > rex.line) { - return mb_get_class_tab(rex.input - 1 - utf_head_off((char *)rex.line, (char *)rex.input - 1), + return mb_get_class_tab((char *)rex.input - 1 - + utf_head_off((char *)rex.line, (char *)rex.input - 1), rex.reg_buf->b_chartab); } return -1; @@ -1111,8 +1096,8 @@ static bool reg_match_visual(void) colnr_T start2, end2; colnr_T curswant; - // Check if the buffer is the current buffer. - if (rex.reg_buf != curbuf || VIsual.lnum == 0) { + // Check if the buffer is the current buffer and not using a string. + if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI) { return false; } @@ -1162,10 +1147,10 @@ static bool reg_match_visual(void) } // getvvcol() flushes rex.line, need to get it again - rex.line = reg_getline(rex.lnum); + rex.line = (uint8_t *)reg_getline(rex.lnum); rex.input = rex.line + col; - unsigned int cols_u = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, rex.line, col); + unsigned int cols_u = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, (char *)rex.line, col); assert(cols_u <= MAXCOL); colnr_T cols = (colnr_T)cols_u; if (cols < start || cols > end - (*p_sel == 'e')) { @@ -1175,10 +1160,8 @@ static bool reg_match_visual(void) return true; } -/* - * Check the regexp program for its magic number. - * Return true if it's wrong. - */ +// Check the regexp program for its magic number. +// Return true if it's wrong. static int prog_magic_wrong(void) { regprog_T *prog; @@ -1196,62 +1179,62 @@ static int prog_magic_wrong(void) return false; } -/* - * Cleanup the subexpressions, if this wasn't done yet. - * This construction is used to clear the subexpressions only when they are - * used (to increase speed). - */ +// Cleanup the subexpressions, if this wasn't done yet. +// This construction is used to clear the subexpressions only when they are +// used (to increase speed). static void cleanup_subexpr(void) { - if (rex.need_clear_subexpr) { - if (REG_MULTI) { - // Use 0xff to set lnum to -1 - memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP); - memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP); - } else { - memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP); - memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP); - } - rex.need_clear_subexpr = false; + if (!rex.need_clear_subexpr) { + return; + } + + if (REG_MULTI) { + // Use 0xff to set lnum to -1 + memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP); + memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP); + } else { + memset(rex.reg_startp, 0, sizeof(char *) * NSUBEXP); + memset(rex.reg_endp, 0, sizeof(char *) * NSUBEXP); } + rex.need_clear_subexpr = false; } static void cleanup_zsubexpr(void) { - if (rex.need_clear_zsubexpr) { - if (REG_MULTI) { - // Use 0xff to set lnum to -1 - memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP); - memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP); - } else { - memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP); - memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP); - } - rex.need_clear_zsubexpr = false; + if (!rex.need_clear_zsubexpr) { + return; + } + + if (REG_MULTI) { + // Use 0xff to set lnum to -1 + memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP); + memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP); + } else { + memset(reg_startzp, 0, sizeof(char *) * NSUBEXP); + memset(reg_endzp, 0, sizeof(char *) * NSUBEXP); } + rex.need_clear_zsubexpr = false; } // Advance rex.lnum, rex.line and rex.input to the next line. static void reg_nextline(void) { - rex.line = reg_getline(++rex.lnum); + rex.line = (uint8_t *)reg_getline(++rex.lnum); rex.input = rex.line; fast_breakcheck(); } -/* - * Check whether a backreference matches. - * Returns RA_FAIL, RA_NOMATCH or RA_MATCH. - * If "bytelen" is not NULL, it is set to the byte length of the match in the - * last line. - */ +// Check whether a backreference matches. +// Returns RA_FAIL, RA_NOMATCH or RA_MATCH. +// If "bytelen" is not NULL, it is set to the byte length of the match in the +// last line. static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen) { linenr_T clnum = start_lnum; colnr_T ccol = start_col; int len; - char_u *p; + char *p; if (bytelen != NULL) { *bytelen = 0; @@ -1260,7 +1243,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e // Since getting one line may invalidate the other, need to make copy. // Slow! if (rex.line != reg_tofree) { - len = (int)STRLEN(rex.line); + len = (int)strlen((char *)rex.line); if (reg_tofree == NULL || len >= (int)reg_tofreelen) { len += 50; // get some extra xfree(reg_tofree); @@ -1279,10 +1262,10 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e if (clnum == end_lnum) { len = end_col - ccol; } else { - len = (int)STRLEN(p + ccol); + len = (int)strlen(p + ccol); } - if (cstrncmp((char *)p + ccol, (char *)rex.input, &len) != 0) { + if (cstrncmp(p + ccol, (char *)rex.input, &len) != 0) { return RA_NOMATCH; // doesn't match } if (bytelen != NULL) { @@ -1328,8 +1311,7 @@ typedef struct { } decomp_T; // 0xfb20 - 0xfb4f -static decomp_T decomp_table[0xfb4f - 0xfb20 + 1] = -{ +static decomp_T decomp_table[0xfb4f - 0xfb20 + 1] = { { 0x5e2, 0, 0 }, // 0xfb20 alt ayin { 0x5d0, 0, 0 }, // 0xfb21 alt alef { 0x5d3, 0, 0 }, // 0xfb22 alt dalet @@ -1403,7 +1385,7 @@ static int cstrncmp(char *s1, char *s2, int *n) int result; if (!rex.reg_ic) { - result = STRNCMP(s1, s2, *n); + result = strncmp(s1, s2, (size_t)(*n)); } else { assert(*n >= 0); result = mb_strnicmp(s1, s2, (size_t)(*n)); @@ -1421,12 +1403,12 @@ static int cstrncmp(char *s1, char *s2, int *n) str2 = s2; c1 = c2 = 0; while ((int)(str1 - s1) < *n) { - c1 = mb_ptr2char_adv((const char_u **)&str1); - c2 = mb_ptr2char_adv((const char_u **)&str2); + c1 = mb_ptr2char_adv((const char **)&str1); + c2 = mb_ptr2char_adv((const char **)&str2); - /* decompose the character if necessary, into 'base' characters - * because I don't care about Arabic, I will hard-code the Hebrew - * which I *do* care about! So sue me... */ + // decompose the character if necessary, into 'base' characters + // because I don't care about Arabic, I will hard-code the Hebrew + // which I *do* care about! So sue me... if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) { // decomposition necessary? mb_decompose(c1, &c11, &junk, &junk); @@ -1456,21 +1438,21 @@ static int cstrncmp(char *s1, char *s2, int *n) /// @param c character to find in @a s /// /// @return NULL if no match, otherwise pointer to the position in @a s -static inline char_u *cstrchr(const char_u *const s, const int c) +static inline char *cstrchr(const char *const s, const int c) FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE { if (!rex.reg_ic) { - return (char_u *)vim_strchr((char *)s, c); + return vim_strchr(s, c); } // Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is // expected to be highly optimized. if (c > 0x80) { const int folded_c = utf_fold(c); - for (const char_u *p = s; *p != NUL; p += utfc_ptr2len((char *)p)) { - if (utf_fold(utf_ptr2char((char *)p)) == folded_c) { - return (char_u *)p; + for (const char *p = s; *p != NUL; p += utfc_ptr2len(p)) { + if (utf_fold(utf_ptr2char(p)) == folded_c) { + return (char *)p; } } return NULL; @@ -1482,11 +1464,11 @@ static inline char_u *cstrchr(const char_u *const s, const int c) } else if (ASCII_ISLOWER(c)) { cc = TOUPPER_ASC(c); } else { - return (char_u *)vim_strchr((char *)s, c); + return vim_strchr(s, c); } char tofind[] = { (char)c, (char)cc, NUL }; - return (char_u *)strpbrk((const char *)s, tofind); + return strpbrk(s, tofind); } //////////////////////////////////////////////////////////////// @@ -1541,7 +1523,7 @@ char *regtilde(char *source, int magic, bool preview) int len; int prevlen; - for (p = newsub; *p; ++p) { + for (p = newsub; *p; p++) { if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) { if (reg_prev_sub != NULL) { // length = len(newsub) - 1 + len(prev_sub) + 1 @@ -1658,8 +1640,7 @@ static void clear_submatch_list(staticList10_T *sl) /// references invalid! /// /// Returns the size of the replacement, including terminating NUL. -int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, int destlen, - int flags) +int vim_regsub(regmatch_T *rmp, char *source, typval_T *expr, char *dest, int destlen, int flags) { regexec_T rex_save; bool rex_in_use_save = rex_in_use; @@ -1685,7 +1666,7 @@ int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, in return result; } -int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, int destlen, +int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char *source, char *dest, int destlen, int flags) { regexec_T rex_save; @@ -1726,11 +1707,11 @@ void free_resub_eval_result(void) } #endif -static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags) +static int vim_regsub_both(char *source, typval_T *expr, char *dest, int destlen, int flags) { - char_u *src; - char_u *dst; - char_u *s; + char *src; + char *dst; + char *s; int c; int cc; int no = -1; @@ -1806,14 +1787,14 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des funcexe.fe_argv_func = fill_submatch_list; funcexe.fe_evaluate = true; if (expr->v_type == VAR_FUNC) { - s = (char_u *)expr->vval.v_string; - call_func((char *)s, -1, &rettv, 1, argv, &funcexe); + s = expr->vval.v_string; + call_func(s, -1, &rettv, 1, argv, &funcexe); } else if (expr->v_type == VAR_PARTIAL) { partial_T *partial = expr->vval.v_partial; - s = (char_u *)partial_name(partial); + s = partial_name(partial); funcexe.fe_partial = partial; - call_func((char *)s, -1, &rettv, 1, argv, &funcexe); + call_func(s, -1, &rettv, 1, argv, &funcexe); } if (tv_list_len(&matchList.sl_list) > 0) { // fill_submatch_list() was called. @@ -1831,14 +1812,14 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des } tv_clear(&rettv); } else { - eval_result[nested] = eval_to_string((char *)source + 2, NULL, true); + eval_result[nested] = eval_to_string(source + 2, NULL, true); } nesting--; if (eval_result[nested] != NULL) { int had_backslash = false; - for (s = (char_u *)eval_result[nested]; *s != NUL; MB_PTR_ADV(s)) { + for (s = eval_result[nested]; *s != NUL; MB_PTR_ADV(s)) { // Change NL to CR, so that it becomes a line break, // unless called from vim_regexec_nl(). // Skip over a backslashed character. @@ -1846,12 +1827,11 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des *s = CAR; } else if (*s == '\\' && s[1] != NUL) { s++; - /* Change NL to CR here too, so that this works: - * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: - * abc\ - * def - * Not when called from vim_regexec_nl(). - */ + // Change NL to CR here too, so that this works: + // :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: + // abc{backslash} + // def + // Not when called from vim_regexec_nl(). if (*s == NL && !rsm.sm_line_lbr) { *s = CAR; } @@ -1860,9 +1840,9 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des } if (had_backslash && (flags & REGSUB_BACKSLASH)) { // Backslashes will be consumed, need to double them. - s = vim_strsave_escaped((char_u *)eval_result[nested], (char_u *)"\\"); + s = vim_strsave_escaped(eval_result[nested], "\\"); xfree(eval_result[nested]); - eval_result[nested] = (char *)s; + eval_result[nested] = s; } dst += strlen(eval_result[nested]); @@ -1874,7 +1854,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des } } } else { - while ((c = *src++) != NUL) { + while ((c = (uint8_t)(*src++)) != NUL) { if (c == '&' && (flags & REGSUB_MAGIC)) { no = 0; } else if (c == '\\' && *src != NUL) { @@ -1883,7 +1863,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des no = 0; } else if ('0' <= *src && *src <= '9') { no = *src++ - '0'; - } else if (vim_strchr("uUlLeE", *src)) { + } else if (vim_strchr("uUlLeE", (uint8_t)(*src))) { switch (*src++) { case 'u': func_one = (fptr_T)do_upper; @@ -1912,7 +1892,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des iemsg("vim_regsub_both(): not enough space"); return 0; } - *dst++ = (char_u)c; + *dst++ = (char)c; *dst++ = *src++; *dst++ = *src++; } else { @@ -1949,10 +1929,10 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des } dst++; } - c = *src++; + c = (uint8_t)(*src++); } } else { - c = utf_ptr2char((char *)src - 1); + c = utf_ptr2char(src - 1); } // Write to buffer, if copy is set. if (func_one != NULL) { @@ -1964,7 +1944,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des cc = c; } - int totlen = utfc_ptr2len((char *)src - 1); + int totlen = utfc_ptr2len(src - 1); int charlen = utf_char2len(cc); if (copy) { @@ -1972,10 +1952,10 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des iemsg("vim_regsub_both(): not enough space"); return 0; } - utf_char2bytes(cc, (char *)dst); + utf_char2bytes(cc, dst); } dst += charlen - 1; - int clen = utf_ptr2len((char *)src - 1); + int clen = utf_ptr2len(src - 1); // If the character length is shorter than "totlen", there // are composing characters; copy them as-is. @@ -2002,15 +1982,15 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des len = rex.reg_mmatch->endpos[no].col - rex.reg_mmatch->startpos[no].col; } else { - len = (int)STRLEN(s); + len = (int)strlen(s); } } } else { - s = (char_u *)rex.reg_match->startp[no]; + s = rex.reg_match->startp[no]; if (rex.reg_match->endp[no] == NULL) { s = NULL; } else { - len = (int)(rex.reg_match->endp[no] - (char *)s); + len = (int)(rex.reg_match->endp[no] - s); } } if (s != NULL) { @@ -2032,7 +2012,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des if (rex.reg_mmatch->endpos[no].lnum == clnum) { len = rex.reg_mmatch->endpos[no].col; } else { - len = (int)STRLEN(s); + len = (int)strlen(s); } } else { break; @@ -2058,7 +2038,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des } dst += 2; } else { - c = utf_ptr2char((char *)s); + c = utf_ptr2char(s); if (func_one != (fptr_T)NULL) { // Turbo C complains without the typecast @@ -2076,7 +2056,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des // Copy composing characters separately, one // at a time. - l = utf_ptr2len((char *)s) - 1; + l = utf_ptr2len(s) - 1; s += l; len -= l; @@ -2086,7 +2066,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des iemsg("vim_regsub_both(): not enough space"); return 0; } - utf_char2bytes(cc, (char *)dst); + utf_char2bytes(cc, dst); } dst += charlen - 1; } @@ -2122,7 +2102,7 @@ static char *reg_getline_submatch(linenr_T lnum) rex.reg_firstlnum = rsm.sm_firstlnum; rex.reg_maxline = rsm.sm_maxline; - s = (char *)reg_getline(lnum); + s = reg_getline(lnum); rex.reg_firstlnum = save_first; rex.reg_maxline = save_max; @@ -2147,10 +2127,8 @@ char *reg_submatch(int no) if (rsm.sm_match == NULL) { ssize_t len; - /* - * First round: compute the length and allocate memory. - * Second round: copy the text. - */ + // First round: compute the length and allocate memory. + // Second round: copy the text. for (round = 1; round <= 2; round++) { lnum = rsm.sm_mmatch->startpos[no].lnum; if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0) { @@ -2166,7 +2144,7 @@ char *reg_submatch(int no) // Within one line: take form start to end col. len = rsm.sm_mmatch->endpos[no].col - rsm.sm_mmatch->startpos[no].col; if (round == 2) { - STRLCPY(retval, s, len + 1); + xstrlcpy(retval, s, (size_t)len + 1); } len++; } else { @@ -2191,8 +2169,9 @@ char *reg_submatch(int no) len++; } if (round == 2) { - STRNCPY(retval + len, reg_getline_submatch(lnum), - rsm.sm_mmatch->endpos[no].col); + strncpy(retval + len, // NOLINT(runtime/printf) + reg_getline_submatch(lnum), + (size_t)rsm.sm_mmatch->endpos[no].col); } len += rsm.sm_mmatch->endpos[no].col; if (round == 2) { @@ -2270,22 +2249,39 @@ list_T *reg_submatch_list(int no) return list; } +/// Initialize the values used for matching against multiple lines +/// +/// @param win window in which to search or NULL +/// @param buf buffer in which to search +/// @param lnum nr of line to start looking for match +static void init_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum) +{ + rex.reg_match = NULL; + rex.reg_mmatch = rmp; + rex.reg_buf = buf; + rex.reg_win = win; + rex.reg_firstlnum = lnum; + rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; + rex.reg_line_lbr = false; + rex.reg_ic = rmp->rmm_ic; + rex.reg_icombine = false; + rex.reg_maxcol = rmp->rmm_maxcol; +} + // XXX Do not allow headers generator to catch definitions from regexp_nfa.c #ifndef DO_NOT_DEFINE_EMPTY_ATTRIBUTES # include "nvim/regexp_bt.c" # include "nvim/regexp_nfa.c" #endif -static regengine_T bt_regengine = -{ +static regengine_T bt_regengine = { bt_regcomp, bt_regfree, bt_regexec_nl, bt_regexec_multi, }; -static regengine_T nfa_regengine = -{ +static regengine_T nfa_regengine = { nfa_regcomp, nfa_regfree, nfa_regexec_nl, @@ -2297,28 +2293,26 @@ static regengine_T nfa_regengine = static int regexp_engine = 0; #ifdef REGEXP_DEBUG -static char_u regname[][30] = { +static uint8_t regname[][30] = { "AUTOMATIC Regexp Engine", "BACKTRACKING Regexp Engine", "NFA Regexp Engine" }; #endif -/* - * Compile a regular expression into internal code. - * Returns the program in allocated memory. - * Use vim_regfree() to free the memory. - * Returns NULL for an error. - */ +// Compile a regular expression into internal code. +// Returns the program in allocated memory. +// Use vim_regfree() to free the memory. +// Returns NULL for an error. regprog_T *vim_regcomp(char *expr_arg, int re_flags) { regprog_T *prog = NULL; - char_u *expr = (char_u *)expr_arg; + char *expr = expr_arg; regexp_engine = (int)p_re; // Check for prefix "\%#=", that sets the regexp engine - if (STRNCMP(expr, "\\%#=", 4) == 0) { + if (strncmp(expr, "\\%#=", 4) == 0) { int newengine = expr[4] - '0'; if (newengine == AUTOMATIC_ENGINE @@ -2348,10 +2342,10 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags) // const int called_emsg_before = called_emsg; if (regexp_engine != BACKTRACKING_ENGINE) { - prog = nfa_regengine.regcomp(expr, + prog = nfa_regengine.regcomp((uint8_t *)expr, re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0)); } else { - prog = bt_regengine.regcomp(expr, re_flags); + prog = bt_regengine.regcomp((uint8_t *)expr, re_flags); } // Check for error compiling regexp with initial engine. @@ -2376,7 +2370,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags) if (regexp_engine == AUTOMATIC_ENGINE && called_emsg == called_emsg_before) { regexp_engine = BACKTRACKING_ENGINE; report_re_switch(expr); - prog = bt_regengine.regcomp(expr, re_flags); + prog = bt_regengine.regcomp((uint8_t *)expr, re_flags); } } @@ -2390,9 +2384,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags) return prog; } -/* - * Free a compiled regexp program, returned by vim_regcomp(). - */ +// Free a compiled regexp program, returned by vim_regcomp(). void vim_regfree(regprog_T *prog) { if (prog != NULL) { @@ -2411,12 +2403,12 @@ void free_regexp_stuff(void) #endif -static void report_re_switch(char_u *pat) +static void report_re_switch(char *pat) { if (p_verbose > 0) { verbose_enter(); msg_puts(_("Switching to backtracking RE engine for pattern: ")); - msg_puts((char *)pat); + msg_puts(pat); verbose_leave(); } } @@ -2433,7 +2425,7 @@ static void report_re_switch(char_u *pat) /// @param nl /// /// @return true if there is a match, false if not. -static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool nl) +static bool vim_regexec_string(regmatch_T *rmp, char *line, colnr_T col, bool nl) { regexec_T rex_save; bool rex_in_use_save = rex_in_use; @@ -2456,7 +2448,7 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool rex.reg_startpos = NULL; rex.reg_endpos = NULL; - int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); + int result = rmp->regprog->engine->regexec_nl(rmp, (uint8_t *)line, col, nl); rmp->regprog->re_in_use = false; // NFA engine aborted because it's very slow, use backtracking engine instead. @@ -2468,11 +2460,11 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool p_re = BACKTRACKING_ENGINE; vim_regfree(rmp->regprog); - report_re_switch((char_u *)pat); + report_re_switch(pat); rmp->regprog = vim_regcomp(pat, re_flags); if (rmp->regprog != NULL) { rmp->regprog->re_in_use = true; - result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); + result = rmp->regprog->engine->regexec_nl(rmp, (uint8_t *)line, col, nl); rmp->regprog->re_in_use = false; } @@ -2490,7 +2482,7 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool // Note: "*prog" may be freed and changed. // Return true if there is a match, false if not. -bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T col) +bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char *line, colnr_T col) { regmatch_T regmatch = { .regprog = *prog, .rm_ic = ignore_case }; bool r = vim_regexec_string(®match, line, col, false); @@ -2502,13 +2494,13 @@ bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T // Return true if there is a match, false if not. bool vim_regexec(regmatch_T *rmp, char *line, colnr_T col) { - return vim_regexec_string(rmp, (char_u *)line, col, false); + return vim_regexec_string(rmp, line, col, false); } // Like vim_regexec(), but consider a "\n" in "line" to be a line break. // Note: "rmp->regprog" may be freed and changed. // Return true if there is a match, false if not. -bool vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) +bool vim_regexec_nl(regmatch_T *rmp, char *line, colnr_T col) { return vim_regexec_string(rmp, line, col, true); } @@ -2560,7 +2552,7 @@ long vim_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, p_re = BACKTRACKING_ENGINE; regprog_T *prev_prog = rmp->regprog; - report_re_switch((char_u *)pat); + report_re_switch(pat); // checking for \z misuse was already done when compiling for NFA, // allow all here reg_do_extmatch = REX_ALL; |