aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/nvim/regexp.c')
-rw-r--r--src/nvim/regexp.c757
1 files changed, 372 insertions, 385 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index b7ec4bf94e..122f3e2020 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -1,9 +1,7 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
-/*
- * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
- */
+// Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
// By default: do not create debugging logs or files related to regular
// expressions, even when compiling with -DDEBUG.
@@ -15,21 +13,34 @@
#include <inttypes.h>
#include <stdbool.h>
#include <string.h>
+#include <sys/types.h>
#include "nvim/ascii.h"
+#include "nvim/buffer_defs.h"
#include "nvim/charset.h"
#include "nvim/eval.h"
+#include "nvim/eval/typval.h"
+#include "nvim/eval/typval_defs.h"
#include "nvim/eval/userfunc.h"
#include "nvim/garray.h"
+#include "nvim/gettext.h"
+#include "nvim/globals.h"
+#include "nvim/keycodes.h"
+#include "nvim/macros.h"
#include "nvim/mark.h"
+#include "nvim/mbyte.h"
#include "nvim/memline.h"
#include "nvim/memory.h"
#include "nvim/message.h"
+#include "nvim/option_defs.h"
#include "nvim/os/input.h"
#include "nvim/plines.h"
-#include "nvim/profile.h"
+#include "nvim/pos.h"
#include "nvim/regexp.h"
+#include "nvim/regexp_defs.h"
#include "nvim/strings.h"
+#include "nvim/types.h"
+#include "nvim/undo_defs.h"
#include "nvim/vim.h"
#ifdef REGEXP_DEBUG
@@ -41,21 +52,17 @@
# define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log"
#endif
-/*
- * Magic characters have a special meaning, they don't match literally.
- * Magic characters are negative. This separates them from literal characters
- * (possibly multi-byte). Only ASCII characters can be Magic.
- */
+// Magic characters have a special meaning, they don't match literally.
+// Magic characters are negative. This separates them from literal characters
+// (possibly multi-byte). Only ASCII characters can be Magic.
#define Magic(x) ((int)(x) - 256)
#define un_Magic(x) ((x) + 256)
#define is_Magic(x) ((x) < 0)
-/*
- * We should define ftpr as a pointer to a function returning a pointer to
- * a function returning a pointer to a function ...
- * This is impossible, so we declare a pointer to a function returning a
- * pointer to a function returning void. This should work for all compilers.
- */
+// We should define ftpr as a pointer to a function returning a pointer to
+// a function returning a pointer to a function ...
+// This is impossible, so we declare a pointer to a function returning a
+// pointer to a function returning void. This should work for all compilers.
typedef void (*(*fptr_T)(int *, int))(void);
static int no_Magic(int x)
@@ -80,7 +87,7 @@ static int toggle_Magic(int x)
#define REGMAGIC 0234
// Utility definitions.
-#define UCHARAT(p) ((int)(*(char_u *)(p)))
+#define UCHARAT(p) ((int)(*(uint8_t *)(p)))
// Used for an error (down from) vim_regcomp(): give the error message, set
// rc_did_emsg and return NULL
@@ -97,20 +104,24 @@ static int toggle_Magic(int x)
#define MAX_LIMIT (32767L << 16L)
-static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
-static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
-static char_u e_large_class[] = N_("E945: Range too large in character class");
-static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
-static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
-static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
-static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
-static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
-static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
-static char_u e_empty_sb[] = N_("E70: Empty %s%%[]");
-static char_u e_recursive[] = N_("E956: Cannot use pattern recursively");
-static char_u e_regexp_number_after_dot_pos_search[]
+static char e_missingbracket[] = N_("E769: Missing ] after %s[");
+static char e_reverse_range[] = N_("E944: Reverse range in character class");
+static char e_large_class[] = N_("E945: Range too large in character class");
+static char e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
+static char e_unmatchedp[] = N_("E54: Unmatched %s(");
+static char e_unmatchedpar[] = N_("E55: Unmatched %s)");
+static char e_z_not_allowed[] = N_("E66: \\z( not allowed here");
+static char e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here");
+static char e_missing_sb[] = N_("E69: Missing ] after %s%%[");
+static char e_empty_sb[] = N_("E70: Empty %s%%[]");
+static char e_recursive[] = N_("E956: Cannot use pattern recursively");
+static char e_regexp_number_after_dot_pos_search_chr[]
= N_("E1204: No Number allowed after .: '\\%%%c'");
-static char_u e_substitute_nesting_too_deep[] = N_("E1290: substitute nesting too deep");
+static char e_nfa_regexp_missing_value_in_chr[]
+ = N_("E1273: (NFA regexp) missing value in '\\%%%c'");
+static char e_atom_engine_must_be_at_start_of_pattern[]
+ = N_("E1281: Atom '\\%%#=%c' must be at the start of the pattern");
+static char e_substitute_nesting_too_deep[] = N_("E1290: substitute nesting too deep");
#define NOT_MULTI 0
#define MULTI_ONE 1
@@ -137,30 +148,26 @@ static int re_multi_type(int c)
return NOT_MULTI;
}
-static char_u *reg_prev_sub = NULL;
-
-/*
- * REGEXP_INRANGE contains all characters which are always special in a []
- * range after '\'.
- * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
- * These are:
- * \n - New line (NL).
- * \r - Carriage Return (CR).
- * \t - Tab (TAB).
- * \e - Escape (ESC).
- * \b - Backspace (Ctrl_H).
- * \d - Character code in decimal, eg \d123
- * \o - Character code in octal, eg \o80
- * \x - Character code in hex, eg \x4a
- * \u - Multibyte character code, eg \u20ac
- * \U - Long multibyte character code, eg \U12345678
- */
+static char *reg_prev_sub = NULL;
+
+// REGEXP_INRANGE contains all characters which are always special in a []
+// range after '\'.
+// REGEXP_ABBR contains all characters which act as abbreviations after '\'.
+// These are:
+// \n - New line (NL).
+// \r - Carriage Return (CR).
+// \t - Tab (TAB).
+// \e - Escape (ESC).
+// \b - Backspace (Ctrl_H).
+// \d - Character code in decimal, eg \d123
+// \o - Character code in octal, eg \o80
+// \x - Character code in hex, eg \x4a
+// \u - Multibyte character code, eg \u20ac
+// \U - Long multibyte character code, eg \U12345678
static char REGEXP_INRANGE[] = "]^-n\\";
static char REGEXP_ABBR[] = "nrtebdoxuU";
-/*
- * Translate '\x' to its control character, except "\n", which is Magic.
- */
+// Translate '\x' to its control character, except "\n", which is Magic.
static int backslash_trans(int c)
{
switch (c) {
@@ -181,8 +188,7 @@ static int backslash_trans(int c)
/// recognized. Otherwise "pp" is advanced to after the item.
static int get_char_class(char **pp)
{
- static const char *(class_names[]) =
- {
+ static const char *(class_names[]) = {
"alnum:]",
#define CLASS_ALNUM 0
"alpha:]",
@@ -227,8 +233,8 @@ static int get_char_class(char **pp)
if ((*pp)[1] == ':') {
for (i = 0; i < (int)ARRAY_SIZE(class_names); i++) {
- if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0) {
- *pp += STRLEN(class_names[i]) + 2;
+ if (strncmp(*pp + 2, class_names[i], strlen(class_names[i])) == 0) {
+ *pp += strlen(class_names[i]) + 2;
return i;
}
}
@@ -236,11 +242,9 @@ static int get_char_class(char **pp)
return CLASS_NONE;
}
-/*
- * Specific version of character class functions.
- * Using a table to keep this fast.
- */
-static short class_tab[256];
+// Specific version of character class functions.
+// Using a table to keep this fast.
+static int16_t class_tab[256];
#define RI_DIGIT 0x01
#define RI_HEX 0x02
@@ -312,24 +316,18 @@ static int re_has_z; ///< \z item detected
static unsigned regflags; ///< RF_ flags for prog
static int had_eol; ///< true when EOL found by vim_regcomp()
-static int reg_magic; // magicness of the pattern:
-#define MAGIC_NONE 1 // "\V" very unmagic
-#define MAGIC_OFF 2 // "\M" or 'magic' off
-#define MAGIC_ON 3 // "\m" or 'magic'
-#define MAGIC_ALL 4 // "\v" very magic
+static magic_T reg_magic; ///< magicness of the pattern
static int reg_string; // matching with a string instead of a buffer
// line
static int reg_strict; // "[abc" is illegal
-/*
- * META contains all characters that may be magic, except '^' and '$'.
- */
+// META contains all characters that may be magic, except '^' and '$'.
// uncrustify:off
// META[] is used often enough to justify turning it into a table.
-static char_u META_flags[] = {
+static uint8_t META_flags[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// % & ( ) * + .
@@ -363,7 +361,7 @@ static int nextchr; // used for ungetchr()
#define REG_NPAREN 3 // \%(\)
typedef struct {
- char_u *regparse;
+ char *regparse;
int prevchr_len;
int curchr;
int prevchr;
@@ -388,21 +386,19 @@ int re_multiline(const regprog_T *prog)
return prog->regflags & RF_HASNL;
}
-/*
- * Check for an equivalence class name "[=a=]". "pp" points to the '['.
- * Returns a character representing the class. Zero means that no item was
- * recognized. Otherwise "pp" is advanced to after the item.
- */
+// Check for an equivalence class name "[=a=]". "pp" points to the '['.
+// Returns a character representing the class. Zero means that no item was
+// recognized. Otherwise "pp" is advanced to after the item.
static int get_equi_class(char **pp)
{
int c;
int l = 1;
- char_u *p = (char_u *)(*pp);
+ char *p = *pp;
if (p[1] == '=' && p[2] != NUL) {
- l = utfc_ptr2len((char *)p + 2);
+ l = utfc_ptr2len(p + 2);
if (p[l + 2] == '=' && p[l + 3] == ']') {
- c = utf_ptr2char((char *)p + 2);
+ c = utf_ptr2char(p + 2);
*pp += l + 4;
return c;
}
@@ -410,22 +406,20 @@ static int get_equi_class(char **pp)
return 0;
}
-/*
- * Check for a collating element "[.a.]". "pp" points to the '['.
- * Returns a character. Zero means that no item was recognized. Otherwise
- * "pp" is advanced to after the item.
- * Currently only single characters are recognized!
- */
+// Check for a collating element "[.a.]". "pp" points to the '['.
+// Returns a character. Zero means that no item was recognized. Otherwise
+// "pp" is advanced to after the item.
+// Currently only single characters are recognized!
static int get_coll_element(char **pp)
{
int c;
int l = 1;
- char_u *p = (char_u *)(*pp);
+ char *p = *pp;
if (p[0] != NUL && p[1] == '.' && p[2] != NUL) {
- l = utfc_ptr2len((char *)p + 2);
+ l = utfc_ptr2len(p + 2);
if (p[l + 2] == '.' && p[l + 3] == ']') {
- c = utf_ptr2char((char *)p + 2);
+ c = utf_ptr2char(p + 2);
*pp += l + 4;
return c;
}
@@ -443,7 +437,7 @@ static void get_cpo_flags(void)
/// Skip over a "[]" range.
/// "p" must point to the character after the '['.
/// The returned pointer is on the matching ']', or the terminating NUL.
-static char_u *skip_anyof(char *p)
+static char *skip_anyof(char *p)
{
int l;
@@ -462,9 +456,9 @@ static char_u *skip_anyof(char *p)
MB_PTR_ADV(p);
}
} else if (*p == '\\'
- && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
+ && (vim_strchr(REGEXP_INRANGE, (uint8_t)p[1]) != NULL
|| (!reg_cpo_lit
- && vim_strchr(REGEXP_ABBR, p[1]) != NULL))) {
+ && vim_strchr(REGEXP_ABBR, (uint8_t)p[1]) != NULL))) {
p += 2;
} else if (*p == '[') {
if (get_char_class(&p) == CLASS_NONE
@@ -478,20 +472,42 @@ static char_u *skip_anyof(char *p)
}
}
- return (char_u *)p;
+ return p;
}
/// Skip past regular expression.
-/// Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
+/// Stop at end of "startp" or where "delim" is found ('/', '?', etc).
/// Take care of characters with a backslash in front of it.
/// Skip strings inside [ and ].
+char *skip_regexp(char *startp, int delim, int magic)
+{
+ return skip_regexp_ex(startp, delim, magic, NULL, NULL, NULL);
+}
+
+/// Call skip_regexp() and when the delimiter does not match give an error and
+/// return NULL.
+char *skip_regexp_err(char *startp, int delim, int magic)
+{
+ char *p = skip_regexp(startp, delim, magic);
+
+ if (*p != delim) {
+ semsg(_("E654: missing delimiter after search pattern: %s"), startp);
+ return NULL;
+ }
+ return p;
+}
+
+/// skip_regexp() with extra arguments:
/// When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
/// expression and change "\?" to "?". If "*newp" is not NULL the expression
/// is changed in-place.
-char_u *skip_regexp(char_u *startp, int dirc, int magic, char **newp)
+/// If a "\?" is changed to "?" then "dropped" is incremented, unless NULL.
+/// If "magic_val" is not NULL, returns the effective magicness of the pattern
+char *skip_regexp_ex(char *startp, int dirc, int magic, char **newp, int *dropped,
+ magic_T *magic_val)
{
- int mymagic;
- char_u *p = startp;
+ magic_T mymagic;
+ char *p = startp;
if (magic) {
mymagic = MAGIC_ON;
@@ -506,7 +522,7 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char **newp)
}
if ((p[0] == '[' && mymagic >= MAGIC_ON)
|| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF)) {
- p = skip_anyof((char *)p + 1);
+ p = skip_anyof(p + 1);
if (p[0] == NUL) {
break;
}
@@ -514,8 +530,11 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char **newp)
if (dirc == '?' && newp != NULL && p[1] == '?') {
// change "\?" to "?", make a copy first.
if (*newp == NULL) {
- *newp = (char *)vim_strsave(startp);
- p = (char_u *)(*newp) + (p - startp);
+ *newp = xstrdup(startp);
+ p = *newp + (p - startp);
+ }
+ if (dropped != NULL) {
+ (*dropped)++;
}
STRMOVE(p, p + 1);
} else {
@@ -528,6 +547,9 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char **newp)
}
}
}
+ if (magic_val != NULL) {
+ *magic_val = mymagic;
+ }
return p;
}
@@ -536,25 +558,21 @@ static int prevchr_len; // byte length of previous char
static int at_start; // True when on the first character
static int prev_at_start; // True when on the second character
-/*
- * Start parsing at "str".
- */
-static void initchr(char_u *str)
+// Start parsing at "str".
+static void initchr(char *str)
{
- regparse = (char *)str;
+ regparse = str;
prevchr_len = 0;
curchr = prevprevchr = prevchr = nextchr = -1;
at_start = true;
prev_at_start = false;
}
-/*
- * Save the current parse state, so that it can be restored and parsing
- * starts in the same state again.
- */
+// Save the current parse state, so that it can be restored and parsing
+// starts in the same state again.
static void save_parse_state(parse_state_T *ps)
{
- ps->regparse = (char_u *)regparse;
+ ps->regparse = regparse;
ps->prevchr_len = prevchr_len;
ps->curchr = curchr;
ps->prevchr = prevchr;
@@ -565,12 +583,10 @@ static void save_parse_state(parse_state_T *ps)
ps->regnpar = regnpar;
}
-/*
- * Restore a previously saved parse state.
- */
+// Restore a previously saved parse state.
static void restore_parse_state(parse_state_T *ps)
{
- regparse = (char *)ps->regparse;
+ regparse = ps->regparse;
prevchr_len = ps->prevchr_len;
curchr = ps->curchr;
prevchr = ps->prevchr;
@@ -581,9 +597,7 @@ static void restore_parse_state(parse_state_T *ps)
regnpar = ps->regnpar;
}
-/*
- * Get the next character without advancing.
- */
+// Get the next character without advancing.
static int peekchr(void)
{
static int after_slash = false;
@@ -663,7 +677,7 @@ static int peekchr(void)
// '$' is only magic as the very last char and if it's in front of
// either "\|", "\)", "\&", or "\n"
if (reg_magic >= MAGIC_OFF) {
- char_u *p = (char_u *)regparse + 1;
+ uint8_t *p = (uint8_t *)regparse + 1;
bool is_magic_all = (reg_magic == MAGIC_ALL);
// ignore \c \C \m \M \v \V and \Z after '$'
@@ -710,9 +724,7 @@ static int peekchr(void)
after_slash--;
curchr = toggle_Magic(curchr);
} else if (vim_strchr(REGEXP_ABBR, c)) {
- /*
- * Handle abbreviations, like "\t" for TAB -- webb
- */
+ // Handle abbreviations, like "\t" for TAB -- webb
curchr = backslash_trans(c);
} else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) {
curchr = toggle_Magic(c);
@@ -731,9 +743,7 @@ static int peekchr(void)
return curchr;
}
-/*
- * Eat one lexed character. Do this in a way that we can undo it.
- */
+// Eat one lexed character. Do this in a way that we can undo it.
static void skipchr(void)
{
// peekchr() eats a backslash, do the same here
@@ -755,10 +765,8 @@ static void skipchr(void)
nextchr = -1;
}
-/*
- * Skip a character while keeping the value of prev_at_start for at_start.
- * prevchr and prevprevchr are also kept.
- */
+// Skip a character while keeping the value of prev_at_start for at_start.
+// prevchr and prevprevchr are also kept.
static void skipchr_keepstart(void)
{
int as = prev_at_start;
@@ -771,10 +779,8 @@ static void skipchr_keepstart(void)
prevprevchr = prpr;
}
-/*
- * Get the next character from the pattern. We know about magic and such, so
- * therefore we need a lexical analyzer.
- */
+// Get the next character from the pattern. We know about magic and such, so
+// therefore we need a lexical analyzer.
static int getchr(void)
{
int chr = peekchr();
@@ -783,9 +789,7 @@ static int getchr(void)
return chr;
}
-/*
- * put character back. Works only once!
- */
+// put character back. Works only once!
static void ungetchr(void)
{
nextchr = curchr;
@@ -799,15 +803,13 @@ static void ungetchr(void)
regparse -= prevchr_len;
}
-/*
- * Get and return the value of the hex string at the current position.
- * Return -1 if there is no valid hex number.
- * The position is updated:
- * blahblah\%x20asdf
- * before-^ ^-after
- * The parameter controls the maximum number of input characters. This will be
- * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
- */
+// Get and return the value of the hex string at the current position.
+// Return -1 if there is no valid hex number.
+// The position is updated:
+// blahblah\%x20asdf
+// before-^ ^-after
+// The parameter controls the maximum number of input characters. This will be
+// 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
static int64_t gethexchrs(int maxinputlen)
{
int64_t nr = 0;
@@ -830,10 +832,8 @@ static int64_t gethexchrs(int maxinputlen)
return nr;
}
-/*
- * Get and return the value of the decimal string immediately after the
- * current position. Return -1 for invalid. Consumes all digits.
- */
+// Get and return the value of the decimal string immediately after the
+// current position. Return -1 for invalid. Consumes all digits.
static int64_t getdecchrs(void)
{
int64_t nr = 0;
@@ -857,14 +857,12 @@ static int64_t getdecchrs(void)
return nr;
}
-/*
- * get and return the value of the octal string immediately after the current
- * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
- * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
- * treat 8 or 9 as recognised characters. Position is updated:
- * blahblah\%o210asdf
- * before-^ ^-after
- */
+// get and return the value of the octal string immediately after the current
+// position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
+// numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
+// treat 8 or 9 as recognised characters. Position is updated:
+// blahblah\%o210asdf
+// before-^ ^-after
static int64_t getoctchrs(void)
{
int64_t nr = 0;
@@ -887,16 +885,14 @@ static int64_t getoctchrs(void)
return nr;
}
-/*
- * read_limits - Read two integers to be taken as a minimum and maximum.
- * If the first character is '-', then the range is reversed.
- * Should end with 'end'. If minval is missing, zero is default, if maxval is
- * missing, a very big number is the default.
- */
+// read_limits - Read two integers to be taken as a minimum and maximum.
+// If the first character is '-', then the range is reversed.
+// Should end with 'end'. If minval is missing, zero is default, if maxval is
+// missing, a very big number is the default.
static int read_limits(long *minval, long *maxval)
{
int reverse = false;
- char_u *first_char;
+ char *first_char;
long tmp;
if (*regparse == '-') {
@@ -904,7 +900,7 @@ static int read_limits(long *minval, long *maxval)
regparse++;
reverse = true;
}
- first_char = (char_u *)regparse;
+ first_char = regparse;
*minval = getdigits_long(&regparse, false, 0);
if (*regparse == ',') { // There is a comma.
if (ascii_isdigit(*++regparse)) {
@@ -924,10 +920,8 @@ static int read_limits(long *minval, long *maxval)
EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL);
}
- /*
- * Reverse the range if there was a '-', or make sure it is in the right
- * order otherwise.
- */
+ // Reverse the range if there was a '-', or make sure it is in the right
+ // order otherwise.
if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) {
tmp = *minval;
*minval = *maxval;
@@ -937,18 +931,14 @@ static int read_limits(long *minval, long *maxval)
return OK;
}
-/*
- * vim_regexec and friends
- */
+// vim_regexec and friends
-/*
- * Global work variables for vim_regexec().
- */
+// Global work variables for vim_regexec().
// Sometimes need to save a copy of a line. Since alloc()/free() is very
// slow, we keep one allocated piece of memory and only re-allocate it when
// it's too small. It's freed in bt_regexec_both() when finished.
-static char_u *reg_tofree = NULL;
+static uint8_t *reg_tofree = NULL;
static unsigned reg_tofreelen;
// Structure used to store the execution state of the regex engine.
@@ -969,10 +959,12 @@ static unsigned reg_tofreelen;
typedef struct {
regmatch_T *reg_match;
regmmatch_T *reg_mmatch;
- char_u **reg_startp;
- char_u **reg_endp;
+
+ uint8_t **reg_startp;
+ uint8_t **reg_endp;
lpos_T *reg_startpos;
lpos_T *reg_endpos;
+
win_T *reg_win;
buf_T *reg_buf;
linenr_T reg_firstlnum;
@@ -981,8 +973,8 @@ typedef struct {
// The current match-position is remembered with these variables:
linenr_T lnum; ///< line number, relative to first line
- char_u *line; ///< start of current line
- char_u *input; ///< current input, points into "line"
+ uint8_t *line; ///< start of current line
+ uint8_t *input; ///< current input, points into "line"
int need_clear_subexpr; ///< subexpressions still need to be cleared
int need_clear_zsubexpr; ///< extmatch subexpressions still need to be
@@ -1026,10 +1018,8 @@ static bool reg_iswordc(int c)
return vim_iswordc_buf(c, rex.reg_buf);
}
-/*
- * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
- */
-static char_u *reg_getline(linenr_T lnum)
+// Get pointer to the line "lnum", which is relative to "reg_firstlnum".
+static char *reg_getline(linenr_T lnum)
{
// when looking behind for a match/no-match lnum is negative. But we
// can't go before line 1
@@ -1038,22 +1028,20 @@ static char_u *reg_getline(linenr_T lnum)
}
if (lnum > rex.reg_maxline) {
// Must have matched the "\n" in the last line.
- return (char_u *)"";
+ return "";
}
return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, false);
}
-static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
-static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
+static uint8_t *reg_startzp[NSUBEXP]; // Workspace to mark beginning
+static uint8_t *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
// true if using multi-line regexp.
#define REG_MULTI (rex.reg_match == NULL)
-/*
- * Create a new extmatch and mark it as referenced once.
- */
+// Create a new extmatch and mark it as referenced once.
static reg_extmatch_T *make_extmatch(void)
FUNC_ATTR_NONNULL_RET
{
@@ -1062,9 +1050,7 @@ static reg_extmatch_T *make_extmatch(void)
return em;
}
-/*
- * Add a reference to an extmatch.
- */
+// Add a reference to an extmatch.
reg_extmatch_T *ref_extmatch(reg_extmatch_T *em)
{
if (em != NULL) {
@@ -1073,10 +1059,8 @@ reg_extmatch_T *ref_extmatch(reg_extmatch_T *em)
return em;
}
-/*
- * Remove a reference to an extmatch. If there are no references left, free
- * the info.
- */
+// Remove a reference to an extmatch. If there are no references left, free
+// the info.
void unref_extmatch(reg_extmatch_T *em)
{
int i;
@@ -1093,7 +1077,8 @@ void unref_extmatch(reg_extmatch_T *em)
static int reg_prev_class(void)
{
if (rex.input > rex.line) {
- return mb_get_class_tab(rex.input - 1 - utf_head_off(rex.line, rex.input - 1),
+ return mb_get_class_tab((char *)rex.input - 1 -
+ utf_head_off((char *)rex.line, (char *)rex.input - 1),
rex.reg_buf->b_chartab);
}
return -1;
@@ -1111,8 +1096,8 @@ static bool reg_match_visual(void)
colnr_T start2, end2;
colnr_T curswant;
- // Check if the buffer is the current buffer.
- if (rex.reg_buf != curbuf || VIsual.lnum == 0) {
+ // Check if the buffer is the current buffer and not using a string.
+ if (rex.reg_buf != curbuf || VIsual.lnum == 0 || !REG_MULTI) {
return false;
}
@@ -1162,10 +1147,10 @@ static bool reg_match_visual(void)
}
// getvvcol() flushes rex.line, need to get it again
- rex.line = reg_getline(rex.lnum);
+ rex.line = (uint8_t *)reg_getline(rex.lnum);
rex.input = rex.line + col;
- unsigned int cols_u = win_linetabsize(wp, rex.line, col);
+ unsigned int cols_u = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, (char *)rex.line, col);
assert(cols_u <= MAXCOL);
colnr_T cols = (colnr_T)cols_u;
if (cols < start || cols > end - (*p_sel == 'e')) {
@@ -1175,10 +1160,8 @@ static bool reg_match_visual(void)
return true;
}
-/*
- * Check the regexp program for its magic number.
- * Return true if it's wrong.
- */
+// Check the regexp program for its magic number.
+// Return true if it's wrong.
static int prog_magic_wrong(void)
{
regprog_T *prog;
@@ -1196,62 +1179,62 @@ static int prog_magic_wrong(void)
return false;
}
-/*
- * Cleanup the subexpressions, if this wasn't done yet.
- * This construction is used to clear the subexpressions only when they are
- * used (to increase speed).
- */
+// Cleanup the subexpressions, if this wasn't done yet.
+// This construction is used to clear the subexpressions only when they are
+// used (to increase speed).
static void cleanup_subexpr(void)
{
- if (rex.need_clear_subexpr) {
- if (REG_MULTI) {
- // Use 0xff to set lnum to -1
- memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
- memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
- } else {
- memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
- memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
- }
- rex.need_clear_subexpr = false;
+ if (!rex.need_clear_subexpr) {
+ return;
+ }
+
+ if (REG_MULTI) {
+ // Use 0xff to set lnum to -1
+ memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
+ memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
+ } else {
+ memset(rex.reg_startp, 0, sizeof(char *) * NSUBEXP);
+ memset(rex.reg_endp, 0, sizeof(char *) * NSUBEXP);
}
+ rex.need_clear_subexpr = false;
}
static void cleanup_zsubexpr(void)
{
- if (rex.need_clear_zsubexpr) {
- if (REG_MULTI) {
- // Use 0xff to set lnum to -1
- memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
- memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
- } else {
- memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
- memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
- }
- rex.need_clear_zsubexpr = false;
+ if (!rex.need_clear_zsubexpr) {
+ return;
+ }
+
+ if (REG_MULTI) {
+ // Use 0xff to set lnum to -1
+ memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
+ memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
+ } else {
+ memset(reg_startzp, 0, sizeof(char *) * NSUBEXP);
+ memset(reg_endzp, 0, sizeof(char *) * NSUBEXP);
}
+ rex.need_clear_zsubexpr = false;
}
// Advance rex.lnum, rex.line and rex.input to the next line.
static void reg_nextline(void)
{
- rex.line = reg_getline(++rex.lnum);
+ rex.line = (uint8_t *)reg_getline(++rex.lnum);
rex.input = rex.line;
fast_breakcheck();
}
-/*
- * Check whether a backreference matches.
- * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
- * If "bytelen" is not NULL, it is set to the byte length of the match in the
- * last line.
- */
+// Check whether a backreference matches.
+// Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
+// If "bytelen" is not NULL, it is set to the byte length of the match in the
+// last line.
static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum,
colnr_T end_col, int *bytelen)
{
linenr_T clnum = start_lnum;
colnr_T ccol = start_col;
int len;
- char_u *p;
+ char *p;
if (bytelen != NULL) {
*bytelen = 0;
@@ -1260,7 +1243,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e
// Since getting one line may invalidate the other, need to make copy.
// Slow!
if (rex.line != reg_tofree) {
- len = (int)STRLEN(rex.line);
+ len = (int)strlen((char *)rex.line);
if (reg_tofree == NULL || len >= (int)reg_tofreelen) {
len += 50; // get some extra
xfree(reg_tofree);
@@ -1279,10 +1262,10 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e
if (clnum == end_lnum) {
len = end_col - ccol;
} else {
- len = (int)STRLEN(p + ccol);
+ len = (int)strlen(p + ccol);
}
- if (cstrncmp(p + ccol, rex.input, &len) != 0) {
+ if (cstrncmp(p + ccol, (char *)rex.input, &len) != 0) {
return RA_NOMATCH; // doesn't match
}
if (bytelen != NULL) {
@@ -1328,8 +1311,7 @@ typedef struct {
} decomp_T;
// 0xfb20 - 0xfb4f
-static decomp_T decomp_table[0xfb4f - 0xfb20 + 1] =
-{
+static decomp_T decomp_table[0xfb4f - 0xfb20 + 1] = {
{ 0x5e2, 0, 0 }, // 0xfb20 alt ayin
{ 0x5d0, 0, 0 }, // 0xfb21 alt alef
{ 0x5d3, 0, 0 }, // 0xfb22 alt dalet
@@ -1395,23 +1377,23 @@ static void mb_decompose(int c, int *c1, int *c2, int *c3)
}
}
-// Compare two strings, ignore case if rex.reg_ic set.
-// Return 0 if strings match, non-zero otherwise.
-// Correct the length "*n" when composing characters are ignored.
-static int cstrncmp(char_u *s1, char_u *s2, int *n)
+/// Compare two strings, ignore case if rex.reg_ic set.
+/// Return 0 if strings match, non-zero otherwise.
+/// Correct the length "*n" when composing characters are ignored.
+static int cstrncmp(char *s1, char *s2, int *n)
{
int result;
if (!rex.reg_ic) {
- result = STRNCMP(s1, s2, *n);
+ result = strncmp(s1, s2, (size_t)(*n));
} else {
assert(*n >= 0);
- result = mb_strnicmp(s1, s2, (size_t)*n);
+ result = mb_strnicmp(s1, s2, (size_t)(*n));
}
// if it failed and it's utf8 and we want to combineignore:
if (result != 0 && rex.reg_icombine) {
- char_u *str1, *str2;
+ char *str1, *str2;
int c1, c2, c11, c12;
int junk;
@@ -1421,12 +1403,12 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n)
str2 = s2;
c1 = c2 = 0;
while ((int)(str1 - s1) < *n) {
- c1 = mb_ptr2char_adv((const char_u **)&str1);
- c2 = mb_ptr2char_adv((const char_u **)&str2);
+ c1 = mb_ptr2char_adv((const char **)&str1);
+ c2 = mb_ptr2char_adv((const char **)&str2);
- /* decompose the character if necessary, into 'base' characters
- * because I don't care about Arabic, I will hard-code the Hebrew
- * which I *do* care about! So sue me... */
+ // decompose the character if necessary, into 'base' characters
+ // because I don't care about Arabic, I will hard-code the Hebrew
+ // which I *do* care about! So sue me...
if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) {
// decomposition necessary?
mb_decompose(c1, &c11, &junk, &junk);
@@ -1456,21 +1438,21 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n)
/// @param c character to find in @a s
///
/// @return NULL if no match, otherwise pointer to the position in @a s
-static inline char_u *cstrchr(const char_u *const s, const int c)
+static inline char *cstrchr(const char *const s, const int c)
FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
FUNC_ATTR_ALWAYS_INLINE
{
if (!rex.reg_ic) {
- return (char_u *)vim_strchr((char *)s, c);
+ return vim_strchr(s, c);
}
// Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is
// expected to be highly optimized.
if (c > 0x80) {
const int folded_c = utf_fold(c);
- for (const char_u *p = s; *p != NUL; p += utfc_ptr2len((char *)p)) {
- if (utf_fold(utf_ptr2char((char *)p)) == folded_c) {
- return (char_u *)p;
+ for (const char *p = s; *p != NUL; p += utfc_ptr2len(p)) {
+ if (utf_fold(utf_ptr2char(p)) == folded_c) {
+ return (char *)p;
}
}
return NULL;
@@ -1482,11 +1464,11 @@ static inline char_u *cstrchr(const char_u *const s, const int c)
} else if (ASCII_ISLOWER(c)) {
cc = TOUPPER_ASC(c);
} else {
- return (char_u *)vim_strchr((char *)s, c);
+ return vim_strchr(s, c);
}
char tofind[] = { (char)c, (char)cc, NUL };
- return (char_u *)strpbrk((const char *)s, tofind);
+ return strpbrk(s, tofind);
}
////////////////////////////////////////////////////////////////
@@ -1523,32 +1505,30 @@ static fptr_T do_Lower(int *d, int c)
return (fptr_T)do_Lower;
}
-/*
- * regtilde(): Replace tildes in the pattern by the old pattern.
- *
- * Short explanation of the tilde: It stands for the previous replacement
- * pattern. If that previous pattern also contains a ~ we should go back a
- * step further... But we insert the previous pattern into the current one
- * and remember that.
- * This still does not handle the case where "magic" changes. So require the
- * user to keep his hands off of "magic".
- *
- * The tildes are parsed once before the first call to vim_regsub().
- */
-char_u *regtilde(char_u *source, int magic, bool preview)
-{
- char_u *newsub = source;
- char_u *tmpsub;
- char_u *p;
+/// regtilde(): Replace tildes in the pattern by the old pattern.
+///
+/// Short explanation of the tilde: It stands for the previous replacement
+/// pattern. If that previous pattern also contains a ~ we should go back a
+/// step further... But we insert the previous pattern into the current one
+/// and remember that.
+/// This still does not handle the case where "magic" changes. So require the
+/// user to keep his hands off of "magic".
+///
+/// The tildes are parsed once before the first call to vim_regsub().
+char *regtilde(char *source, int magic, bool preview)
+{
+ char *newsub = source;
+ char *tmpsub;
+ char *p;
int len;
int prevlen;
- for (p = newsub; *p; ++p) {
+ for (p = newsub; *p; p++) {
if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) {
if (reg_prev_sub != NULL) {
// length = len(newsub) - 1 + len(prev_sub) + 1
- prevlen = (int)STRLEN(reg_prev_sub);
- tmpsub = xmalloc(STRLEN(newsub) + (size_t)prevlen);
+ prevlen = (int)strlen(reg_prev_sub);
+ tmpsub = xmalloc(strlen(newsub) + (size_t)prevlen);
// copy prefix
len = (int)(p - newsub); // not including ~
memmove(tmpsub, newsub, (size_t)len);
@@ -1575,7 +1555,7 @@ char_u *regtilde(char_u *source, int magic, bool preview)
if (*p == '\\' && p[1]) { // skip escaped characters
p++;
}
- p += utfc_ptr2len((char *)p) - 1;
+ p += utfc_ptr2len(p) - 1;
}
}
@@ -1584,7 +1564,7 @@ char_u *regtilde(char_u *source, int magic, bool preview)
// Store a copy of newsub in reg_prev_sub. It is always allocated,
// because recursive calls may make the returned string invalid.
xfree(reg_prev_sub);
- reg_prev_sub = vim_strsave(newsub);
+ reg_prev_sub = xstrdup(newsub);
}
return newsub;
@@ -1607,12 +1587,12 @@ static regsubmatch_T rsm; // can only be used when can_f_submatch is true
/// Put the submatches in "argv[argskip]" which is a list passed into
/// call_func() by vim_regsub_both().
-static int fill_submatch_list(int argc FUNC_ATTR_UNUSED, typval_T *argv, int argskip, int argcount)
+static int fill_submatch_list(int argc FUNC_ATTR_UNUSED, typval_T *argv, int argskip, ufunc_T *fp)
FUNC_ATTR_NONNULL_ALL
{
typval_T *listarg = argv + argskip;
- if (argcount == argskip) {
+ if (!fp->uf_varargs && fp->uf_args.ga_len <= argskip) {
// called function doesn't take a submatches argument
return argskip;
}
@@ -1623,14 +1603,14 @@ static int fill_submatch_list(int argc FUNC_ATTR_UNUSED, typval_T *argv, int arg
// There are always 10 list items in staticList10_T.
listitem_T *li = tv_list_first(listarg->vval.v_list);
for (int i = 0; i < 10; i++) {
- char_u *s = rsm.sm_match->startp[i];
+ char *s = rsm.sm_match->startp[i];
if (s == NULL || rsm.sm_match->endp[i] == NULL) {
s = NULL;
} else {
- s = vim_strnsave(s, (size_t)(rsm.sm_match->endp[i] - s));
+ s = xstrnsave(s, (size_t)(rsm.sm_match->endp[i] - s));
}
TV_LIST_ITEM_TV(li)->v_type = VAR_STRING;
- TV_LIST_ITEM_TV(li)->vval.v_string = (char *)s;
+ TV_LIST_ITEM_TV(li)->vval.v_string = s;
li = TV_LIST_ITEM_NEXT(argv->vval.v_list, li);
}
return argskip + 1;
@@ -1660,8 +1640,7 @@ static void clear_submatch_list(staticList10_T *sl)
/// references invalid!
///
/// Returns the size of the replacement, including terminating NUL.
-int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, int destlen,
- int flags)
+int vim_regsub(regmatch_T *rmp, char *source, typval_T *expr, char *dest, int destlen, int flags)
{
regexec_T rex_save;
bool rex_in_use_save = rex_in_use;
@@ -1687,7 +1666,7 @@ int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, in
return result;
}
-int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, int destlen,
+int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char *source, char *dest, int destlen,
int flags)
{
regexec_T rex_save;
@@ -1717,7 +1696,7 @@ int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *de
// When nesting more than a couple levels it's probably a mistake.
#define MAX_REGSUB_NESTING 4
-static char_u *eval_result[MAX_REGSUB_NESTING] = { NULL, NULL, NULL, NULL };
+static char *eval_result[MAX_REGSUB_NESTING] = { NULL, NULL, NULL, NULL };
#if defined(EXITFREE)
void free_resub_eval_result(void)
@@ -1728,11 +1707,11 @@ void free_resub_eval_result(void)
}
#endif
-static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags)
+static int vim_regsub_both(char *source, typval_T *expr, char *dest, int destlen, int flags)
{
- char_u *src;
- char_u *dst;
- char_u *s;
+ char *src;
+ char *dst;
+ char *s;
int c;
int cc;
int no = -1;
@@ -1769,7 +1748,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
if (copy) {
if (eval_result[nested] != NULL) {
STRCPY(dest, eval_result[nested]);
- dst += STRLEN(eval_result[nested]);
+ dst += strlen(eval_result[nested]);
XFREE_CLEAR(eval_result[nested]);
}
} else {
@@ -1805,17 +1784,17 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
argv[0].v_type = VAR_LIST;
argv[0].vval.v_list = &matchList.sl_list;
funcexe_T funcexe = FUNCEXE_INIT;
- funcexe.argv_func = fill_submatch_list;
- funcexe.evaluate = true;
+ funcexe.fe_argv_func = fill_submatch_list;
+ funcexe.fe_evaluate = true;
if (expr->v_type == VAR_FUNC) {
- s = (char_u *)expr->vval.v_string;
- call_func((char *)s, -1, &rettv, 1, argv, &funcexe);
+ s = expr->vval.v_string;
+ call_func(s, -1, &rettv, 1, argv, &funcexe);
} else if (expr->v_type == VAR_PARTIAL) {
partial_T *partial = expr->vval.v_partial;
- s = (char_u *)partial_name(partial);
- funcexe.partial = partial;
- call_func((char *)s, -1, &rettv, 1, argv, &funcexe);
+ s = partial_name(partial);
+ funcexe.fe_partial = partial;
+ call_func(s, -1, &rettv, 1, argv, &funcexe);
}
if (tv_list_len(&matchList.sl_list) > 0) {
// fill_submatch_list() was called.
@@ -1826,14 +1805,14 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
eval_result[nested] = NULL;
} else {
char buf[NUMBUFLEN];
- eval_result[nested] = (char_u *)tv_get_string_buf_chk(&rettv, buf);
+ eval_result[nested] = (char *)tv_get_string_buf_chk(&rettv, buf);
if (eval_result[nested] != NULL) {
- eval_result[nested] = vim_strsave(eval_result[nested]);
+ eval_result[nested] = xstrdup(eval_result[nested]);
}
}
tv_clear(&rettv);
} else {
- eval_result[nested] = (char_u *)eval_to_string((char *)source + 2, NULL, true);
+ eval_result[nested] = eval_to_string(source + 2, NULL, true);
}
nesting--;
@@ -1848,12 +1827,11 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
*s = CAR;
} else if (*s == '\\' && s[1] != NUL) {
s++;
- /* Change NL to CR here too, so that this works:
- * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
- * abc\
- * def
- * Not when called from vim_regexec_nl().
- */
+ // Change NL to CR here too, so that this works:
+ // :s/abc\\\ndef/\="aaa\\\nbbb"/ on text:
+ // abc{backslash}
+ // def
+ // Not when called from vim_regexec_nl().
if (*s == NL && !rsm.sm_line_lbr) {
*s = CAR;
}
@@ -1862,12 +1840,12 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
}
if (had_backslash && (flags & REGSUB_BACKSLASH)) {
// Backslashes will be consumed, need to double them.
- s = vim_strsave_escaped(eval_result[nested], (char_u *)"\\");
+ s = vim_strsave_escaped(eval_result[nested], "\\");
xfree(eval_result[nested]);
eval_result[nested] = s;
}
- dst += STRLEN(eval_result[nested]);
+ dst += strlen(eval_result[nested]);
}
can_f_submatch = prev_can_f_submatch;
@@ -1876,7 +1854,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
}
}
} else {
- while ((c = *src++) != NUL) {
+ while ((c = (uint8_t)(*src++)) != NUL) {
if (c == '&' && (flags & REGSUB_MAGIC)) {
no = 0;
} else if (c == '\\' && *src != NUL) {
@@ -1885,7 +1863,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
no = 0;
} else if ('0' <= *src && *src <= '9') {
no = *src++ - '0';
- } else if (vim_strchr("uUlLeE", *src)) {
+ } else if (vim_strchr("uUlLeE", (uint8_t)(*src))) {
switch (*src++) {
case 'u':
func_one = (fptr_T)do_upper;
@@ -1914,7 +1892,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
iemsg("vim_regsub_both(): not enough space");
return 0;
}
- *dst++ = (char_u)c;
+ *dst++ = (char)c;
*dst++ = *src++;
*dst++ = *src++;
} else {
@@ -1951,10 +1929,10 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
}
dst++;
}
- c = *src++;
+ c = (uint8_t)(*src++);
}
} else {
- c = utf_ptr2char((char *)src - 1);
+ c = utf_ptr2char(src - 1);
}
// Write to buffer, if copy is set.
if (func_one != NULL) {
@@ -1966,7 +1944,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
cc = c;
}
- int totlen = utfc_ptr2len((char *)src - 1);
+ int totlen = utfc_ptr2len(src - 1);
int charlen = utf_char2len(cc);
if (copy) {
@@ -1974,10 +1952,10 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
iemsg("vim_regsub_both(): not enough space");
return 0;
}
- utf_char2bytes(cc, (char *)dst);
+ utf_char2bytes(cc, dst);
}
dst += charlen - 1;
- int clen = utf_ptr2len((char *)src - 1);
+ int clen = utf_ptr2len(src - 1);
// If the character length is shorter than "totlen", there
// are composing characters; copy them as-is.
@@ -2004,7 +1982,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
len = rex.reg_mmatch->endpos[no].col
- rex.reg_mmatch->startpos[no].col;
} else {
- len = (int)STRLEN(s);
+ len = (int)strlen(s);
}
}
} else {
@@ -2034,7 +2012,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
if (rex.reg_mmatch->endpos[no].lnum == clnum) {
len = rex.reg_mmatch->endpos[no].col;
} else {
- len = (int)STRLEN(s);
+ len = (int)strlen(s);
}
} else {
break;
@@ -2060,7 +2038,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
}
dst += 2;
} else {
- c = utf_ptr2char((char *)s);
+ c = utf_ptr2char(s);
if (func_one != (fptr_T)NULL) {
// Turbo C complains without the typecast
@@ -2078,7 +2056,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
// Copy composing characters separately, one
// at a time.
- l = utf_ptr2len((char *)s) - 1;
+ l = utf_ptr2len(s) - 1;
s += l;
len -= l;
@@ -2088,7 +2066,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int des
iemsg("vim_regsub_both(): not enough space");
return 0;
}
- utf_char2bytes(cc, (char *)dst);
+ utf_char2bytes(cc, dst);
}
dst += charlen - 1;
}
@@ -2112,14 +2090,12 @@ exit:
return (int)((dst - dest) + 1);
}
-/*
- * Call reg_getline() with the line numbers from the submatch. If a
- * substitute() was used the reg_maxline and other values have been
- * overwritten.
- */
-static char_u *reg_getline_submatch(linenr_T lnum)
+/// Call reg_getline() with the line numbers from the submatch. If a
+/// substitute() was used the reg_maxline and other values have been
+/// overwritten.
+static char *reg_getline_submatch(linenr_T lnum)
{
- char_u *s;
+ char *s;
linenr_T save_first = rex.reg_firstlnum;
linenr_T save_max = rex.reg_maxline;
@@ -2133,15 +2109,14 @@ static char_u *reg_getline_submatch(linenr_T lnum)
return s;
}
-/*
- * Used for the submatch() function: get the string from the n'th submatch in
- * allocated memory.
- * Returns NULL when not in a ":s" command and for a non-existing submatch.
- */
-char_u *reg_submatch(int no)
+/// Used for the submatch() function: get the string from the n'th submatch in
+/// allocated memory.
+///
+/// @return NULL when not in a ":s" command and for a non-existing submatch.
+char *reg_submatch(int no)
{
- char_u *retval = NULL;
- char_u *s;
+ char *retval = NULL;
+ char *s;
int round;
linenr_T lnum;
@@ -2152,10 +2127,8 @@ char_u *reg_submatch(int no)
if (rsm.sm_match == NULL) {
ssize_t len;
- /*
- * First round: compute the length and allocate memory.
- * Second round: copy the text.
- */
+ // First round: compute the length and allocate memory.
+ // Second round: copy the text.
for (round = 1; round <= 2; round++) {
lnum = rsm.sm_mmatch->startpos[no].lnum;
if (lnum < 0 || rsm.sm_mmatch->endpos[no].lnum < 0) {
@@ -2171,13 +2144,13 @@ char_u *reg_submatch(int no)
// Within one line: take form start to end col.
len = rsm.sm_mmatch->endpos[no].col - rsm.sm_mmatch->startpos[no].col;
if (round == 2) {
- STRLCPY(retval, s, len + 1);
+ xstrlcpy(retval, s, (size_t)len + 1);
}
len++;
} else {
// Multiple lines: take start line from start col, middle
// lines completely and end line up to end col.
- len = (ssize_t)STRLEN(s);
+ len = (ssize_t)strlen(s);
if (round == 2) {
STRCPY(retval, s);
retval[len] = '\n';
@@ -2189,15 +2162,16 @@ char_u *reg_submatch(int no)
if (round == 2) {
STRCPY(retval + len, s);
}
- len += (ssize_t)STRLEN(s);
+ len += (ssize_t)strlen(s);
if (round == 2) {
retval[len] = '\n';
}
len++;
}
if (round == 2) {
- STRNCPY(retval + len, reg_getline_submatch(lnum),
- rsm.sm_mmatch->endpos[no].col);
+ strncpy(retval + len, // NOLINT(runtime/printf)
+ reg_getline_submatch(lnum),
+ (size_t)rsm.sm_mmatch->endpos[no].col);
}
len += rsm.sm_mmatch->endpos[no].col;
if (round == 2) {
@@ -2215,7 +2189,7 @@ char_u *reg_submatch(int no)
if (s == NULL || rsm.sm_match->endp[no] == NULL) {
retval = NULL;
} else {
- retval = vim_strnsave(s, (size_t)(rsm.sm_match->endp[no] - s));
+ retval = xstrnsave(s, (size_t)(rsm.sm_match->endp[no] - s));
}
}
@@ -2250,16 +2224,16 @@ list_T *reg_submatch_list(int no)
list = tv_list_alloc(elnum - slnum + 1);
- s = (const char *)reg_getline_submatch(slnum) + scol;
+ s = reg_getline_submatch(slnum) + scol;
if (slnum == elnum) {
tv_list_append_string(list, s, ecol - scol);
} else {
tv_list_append_string(list, s, -1);
for (int i = 1; i < elnum - slnum; i++) {
- s = (const char *)reg_getline_submatch(slnum + i);
+ s = reg_getline_submatch(slnum + i);
tv_list_append_string(list, s, -1);
}
- s = (const char *)reg_getline_submatch(elnum);
+ s = reg_getline_submatch(elnum);
tv_list_append_string(list, s, ecol);
}
} else {
@@ -2275,22 +2249,39 @@ list_T *reg_submatch_list(int no)
return list;
}
+/// Initialize the values used for matching against multiple lines
+///
+/// @param win window in which to search or NULL
+/// @param buf buffer in which to search
+/// @param lnum nr of line to start looking for match
+static void init_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum)
+{
+ rex.reg_match = NULL;
+ rex.reg_mmatch = rmp;
+ rex.reg_buf = buf;
+ rex.reg_win = win;
+ rex.reg_firstlnum = lnum;
+ rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
+ rex.reg_line_lbr = false;
+ rex.reg_ic = rmp->rmm_ic;
+ rex.reg_icombine = false;
+ rex.reg_maxcol = rmp->rmm_maxcol;
+}
+
// XXX Do not allow headers generator to catch definitions from regexp_nfa.c
#ifndef DO_NOT_DEFINE_EMPTY_ATTRIBUTES
# include "nvim/regexp_bt.c"
# include "nvim/regexp_nfa.c"
#endif
-static regengine_T bt_regengine =
-{
+static regengine_T bt_regengine = {
bt_regcomp,
bt_regfree,
bt_regexec_nl,
bt_regexec_multi,
};
-static regengine_T nfa_regengine =
-{
+static regengine_T nfa_regengine = {
nfa_regcomp,
nfa_regfree,
nfa_regexec_nl,
@@ -2302,28 +2293,26 @@ static regengine_T nfa_regengine =
static int regexp_engine = 0;
#ifdef REGEXP_DEBUG
-static char_u regname[][30] = {
+static uint8_t regname[][30] = {
"AUTOMATIC Regexp Engine",
"BACKTRACKING Regexp Engine",
"NFA Regexp Engine"
};
#endif
-/*
- * Compile a regular expression into internal code.
- * Returns the program in allocated memory.
- * Use vim_regfree() to free the memory.
- * Returns NULL for an error.
- */
+// Compile a regular expression into internal code.
+// Returns the program in allocated memory.
+// Use vim_regfree() to free the memory.
+// Returns NULL for an error.
regprog_T *vim_regcomp(char *expr_arg, int re_flags)
{
regprog_T *prog = NULL;
- char_u *expr = (char_u *)expr_arg;
+ char *expr = expr_arg;
regexp_engine = (int)p_re;
// Check for prefix "\%#=", that sets the regexp engine
- if (STRNCMP(expr, "\\%#=", 4) == 0) {
+ if (strncmp(expr, "\\%#=", 4) == 0) {
int newengine = expr[4] - '0';
if (newengine == AUTOMATIC_ENGINE
@@ -2353,10 +2342,10 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags)
//
const int called_emsg_before = called_emsg;
if (regexp_engine != BACKTRACKING_ENGINE) {
- prog = nfa_regengine.regcomp(expr,
+ prog = nfa_regengine.regcomp((uint8_t *)expr,
re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
} else {
- prog = bt_regengine.regcomp(expr, re_flags);
+ prog = bt_regengine.regcomp((uint8_t *)expr, re_flags);
}
// Check for error compiling regexp with initial engine.
@@ -2381,7 +2370,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags)
if (regexp_engine == AUTOMATIC_ENGINE && called_emsg == called_emsg_before) {
regexp_engine = BACKTRACKING_ENGINE;
report_re_switch(expr);
- prog = bt_regengine.regcomp(expr, re_flags);
+ prog = bt_regengine.regcomp((uint8_t *)expr, re_flags);
}
}
@@ -2395,9 +2384,7 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags)
return prog;
}
-/*
- * Free a compiled regexp program, returned by vim_regcomp().
- */
+// Free a compiled regexp program, returned by vim_regcomp().
void vim_regfree(regprog_T *prog)
{
if (prog != NULL) {
@@ -2416,12 +2403,12 @@ void free_regexp_stuff(void)
#endif
-static void report_re_switch(char_u *pat)
+static void report_re_switch(char *pat)
{
if (p_verbose > 0) {
verbose_enter();
msg_puts(_("Switching to backtracking RE engine for pattern: "));
- msg_puts((char *)pat);
+ msg_puts(pat);
verbose_leave();
}
}
@@ -2438,7 +2425,7 @@ static void report_re_switch(char_u *pat)
/// @param nl
///
/// @return true if there is a match, false if not.
-static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool nl)
+static bool vim_regexec_string(regmatch_T *rmp, char *line, colnr_T col, bool nl)
{
regexec_T rex_save;
bool rex_in_use_save = rex_in_use;
@@ -2461,7 +2448,7 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool
rex.reg_startpos = NULL;
rex.reg_endpos = NULL;
- int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
+ int result = rmp->regprog->engine->regexec_nl(rmp, (uint8_t *)line, col, nl);
rmp->regprog->re_in_use = false;
// NFA engine aborted because it's very slow, use backtracking engine instead.
@@ -2469,15 +2456,15 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool
&& result == NFA_TOO_EXPENSIVE) {
int save_p_re = (int)p_re;
int re_flags = (int)rmp->regprog->re_flags;
- char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
+ char *pat = xstrdup(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
report_re_switch(pat);
- rmp->regprog = vim_regcomp((char *)pat, re_flags);
+ rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL) {
rmp->regprog->re_in_use = true;
- result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
+ result = rmp->regprog->engine->regexec_nl(rmp, (uint8_t *)line, col, nl);
rmp->regprog->re_in_use = false;
}
@@ -2495,7 +2482,7 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool
// Note: "*prog" may be freed and changed.
// Return true if there is a match, false if not.
-bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T col)
+bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char *line, colnr_T col)
{
regmatch_T regmatch = { .regprog = *prog, .rm_ic = ignore_case };
bool r = vim_regexec_string(&regmatch, line, col, false);
@@ -2507,13 +2494,13 @@ bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T
// Return true if there is a match, false if not.
bool vim_regexec(regmatch_T *rmp, char *line, colnr_T col)
{
- return vim_regexec_string(rmp, (char_u *)line, col, false);
+ return vim_regexec_string(rmp, line, col, false);
}
// Like vim_regexec(), but consider a "\n" in "line" to be a line break.
// Note: "rmp->regprog" may be freed and changed.
// Return true if there is a match, false if not.
-bool vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
+bool vim_regexec_nl(regmatch_T *rmp, char *line, colnr_T col)
{
return vim_regexec_string(rmp, line, col, true);
}
@@ -2560,7 +2547,7 @@ long vim_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum,
&& result == NFA_TOO_EXPENSIVE) {
int save_p_re = (int)p_re;
int re_flags = (int)rmp->regprog->re_flags;
- char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
+ char *pat = xstrdup(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
regprog_T *prev_prog = rmp->regprog;
@@ -2569,7 +2556,7 @@ long vim_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum,
// checking for \z misuse was already done when compiling for NFA,
// allow all here
reg_do_extmatch = REX_ALL;
- rmp->regprog = vim_regcomp((char *)pat, re_flags);
+ rmp->regprog = vim_regcomp(pat, re_flags);
reg_do_extmatch = 0;
if (rmp->regprog == NULL) {