aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/nvim/regexp.c')
-rw-r--r--src/nvim/regexp.c177
1 files changed, 90 insertions, 87 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index e396e54ced..122f3e2020 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -87,7 +87,7 @@ static int toggle_Magic(int x)
#define REGMAGIC 0234
// Utility definitions.
-#define UCHARAT(p) ((int)(*(char_u *)(p)))
+#define UCHARAT(p) ((int)(*(uint8_t *)(p)))
// Used for an error (down from) vim_regcomp(): give the error message, set
// rc_did_emsg and return NULL
@@ -327,7 +327,7 @@ static int reg_strict; // "[abc" is illegal
// uncrustify:off
// META[] is used often enough to justify turning it into a table.
-static char_u META_flags[] = {
+static uint8_t META_flags[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// % & ( ) * + .
@@ -361,7 +361,7 @@ static int nextchr; // used for ungetchr()
#define REG_NPAREN 3 // \%(\)
typedef struct {
- char_u *regparse;
+ char *regparse;
int prevchr_len;
int curchr;
int prevchr;
@@ -393,12 +393,12 @@ static int get_equi_class(char **pp)
{
int c;
int l = 1;
- char_u *p = (char_u *)(*pp);
+ char *p = *pp;
if (p[1] == '=' && p[2] != NUL) {
- l = utfc_ptr2len((char *)p + 2);
+ l = utfc_ptr2len(p + 2);
if (p[l + 2] == '=' && p[l + 3] == ']') {
- c = utf_ptr2char((char *)p + 2);
+ c = utf_ptr2char(p + 2);
*pp += l + 4;
return c;
}
@@ -414,12 +414,12 @@ static int get_coll_element(char **pp)
{
int c;
int l = 1;
- char_u *p = (char_u *)(*pp);
+ char *p = *pp;
if (p[0] != NUL && p[1] == '.' && p[2] != NUL) {
- l = utfc_ptr2len((char *)p + 2);
+ l = utfc_ptr2len(p + 2);
if (p[l + 2] == '.' && p[l + 3] == ']') {
- c = utf_ptr2char((char *)p + 2);
+ c = utf_ptr2char(p + 2);
*pp += l + 4;
return c;
}
@@ -437,7 +437,7 @@ static void get_cpo_flags(void)
/// Skip over a "[]" range.
/// "p" must point to the character after the '['.
/// The returned pointer is on the matching ']', or the terminating NUL.
-static char_u *skip_anyof(char *p)
+static char *skip_anyof(char *p)
{
int l;
@@ -456,9 +456,9 @@ static char_u *skip_anyof(char *p)
MB_PTR_ADV(p);
}
} else if (*p == '\\'
- && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
+ && (vim_strchr(REGEXP_INRANGE, (uint8_t)p[1]) != NULL
|| (!reg_cpo_lit
- && vim_strchr(REGEXP_ABBR, p[1]) != NULL))) {
+ && vim_strchr(REGEXP_ABBR, (uint8_t)p[1]) != NULL))) {
p += 2;
} else if (*p == '[') {
if (get_char_class(&p) == CLASS_NONE
@@ -472,7 +472,7 @@ static char_u *skip_anyof(char *p)
}
}
- return (char_u *)p;
+ return p;
}
/// Skip past regular expression.
@@ -522,7 +522,7 @@ char *skip_regexp_ex(char *startp, int dirc, int magic, char **newp, int *droppe
}
if ((p[0] == '[' && mymagic >= MAGIC_ON)
|| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF)) {
- p = (char *)skip_anyof(p + 1);
+ p = skip_anyof(p + 1);
if (p[0] == NUL) {
break;
}
@@ -559,9 +559,9 @@ static int at_start; // True when on the first character
static int prev_at_start; // True when on the second character
// Start parsing at "str".
-static void initchr(char_u *str)
+static void initchr(char *str)
{
- regparse = (char *)str;
+ regparse = str;
prevchr_len = 0;
curchr = prevprevchr = prevchr = nextchr = -1;
at_start = true;
@@ -572,7 +572,7 @@ static void initchr(char_u *str)
// starts in the same state again.
static void save_parse_state(parse_state_T *ps)
{
- ps->regparse = (char_u *)regparse;
+ ps->regparse = regparse;
ps->prevchr_len = prevchr_len;
ps->curchr = curchr;
ps->prevchr = prevchr;
@@ -586,7 +586,7 @@ static void save_parse_state(parse_state_T *ps)
// Restore a previously saved parse state.
static void restore_parse_state(parse_state_T *ps)
{
- regparse = (char *)ps->regparse;
+ regparse = ps->regparse;
prevchr_len = ps->prevchr_len;
curchr = ps->curchr;
prevchr = ps->prevchr;
@@ -677,7 +677,7 @@ static int peekchr(void)
// '$' is only magic as the very last char and if it's in front of
// either "\|", "\)", "\&", or "\n"
if (reg_magic >= MAGIC_OFF) {
- char_u *p = (char_u *)regparse + 1;
+ uint8_t *p = (uint8_t *)regparse + 1;
bool is_magic_all = (reg_magic == MAGIC_ALL);
// ignore \c \C \m \M \v \V and \Z after '$'
@@ -892,7 +892,7 @@ static int64_t getoctchrs(void)
static int read_limits(long *minval, long *maxval)
{
int reverse = false;
- char_u *first_char;
+ char *first_char;
long tmp;
if (*regparse == '-') {
@@ -900,7 +900,7 @@ static int read_limits(long *minval, long *maxval)
regparse++;
reverse = true;
}
- first_char = (char_u *)regparse;
+ first_char = regparse;
*minval = getdigits_long(&regparse, false, 0);
if (*regparse == ',') { // There is a comma.
if (ascii_isdigit(*++regparse)) {
@@ -938,7 +938,7 @@ static int read_limits(long *minval, long *maxval)
// Sometimes need to save a copy of a line. Since alloc()/free() is very
// slow, we keep one allocated piece of memory and only re-allocate it when
// it's too small. It's freed in bt_regexec_both() when finished.
-static char_u *reg_tofree = NULL;
+static uint8_t *reg_tofree = NULL;
static unsigned reg_tofreelen;
// Structure used to store the execution state of the regex engine.
@@ -960,8 +960,8 @@ typedef struct {
regmatch_T *reg_match;
regmmatch_T *reg_mmatch;
- char_u **reg_startp;
- char_u **reg_endp;
+ uint8_t **reg_startp;
+ uint8_t **reg_endp;
lpos_T *reg_startpos;
lpos_T *reg_endpos;
@@ -973,8 +973,8 @@ typedef struct {
// The current match-position is remembered with these variables:
linenr_T lnum; ///< line number, relative to first line
- char_u *line; ///< start of current line
- char_u *input; ///< current input, points into "line"
+ uint8_t *line; ///< start of current line
+ uint8_t *input; ///< current input, points into "line"
int need_clear_subexpr; ///< subexpressions still need to be cleared
int need_clear_zsubexpr; ///< extmatch subexpressions still need to be
@@ -1019,7 +1019,7 @@ static bool reg_iswordc(int c)
}
// Get pointer to the line "lnum", which is relative to "reg_firstlnum".
-static char_u *reg_getline(linenr_T lnum)
+static char *reg_getline(linenr_T lnum)
{
// when looking behind for a match/no-match lnum is negative. But we
// can't go before line 1
@@ -1028,13 +1028,13 @@ static char_u *reg_getline(linenr_T lnum)
}
if (lnum > rex.reg_maxline) {
// Must have matched the "\n" in the last line.
- return (char_u *)"";
+ return "";
}
- return (char_u *)ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, false);
+ return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, false);
}
-static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
-static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
+static uint8_t *reg_startzp[NSUBEXP]; // Workspace to mark beginning
+static uint8_t *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos
static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos
@@ -1147,7 +1147,7 @@ static bool reg_match_visual(void)
}
// getvvcol() flushes rex.line, need to get it again
- rex.line = reg_getline(rex.lnum);
+ rex.line = (uint8_t *)reg_getline(rex.lnum);
rex.input = rex.line + col;
unsigned int cols_u = win_linetabsize(wp, rex.reg_firstlnum + rex.lnum, (char *)rex.line, col);
@@ -1184,38 +1184,42 @@ static int prog_magic_wrong(void)
// used (to increase speed).
static void cleanup_subexpr(void)
{
- if (rex.need_clear_subexpr) {
- if (REG_MULTI) {
- // Use 0xff to set lnum to -1
- memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
- memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
- } else {
- memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP);
- memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP);
- }
- rex.need_clear_subexpr = false;
+ if (!rex.need_clear_subexpr) {
+ return;
+ }
+
+ if (REG_MULTI) {
+ // Use 0xff to set lnum to -1
+ memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
+ memset(rex.reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
+ } else {
+ memset(rex.reg_startp, 0, sizeof(char *) * NSUBEXP);
+ memset(rex.reg_endp, 0, sizeof(char *) * NSUBEXP);
}
+ rex.need_clear_subexpr = false;
}
static void cleanup_zsubexpr(void)
{
- if (rex.need_clear_zsubexpr) {
- if (REG_MULTI) {
- // Use 0xff to set lnum to -1
- memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
- memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
- } else {
- memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
- memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
- }
- rex.need_clear_zsubexpr = false;
+ if (!rex.need_clear_zsubexpr) {
+ return;
+ }
+
+ if (REG_MULTI) {
+ // Use 0xff to set lnum to -1
+ memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
+ memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
+ } else {
+ memset(reg_startzp, 0, sizeof(char *) * NSUBEXP);
+ memset(reg_endzp, 0, sizeof(char *) * NSUBEXP);
}
+ rex.need_clear_zsubexpr = false;
}
// Advance rex.lnum, rex.line and rex.input to the next line.
static void reg_nextline(void)
{
- rex.line = reg_getline(++rex.lnum);
+ rex.line = (uint8_t *)reg_getline(++rex.lnum);
rex.input = rex.line;
fast_breakcheck();
}
@@ -1252,7 +1256,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e
}
// Get the line to compare with.
- p = (char *)reg_getline(clnum);
+ p = reg_getline(clnum);
assert(p);
if (clnum == end_lnum) {
@@ -1399,8 +1403,8 @@ static int cstrncmp(char *s1, char *s2, int *n)
str2 = s2;
c1 = c2 = 0;
while ((int)(str1 - s1) < *n) {
- c1 = mb_ptr2char_adv((const char_u **)&str1);
- c2 = mb_ptr2char_adv((const char_u **)&str2);
+ c1 = mb_ptr2char_adv((const char **)&str1);
+ c2 = mb_ptr2char_adv((const char **)&str2);
// decompose the character if necessary, into 'base' characters
// because I don't care about Arabic, I will hard-code the Hebrew
@@ -1434,21 +1438,21 @@ static int cstrncmp(char *s1, char *s2, int *n)
/// @param c character to find in @a s
///
/// @return NULL if no match, otherwise pointer to the position in @a s
-static inline char_u *cstrchr(const char_u *const s, const int c)
+static inline char *cstrchr(const char *const s, const int c)
FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
FUNC_ATTR_ALWAYS_INLINE
{
if (!rex.reg_ic) {
- return (char_u *)vim_strchr((char *)s, c);
+ return vim_strchr(s, c);
}
// Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is
// expected to be highly optimized.
if (c > 0x80) {
const int folded_c = utf_fold(c);
- for (const char_u *p = s; *p != NUL; p += utfc_ptr2len((char *)p)) {
- if (utf_fold(utf_ptr2char((char *)p)) == folded_c) {
- return (char_u *)p;
+ for (const char *p = s; *p != NUL; p += utfc_ptr2len(p)) {
+ if (utf_fold(utf_ptr2char(p)) == folded_c) {
+ return (char *)p;
}
}
return NULL;
@@ -1460,11 +1464,11 @@ static inline char_u *cstrchr(const char_u *const s, const int c)
} else if (ASCII_ISLOWER(c)) {
cc = TOUPPER_ASC(c);
} else {
- return (char_u *)vim_strchr((char *)s, c);
+ return vim_strchr(s, c);
}
char tofind[] = { (char)c, (char)cc, NUL };
- return (char_u *)strpbrk((const char *)s, tofind);
+ return strpbrk(s, tofind);
}
////////////////////////////////////////////////////////////////
@@ -1636,8 +1640,7 @@ static void clear_submatch_list(staticList10_T *sl)
/// references invalid!
///
/// Returns the size of the replacement, including terminating NUL.
-int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, int destlen,
- int flags)
+int vim_regsub(regmatch_T *rmp, char *source, typval_T *expr, char *dest, int destlen, int flags)
{
regexec_T rex_save;
bool rex_in_use_save = rex_in_use;
@@ -1653,7 +1656,7 @@ int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, in
rex.reg_maxline = 0;
rex.reg_buf = curbuf;
rex.reg_line_lbr = true;
- int result = vim_regsub_both((char *)source, expr, (char *)dest, destlen, flags);
+ int result = vim_regsub_both(source, expr, dest, destlen, flags);
rex_in_use = rex_in_use_save;
if (rex_in_use) {
@@ -1663,7 +1666,7 @@ int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, in
return result;
}
-int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, int destlen,
+int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char *source, char *dest, int destlen,
int flags)
{
regexec_T rex_save;
@@ -1681,7 +1684,7 @@ int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *de
rex.reg_firstlnum = lnum;
rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
rex.reg_line_lbr = false;
- int result = vim_regsub_both((char *)source, NULL, (char *)dest, destlen, flags);
+ int result = vim_regsub_both(source, NULL, dest, destlen, flags);
rex_in_use = rex_in_use_save;
if (rex_in_use) {
@@ -1860,7 +1863,7 @@ static int vim_regsub_both(char *source, typval_T *expr, char *dest, int destlen
no = 0;
} else if ('0' <= *src && *src <= '9') {
no = *src++ - '0';
- } else if (vim_strchr("uUlLeE", *src)) {
+ } else if (vim_strchr("uUlLeE", (uint8_t)(*src))) {
switch (*src++) {
case 'u':
func_one = (fptr_T)do_upper;
@@ -1974,7 +1977,7 @@ static int vim_regsub_both(char *source, typval_T *expr, char *dest, int destlen
if (clnum < 0 || rex.reg_mmatch->endpos[no].lnum < 0) {
s = NULL;
} else {
- s = (char *)reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
+ s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
if (rex.reg_mmatch->endpos[no].lnum == clnum) {
len = rex.reg_mmatch->endpos[no].col
- rex.reg_mmatch->startpos[no].col;
@@ -2005,7 +2008,7 @@ static int vim_regsub_both(char *source, typval_T *expr, char *dest, int destlen
*dst = CAR;
}
dst++;
- s = (char *)reg_getline(++clnum);
+ s = reg_getline(++clnum);
if (rex.reg_mmatch->endpos[no].lnum == clnum) {
len = rex.reg_mmatch->endpos[no].col;
} else {
@@ -2099,7 +2102,7 @@ static char *reg_getline_submatch(linenr_T lnum)
rex.reg_firstlnum = rsm.sm_firstlnum;
rex.reg_maxline = rsm.sm_maxline;
- s = (char *)reg_getline(lnum);
+ s = reg_getline(lnum);
rex.reg_firstlnum = save_first;
rex.reg_maxline = save_max;
@@ -2290,7 +2293,7 @@ static regengine_T nfa_regengine = {
static int regexp_engine = 0;
#ifdef REGEXP_DEBUG
-static char_u regname[][30] = {
+static uint8_t regname[][30] = {
"AUTOMATIC Regexp Engine",
"BACKTRACKING Regexp Engine",
"NFA Regexp Engine"
@@ -2339,10 +2342,10 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags)
//
const int called_emsg_before = called_emsg;
if (regexp_engine != BACKTRACKING_ENGINE) {
- prog = nfa_regengine.regcomp((char_u *)expr,
+ prog = nfa_regengine.regcomp((uint8_t *)expr,
re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
} else {
- prog = bt_regengine.regcomp((char_u *)expr, re_flags);
+ prog = bt_regengine.regcomp((uint8_t *)expr, re_flags);
}
// Check for error compiling regexp with initial engine.
@@ -2366,8 +2369,8 @@ regprog_T *vim_regcomp(char *expr_arg, int re_flags)
// But don't try if an error message was given.
if (regexp_engine == AUTOMATIC_ENGINE && called_emsg == called_emsg_before) {
regexp_engine = BACKTRACKING_ENGINE;
- report_re_switch((char_u *)expr);
- prog = bt_regengine.regcomp((char_u *)expr, re_flags);
+ report_re_switch(expr);
+ prog = bt_regengine.regcomp((uint8_t *)expr, re_flags);
}
}
@@ -2400,12 +2403,12 @@ void free_regexp_stuff(void)
#endif
-static void report_re_switch(char_u *pat)
+static void report_re_switch(char *pat)
{
if (p_verbose > 0) {
verbose_enter();
msg_puts(_("Switching to backtracking RE engine for pattern: "));
- msg_puts((char *)pat);
+ msg_puts(pat);
verbose_leave();
}
}
@@ -2422,7 +2425,7 @@ static void report_re_switch(char_u *pat)
/// @param nl
///
/// @return true if there is a match, false if not.
-static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool nl)
+static bool vim_regexec_string(regmatch_T *rmp, char *line, colnr_T col, bool nl)
{
regexec_T rex_save;
bool rex_in_use_save = rex_in_use;
@@ -2445,7 +2448,7 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool
rex.reg_startpos = NULL;
rex.reg_endpos = NULL;
- int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
+ int result = rmp->regprog->engine->regexec_nl(rmp, (uint8_t *)line, col, nl);
rmp->regprog->re_in_use = false;
// NFA engine aborted because it's very slow, use backtracking engine instead.
@@ -2457,11 +2460,11 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
- report_re_switch((char_u *)pat);
+ report_re_switch(pat);
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL) {
rmp->regprog->re_in_use = true;
- result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
+ result = rmp->regprog->engine->regexec_nl(rmp, (uint8_t *)line, col, nl);
rmp->regprog->re_in_use = false;
}
@@ -2479,7 +2482,7 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool
// Note: "*prog" may be freed and changed.
// Return true if there is a match, false if not.
-bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T col)
+bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char *line, colnr_T col)
{
regmatch_T regmatch = { .regprog = *prog, .rm_ic = ignore_case };
bool r = vim_regexec_string(&regmatch, line, col, false);
@@ -2491,13 +2494,13 @@ bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T
// Return true if there is a match, false if not.
bool vim_regexec(regmatch_T *rmp, char *line, colnr_T col)
{
- return vim_regexec_string(rmp, (char_u *)line, col, false);
+ return vim_regexec_string(rmp, line, col, false);
}
// Like vim_regexec(), but consider a "\n" in "line" to be a line break.
// Note: "rmp->regprog" may be freed and changed.
// Return true if there is a match, false if not.
-bool vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
+bool vim_regexec_nl(regmatch_T *rmp, char *line, colnr_T col)
{
return vim_regexec_string(rmp, line, col, true);
}
@@ -2549,7 +2552,7 @@ long vim_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum,
p_re = BACKTRACKING_ENGINE;
regprog_T *prev_prog = rmp->regprog;
- report_re_switch((char_u *)pat);
+ report_re_switch(pat);
// checking for \z misuse was already done when compiling for NFA,
// allow all here
reg_do_extmatch = REX_ALL;