diff options
-rw-r--r-- | runtime/doc/pattern.txt | 3 | ||||
-rw-r--r-- | runtime/filetype.vim | 3 | ||||
-rw-r--r-- | src/nvim/buffer.c | 8 | ||||
-rw-r--r-- | src/nvim/eval/funcs.c | 3 | ||||
-rw-r--r-- | src/nvim/fileio.c | 20 | ||||
-rw-r--r-- | src/nvim/options.lua | 6 | ||||
-rw-r--r-- | src/nvim/os/fs.c | 26 | ||||
-rw-r--r-- | src/nvim/quickfix.c | 24 | ||||
-rw-r--r-- | src/nvim/regexp.c | 847 | ||||
-rw-r--r-- | src/nvim/regexp_defs.h | 7 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 1139 | ||||
-rw-r--r-- | src/nvim/screen.c | 11 | ||||
-rw-r--r-- | src/nvim/testdir/check.vim | 11 | ||||
-rw-r--r-- | src/nvim/testdir/runtest.vim | 5 | ||||
-rw-r--r-- | src/nvim/testdir/test_backup.vim | 16 | ||||
-rw-r--r-- | src/nvim/testdir/test_diffmode.vim | 29 | ||||
-rw-r--r-- | src/nvim/testdir/test_filetype.vim | 1 | ||||
-rw-r--r-- | src/nvim/testdir/test_quickfix.vim | 27 | ||||
-rw-r--r-- | src/nvim/testdir/test_regexp_utf8.vim | 182 | ||||
-rw-r--r-- | src/nvim/testdir/test_spell.vim | 41 | ||||
-rw-r--r-- | src/nvim/testdir/test_tabline.vim | 25 | ||||
-rw-r--r-- | test/functional/legacy/memory_usage_spec.lua | 12 | ||||
-rw-r--r-- | test/functional/terminal/tui_spec.lua | 2 | ||||
-rw-r--r-- | test/functional/ui/diff_spec.lua | 77 | ||||
-rw-r--r-- | test/functional/ui/spell_spec.lua | 31 |
25 files changed, 1572 insertions, 984 deletions
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index adfab07758..7129c6cd58 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1111,6 +1111,9 @@ x A single character, with no special meaning, matches itself *[:tab:]* [:tab:] the <Tab> character *[:escape:]* [:escape:] the <Esc> character *[:backspace:]* [:backspace:] the <BS> character +*[:ident:]* [:ident:] identifier character (same as "\i") +*[:keyword:]* [:keyword:] keyword character (same as "\k") +*[:fname:]* [:fname:] file name character (same as "\f") The brackets in character class expressions are additional to the brackets delimiting a collection. For example, the following is a plausible pattern for a Unix filename: "[-./[:alnum:]_~]\+" That is, diff --git a/runtime/filetype.vim b/runtime/filetype.vim index 6807bef3eb..c0d656107c 100644 --- a/runtime/filetype.vim +++ b/runtime/filetype.vim @@ -84,6 +84,9 @@ au BufNewFile,BufRead *.gpr setf ada " AHDL au BufNewFile,BufRead *.tdf setf ahdl +" AIDL +au BufNewFile,BufRead *.aidl setf aidl + " AMPL au BufNewFile,BufRead *.run setf ampl diff --git a/src/nvim/buffer.c b/src/nvim/buffer.c index 86067aceac..b3bbdce9d9 100644 --- a/src/nvim/buffer.c +++ b/src/nvim/buffer.c @@ -3646,13 +3646,19 @@ int build_stl_str_hl( } } if (n == curitem && group_start_userhl == group_end_userhl) { + // empty group out_p = t; group_len = 0; - // do not use the highlighting from the removed group for (n = groupitems[groupdepth] + 1; n < curitem; n++) { + // do not use the highlighting from the removed group if (items[n].type == Highlight) { items[n].type = Empty; } + // adjust the start position of TabPage to the next + // item position + if (items[n].type == TabPage) { + items[n].start = out_p; + } } } } diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index e350d09935..b97bd6b10d 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -9519,7 +9519,7 @@ static void f_split(typval_T *argvars, typval_T *rettv, FunPtr fptr) tv_list_alloc_ret(rettv, kListLenMayKnow); if (typeerr) { - return; + goto theend; } regmatch_T regmatch = { @@ -9563,6 +9563,7 @@ static void f_split(typval_T *argvars, typval_T *rettv, FunPtr fptr) vim_regfree(regmatch.regprog); } +theend: p_cpo = save_cpo; } diff --git a/src/nvim/fileio.c b/src/nvim/fileio.c index 20f0cdccc3..f922591d0b 100644 --- a/src/nvim/fileio.c +++ b/src/nvim/fileio.c @@ -300,6 +300,7 @@ readfile( int skip_read = false; context_sha256_T sha_ctx; int read_undo_file = false; + int split = 0; // number of split lines linenr_T linecnt; int error = FALSE; /* errors encountered */ int ff_error = EOL_UNKNOWN; /* file format with errors */ @@ -1013,8 +1014,21 @@ retry: */ { if (!skip_read) { - size = 0x10000L; /* use buffer >= 64K */ + // Use buffer >= 64K. Add linerest to double the size if the + // line gets very long, to avoid a lot of copying. But don't + // read more than 1 Mbyte at a time, so we can be interrupted. + size = 0x10000L + linerest; + if (size > 0x100000L) { + size = 0x100000L; + } + } + // Protect against the argument of lalloc() going negative. + if (size < 0 || size + linerest + 1 < 0 || linerest >= MAXCOL) { + split++; + *ptr = NL; // split line by inserting a NL + size = 1; + } else if (!skip_read) { for (; size >= 10; size /= 2) { new_buffer = verbose_try_malloc((size_t)size + (size_t)linerest + 1); if (new_buffer) { @@ -1862,6 +1876,10 @@ failed: STRCAT(IObuff, _("[CR missing]")); c = TRUE; } + if (split) { + STRCAT(IObuff, _("[long lines split]")); + c = true; + } if (notconverted) { STRCAT(IObuff, _("[NOT converted]")); c = TRUE; diff --git a/src/nvim/options.lua b/src/nvim/options.lua index e7c1a3fe88..60a38dc2e3 100644 --- a/src/nvim/options.lua +++ b/src/nvim/options.lua @@ -1271,7 +1271,11 @@ return { deny_duplicates=true, vi_def=true, varname='p_isi', - defaults={if_true={vi="@,48-57,_,192-255"}} + defaults={ + condition='WIN32', + if_true={vi="@,48-57,_,128-167,224-235"}, + if_false={vi="@,48-57,_,192-255"} + } }, { full_name='iskeyword', abbreviation='isk', diff --git a/src/nvim/os/fs.c b/src/nvim/os/fs.c index 873b611151..a3bef3389c 100644 --- a/src/nvim/os/fs.c +++ b/src/nvim/os/fs.c @@ -743,7 +743,9 @@ static int os_stat(const char *name, uv_stat_t *statbuf) } uv_fs_t request; int result = uv_fs_stat(&fs_loop, &request, name, NULL); - *statbuf = request.statbuf; + if (result == kLibuvSuccess) { + *statbuf = request.statbuf; + } uv_fs_req_cleanup(&request); return result; } @@ -1009,6 +1011,7 @@ int os_remove(const char *path) bool os_fileinfo(const char *path, FileInfo *file_info) FUNC_ATTR_NONNULL_ARG(2) { + memset(file_info, 0, sizeof(*file_info)); return os_stat(path, &(file_info->stat)) == kLibuvSuccess; } @@ -1020,14 +1023,17 @@ bool os_fileinfo(const char *path, FileInfo *file_info) bool os_fileinfo_link(const char *path, FileInfo *file_info) FUNC_ATTR_NONNULL_ARG(2) { + memset(file_info, 0, sizeof(*file_info)); if (path == NULL) { return false; } uv_fs_t request; - int result = uv_fs_lstat(&fs_loop, &request, path, NULL); - file_info->stat = request.statbuf; + bool ok = uv_fs_lstat(&fs_loop, &request, path, NULL) == kLibuvSuccess; + if (ok) { + file_info->stat = request.statbuf; + } uv_fs_req_cleanup(&request); - return (result == kLibuvSuccess); + return ok; } /// Get the file information for a given file descriptor @@ -1039,10 +1045,16 @@ bool os_fileinfo_fd(int file_descriptor, FileInfo *file_info) FUNC_ATTR_NONNULL_ALL { uv_fs_t request; - int result = uv_fs_fstat(&fs_loop, &request, file_descriptor, NULL); - file_info->stat = request.statbuf; + memset(file_info, 0, sizeof(*file_info)); + bool ok = uv_fs_fstat(&fs_loop, + &request, + file_descriptor, + NULL) == kLibuvSuccess; + if (ok) { + file_info->stat = request.statbuf; + } uv_fs_req_cleanup(&request); - return (result == kLibuvSuccess); + return ok; } /// Compare the inodes of two FileInfos diff --git a/src/nvim/quickfix.c b/src/nvim/quickfix.c index 484168e798..ddce1e922d 100644 --- a/src/nvim/quickfix.c +++ b/src/nvim/quickfix.c @@ -3757,13 +3757,13 @@ static int qf_buf_add_line(buf_T *buf, linenr_T lnum, const qfline_T *qfp, buf_T *errbuf; if (qfp->qf_module != NULL) { - STRCPY(IObuff, qfp->qf_module); + STRLCPY(IObuff, qfp->qf_module, IOSIZE - 1); len = (int)STRLEN(IObuff); } else if (qfp->qf_fnum != 0 && (errbuf = buflist_findnr(qfp->qf_fnum)) != NULL && errbuf->b_fname != NULL) { if (qfp->qf_type == 1) { // :helpgrep - STRLCPY(IObuff, path_tail(errbuf->b_fname), sizeof(IObuff)); + STRLCPY(IObuff, path_tail(errbuf->b_fname), IOSIZE - 1); } else { // shorten the file name if not done already if (errbuf->b_sfname == NULL @@ -3773,33 +3773,37 @@ static int qf_buf_add_line(buf_T *buf, linenr_T lnum, const qfline_T *qfp, } shorten_buf_fname(errbuf, dirname, false); } - STRLCPY(IObuff, errbuf->b_fname, sizeof(IObuff)); + STRLCPY(IObuff, errbuf->b_fname, IOSIZE - 1); } len = (int)STRLEN(IObuff); } else { len = 0; } - IObuff[len++] = '|'; - + if (len < IOSIZE - 1) { + IObuff[len++] = '|'; + } if (qfp->qf_lnum > 0) { - snprintf((char *)IObuff + len, sizeof(IObuff), "%" PRId64, + snprintf((char *)IObuff + len, (size_t)(IOSIZE - len), "%" PRId64, (int64_t)qfp->qf_lnum); len += (int)STRLEN(IObuff + len); if (qfp->qf_col > 0) { - snprintf((char *)IObuff + len, sizeof(IObuff), " col %d", qfp->qf_col); + snprintf((char *)IObuff + len, (size_t)(IOSIZE - len), " col %d", + qfp->qf_col); len += (int)STRLEN(IObuff + len); } - snprintf((char *)IObuff + len, sizeof(IObuff), "%s", + snprintf((char *)IObuff + len, (size_t)(IOSIZE - len), "%s", (char *)qf_types(qfp->qf_type, qfp->qf_nr)); len += (int)STRLEN(IObuff + len); } else if (qfp->qf_pattern != NULL) { qf_fmt_text(qfp->qf_pattern, IObuff + len, IOSIZE - len); len += (int)STRLEN(IObuff + len); } - IObuff[len++] = '|'; - IObuff[len++] = ' '; + if (len < IOSIZE - 2) { + IObuff[len++] = '|'; + IObuff[len++] = ' '; + } // Remove newlines and leading whitespace from the text. // For an unrecognized line keep the indent, the compiler may diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 34553fcec4..9705896e9b 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -40,11 +40,11 @@ * Named character class support added by Walter Briscoe (1998 Jul 01) */ -/* Uncomment the first if you do not want to see debugging logs or files - * related to regular expressions, even when compiling with -DDEBUG. - * Uncomment the second to get the regexp debugging. */ -/* #undef REGEXP_DEBUG */ -/* #define REGEXP_DEBUG */ +// By default: do not create debugging logs or files related to regular +// expressions, even when compiling with -DDEBUG. +// Uncomment the second line to get the regexp debugging. +// #undef REGEXP_DEBUG +// #define REGEXP_DEBUG #include <assert.h> #include <inttypes.h> @@ -301,8 +301,8 @@ typedef struct { */ typedef struct { union { - char_u *ptr; /* reginput pointer, for single-line regexp */ - lpos_T pos; /* reginput pos, for multi-line regexp */ + char_u *ptr; ///< rex.input pointer, for single-line regexp + lpos_T pos; ///< rex.input pos, for multi-line regexp } rs_u; int rs_len; } regsave_T; @@ -355,7 +355,7 @@ typedef struct regitem_S { union { save_se_T sesave; regsave_T regsave; - } rs_un; // room for saving reginput + } rs_un; ///< room for saving rex.input } regitem_T; @@ -490,6 +490,8 @@ static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); static char_u e_z1_not_allowed[] = N_("E67: \\z1 - \\z9 not allowed here"); static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%["); static char_u e_empty_sb[] = N_("E70: Empty %s%%[]"); +static char_u e_recursive[] = N_("E956: Cannot use pattern recursively"); + #define NOT_MULTI 0 #define MULTI_ONE 1 #define MULTI_MULT 2 @@ -600,6 +602,12 @@ static int get_char_class(char_u **pp) #define CLASS_BACKSPACE 14 "escape:]", #define CLASS_ESCAPE 15 + "ident:]", +#define CLASS_IDENT 16 + "keyword:]", +#define CLASS_KEYWORD 17 + "fname:]", +#define CLASS_FNAME 18 }; #define CLASS_NONE 99 int i; @@ -633,7 +641,7 @@ static short class_tab[256]; static void init_class_tab(void) { int i; - static int done = FALSE; + static int done = false; if (done) return; @@ -658,7 +666,7 @@ static void init_class_tab(void) } class_tab[' '] |= RI_WHITE; class_tab['\t'] |= RI_WHITE; - done = TRUE; + done = true; } # define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT)) @@ -678,26 +686,24 @@ static void init_class_tab(void) #define RF_ICOMBINE 8 /* ignore combining characters */ #define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */ -/* - * Global work variables for vim_regcomp(). - */ - -static char_u *regparse; /* Input-scan pointer. */ -static int prevchr_len; /* byte length of previous char */ -static int num_complex_braces; /* Complex \{...} count */ -static int regnpar; /* () count. */ -static int regnzpar; /* \z() count. */ -static int re_has_z; /* \z item detected */ -static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */ -static long regsize; /* Code size. */ -static int reg_toolong; /* TRUE when offset out of range */ -static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */ -static unsigned regflags; /* RF_ flags for prog */ -static long brace_min[10]; /* Minimums for complex brace repeats */ -static long brace_max[10]; /* Maximums for complex brace repeats */ -static int brace_count[10]; /* Current counts for complex brace repeats */ -static int had_eol; /* TRUE when EOL found by vim_regcomp() */ -static int one_exactly = FALSE; /* only do one char for EXACTLY */ +// Global work variables for vim_regcomp(). + +static char_u *regparse; ///< Input-scan pointer. +static int prevchr_len; ///< byte length of previous char +static int num_complex_braces; ///< Complex \{...} count +static int regnpar; ///< () count. +static int regnzpar; ///< \z() count. +static int re_has_z; ///< \z item detected +static char_u *regcode; ///< Code-emit pointer, or JUST_CALC_SIZE +static long regsize; ///< Code size. +static int reg_toolong; ///< true when offset out of range +static char_u had_endbrace[NSUBEXP]; ///< flags, true if end of () found +static unsigned regflags; ///< RF_ flags for prog +static long brace_min[10]; ///< Minimums for complex brace repeats +static long brace_max[10]; ///< Maximums for complex brace repeats +static int brace_count[10]; ///< Current counts for complex brace repeats +static int had_eol; ///< true when EOL found by vim_regcomp() +static int one_exactly = false; ///< only do one char for EXACTLY static int reg_magic; /* magicness of the pattern: */ #define MAGIC_NONE 1 /* "\V" very unmagic */ @@ -754,10 +760,9 @@ static int nextchr; /* used for ungetchr() */ static regengine_T bt_regengine; static regengine_T nfa_regengine; -/* - * Return TRUE if compiled regular expression "prog" can match a line break. - */ -int re_multiline(regprog_T *prog) +// Return true if compiled regular expression "prog" can match a line break. +int re_multiline(const regprog_T *prog) + FUNC_ATTR_NONNULL_ALL { return prog->regflags & RF_HASNL; } @@ -1211,7 +1216,7 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char_u **newp) return p; } -/// Return TRUE if the back reference is legal. We must have seen the close +/// Return true if the back reference is legal. We must have seen the close /// brace. /// TODO(vim): Should also check that we don't refer to something repeated /// (+*=): what instance of the repetition should we match? @@ -1234,7 +1239,7 @@ static int seen_endbrace(int refnum) return false; } } - return TRUE; + return true; } /* @@ -1281,6 +1286,7 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) /* Allocate space. */ bt_regprog_T *r = xmalloc(sizeof(bt_regprog_T) + regsize); + r->re_in_use = false; /* * Second pass: emit code. @@ -1394,9 +1400,9 @@ regcomp_start ( regnzpar = 1; re_has_z = 0; regsize = 0L; - reg_toolong = FALSE; + reg_toolong = false; regflags = 0; - had_eol = FALSE; + had_eol = false; } /* @@ -1408,7 +1414,7 @@ int vim_regcomp_had_eol(void) return had_eol; } -// variables for parsing reginput +// variables used for parsing static int at_start; // True when on the first character static int prev_at_start; // True when on the second character @@ -1506,12 +1512,11 @@ reg ( EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ /* NOTREACHED */ } - /* - * Here we set the flag allowing back references to this set of - * parentheses. - */ - if (paren == REG_PAREN) - had_endbrace[parno] = TRUE; /* have seen the close paren */ + // Here we set the flag allowing back references to this set of + // parentheses. + if (paren == REG_PAREN) { + had_endbrace[parno] = true; // have seen the close paren + } return ret; } @@ -1565,7 +1570,7 @@ static char_u *regconcat(int *flagp) char_u *chain = NULL; char_u *latest; int flags; - int cont = TRUE; + int cont = true; *flagp = WORST; /* Tentatively. */ @@ -1575,7 +1580,7 @@ static char_u *regconcat(int *flagp) case Magic('|'): case Magic('&'): case Magic(')'): - cont = FALSE; + cont = false; break; case Magic('Z'): regflags |= RF_ICOMBINE; @@ -1802,7 +1807,7 @@ static char_u *regatom(int *flagp) case Magic('$'): ret = regnode(EOL); - had_eol = TRUE; + had_eol = true; break; case Magic('<'): @@ -1821,7 +1826,7 @@ static char_u *regatom(int *flagp) } if (c == '$') { /* "\_$" is end-of-line */ ret = regnode(EOL); - had_eol = TRUE; + had_eol = true; break; } @@ -2069,11 +2074,12 @@ static char_u *regatom(int *flagp) } ungetchr(); - one_exactly = TRUE; + one_exactly = true; lastnode = regatom(flagp); - one_exactly = FALSE; - if (lastnode == NULL) + one_exactly = false; + if (lastnode == NULL) { return NULL; + } } if (ret == NULL) EMSG2_RET_NULL(_(e_empty_sb), @@ -2417,6 +2423,27 @@ collection: case CLASS_ESCAPE: regc(ESC); break; + case CLASS_IDENT: + for (cu = 1; cu <= 255; cu++) { + if (vim_isIDc(cu)) { + regmbc(cu); + } + } + break; + case CLASS_KEYWORD: + for (cu = 1; cu <= 255; cu++) { + if (reg_iswordc(cu)) { + regmbc(cu); + } + } + break; + case CLASS_FNAME: + for (cu = 1; cu <= 255; cu++) { + if (vim_isfilec(cu)) { + regmbc(cu); + } + } + break; } } else { // produce a multibyte character, including any @@ -2514,15 +2541,13 @@ static bool re_mult_next(char *what) return true; } -/* - * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for - * character "c". - */ -static int use_multibytecode(int c) +// Return true if MULTIBYTECODE should be used instead of EXACTLY for +// character "c". +static bool use_multibytecode(int c) { - return has_mbyte && (*mb_char2len)(c) > 1 + return utf_char2len(c) > 1 && (re_multi_type(peekchr()) != NOT_MULTI - || (enc_utf8 && utf_iscomposing(c))); + || utf_iscomposing(c)); } /* @@ -2667,39 +2692,38 @@ static char_u *re_put_uint32(char_u *p, uint32_t val) return p; } -/* - * Set the next-pointer at the end of a node chain. - */ +// Set the next-pointer at the end of a node chain. static void regtail(char_u *p, char_u *val) { - char_u *scan; - char_u *temp; int offset; - if (p == JUST_CALC_SIZE) + if (p == JUST_CALC_SIZE) { return; + } - /* Find last node. */ - scan = p; + // Find last node. + char_u *scan = p; for (;; ) { - temp = regnext(scan); - if (temp == NULL) + char_u *temp = regnext(scan); + if (temp == NULL) { break; + } scan = temp; } - if (OP(scan) == BACK) + if (OP(scan) == BACK) { offset = (int)(scan - val); - else + } else { offset = (int)(val - scan); - /* When the offset uses more than 16 bits it can no longer fit in the two - * bytes available. Use a global flag to avoid having to check return - * values in too many places. */ - if (offset > 0xffff) - reg_toolong = TRUE; - else { - *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377); - *(scan + 2) = (char_u) (offset & 0377); + } + // When the offset uses more than 16 bits it can no longer fit in the two + // bytes available. Use a global flag to avoid having to check return + // values in too many places. + if (offset > 0xffff) { + reg_toolong = true; + } else { + *(scan + 1) = (char_u)(((unsigned)offset >> 8) & 0377); + *(scan + 2) = (char_u)(offset & 0377); } } @@ -2728,8 +2752,8 @@ static void initchr(char_u *str) regparse = str; prevchr_len = 0; curchr = prevprevchr = prevchr = nextchr = -1; - at_start = TRUE; - prev_at_start = FALSE; + at_start = true; + prev_at_start = false; } /* @@ -2771,7 +2795,7 @@ static void restore_parse_state(parse_state_T *ps) */ static int peekchr(void) { - static int after_slash = FALSE; + static int after_slash = false; if (curchr != -1) { return curchr; @@ -2837,8 +2861,8 @@ static int peekchr(void) || (no_Magic(prevchr) == '(' && prevprevchr == Magic('%')))) { curchr = Magic('^'); - at_start = TRUE; - prev_at_start = FALSE; + at_start = true; + prev_at_start = false; } break; case '$': @@ -2889,12 +2913,12 @@ static int peekchr(void) */ curchr = -1; prev_at_start = at_start; - at_start = FALSE; /* be able to say "/\*ptr" */ - ++regparse; - ++after_slash; + at_start = false; // be able to say "/\*ptr" + regparse++; + after_slash++; peekchr(); - --regparse; - --after_slash; + regparse--; + after_slash--; curchr = toggle_Magic(curchr); } else if (vim_strchr(REGEXP_ABBR, c)) { /* @@ -2936,7 +2960,7 @@ static void skipchr(void) } regparse += prevchr_len; prev_at_start = at_start; - at_start = FALSE; + at_start = false; prevprevchr = prevchr; prevchr = curchr; curchr = nextchr; /* use previously unget char, or -1 */ @@ -2980,7 +3004,7 @@ static void ungetchr(void) curchr = prevchr; prevchr = prevprevchr; at_start = prev_at_start; - prev_at_start = FALSE; + prev_at_start = false; /* Backup regparse, so that it's at the same position as before the * getchr(). */ @@ -3101,14 +3125,14 @@ static int coll_get_char(void) */ static int read_limits(long *minval, long *maxval) { - int reverse = FALSE; + int reverse = false; char_u *first_char; long tmp; if (*regparse == '-') { // Starts with '-', so reverse the range later. regparse++; - reverse = TRUE; + reverse = true; } first_char = regparse; *minval = getdigits_long(®parse, false, 0); @@ -3153,17 +3177,6 @@ static int read_limits(long *minval, long *maxval) * Global work variables for vim_regexec(). */ -/* The current match-position is remembered with these variables: */ -static linenr_T reglnum; /* line number, relative to first line */ -static char_u *regline; /* start of current line */ -static char_u *reginput; /* current input, points into "regline" */ - -static int need_clear_subexpr; /* subexpressions still need to be - * cleared */ -static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions - * still need to be cleared */ - - /* Save the sub-expressions before attempting a match. */ #define save_se(savep, posp, pp) \ REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp)) @@ -3214,18 +3227,42 @@ typedef struct { linenr_T reg_maxline; bool reg_line_lbr; // "\n" in string is line break + // The current match-position is remembered with these variables: + linenr_T lnum; ///< line number, relative to first line + char_u *line; ///< start of current line + char_u *input; ///< current input, points into "regline" + + int need_clear_subexpr; ///< subexpressions still need to be cleared + int need_clear_zsubexpr; ///< extmatch subexpressions still need to be + ///< cleared + + // Internal copy of 'ignorecase'. It is set at each call to vim_regexec(). // Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern // contains '\c' or '\C' the value is overruled. bool reg_ic; - // Similar to rex.reg_ic, but only for 'combining' characters. Set with \Z + // Similar to "reg_ic", but only for 'combining' characters. Set with \Z // flag in the regexp. Defaults to false, always. bool reg_icombine; // Copy of "rmm_maxcol": maximum column to search for a match. Zero when // there is no maximum. colnr_T reg_maxcol; + + // State for the NFA engine regexec. + int nfa_has_zend; ///< NFA regexp \ze operator encountered. + int nfa_has_backref; ///< NFA regexp \1 .. \9 encountered. + int nfa_nsubexpr; ///< Number of sub expressions actually being used + ///< during execution. 1 if only the whole match + ///< (subexpr 0) is used. + // listid is global, so that it increases on recursive calls to + // nfa_regmatch(), which means we don't have to clear the lastlist field of + // all the states. + int nfa_listid; + int nfa_alt_listid; + + int nfa_has_zsubexpr; ///< NFA regexp has \z( ), set zsubexpr. } regexec_T; static regexec_T rex; @@ -3266,6 +3303,13 @@ void free_regexp_stuff(void) #endif +// Return true if character 'c' is included in 'iskeyword' option for +// "reg_buf" buffer. +static bool reg_iswordc(int c) +{ + return vim_iswordc_buf(c, rex.reg_buf); +} + /* * Get pointer to the line "lnum", which is relative to "reg_firstlnum". */ @@ -3290,7 +3334,7 @@ static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */ static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */ static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */ -// TRUE if using multi-line regexp. +// true if using multi-line regexp. #define REG_MULTI (rex.reg_match == NULL) /* @@ -3491,13 +3535,13 @@ static long bt_regexec_both(char_u *line, } } - regline = line; - reglnum = 0; - reg_toolong = FALSE; + rex.line = line; + rex.lnum = 0; + reg_toolong = false; /* Simplest case: Anchored match need be tried only once. */ if (prog->reganch) { - int c = utf_ptr2char(regline + col); + int c = utf_ptr2char(rex.line + col); if (prog->regstart == NUL || prog->regstart == c || (rex.reg_ic @@ -3514,12 +3558,12 @@ static long bt_regexec_both(char_u *line, while (!got_int) { if (prog->regstart != NUL) { // Skip until the char we know it must start with. - s = cstrchr(regline + col, prog->regstart); + s = cstrchr(rex.line + col, prog->regstart); if (s == NULL) { retval = 0; break; } - col = (int)(s - regline); + col = (int)(s - rex.line); } // Check for maximum column to try. @@ -3533,18 +3577,16 @@ static long bt_regexec_both(char_u *line, break; } - /* if not currently on the first line, get it again */ - if (reglnum != 0) { - reglnum = 0; - regline = reg_getline((linenr_T)0); + // if not currently on the first line, get it again + if (rex.lnum != 0) { + rex.lnum = 0; + rex.line = reg_getline((linenr_T)0); } - if (regline[col] == NUL) + if (rex.line[col] == NUL) { break; - if (has_mbyte) - col += (*mb_ptr2len)(regline + col); - else - ++col; - /* Check for timeout once in a twenty times to avoid overhead. */ + } + col += (*mb_ptr2len)(rex.line + col); + // Check for timeout once in a twenty times to avoid overhead. if (tm != NULL && ++tm_count == 20) { tm_count = 0; if (profile_passed_limit(*tm)) { @@ -3608,18 +3650,17 @@ void unref_extmatch(reg_extmatch_T *em) } } -/// Try match of "prog" with at regline["col"]. +/// Try match of "prog" with at rex.line["col"]. /// @returns 0 for failure, or number of lines contained in the match. static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, // timeout limit or NULL int *timed_out) // flag set on timeout or NULL { - reginput = regline + col; - need_clear_subexpr = TRUE; - /* Clear the external match subpointers if necessary. */ - if (prog->reghasz == REX_SET) - need_clear_zsubexpr = TRUE; + rex.input = rex.line + col; + rex.need_clear_subexpr = true; + // Clear the external match subpointers if necessaey. + rex.need_clear_zsubexpr = (prog->reghasz == REX_SET); if (regmatch(prog->program + 1, tm, timed_out) == 0) { return 0; @@ -3632,18 +3673,18 @@ static long regtry(bt_regprog_T *prog, rex.reg_startpos[0].col = col; } if (rex.reg_endpos[0].lnum < 0) { - rex.reg_endpos[0].lnum = reglnum; - rex.reg_endpos[0].col = (int)(reginput - regline); + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = (int)(rex.input - rex.line); } else { // Use line number of "\ze". - reglnum = rex.reg_endpos[0].lnum; + rex.lnum = rex.reg_endpos[0].lnum; } } else { if (rex.reg_startp[0] == NULL) { - rex.reg_startp[0] = regline + col; + rex.reg_startp[0] = rex.line + col; } if (rex.reg_endp[0] == NULL) { - rex.reg_endp[0] = reginput; + rex.reg_endp[0] = rex.input; } } /* Package any found \z(...\) matches for export. Default is none. */ @@ -3675,23 +3716,24 @@ static long regtry(bt_regprog_T *prog, } } } - return 1 + reglnum; + return 1 + rex.lnum; } // Get class of previous character. static int reg_prev_class(void) { - if (reginput > regline) { - return mb_get_class_tab(reginput - 1 - utf_head_off(regline, reginput - 1), - rex.reg_buf->b_chartab); + if (rex.input > rex.line) { + return mb_get_class_tab( + rex.input - 1 - utf_head_off(rex.line, rex.input - 1), + rex.reg_buf->b_chartab); } return -1; } -// Return TRUE if the current reginput position matches the Visual area. -static int reg_match_visual(void) +// Return true if the current rex.input position matches the Visual area. +static bool reg_match_visual(void) { pos_T top, bot; linenr_T lnum; @@ -3725,16 +3767,17 @@ static int reg_match_visual(void) } mode = curbuf->b_visual.vi_mode; } - lnum = reglnum + rex.reg_firstlnum; + lnum = rex.lnum + rex.reg_firstlnum; if (lnum < top.lnum || lnum > bot.lnum) { return false; } if (mode == 'v') { - col = (colnr_T)(reginput - regline); + col = (colnr_T)(rex.input - rex.line); if ((lnum == top.lnum && col < top.col) - || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e'))) - return FALSE; + || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e'))) { + return false; + } } else if (mode == Ctrl_V) { getvvcol(wp, &top, &start, NULL, &end); getvvcol(wp, &bot, &start2, NULL, &end2); @@ -3744,17 +3787,18 @@ static int reg_match_visual(void) end = end2; if (top.col == MAXCOL || bot.col == MAXCOL) end = MAXCOL; - unsigned int cols_u = win_linetabsize(wp, regline, - (colnr_T)(reginput - regline)); + unsigned int cols_u = win_linetabsize(wp, rex.line, + (colnr_T)(rex.input - rex.line)); assert(cols_u <= MAXCOL); colnr_T cols = (colnr_T)cols_u; - if (cols < start || cols > end - (*p_sel == 'e')) - return FALSE; + if (cols < start || cols > end - (*p_sel == 'e')) { + return false; + } } - return TRUE; + return true; } -#define ADVANCE_REGINPUT() MB_PTR_ADV(reginput) +#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) /* * The arguments from BRACE_LIMITS are stored here. They are actually local @@ -3773,11 +3817,11 @@ static long bl_maxval; /// (that don't need to know whether the rest of the match failed) by a nested /// loop. /// -/// Returns TRUE when there is a match. Leaves reginput and reglnum just after -/// the last matched character. -/// Returns FALSE when there is no match. Leaves reginput and reglnum in an +/// Returns true when there is a match. Leaves rex.input and rex.lnum +/// just after the last matched character. +/// Returns false when there is no match. Leaves rex.input and rex.lnum in an /// undefined state! -static int regmatch( +static bool regmatch( char_u *scan, // Current node. proftime_T *tm, // timeout limit or NULL int *timed_out // flag set on timeout or NULL @@ -3860,38 +3904,40 @@ static int regmatch( op = OP(scan); // Check for character class with NL added. if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI - && *reginput == NUL && reglnum <= rex.reg_maxline) { + && *rex.input == NUL && rex.lnum <= rex.reg_maxline) { reg_nextline(); - } else if (rex.reg_line_lbr && WITH_NL(op) && *reginput == '\n') { + } else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n') { ADVANCE_REGINPUT(); } else { if (WITH_NL(op)) { op -= ADD_NL; } - c = utf_ptr2char(reginput); + c = utf_ptr2char(rex.input); switch (op) { case BOL: - if (reginput != regline) + if (rex.input != rex.line) { status = RA_NOMATCH; + } break; case EOL: - if (c != NUL) + if (c != NUL) { status = RA_NOMATCH; + } break; case RE_BOF: // We're not at the beginning of the file when below the first // line where we started, not at the start of the line or we // didn't start at the first line of the buffer. - if (reglnum != 0 || reginput != regline + if (rex.lnum != 0 || rex.input != rex.line || (REG_MULTI && rex.reg_firstlnum > 1)) { status = RA_NOMATCH; } break; case RE_EOF: - if (reglnum != rex.reg_maxline || c != NUL) { + if (rex.lnum != rex.reg_maxline || c != NUL) { status = RA_NOMATCH; } break; @@ -3900,8 +3946,9 @@ static int regmatch( // Check if the buffer is in a window and compare the // rex.reg_win->w_cursor position to the match position. if (rex.reg_win == NULL - || (reglnum + rex.reg_firstlnum != rex.reg_win->w_cursor.lnum) - || ((colnr_T)(reginput - regline) != rex.reg_win->w_cursor.col)) { + || (rex.lnum + rex.reg_firstlnum != rex.reg_win->w_cursor.lnum) + || ((colnr_T)(rex.input - rex.line) != + rex.reg_win->w_cursor.col)) { status = RA_NOMATCH; } break; @@ -3916,13 +3963,13 @@ static int regmatch( pos = getmark_buf(rex.reg_buf, mark, false); if (pos == NULL // mark doesn't exist || pos->lnum <= 0 // mark isn't set in reg_buf - || (pos->lnum == reglnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(reginput - regline) + || (pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos->col == (colnr_T)(rex.input - rex.line) ? (cmp == '<' || cmp == '>') - : (pos->col < (colnr_T)(reginput - regline) + : (pos->col < (colnr_T)(rex.input - rex.line) ? cmp != '>' : cmp != '<')) - : (pos->lnum < reglnum + rex.reg_firstlnum + : (pos->lnum < rex.lnum + rex.reg_firstlnum ? cmp != '>' : cmp != '<'))) { status = RA_NOMATCH; @@ -3936,79 +3983,70 @@ static int regmatch( break; case RE_LNUM: - assert(reglnum + rex.reg_firstlnum >= 0 - && (uintmax_t)(reglnum + rex.reg_firstlnum) <= UINT32_MAX); + assert(rex.lnum + rex.reg_firstlnum >= 0 + && (uintmax_t)(rex.lnum + rex.reg_firstlnum) <= UINT32_MAX); if (!REG_MULTI - || !re_num_cmp((uint32_t)(reglnum + rex.reg_firstlnum), scan)) { + || !re_num_cmp((uint32_t)(rex.lnum + rex.reg_firstlnum), scan)) { status = RA_NOMATCH; } break; case RE_COL: - assert(reginput - regline + 1 >= 0 - && (uintmax_t)(reginput - regline + 1) <= UINT32_MAX); - if (!re_num_cmp((uint32_t)(reginput - regline + 1), scan)) + assert(rex.input - rex.line + 1 >= 0 + && (uintmax_t)(rex.input - rex.line + 1) <= UINT32_MAX); + if (!re_num_cmp((uint32_t)(rex.input - rex.line + 1), scan)) { status = RA_NOMATCH; + } break; case RE_VCOL: if (!re_num_cmp(win_linetabsize(rex.reg_win == NULL ? curwin : rex.reg_win, - regline, - (colnr_T)(reginput - regline)) + 1, + rex.line, + (colnr_T)(rex.input - rex.line)) + 1, scan)) { status = RA_NOMATCH; } break; - case BOW: /* \<word; reginput points to w */ - if (c == NUL) /* Can't match at end of line */ + case BOW: // \<word; rex.input points to w + if (c == NUL) { // Can't match at end of line status = RA_NOMATCH; - else if (has_mbyte) { - int this_class; - + } else { // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); + const int this_class = + mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); if (this_class <= 1) { status = RA_NOMATCH; // Not on a word at all. } else if (reg_prev_class() == this_class) { status = RA_NOMATCH; // Previous char is in same word. } - } else { - if (!vim_iswordc_buf(c, rex.reg_buf) - || (reginput > regline - && vim_iswordc_buf(reginput[-1], rex.reg_buf))) { - status = RA_NOMATCH; - } } break; - case EOW: /* word\>; reginput points after d */ - if (reginput == regline) /* Can't match at start of line */ + case EOW: // word\>; rex.input points after d + if (rex.input == rex.line) { // Can't match at start of line status = RA_NOMATCH; - else if (has_mbyte) { + } else { int this_class, prev_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); + this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); prev_class = reg_prev_class(); if (this_class == prev_class - || prev_class == 0 || prev_class == 1) - status = RA_NOMATCH; - } else { - if (!vim_iswordc_buf(reginput[-1], rex.reg_buf) - || (reginput[0] != NUL && vim_iswordc_buf(c, rex.reg_buf))) { + || prev_class == 0 || prev_class == 1) { status = RA_NOMATCH; } } - break; /* Matched with EOW */ + break; // Matched with EOW case ANY: - /* ANY does not match new lines. */ - if (c == NUL) + // ANY does not match new lines. + if (c == NUL) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case IDENT: @@ -4019,14 +4057,15 @@ static int regmatch( break; case SIDENT: - if (ascii_isdigit(*reginput) || !vim_isIDc(c)) + if (ascii_isdigit(*rex.input) || !vim_isIDc(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case KWORD: - if (!vim_iswordp_buf(reginput, rex.reg_buf)) { + if (!vim_iswordp_buf(rex.input, rex.reg_buf)) { status = RA_NOMATCH; } else { ADVANCE_REGINPUT(); @@ -4034,8 +4073,8 @@ static int regmatch( break; case SKWORD: - if (ascii_isdigit(*reginput) - || !vim_iswordp_buf(reginput, rex.reg_buf)) { + if (ascii_isdigit(*rex.input) + || !vim_iswordp_buf(rex.input, rex.reg_buf)) { status = RA_NOMATCH; } else { ADVANCE_REGINPUT(); @@ -4043,31 +4082,35 @@ static int regmatch( break; case FNAME: - if (!vim_isfilec(c)) + if (!vim_isfilec(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case SFNAME: - if (ascii_isdigit(*reginput) || !vim_isfilec(c)) + if (ascii_isdigit(*rex.input) || !vim_isfilec(c)) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case PRINT: - if (!vim_isprintc(PTR2CHAR(reginput))) + if (!vim_isprintc(PTR2CHAR(rex.input))) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case SPRINT: - if (ascii_isdigit(*reginput) || !vim_isprintc(PTR2CHAR(reginput))) + if (ascii_isdigit(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input))) { status = RA_NOMATCH; - else + } else { ADVANCE_REGINPUT(); + } break; case WHITE: @@ -4203,10 +4246,10 @@ static int regmatch( opnd = OPERAND(scan); // Inline the first byte, for speed. - if (*opnd != *reginput + if (*opnd != *rex.input && (!rex.reg_ic || (!enc_utf8 - && mb_tolower(*opnd) != mb_tolower(*reginput)))) { + && mb_tolower(*opnd) != mb_tolower(*rex.input)))) { status = RA_NOMATCH; } else if (*opnd == NUL) { // match empty string always works; happens when "~" is @@ -4217,14 +4260,14 @@ static int regmatch( } else { // Need to match first byte again for multi-byte. len = (int)STRLEN(opnd); - if (cstrncmp(opnd, reginput, &len) != 0) { + if (cstrncmp(opnd, rex.input, &len) != 0) { status = RA_NOMATCH; } } // Check for following composing character, unless %C // follows (skips over all composing chars). if (status != RA_NOMATCH && enc_utf8 - && UTF_COMPOSINGLIKE(reginput, reginput + len) + && UTF_COMPOSINGLIKE(rex.input, rex.input + len) && !rex.reg_icombine && OP(next) != RE_COMPOSING) { // raaron: This code makes a composing character get @@ -4233,7 +4276,7 @@ static int regmatch( status = RA_NOMATCH; } if (status != RA_NOMATCH) { - reginput += len; + rex.input += len; } } } @@ -4250,54 +4293,52 @@ static int regmatch( break; case MULTIBYTECODE: - if (has_mbyte) { + { int i, len; - char_u *opnd; - int opndc = 0, inpc; - opnd = OPERAND(scan); + const char_u *opnd = OPERAND(scan); // Safety check (just in case 'encoding' was changed since // compiling the program). if ((len = (*mb_ptr2len)(opnd)) < 2) { status = RA_NOMATCH; break; } - if (enc_utf8) { - opndc = utf_ptr2char(opnd); - } - if (enc_utf8 && utf_iscomposing(opndc)) { - /* When only a composing char is given match at any - * position where that composing char appears. */ + const int opndc = utf_ptr2char(opnd); + if (utf_iscomposing(opndc)) { + // When only a composing char is given match at any + // position where that composing char appears. status = RA_NOMATCH; - for (i = 0; reginput[i] != NUL; i += utf_ptr2len(reginput + i)) { - inpc = utf_ptr2char(reginput + i); + for (i = 0; rex.input[i] != NUL; + i += utf_ptr2len(rex.input + i)) { + const int inpc = utf_ptr2char(rex.input + i); if (!utf_iscomposing(inpc)) { if (i > 0) { break; } } else if (opndc == inpc) { // Include all following composing chars. - len = i + utfc_ptr2len(reginput + i); + len = i + utfc_ptr2len(rex.input + i); status = RA_MATCH; break; } } - } else - for (i = 0; i < len; ++i) - if (opnd[i] != reginput[i]) { + } else { + for (i = 0; i < len; i++) { + if (opnd[i] != rex.input[i]) { status = RA_NOMATCH; break; } - reginput += len; - } else - status = RA_NOMATCH; + } + } + rex.input += len; + } break; case RE_COMPOSING: if (enc_utf8) { // Skip composing characters. - while (utf_iscomposing(utf_ptr2char(reginput))) { - MB_CPTR_ADV(reginput); + while (utf_iscomposing(utf_ptr2char(rex.input))) { + MB_CPTR_ADV(rex.input); } } break; @@ -4460,7 +4501,7 @@ static int regmatch( } else { // Compare current input with back-ref in the same line. len = (int)(rex.reg_endp[no] - rex.reg_startp[no]); - if (cstrncmp(rex.reg_startp[no], reginput, &len) != 0) { + if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0) { status = RA_NOMATCH; } } @@ -4469,12 +4510,12 @@ static int regmatch( // Backref was not set: Match an empty string. len = 0; } else { - if (rex.reg_startpos[no].lnum == reglnum - && rex.reg_endpos[no].lnum == reglnum) { + if (rex.reg_startpos[no].lnum == rex.lnum + && rex.reg_endpos[no].lnum == rex.lnum) { // Compare back-ref within the current line. len = rex.reg_endpos[no].col - rex.reg_startpos[no].col; - if (cstrncmp(regline + rex.reg_startpos[no].col, - reginput, &len) != 0) { + if (cstrncmp(rex.line + rex.reg_startpos[no].col, + rex.input, &len) != 0) { status = RA_NOMATCH; } } else { @@ -4491,8 +4532,8 @@ static int regmatch( } } - /* Matched the backref, skip over it. */ - reginput += len; + // Matched the backref, skip over it. + rex.input += len; } break; @@ -4506,20 +4547,18 @@ static int regmatch( case ZREF + 8: case ZREF + 9: { - int len; - cleanup_zsubexpr(); no = op - ZREF; if (re_extmatch_in != NULL && re_extmatch_in->matches[no] != NULL) { - len = (int)STRLEN(re_extmatch_in->matches[no]); - if (cstrncmp(re_extmatch_in->matches[no], - reginput, &len) != 0) + int len = (int)STRLEN(re_extmatch_in->matches[no]); + if (cstrncmp(re_extmatch_in->matches[no], rex.input, &len) != 0) { status = RA_NOMATCH; - else - reginput += len; + } else { + rex.input += len; + } } else { - /* Backref was not set: Match an empty string. */ + // Backref was not set: Match an empty string. } } break; @@ -4725,15 +4764,17 @@ static int regmatch( case BHPOS: if (REG_MULTI) { - if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline) - || behind_pos.rs_u.pos.lnum != reglnum) + if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line) + || behind_pos.rs_u.pos.lnum != rex.lnum) { status = RA_NOMATCH; - } else if (behind_pos.rs_u.ptr != reginput) + } + } else if (behind_pos.rs_u.ptr != rex.input) { status = RA_NOMATCH; + } break; case NEWL: - if ((c != NUL || !REG_MULTI || reglnum > rex.reg_maxline + if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) && (c != '\n' || !rex.reg_line_lbr)) { status = RA_NOMATCH; } else if (rex.reg_line_lbr) { @@ -4946,7 +4987,7 @@ static int regmatch( if (limit > 0 && ((rp->rs_un.regsave.rs_u.pos.lnum < behind_pos.rs_u.pos.lnum - ? (colnr_T)STRLEN(regline) + ? (colnr_T)STRLEN(rex.line) : behind_pos.rs_u.pos.col) - rp->rs_un.regsave.rs_u.pos.col >= limit)) no = FAIL; @@ -4960,7 +5001,7 @@ static int regmatch( else { reg_restore(&rp->rs_un.regsave, &backpos); rp->rs_un.regsave.rs_u.pos.col = - (colnr_T)STRLEN(regline); + (colnr_T)STRLEN(rex.line); } } else { const char_u *const line = @@ -4972,10 +5013,10 @@ static int regmatch( + 1; } } else { - if (rp->rs_un.regsave.rs_u.ptr == regline) { + if (rp->rs_un.regsave.rs_u.ptr == rex.line) { no = FAIL; } else { - MB_PTR_BACK(regline, rp->rs_un.regsave.rs_u.ptr); + MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr); if (limit > 0 && (long)(behind_pos.rs_u.ptr - rp->rs_un.regsave.rs_u.ptr) > limit) { @@ -5039,18 +5080,18 @@ static int regmatch( * didn't match -- back up one char. */ if (--rst->count < rst->minval) break; - if (reginput == regline) { + if (rex.input == rex.line) { // backup to last char of previous line - reglnum--; - regline = reg_getline(reglnum); + rex.lnum--; + rex.line = reg_getline(rex.lnum); // Just in case regrepeat() didn't count right. - if (regline == NULL) { + if (rex.line == NULL) { break; } - reginput = regline + STRLEN(regline); + rex.input = rex.line + STRLEN(rex.line); fast_breakcheck(); } else { - MB_PTR_BACK(regline, reginput); + MB_PTR_BACK(rex.line, rex.input); } } else { /* Range is backwards, use shortest match first. @@ -5067,9 +5108,9 @@ static int regmatch( } else status = RA_NOMATCH; - /* If it could match, try it. */ - if (rst->nextb == NUL || *reginput == rst->nextb - || *reginput == rst->nextb_ic) { + // If it could match, try it. + if (rst->nextb == NUL || *rex.input == rst->nextb + || *rex.input == rst->nextb_ic) { reg_save(&rp->rs_un.regsave, &backpos); scan = regnext(rp->rs_scan); status = RA_CONT; @@ -5156,7 +5197,7 @@ static void regstack_pop(char_u **scan) /* * regrepeat - repeatedly match something simple, return how many. - * Advances reginput (and reglnum) to just after the matched chars. + * Advances rex.input (and rex.lnum) to just after the matched chars. */ static int regrepeat ( @@ -5165,12 +5206,11 @@ regrepeat ( ) { long count = 0; - char_u *scan; char_u *opnd; int mask; int testval = 0; - scan = reginput; /* Make local copy of reginput for speed. */ + char_u *scan = rex.input; // Make local copy of rex.input for speed. opnd = OPERAND(p); switch (OP(p)) { case ANY: @@ -5182,15 +5222,16 @@ regrepeat ( count++; MB_PTR_ADV(scan); } - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr || count == maxcount) { break; } count++; // count the line-break reg_nextline(); - scan = reginput; - if (got_int) + scan = rex.input; + if (got_int) { break; + } } break; @@ -5204,14 +5245,15 @@ regrepeat ( if (vim_isIDc(PTR2CHAR(scan)) && (testval || !ascii_isdigit(*scan))) { MB_PTR_ADV(scan); } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; - if (got_int) + scan = rex.input; + if (got_int) { break; + } } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { scan++; } else { @@ -5232,12 +5274,12 @@ regrepeat ( && (testval || !ascii_isdigit(*scan))) { MB_PTR_ADV(scan); } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) { break; } @@ -5260,12 +5302,12 @@ regrepeat ( if (vim_isfilec(PTR2CHAR(scan)) && (testval || !ascii_isdigit(*scan))) { MB_PTR_ADV(scan); } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) { break; } @@ -5286,12 +5328,12 @@ regrepeat ( case SPRINT + ADD_NL: while (count < maxcount) { if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) { break; } @@ -5314,14 +5356,15 @@ do_class: while (count < maxcount) { int l; if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; - if (got_int) + scan = rex.input; + if (got_int) { break; + } } else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1) { if (testval != 0) break; @@ -5467,12 +5510,12 @@ do_class: while (count < maxcount) { int len; if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) { break; } reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) { break; } @@ -5494,7 +5537,7 @@ do_class: case NEWL: while (count < maxcount - && ((*scan == NUL && reglnum <= rex.reg_maxline && !rex.reg_line_lbr + && ((*scan == NUL && rex.lnum <= rex.reg_maxline && !rex.reg_line_lbr && REG_MULTI) || (*scan == '\n' && rex.reg_line_lbr))) { count++; if (rex.reg_line_lbr) { @@ -5502,9 +5545,10 @@ do_class: } else { reg_nextline(); } - scan = reginput; - if (got_int) + scan = rex.input; + if (got_int) { break; + } } break; @@ -5516,7 +5560,7 @@ do_class: break; } - reginput = scan; + rex.input = scan; return (int)count; } @@ -5546,7 +5590,7 @@ static char_u *regnext(char_u *p) /* * Check the regexp program for its magic number. - * Return TRUE if it's wrong. + * Return true if it's wrong. */ static int prog_magic_wrong(void) { @@ -5560,9 +5604,9 @@ static int prog_magic_wrong(void) if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC) { EMSG(_(e_re_corr)); - return TRUE; + return true; } - return FALSE; + return false; } /* @@ -5572,7 +5616,7 @@ static int prog_magic_wrong(void) */ static void cleanup_subexpr(void) { - if (need_clear_subexpr) { + if (rex.need_clear_subexpr) { if (REG_MULTI) { // Use 0xff to set lnum to -1 memset(rex.reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP); @@ -5581,13 +5625,13 @@ static void cleanup_subexpr(void) memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP); memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP); } - need_clear_subexpr = FALSE; + rex.need_clear_subexpr = false; } } static void cleanup_zsubexpr(void) { - if (need_clear_zsubexpr) { + if (rex.need_clear_zsubexpr) { if (REG_MULTI) { /* Use 0xff to set lnum to -1 */ memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP); @@ -5596,23 +5640,20 @@ static void cleanup_zsubexpr(void) memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP); memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP); } - need_clear_zsubexpr = FALSE; + rex.need_clear_zsubexpr = false; } } -/* - * Save the current subexpr to "bp", so that they can be restored - * later by restore_subexpr(). - */ +// Save the current subexpr to "bp", so that they can be restored +// later by restore_subexpr(). static void save_subexpr(regbehind_T *bp) + FUNC_ATTR_NONNULL_ALL { - int i; - - // When "need_clear_subexpr" is set we don't need to save the values, only + // When "rex.need_clear_subexpr" is set we don't need to save the values, only // remember that this flag needs to be set again when restoring. - bp->save_need_clear_subexpr = need_clear_subexpr; - if (!need_clear_subexpr) { - for (i = 0; i < NSUBEXP; ++i) { + bp->save_need_clear_subexpr = rex.need_clear_subexpr; + if (!rex.need_clear_subexpr) { + for (int i = 0; i < NSUBEXP; i++) { if (REG_MULTI) { bp->save_start[i].se_u.pos = rex.reg_startpos[i]; bp->save_end[i].se_u.pos = rex.reg_endpos[i]; @@ -5624,17 +5665,14 @@ static void save_subexpr(regbehind_T *bp) } } -/* - * Restore the subexpr from "bp". - */ +// Restore the subexpr from "bp". static void restore_subexpr(regbehind_T *bp) + FUNC_ATTR_NONNULL_ALL { - int i; - - /* Only need to restore saved values when they are not to be cleared. */ - need_clear_subexpr = bp->save_need_clear_subexpr; - if (!need_clear_subexpr) { - for (i = 0; i < NSUBEXP; ++i) { + // Only need to restore saved values when they are not to be cleared. + rex.need_clear_subexpr = bp->save_need_clear_subexpr; + if (!rex.need_clear_subexpr) { + for (int i = 0; i < NSUBEXP; i++) { if (REG_MULTI) { rex.reg_startpos[i] = bp->save_start[i].se_u.pos; rex.reg_endpos[i] = bp->save_end[i].se_u.pos; @@ -5646,56 +5684,54 @@ static void restore_subexpr(regbehind_T *bp) } } -/* - * Advance reglnum, regline and reginput to the next line. - */ +// Advance rex.lnum, rex.line and rex.input to the next line. static void reg_nextline(void) { - regline = reg_getline(++reglnum); - reginput = regline; + rex.line = reg_getline(++rex.lnum); + rex.input = rex.line; fast_breakcheck(); } -/* - * Save the input line and position in a regsave_T. - */ +// Save the input line and position in a regsave_T. static void reg_save(regsave_T *save, garray_T *gap) + FUNC_ATTR_NONNULL_ALL { if (REG_MULTI) { - save->rs_u.pos.col = (colnr_T)(reginput - regline); - save->rs_u.pos.lnum = reglnum; - } else - save->rs_u.ptr = reginput; + save->rs_u.pos.col = (colnr_T)(rex.input - rex.line); + save->rs_u.pos.lnum = rex.lnum; + } else { + save->rs_u.ptr = rex.input; + } save->rs_len = gap->ga_len; } -/* - * Restore the input line and position from a regsave_T. - */ +// Restore the input line and position from a regsave_T. static void reg_restore(regsave_T *save, garray_T *gap) + FUNC_ATTR_NONNULL_ALL { if (REG_MULTI) { - if (reglnum != save->rs_u.pos.lnum) { - /* only call reg_getline() when the line number changed to save - * a bit of time */ - reglnum = save->rs_u.pos.lnum; - regline = reg_getline(reglnum); + if (rex.lnum != save->rs_u.pos.lnum) { + // only call reg_getline() when the line number changed to save + // a bit of time + rex.lnum = save->rs_u.pos.lnum; + rex.line = reg_getline(rex.lnum); } - reginput = regline + save->rs_u.pos.col; - } else - reginput = save->rs_u.ptr; + rex.input = rex.line + save->rs_u.pos.col; + } else { + rex.input = save->rs_u.ptr; + } gap->ga_len = save->rs_len; } -/* - * Return TRUE if current position is equal to saved position. - */ -static int reg_save_equal(regsave_T *save) +// Return true if current position is equal to saved position. +static bool reg_save_equal(const regsave_T *save) + FUNC_ATTR_NONNULL_ALL { - if (REG_MULTI) - return reglnum == save->rs_u.pos.lnum - && reginput == regline + save->rs_u.pos.col; - return reginput == save->rs_u.ptr; + if (REG_MULTI) { + return rex.lnum == save->rs_u.pos.lnum + && rex.input == rex.line + save->rs_u.pos.col; + } + return rex.input == save->rs_u.ptr; } /* @@ -5708,14 +5744,14 @@ static int reg_save_equal(regsave_T *save) static void save_se_multi(save_se_T *savep, lpos_T *posp) { savep->se_u.pos = *posp; - posp->lnum = reglnum; - posp->col = (colnr_T)(reginput - regline); + posp->lnum = rex.lnum; + posp->col = (colnr_T)(rex.input - rex.line); } static void save_se_one(save_se_T *savep, char_u **pp) { savep->se_u.ptr = *pp; - *pp = reginput; + *pp = rex.input; } /* @@ -5750,17 +5786,17 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e for (;; ) { /* Since getting one line may invalidate the other, need to make copy. * Slow! */ - if (regline != reg_tofree) { - len = (int)STRLEN(regline); + if (rex.line != reg_tofree) { + len = (int)STRLEN(rex.line); if (reg_tofree == NULL || len >= (int)reg_tofreelen) { len += 50; /* get some extra */ xfree(reg_tofree); reg_tofree = xmalloc(len); reg_tofreelen = len; } - STRCPY(reg_tofree, regline); - reginput = reg_tofree + (reginput - regline); - regline = reg_tofree; + STRCPY(reg_tofree, rex.line); + rex.input = reg_tofree + (rex.input - rex.line); + rex.line = reg_tofree; } /* Get the line to compare with. */ @@ -5772,14 +5808,16 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e else len = (int)STRLEN(p + ccol); - if (cstrncmp(p + ccol, reginput, &len) != 0) - return RA_NOMATCH; /* doesn't match */ - if (bytelen != NULL) + if (cstrncmp(p + ccol, rex.input, &len) != 0) { + return RA_NOMATCH; // doesn't match + } + if (bytelen != NULL) { *bytelen += len; + } if (clnum == end_lnum) { break; // match and at end! } - if (reglnum >= rex.reg_maxline) { + if (rex.lnum >= rex.reg_maxline) { return RA_NOMATCH; // text too short } @@ -5793,8 +5831,8 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e return RA_FAIL; } - /* found a match! Note that regline may now point to a copy of the line, - * that should not matter. */ + // found a match! Note that rex.line may now point to a copy of the line, + // that should not matter. return RA_MATCH; } @@ -6477,7 +6515,7 @@ char_u *regtilde(char_u *source, int magic) return newsub; } -static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */ +static bool can_f_submatch = false; // true when submatch() can be used // These pointers are used for reg_submatch(). Needed for when the // substitution string is an expression that contains a call to substitute() @@ -6534,11 +6572,11 @@ static void clear_submatch_list(staticList10_T *sl) /// vim_regsub() - perform substitutions after a vim_regexec() or /// vim_regexec_multi() match. /// -/// If "copy" is TRUE really copy into "dest". -/// If "copy" is FALSE nothing is copied, this is just to find out the length +/// If "copy" is true really copy into "dest". +/// If "copy" is false nothing is copied, this is just to find out the length /// of the result. /// -/// If "backslash" is TRUE, a backslash will be removed later, need to double +/// If "backslash" is true, a backslash will be removed later, need to double /// them to keep them, and insert a backslash before a CR to avoid it being /// replaced with a line break later. /// @@ -6630,8 +6668,8 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, if (expr != NULL || (source[0] == '\\' && source[1] == '=')) { // To make sure that the length doesn't change between checking the // length and copying the string, and to speed up things, the - // resulting string is saved from the call with "copy" == FALSE to the - // call with "copy" == TRUE. + // resulting string is saved from the call with "copy" == false to the + // call with "copy" == true. if (copy) { if (eval_result != NULL) { STRCPY(dest, eval_result); @@ -6639,7 +6677,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, XFREE_CLEAR(eval_result); } } else { - int prev_can_f_submatch = can_f_submatch; + const bool prev_can_f_submatch = can_f_submatch; regsubmatch_T rsm_save; xfree(eval_result); @@ -6700,7 +6738,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } if (eval_result != NULL) { - int had_backslash = FALSE; + int had_backslash = false; for (s = eval_result; *s != NUL; MB_PTR_ADV(s)) { // Change NL to CR, so that it becomes a line break, @@ -6778,22 +6816,24 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } if (c == '\\' && *src != NUL) { - /* Check for abbreviations -- webb */ + // Check for abbreviations -- webb switch (*src) { case 'r': c = CAR; ++src; break; case 'n': c = NL; ++src; break; case 't': c = TAB; ++src; break; - /* Oh no! \e already has meaning in subst pat :-( */ - /* case 'e': c = ESC; ++src; break; */ + // Oh no! \e already has meaning in subst pat :-( + // case 'e': c = ESC; ++src; break; case 'b': c = Ctrl_H; ++src; break; - /* If "backslash" is TRUE the backslash will be removed - * later. Used to insert a literal CR. */ - default: if (backslash) { - if (copy) + // If "backslash" is true the backslash will be removed + // later. Used to insert a literal CR. + default: + if (backslash) { + if (copy) { *dst = '\\'; - ++dst; - } + } + dst++; + } c = *src++; } } else { @@ -7163,8 +7203,10 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) regexp_engine = AUTOMATIC_ENGINE; } } +#ifdef REGEXP_DEBUG bt_regengine.expr = expr; nfa_regengine.expr = expr; +#endif // reg_iswordc() uses rex.reg_buf rex.reg_buf = curbuf; @@ -7245,24 +7287,33 @@ static void report_re_switch(char_u *pat) /// @param col the column to start looking for match /// @param nl /// -/// @return TRUE if there is a match, FALSE if not. -static int vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, - bool nl) +/// @return true if there is a match, false if not. +static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, + bool nl) { regexec_T rex_save; bool rex_in_use_save = rex_in_use; + // Cannot use the same prog recursively, it contains state. + if (rmp->regprog->re_in_use) { + EMSG(_(e_recursive)); + return false; + } + rmp->regprog->re_in_use = true; + if (rex_in_use) { // Being called recursively, save the state. rex_save = rex; } rex_in_use = true; + rex.reg_startp = NULL; rex.reg_endp = NULL; rex.reg_startpos = NULL; rex.reg_endpos = NULL; int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); + rmp->regprog->re_in_use = false; // NFA engine aborted because it's very slow, use backtracking engine instead. if (rmp->regprog->re_engine == AUTOMATIC_ENGINE @@ -7276,7 +7327,9 @@ static int vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, report_re_switch(pat); rmp->regprog = vim_regcomp(pat, re_flags); if (rmp->regprog != NULL) { + rmp->regprog->re_in_use = true; result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); + rmp->regprog->re_in_use = false; } xfree(pat); @@ -7292,27 +7345,27 @@ static int vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, } // Note: "*prog" may be freed and changed. -// Return TRUE if there is a match, FALSE if not. -int vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, +// Return true if there is a match, false if not. +bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T col) { regmatch_T regmatch = { .regprog = *prog, .rm_ic = ignore_case }; - int r = vim_regexec_string(®match, line, col, false); + bool r = vim_regexec_string(®match, line, col, false); *prog = regmatch.regprog; return r; } // Note: "rmp->regprog" may be freed and changed. -// Return TRUE if there is a match, FALSE if not. -int vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col) +// Return true if there is a match, false if not. +bool vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col) { return vim_regexec_string(rmp, line, col, false); } // Like vim_regexec(), but consider a "\n" in "line" to be a line break. // Note: "rmp->regprog" may be freed and changed. -// Return TRUE if there is a match, FALSE if not. -int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) +// Return true if there is a match, false if not. +bool vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) { return vim_regexec_string(rmp, line, col, true); } @@ -7337,6 +7390,13 @@ long vim_regexec_multi( regexec_T rex_save; bool rex_in_use_save = rex_in_use; + // Cannot use the same prog recursively, it contains state. + if (rmp->regprog->re_in_use) { + EMSG(_(e_recursive)); + return false; + } + rmp->regprog->re_in_use = true; + if (rex_in_use) { // Being called recursively, save the state. rex_save = rex; @@ -7345,6 +7405,7 @@ long vim_regexec_multi( int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm, timed_out); + rmp->regprog->re_in_use = false; // NFA engine aborted because it's very slow, use backtracking engine instead. if (rmp->regprog->re_engine == AUTOMATIC_ENGINE @@ -7363,8 +7424,10 @@ long vim_regexec_multi( reg_do_extmatch = 0; if (rmp->regprog != NULL) { + rmp->regprog->re_in_use = true; result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm, timed_out); + rmp->regprog->re_in_use = false; } xfree(pat); diff --git a/src/nvim/regexp_defs.h b/src/nvim/regexp_defs.h index 116bfee91e..a729a91555 100644 --- a/src/nvim/regexp_defs.h +++ b/src/nvim/regexp_defs.h @@ -72,6 +72,7 @@ struct regprog { unsigned regflags; unsigned re_engine; ///< Automatic, backtracking or NFA engine. unsigned re_flags; ///< Second argument for vim_regcomp(). + bool re_in_use; ///< prog is being executed }; /* @@ -84,7 +85,8 @@ typedef struct { regengine_T *engine; unsigned regflags; unsigned re_engine; - unsigned re_flags; ///< Second argument for vim_regcomp(). + unsigned re_flags; + bool re_in_use; int regstart; char_u reganch; @@ -114,7 +116,8 @@ typedef struct { regengine_T *engine; unsigned regflags; unsigned re_engine; - unsigned re_flags; ///< Second argument for vim_regcomp(). + unsigned re_flags; + bool re_in_use; nfa_state_T *start; // points into state[] diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 387732fdee..506c4e87db 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -230,7 +230,10 @@ enum { NFA_CLASS_TAB, NFA_CLASS_RETURN, NFA_CLASS_BACKSPACE, - NFA_CLASS_ESCAPE + NFA_CLASS_ESCAPE, + NFA_CLASS_IDENT, + NFA_CLASS_KEYWORD, + NFA_CLASS_FNAME, }; /* Keep in sync with classchars. */ @@ -267,9 +270,9 @@ struct Frag { typedef struct Frag Frag_T; typedef struct { - int in_use; /* number of subexpr with useful info */ + int in_use; ///< number of subexpr with useful info - /* When REG_MULTI is TRUE list.multi is used, otherwise list.line. */ + // When REG_MULTI is true list.multi is used, otherwise list.line. union { struct multipos { linenr_T start_lnum; @@ -310,48 +313,27 @@ typedef struct { regsubs_T subs; /* submatch info, only party used */ } nfa_thread_T; -/* nfa_list_T contains the alternative NFA execution states. */ +// nfa_list_T contains the alternative NFA execution states. typedef struct { - nfa_thread_T *t; /* allocated array of states */ - int n; /* nr of states currently in "t" */ - int len; /* max nr of states in "t" */ - int id; /* ID of the list */ - int has_pim; /* TRUE when any state has a PIM */ + nfa_thread_T *t; ///< allocated array of states + int n; ///< nr of states currently in "t" + int len; ///< max nr of states in "t" + int id; ///< ID of the list + int has_pim; ///< true when any state has a PIM } nfa_list_T; -/// re_flags passed to nfa_regcomp(). -static int nfa_re_flags; - -/* NFA regexp \ze operator encountered. */ -static int nfa_has_zend; - -/* NFA regexp \1 .. \9 encountered. */ -static int nfa_has_backref; - -/* NFA regexp has \z( ), set zsubexpr. */ -static int nfa_has_zsubexpr; - -/* Number of sub expressions actually being used during execution. 1 if only - * the whole match (subexpr 0) is used. */ -static int nfa_nsubexpr; - -static int *post_start; /* holds the postfix form of r.e. */ +// Variables only used in nfa_regcomp() and descendants. +static int nfa_re_flags; ///< re_flags passed to nfa_regcomp(). +static int *post_start; ///< holds the postfix form of r.e. static int *post_end; static int *post_ptr; -static int nstate; /* Number of states in the NFA. Also used when - * executing. */ -static int istate; /* Index in the state vector, used in alloc_state() */ +static int nstate; ///< Number of states in the NFA. Also used when executing. +static int istate; ///< Index in the state vector, used in alloc_state() /* If not NULL match must end at this position */ static save_se_T *nfa_endp = NULL; -/* listid is global, so that it increases on recursive calls to - * nfa_regmatch(), which means we don't have to clear the lastlist field of - * all the states. */ -static int nfa_listid; -static int nfa_alt_listid; - /* 0 for first call to nfa_regmatch(), 1 for recursive call. */ static int nfa_ll_index = 0; @@ -395,8 +377,8 @@ nfa_regcomp_start ( post_start = (int *)xmalloc(postfix_size); post_ptr = post_start; post_end = post_start + nstate_max; - nfa_has_zend = FALSE; - nfa_has_backref = FALSE; + rex.nfa_has_zend = false; + rex.nfa_has_backref = false; /* shared with BT engine */ regcomp_start(expr, re_flags); @@ -605,12 +587,10 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) # define CLASS_o9 0x02 # define CLASS_underscore 0x01 - int newl = FALSE; char_u *p; int config = 0; - if (extra_newl == TRUE) - newl = TRUE; + bool newl = extra_newl == true; if (*end != ']') return FAIL; @@ -655,13 +635,13 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) } p += 3; } else if (p + 1 < end && *p == '\\' && *(p + 1) == 'n') { - newl = TRUE; + newl = true; p += 2; } else if (*p == '_') { config |= CLASS_underscore; p++; } else if (*p == '\n') { - newl = TRUE; + newl = true; p++; } else return FAIL; @@ -670,8 +650,9 @@ static int nfa_recognize_char_class(char_u *start, char_u *end, int extra_newl) if (p != end) return FAIL; - if (newl == TRUE) + if (newl == true) { extra_newl = NFA_ADD_NL; + } switch (config) { case CLASS_o9: @@ -1188,7 +1169,7 @@ static int nfa_regatom(void) case Magic('$'): EMIT(NFA_EOL); - had_eol = TRUE; + had_eol = true; break; case Magic('<'): @@ -1210,7 +1191,7 @@ static int nfa_regatom(void) } if (c == '$') { /* "\_$" is end-of-line */ EMIT(NFA_EOL); - had_eol = TRUE; + had_eol = true; break; } @@ -1257,7 +1238,7 @@ static int nfa_regatom(void) if (p == NULL) { if (extra == NFA_ADD_NL) { EMSGN(_(e_ill_char_class), c); - rc_did_emsg = TRUE; + rc_did_emsg = true; return FAIL; } IEMSGN("INTERNAL: Unknown character class char: %" PRId64, c); @@ -1346,7 +1327,7 @@ static int nfa_regatom(void) return FAIL; } EMIT(NFA_BACKREF1 + refnum); - nfa_has_backref = true; + rex.nfa_has_backref = true; } break; @@ -1361,7 +1342,7 @@ static int nfa_regatom(void) break; case 'e': EMIT(NFA_ZEND); - nfa_has_zend = true; + rex.nfa_has_zend = true; if (!re_mult_next("\\zs")) { return false; } @@ -1380,8 +1361,8 @@ static int nfa_regatom(void) EMSG_RET_FAIL(_(e_z1_not_allowed)); } EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); - /* No need to set nfa_has_backref, the sub-matches don't - * change when \z1 .. \z9 matches or not. */ + // No need to set rex.nfa_has_backref, the sub-matches don't + // change when \z1 .. \z9 matches or not. re_has_z = REX_USE; break; case '(': @@ -1598,12 +1579,12 @@ collection: EMIT(NFA_CONCAT); MB_PTR_ADV(regparse); } - /* Emit the OR branches for each character in the [] */ - emit_range = FALSE; + // Emit the OR branches for each character in the [] + emit_range = false; while (regparse < endp) { oldstartc = startc; startc = -1; - got_coll_char = FALSE; + got_coll_char = false; if (*regparse == '[') { /* Check for [: :], [= =], [. .] */ equiclass = collclass = 0; @@ -1665,6 +1646,15 @@ collection: case CLASS_ESCAPE: EMIT(NFA_CLASS_ESCAPE); break; + case CLASS_IDENT: + EMIT(NFA_CLASS_IDENT); + break; + case CLASS_KEYWORD: + EMIT(NFA_CLASS_KEYWORD); + break; + case CLASS_FNAME: + EMIT(NFA_CLASS_FNAME); + break; } EMIT(NFA_CONCAT); continue; @@ -1684,7 +1674,7 @@ collection: /* Try a range like 'a-x' or '\t-z'. Also allows '-' as a * start character. */ if (*regparse == '-' && oldstartc != -1) { - emit_range = TRUE; + emit_range = true; startc = oldstartc; MB_PTR_ADV(regparse); continue; // reading the end of the range @@ -1764,7 +1754,7 @@ collection: EMIT(NFA_CONCAT); } } - emit_range = FALSE; + emit_range = false; startc = -1; } else { /* This char (startc) is not part of a range. Just @@ -1781,10 +1771,11 @@ collection: if (!negated) extra = NFA_ADD_NL; } else { - if (got_coll_char == TRUE && startc == 0) + if (got_coll_char == true && startc == 0) { EMIT(0x0a); - else + } else { EMIT(startc); + } EMIT(NFA_CONCAT); } } @@ -1802,13 +1793,14 @@ collection: regparse = endp; MB_PTR_ADV(regparse); - /* Mark end of the collection. */ - if (negated == TRUE) + // Mark end of the collection. + if (negated == true) { EMIT(NFA_END_NEG_COLL); - else + } else { EMIT(NFA_END_COLL); + } - /* \_[] also matches \n but it's not negated */ + // \_[] also matches \n but it's not negated if (extra == NFA_ADD_NL) { EMIT(reg_string ? NL : NFA_NEWL); EMIT(NFA_OR); @@ -1877,7 +1869,7 @@ static int nfa_regpiece(void) int op; int ret; long minval, maxval; - int greedy = TRUE; /* Braces are prefixed with '-' ? */ + bool greedy = true; // Braces are prefixed with '-' ? parse_state_T old_state; parse_state_T new_state; int64_t c2; @@ -1977,11 +1969,11 @@ static int nfa_regpiece(void) * parenthesis have the same id */ - greedy = TRUE; + greedy = true; c2 = peekchr(); if (c2 == '-' || c2 == Magic('-')) { skipchr(); - greedy = FALSE; + greedy = false; } if (!read_limits(&minval, &maxval)) EMSG_RET_FAIL(_("E870: (NFA regexp) Error reading repetition limits")); @@ -2019,7 +2011,7 @@ static int nfa_regpiece(void) /* Save parse state after the repeated atom and the \{} */ save_parse_state(&new_state); - quest = (greedy == TRUE ? NFA_QUEST : NFA_QUEST_NONGREEDY); + quest = (greedy == true ? NFA_QUEST : NFA_QUEST_NONGREEDY); for (i = 0; i < maxval; i++) { /* Goto beginning of the repeated atom */ restore_parse_state(&old_state); @@ -2073,8 +2065,8 @@ static int nfa_regpiece(void) */ static int nfa_regconcat(void) { - int cont = TRUE; - int first = TRUE; + bool cont = true; + bool first = true; while (cont) { switch (peekchr()) { @@ -2082,7 +2074,7 @@ static int nfa_regconcat(void) case Magic('|'): case Magic('&'): case Magic(')'): - cont = FALSE; + cont = false; break; case Magic('Z'): @@ -2119,12 +2111,14 @@ static int nfa_regconcat(void) break; default: - if (nfa_regpiece() == FAIL) + if (nfa_regpiece() == FAIL) { return FAIL; - if (first == FALSE) + } + if (first == false) { EMIT(NFA_CONCAT); - else - first = FALSE; + } else { + first = false; + } break; } } @@ -2230,15 +2224,14 @@ nfa_reg ( else EMSG_RET_FAIL(_("E873: (NFA regexp) proper termination error")); } - /* - * Here we set the flag allowing back references to this set of - * parentheses. - */ + // Here we set the flag allowing back references to this set of + // parentheses. if (paren == REG_PAREN) { - had_endbrace[parno] = TRUE; /* have seen the close paren */ + had_endbrace[parno] = true; // have seen the close paren EMIT(NFA_MOPEN + parno); - } else if (paren == REG_ZPAREN) + } else if (paren == REG_ZPAREN) { EMIT(NFA_ZOPEN + parno); + } return OK; } @@ -2248,10 +2241,10 @@ static char_u code[50]; static void nfa_set_code(int c) { - int addnl = FALSE; + int addnl = false; if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL) { - addnl = TRUE; + addnl = true; c -= NFA_ADD_NL; } @@ -2426,6 +2419,9 @@ static void nfa_set_code(int c) case NFA_CLASS_RETURN: STRCPY(code, "NFA_CLASS_RETURN"); break; case NFA_CLASS_BACKSPACE: STRCPY(code, "NFA_CLASS_BACKSPACE"); break; case NFA_CLASS_ESCAPE: STRCPY(code, "NFA_CLASS_ESCAPE"); break; + case NFA_CLASS_IDENT: STRCPY(code, "NFA_CLASS_IDENT"); break; + case NFA_CLASS_KEYWORD: STRCPY(code, "NFA_CLASS_KEYWORD"); break; + case NFA_CLASS_FNAME: STRCPY(code, "NFA_CLASS_FNAME"); break; case NFA_ANY: STRCPY(code, "NFA_ANY"); break; case NFA_IDENT: STRCPY(code, "NFA_IDENT"); break; @@ -2464,9 +2460,9 @@ static void nfa_set_code(int c) code[5] = c; } - if (addnl == TRUE) + if (addnl == true) { STRCAT(code, " + NEWLINE "); - + } } static FILE *log_fd; @@ -2848,11 +2844,8 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) case NFA_UPPER_IC: case NFA_NUPPER_IC: case NFA_ANY_COMPOSING: - /* possibly non-ascii */ - if (has_mbyte) - len += 3; - else - ++len; + // possibly non-ascii + len += 3; break; case NFA_START_INVISIBLE: @@ -3019,12 +3012,12 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) for (p = postfix; p < end; ++p) { switch (*p) { case NFA_CONCAT: - /* Concatenation. - * Pay attention: this operator does not exist in the r.e. itself - * (it is implicit, really). It is added when r.e. is translated - * to postfix form in re2post(). */ - if (nfa_calc_size == TRUE) { - /* nstate += 0; */ + // Concatenation. + // Pay attention: this operator does not exist in the r.e. itself + // (it is implicit, really). It is added when r.e. is translated + // to postfix form in re2post(). + if (nfa_calc_size == true) { + // nstate += 0; break; } e2 = POP(); @@ -3034,8 +3027,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_OR: - /* Alternation */ - if (nfa_calc_size == TRUE) { + // Alternation + if (nfa_calc_size == true) { nstate++; break; } @@ -3048,8 +3041,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_STAR: - /* Zero or more, prefer more */ - if (nfa_calc_size == TRUE) { + // Zero or more, prefer more + if (nfa_calc_size == true) { nstate++; break; } @@ -3062,8 +3055,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_STAR_NONGREEDY: - /* Zero or more, prefer zero */ - if (nfa_calc_size == TRUE) { + // Zero or more, prefer zero + if (nfa_calc_size == true) { nstate++; break; } @@ -3076,8 +3069,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_QUEST: - /* one or zero atoms=> greedy match */ - if (nfa_calc_size == TRUE) { + // one or zero atoms=> greedy match + if (nfa_calc_size == true) { nstate++; break; } @@ -3089,8 +3082,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_QUEST_NONGREEDY: - /* zero or one atoms => non-greedy match */ - if (nfa_calc_size == TRUE) { + // zero or one atoms => non-greedy match + if (nfa_calc_size == true) { nstate++; break; } @@ -3106,7 +3099,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) /* On the stack is the sequence starting with NFA_START_COLL or * NFA_START_NEG_COLL and all possible characters. Patch it to * add the output to the start. */ - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate++; break; } @@ -3120,10 +3113,10 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_RANGE: - /* Before this are two characters, the low and high end of a - * range. Turn them into two states with MIN and MAX. */ - if (nfa_calc_size == TRUE) { - /* nstate += 0; */ + // Before this are two characters, the low and high end of a + // range. Turn them into two states with MIN and MAX. + if (nfa_calc_size == true) { + // nstate += 0; break; } e2 = POP(); @@ -3137,8 +3130,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) break; case NFA_EMPTY: - /* 0-length, used in a repetition with max/min count of 0 */ - if (nfa_calc_size == TRUE) { + // 0-length, used in a repetition with max/min count of 0 + if (nfa_calc_size == true) { nstate++; break; } @@ -3152,20 +3145,19 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) { int n; - /* \%[abc] implemented as: - * NFA_SPLIT - * +-CHAR(a) - * | +-NFA_SPLIT - * | +-CHAR(b) - * | | +-NFA_SPLIT - * | | +-CHAR(c) - * | | | +-next - * | | +- next - * | +- next - * +- next - */ - n = *++p; /* get number of characters */ - if (nfa_calc_size == TRUE) { + // \%[abc] implemented as: + // NFA_SPLIT + // +-CHAR(a) + // | +-NFA_SPLIT + // | +-CHAR(b) + // | | +-NFA_SPLIT + // | | +-CHAR(c) + // | | | +-next + // | | +- next + // | +- next + // +- next + n = *++p; // get number of characters + if (nfa_calc_size == true) { nstate += n; break; } @@ -3235,7 +3227,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) * Surrounds the preceding atom with START_INVISIBLE and * END_INVISIBLE, similarly to MOPEN. */ - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate += pattern ? 4 : 2; break; } @@ -3297,8 +3289,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_ZOPEN7: case NFA_ZOPEN8: case NFA_ZOPEN9: - case NFA_NOPEN: /* \%( \) "Invisible Submatch" */ - if (nfa_calc_size == TRUE) { + case NFA_NOPEN: // \%( \) "Invisible Submatch" + if (nfa_calc_size == true) { nstate += 2; break; } @@ -3376,7 +3368,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_ZREF7: case NFA_ZREF8: case NFA_ZREF9: - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate += 2; break; } @@ -3405,7 +3397,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) { int n = *++p; /* lnum, col or mark name */ - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate += 1; break; } @@ -3420,8 +3412,8 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) case NFA_ZSTART: case NFA_ZEND: default: - /* Operands */ - if (nfa_calc_size == TRUE) { + // Operands + if (nfa_calc_size == true) { nstate++; break; } @@ -3435,7 +3427,7 @@ static nfa_state_T *post2nfa(int *postfix, int *end, int nfa_calc_size) } /* for(p = postfix; *p; ++p) */ - if (nfa_calc_size == TRUE) { + if (nfa_calc_size == true) { nstate++; goto theend; /* Return value when counting size is ignored anyway */ } @@ -3489,11 +3481,11 @@ static void nfa_postprocess(nfa_regprog_T *prog) || c == NFA_START_INVISIBLE_BEFORE_NEG) { int directly; - /* Do it directly when what follows is possibly the end of the - * match. */ - if (match_follows(prog->state[i].out1->out, 0)) - directly = TRUE; - else { + // Do it directly when what follows is possibly the end of the + // match. + if (match_follows(prog->state[i].out1->out, 0)) { + directly = true; + } else { int ch_invisible = failure_chance(prog->state[i].out, 0); int ch_follows = failure_chance(prog->state[i].out1->out, 0); @@ -3505,10 +3497,11 @@ static void nfa_postprocess(nfa_regprog_T *prog) * unbounded, always prefer what follows then, * unless what follows will always match. * Otherwise strongly prefer what follows. */ - if (prog->state[i].val <= 0 && ch_follows > 0) - directly = FALSE; - else + if (prog->state[i].val <= 0 && ch_follows > 0) { + directly = false; + } else { directly = ch_follows * 10 < ch_invisible; + } } else { /* normal invisible, first do the one with the * highest failure chance */ @@ -3537,8 +3530,9 @@ static void nfa_postprocess(nfa_regprog_T *prog) static void log_subsexpr(regsubs_T *subs) { log_subexpr(&subs->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { log_subexpr(&subs->synt); + } } static void log_subexpr(regsub_T *sub) @@ -3564,15 +3558,17 @@ static void log_subexpr(regsub_T *sub) } } -static char *pim_info(nfa_pim_T *pim) +static char *pim_info(const nfa_pim_T *pim) { static char buf[30]; - if (pim == NULL || pim->result == NFA_PIM_UNUSED) + if (pim == NULL || pim->result == NFA_PIM_UNUSED) { buf[0] = NUL; - else { - sprintf(buf, " PIM col %d", REG_MULTI ? (int)pim->end.pos.col - : (int)(pim->end.ptr - reginput)); + } else { + snprintf(buf, sizeof(buf), " PIM col %d", + REG_MULTI + ? (int)pim->end.pos.col + : (int)(pim->end.ptr - rex.input)); } return buf; } @@ -3591,19 +3587,21 @@ static void copy_pim(nfa_pim_T *to, nfa_pim_T *from) to->result = from->result; to->state = from->state; copy_sub(&to->subs.norm, &from->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&to->subs.synt, &from->subs.synt); + } to->end = from->end; } static void clear_sub(regsub_T *sub) { - if (REG_MULTI) - /* Use 0xff to set lnum to -1 */ + if (REG_MULTI) { + // Use 0xff to set lnum to -1 memset(sub->list.multi, 0xff, - sizeof(struct multipos) * nfa_nsubexpr); - else - memset(sub->list.line, 0, sizeof(struct linepos) * nfa_nsubexpr); + sizeof(struct multipos) * rex.nfa_nsubexpr); + } else { + memset(sub->list.line, 0, sizeof(struct linepos) * rex.nfa_nsubexpr); + } sub->in_use = 0; } @@ -3651,7 +3649,7 @@ static void copy_sub_off(regsub_T *to, regsub_T *from) */ static void copy_ze_off(regsub_T *to, regsub_T *from) { - if (nfa_has_zend) { + if (rex.nfa_has_zend) { if (REG_MULTI) { if (from->list.multi[0].end_lnum >= 0){ to->list.multi[0].end_lnum = from->list.multi[0].end_lnum; @@ -3664,9 +3662,9 @@ static void copy_ze_off(regsub_T *to, regsub_T *from) } } -// Return TRUE if "sub1" and "sub2" have the same start positions. +// Return true if "sub1" and "sub2" have the same start positions. // When using back-references also check the end position. -static int sub_equal(regsub_T *sub1, regsub_T *sub2) +static bool sub_equal(regsub_T *sub1, regsub_T *sub2) { int i; int todo; @@ -3677,22 +3675,25 @@ static int sub_equal(regsub_T *sub1, regsub_T *sub2) todo = sub1->in_use > sub2->in_use ? sub1->in_use : sub2->in_use; if (REG_MULTI) { - for (i = 0; i < todo; ++i) { - if (i < sub1->in_use) + for (i = 0; i < todo; i++) { + if (i < sub1->in_use) { s1 = sub1->list.multi[i].start_lnum; - else + } else { s1 = -1; - if (i < sub2->in_use) + } + if (i < sub2->in_use) { s2 = sub2->list.multi[i].start_lnum; - else + } else { s2 = -1; - if (s1 != s2) - return FALSE; + } + if (s1 != s2) { + return false; + } if (s1 != -1 && sub1->list.multi[i].start_col - != sub2->list.multi[i].start_col) - return FALSE; - - if (nfa_has_backref) { + != sub2->list.multi[i].start_col) { + return false; + } + if (rex.nfa_has_backref) { if (i < sub1->in_use) { s1 = sub1->list.multi[i].end_lnum; } else { @@ -3704,28 +3705,30 @@ static int sub_equal(regsub_T *sub1, regsub_T *sub2) s2 = -1; } if (s1 != s2) { - return FALSE; + return false; } if (s1 != -1 && sub1->list.multi[i].end_col != sub2->list.multi[i].end_col) { - return FALSE; + return false; } } } } else { - for (i = 0; i < todo; ++i) { - if (i < sub1->in_use) + for (i = 0; i < todo; i++) { + if (i < sub1->in_use) { sp1 = sub1->list.line[i].start; - else + } else { sp1 = NULL; - if (i < sub2->in_use) + } + if (i < sub2->in_use) { sp2 = sub2->list.line[i].start; - else + } else { sp2 = NULL; - if (sp1 != sp2) - return FALSE; - - if (nfa_has_backref) { + } + if (sp1 != sp2) { + return false; + } + if (rex.nfa_has_backref) { if (i < sub1->in_use) { sp1 = sub1->list.line[i].end; } else { @@ -3737,13 +3740,13 @@ static int sub_equal(regsub_T *sub1, regsub_T *sub2) sp2 = NULL; } if (sp1 != sp2) { - return FALSE; + return false; } } } } - return TRUE; + return true; } #ifdef REGEXP_DEBUG @@ -3754,83 +3757,81 @@ static void report_state(char *action, nfa_pim_T *pim) { int col; - if (sub->in_use <= 0) + if (sub->in_use <= 0) { col = -1; - else if (REG_MULTI) + } else if (REG_MULTI) { col = sub->list.multi[0].start_col; - else - col = (int)(sub->list.line[0].start - regline); + } else { + col = (int)(sub->list.line[0].start - rex.line); + } nfa_set_code(state->c); fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)%s\n", - action, abs(state->id), lid, state->c, code, col, - pim_info(pim)); + action, abs(state->id), lid, state->c, code, col, + pim_info(pim)); } #endif -/* - * Return TRUE if the same state is already in list "l" with the same - * positions as "subs". - */ -static int -has_state_with_pos ( - nfa_list_T *l, /* runtime state list */ - nfa_state_T *state, /* state to update */ - regsubs_T *subs, /* pointers to subexpressions */ - nfa_pim_T *pim /* postponed match or NULL */ +// Return true if the same state is already in list "l" with the same +// positions as "subs". +static bool has_state_with_pos( + nfa_list_T *l, // runtime state list + nfa_state_T *state, // state to update + regsubs_T *subs, // pointers to subexpressions + nfa_pim_T *pim // postponed match or NULL ) + FUNC_ATTR_NONNULL_ARG(1, 2, 3) { - nfa_thread_T *thread; - int i; - - for (i = 0; i < l->n; ++i) { - thread = &l->t[i]; + for (int i = 0; i < l->n; i++) { + nfa_thread_T *thread = &l->t[i]; if (thread->state->id == state->id && sub_equal(&thread->subs.norm, &subs->norm) - && (!nfa_has_zsubexpr + && (!rex.nfa_has_zsubexpr || sub_equal(&thread->subs.synt, &subs->synt)) - && pim_equal(&thread->pim, pim)) - return TRUE; + && pim_equal(&thread->pim, pim)) { + return true; + } } - return FALSE; + return false; } -/* - * Return TRUE if "one" and "two" are equal. That includes when both are not - * set. - */ -static int pim_equal(nfa_pim_T *one, nfa_pim_T *two) +// Return true if "one" and "two" are equal. That includes when both are not +// set. +static bool pim_equal(const nfa_pim_T *one, const nfa_pim_T *two) { - int one_unused = (one == NULL || one->result == NFA_PIM_UNUSED); - int two_unused = (two == NULL || two->result == NFA_PIM_UNUSED); + const bool one_unused = (one == NULL || one->result == NFA_PIM_UNUSED); + const bool two_unused = (two == NULL || two->result == NFA_PIM_UNUSED); - if (one_unused) - /* one is unused: equal when two is also unused */ + if (one_unused) { + // one is unused: equal when two is also unused return two_unused; - if (two_unused) - /* one is used and two is not: not equal */ - return FALSE; - /* compare the state id */ - if (one->state->id != two->state->id) - return FALSE; - /* compare the position */ - if (REG_MULTI) + } + if (two_unused) { + // one is used and two is not: not equal + return false; + } + // compare the state id + if (one->state->id != two->state->id) { + return false; + } + // compare the position + if (REG_MULTI) { return one->end.pos.lnum == two->end.pos.lnum && one->end.pos.col == two->end.pos.col; + } return one->end.ptr == two->end.ptr; } -/* - * Return TRUE if "state" leads to a NFA_MATCH without advancing the input. - */ -static int match_follows(nfa_state_T *startstate, int depth) +// Return true if "state" leads to a NFA_MATCH without advancing the input. +static bool match_follows(const nfa_state_T *startstate, int depth) + FUNC_ATTR_NONNULL_ALL { - nfa_state_T *state = startstate; - - /* avoid too much recursion */ - if (depth > 10) - return FALSE; + const nfa_state_T *state = startstate; + // avoid too much recursion + if (depth > 10) { + return false; + } while (state != NULL) { switch (state->c) { case NFA_MATCH: @@ -3838,7 +3839,7 @@ static int match_follows(nfa_state_T *startstate, int depth) case NFA_END_INVISIBLE: case NFA_END_INVISIBLE_NEG: case NFA_END_PATTERN: - return TRUE; + return true; case NFA_SPLIT: return match_follows(state->out, depth + 1) @@ -3892,39 +3893,38 @@ static int match_follows(nfa_state_T *startstate, int depth) case NFA_START_COLL: case NFA_START_NEG_COLL: case NFA_NEWL: - /* state will advance input */ - return FALSE; + // state will advance input + return false; default: - if (state->c > 0) - /* state will advance input */ - return FALSE; - - /* Others: zero-width or possibly zero-width, might still find - * a match at the same position, keep looking. */ + if (state->c > 0) { + // state will advance input + return false; + } + // Others: zero-width or possibly zero-width, might still find + // a match at the same position, keep looking. break; } state = state->out; } - return FALSE; + return false; } -/* - * Return TRUE if "state" is already in list "l". - */ -static int -state_in_list ( - nfa_list_T *l, /* runtime state list */ - nfa_state_T *state, /* state to update */ - regsubs_T *subs /* pointers to subexpressions */ +// Return true if "state" is already in list "l". +static bool state_in_list( + nfa_list_T *l, // runtime state list + nfa_state_T *state, // state to update + regsubs_T *subs // pointers to subexpressions ) + FUNC_ATTR_NONNULL_ALL { if (state->lastlist[nfa_ll_index] == l->id) { - if (!nfa_has_backref || has_state_with_pos(l, state, subs, NULL)) - return TRUE; + if (!rex.nfa_has_backref || has_state_with_pos(l, state, subs, NULL)) { + return true; + } } - return FALSE; + return false; } // Offset used for "off" by addstate_here(). @@ -3943,10 +3943,10 @@ static regsubs_T *addstate( { int subidx; int off = off_arg; - int add_here = FALSE; + int add_here = false; int listindex = 0; int k; - int found = FALSE; + int found = false; nfa_thread_T *thread; struct multipos save_multipos; int save_in_use; @@ -3956,7 +3956,7 @@ static regsubs_T *addstate( regsubs_T *subs = subs_arg; static regsubs_T temp_subs; #ifdef REGEXP_DEBUG - int did_print = FALSE; + int did_print = false; #endif static int depth = 0; @@ -4005,15 +4005,16 @@ static regsubs_T *addstate( case NFA_BOL: case NFA_BOF: - /* "^" won't match past end-of-line, don't bother trying. - * Except when at the end of the line, or when we are going to the - * next line for a look-behind match. */ - if (reginput > regline - && *reginput != NUL + // "^" won't match past end-of-line, don't bother trying. + // Except when at the end of the line, or when we are going to the + // next line for a look-behind match. + if (rex.input > rex.line + && *rex.input != NUL && (nfa_endp == NULL || !REG_MULTI - || reglnum == nfa_endp->se_u.pos.lnum)) + || rex.lnum == nfa_endp->se_u.pos.lnum)) { goto skip_add; + } FALLTHROUGH; case NFA_MOPEN1: @@ -4047,7 +4048,7 @@ static regsubs_T *addstate( * unless it is an MOPEN that is used for a backreference or * when there is a PIM. For NFA_MATCH check the position, * lower position is preferred. */ - if (!nfa_has_backref && pim == NULL && !l->has_pim + if (!rex.nfa_has_backref && pim == NULL && !l->has_pim && state->c != NFA_MATCH) { /* When called from addstate_here() do insert before @@ -4055,7 +4056,7 @@ static regsubs_T *addstate( if (add_here) { for (k = 0; k < l->n && k < listindex; ++k) { if (l->t[k].state->id == state->id) { - found = TRUE; + found = true; break; } } @@ -4092,11 +4093,12 @@ skip_add: return NULL; } if (subs != &temp_subs) { - /* "subs" may point into the current array, need to make a - * copy before it becomes invalid. */ + // "subs" may point into the current array, need to make a + // copy before it becomes invalid. copy_sub(&temp_subs.norm, &subs->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&temp_subs.synt, &subs->synt); + } subs = &temp_subs; } @@ -4113,14 +4115,15 @@ skip_add: thread->pim.result = NFA_PIM_UNUSED; else { copy_pim(&thread->pim, pim); - l->has_pim = TRUE; + l->has_pim = true; } copy_sub(&thread->subs.norm, &subs->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&thread->subs.synt, &subs->synt); + } #ifdef REGEXP_DEBUG report_state("Adding", &thread->subs.norm, state, l->id, pim); - did_print = TRUE; + did_print = true; #endif } @@ -4195,13 +4198,12 @@ skip_add: sub->in_use = subidx + 1; } if (off == -1) { - sub->list.multi[subidx].start_lnum = reglnum + 1; + sub->list.multi[subidx].start_lnum = rex.lnum + 1; sub->list.multi[subidx].start_col = 0; } else { - - sub->list.multi[subidx].start_lnum = reglnum; + sub->list.multi[subidx].start_lnum = rex.lnum; sub->list.multi[subidx].start_col = - (colnr_T)(reginput - regline + off); + (colnr_T)(rex.input - rex.line + off); } sub->list.multi[subidx].end_lnum = -1; } else { @@ -4216,7 +4218,7 @@ skip_add: } sub->in_use = subidx + 1; } - sub->list.line[subidx].start = reginput + off; + sub->list.line[subidx].start = rex.input + off; } subs = addstate(l, state->out, subs, pim, off_arg); @@ -4241,9 +4243,10 @@ skip_add: break; case NFA_MCLOSE: - if (nfa_has_zend && (REG_MULTI - ? subs->norm.list.multi[0].end_lnum >= 0 - : subs->norm.list.line[0].end != NULL)) { + if (rex.nfa_has_zend + && (REG_MULTI + ? subs->norm.list.multi[0].end_lnum >= 0 + : subs->norm.list.line[0].end != NULL)) { // Do not overwrite the position set by \ze. subs = addstate(l, state->out, subs, pim, off_arg); break; @@ -4288,18 +4291,18 @@ skip_add: if (REG_MULTI) { save_multipos = sub->list.multi[subidx]; if (off == -1) { - sub->list.multi[subidx].end_lnum = reglnum + 1; + sub->list.multi[subidx].end_lnum = rex.lnum + 1; sub->list.multi[subidx].end_col = 0; } else { - sub->list.multi[subidx].end_lnum = reglnum; + sub->list.multi[subidx].end_lnum = rex.lnum; sub->list.multi[subidx].end_col = - (colnr_T)(reginput - regline + off); + (colnr_T)(rex.input - rex.line + off); } /* avoid compiler warnings */ save_ptr = NULL; } else { save_ptr = sub->list.line[subidx].end; - sub->list.line[subidx].end = reginput + off; + sub->list.line[subidx].end = rex.input + off; // avoid compiler warnings memset(&save_multipos, 0, sizeof(save_multipos)); } @@ -4486,6 +4489,21 @@ static int check_char_class(int class, int c) return OK; } break; + case NFA_CLASS_IDENT: + if (vim_isIDc(c)) { + return OK; + } + break; + case NFA_CLASS_KEYWORD: + if (reg_iswordc(c)) { + return OK; + } + break; + case NFA_CLASS_FNAME: + if (vim_isfilec(c)) { + return OK; + } + break; default: // should not be here :P @@ -4497,7 +4515,7 @@ static int check_char_class(int class, int c) /* * Check for a match with subexpression "subidx". - * Return TRUE if it matches. + * Return true if it matches. */ static int match_backref ( @@ -4512,49 +4530,49 @@ match_backref ( retempty: /* backref was not set, match an empty string */ *bytelen = 0; - return TRUE; + return true; } if (REG_MULTI) { if (sub->list.multi[subidx].start_lnum < 0 || sub->list.multi[subidx].end_lnum < 0) goto retempty; - if (sub->list.multi[subidx].start_lnum == reglnum - && sub->list.multi[subidx].end_lnum == reglnum) { + if (sub->list.multi[subidx].start_lnum == rex.lnum + && sub->list.multi[subidx].end_lnum == rex.lnum) { len = sub->list.multi[subidx].end_col - sub->list.multi[subidx].start_col; - if (cstrncmp(regline + sub->list.multi[subidx].start_col, - reginput, &len) == 0) { + if (cstrncmp(rex.line + sub->list.multi[subidx].start_col, + rex.input, &len) == 0) { *bytelen = len; - return TRUE; + return true; } } else { - if (match_with_backref( - sub->list.multi[subidx].start_lnum, - sub->list.multi[subidx].start_col, - sub->list.multi[subidx].end_lnum, - sub->list.multi[subidx].end_col, - bytelen) == RA_MATCH) - return TRUE; + if (match_with_backref(sub->list.multi[subidx].start_lnum, + sub->list.multi[subidx].start_col, + sub->list.multi[subidx].end_lnum, + sub->list.multi[subidx].end_col, + bytelen) == RA_MATCH) { + return true; + } } } else { if (sub->list.line[subidx].start == NULL || sub->list.line[subidx].end == NULL) goto retempty; len = (int)(sub->list.line[subidx].end - sub->list.line[subidx].start); - if (cstrncmp(sub->list.line[subidx].start, reginput, &len) == 0) { + if (cstrncmp(sub->list.line[subidx].start, rex.input, &len) == 0) { *bytelen = len; - return TRUE; + return true; } } - return FALSE; + return false; } /* * Check for a match with \z subexpression "subidx". - * Return TRUE if it matches. + * Return true if it matches. */ static int match_zref ( @@ -4568,15 +4586,15 @@ match_zref ( if (re_extmatch_in == NULL || re_extmatch_in->matches[subidx] == NULL) { /* backref was not set, match an empty string */ *bytelen = 0; - return TRUE; + return true; } len = (int)STRLEN(re_extmatch_in->matches[subidx]); - if (cstrncmp(re_extmatch_in->matches[subidx], reginput, &len) == 0) { + if (cstrncmp(re_extmatch_in->matches[subidx], rex.input, &len) == 0) { *bytelen = len; - return TRUE; + return true; } - return FALSE; + return false; } /* @@ -4629,74 +4647,79 @@ static bool nfa_re_num_cmp(uintmax_t val, int op, uintmax_t pos) static int recursive_regmatch( nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog, regsubs_T *submatch, regsubs_T *m, int **listids, int *listids_len) + FUNC_ATTR_NONNULL_ARG(1, 3, 5, 6, 7) { - int save_reginput_col = (int)(reginput - regline); - int save_reglnum = reglnum; - int save_nfa_match = nfa_match; - int save_nfa_listid = nfa_listid; - save_se_T *save_nfa_endp = nfa_endp; + const int save_reginput_col = (int)(rex.input - rex.line); + const int save_reglnum = rex.lnum; + const int save_nfa_match = nfa_match; + const int save_nfa_listid = rex.nfa_listid; + save_se_T *const save_nfa_endp = nfa_endp; save_se_T endpos; save_se_T *endposp = NULL; - int result; - int need_restore = FALSE; + int need_restore = false; if (pim != NULL) { - /* start at the position where the postponed match was */ - if (REG_MULTI) - reginput = regline + pim->end.pos.col; - else - reginput = pim->end.ptr; + // start at the position where the postponed match was + if (REG_MULTI) { + rex.input = rex.line + pim->end.pos.col; + } else { + rex.input = pim->end.ptr; + } } if (state->c == NFA_START_INVISIBLE_BEFORE || state->c == NFA_START_INVISIBLE_BEFORE_FIRST || state->c == NFA_START_INVISIBLE_BEFORE_NEG || state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST) { - /* The recursive match must end at the current position. When "pim" is - * not NULL it specifies the current position. */ + // The recursive match must end at the current position. When "pim" is + // not NULL it specifies the current position. endposp = &endpos; if (REG_MULTI) { if (pim == NULL) { - endpos.se_u.pos.col = (int)(reginput - regline); - endpos.se_u.pos.lnum = reglnum; - } else + endpos.se_u.pos.col = (int)(rex.input - rex.line); + endpos.se_u.pos.lnum = rex.lnum; + } else { endpos.se_u.pos = pim->end.pos; + } } else { - if (pim == NULL) - endpos.se_u.ptr = reginput; - else + if (pim == NULL) { + endpos.se_u.ptr = rex.input; + } else { endpos.se_u.ptr = pim->end.ptr; + } } - /* Go back the specified number of bytes, or as far as the - * start of the previous line, to try matching "\@<=" or - * not matching "\@<!". This is very inefficient, limit the number of - * bytes if possible. */ + // Go back the specified number of bytes, or as far as the + // start of the previous line, to try matching "\@<=" or + // not matching "\@<!". This is very inefficient, limit the number of + // bytes if possible. if (state->val <= 0) { if (REG_MULTI) { - regline = reg_getline(--reglnum); - if (regline == NULL) - /* can't go before the first line */ - regline = reg_getline(++reglnum); + rex.line = reg_getline(--rex.lnum); + if (rex.line == NULL) { + // can't go before the first line + rex.line = reg_getline(++rex.lnum); + } } - reginput = regline; + rex.input = rex.line; } else { - if (REG_MULTI && (int)(reginput - regline) < state->val) { - /* Not enough bytes in this line, go to end of - * previous line. */ - regline = reg_getline(--reglnum); - if (regline == NULL) { - /* can't go before the first line */ - regline = reg_getline(++reglnum); - reginput = regline; - } else - reginput = regline + STRLEN(regline); + if (REG_MULTI && (int)(rex.input - rex.line) < state->val) { + // Not enough bytes in this line, go to end of + // previous line. + rex.line = reg_getline(--rex.lnum); + if (rex.line == NULL) { + // can't go before the first line + rex.line = reg_getline(++rex.lnum); + rex.input = rex.line; + } else { + rex.input = rex.line + STRLEN(rex.line); + } } - if ((int)(reginput - regline) >= state->val) { - reginput -= state->val; - reginput -= utf_head_off(regline, reginput); + if ((int)(rex.input - rex.line) >= state->val) { + rex.input -= state->val; + rex.input -= utf_head_off(rex.line, rex.input); } else { - reginput = regline; + rex.input = rex.line; } } } @@ -4706,48 +4729,50 @@ static int recursive_regmatch( fclose(log_fd); log_fd = NULL; #endif - /* Have to clear the lastlist field of the NFA nodes, so that - * nfa_regmatch() and addstate() can run properly after recursion. */ + // Have to clear the lastlist field of the NFA nodes, so that + // nfa_regmatch() and addstate() can run properly after recursion. if (nfa_ll_index == 1) { - /* Already calling nfa_regmatch() recursively. Save the lastlist[1] - * values and clear them. */ - if (*listids == NULL || *listids_len < nstate) { + // Already calling nfa_regmatch() recursively. Save the lastlist[1] + // values and clear them. + if (*listids == NULL || *listids_len < prog->nstate) { xfree(*listids); - *listids = xmalloc(sizeof(**listids) * nstate); - *listids_len = nstate; + *listids = xmalloc(sizeof(**listids) * prog->nstate); + *listids_len = prog->nstate; } nfa_save_listids(prog, *listids); - need_restore = TRUE; - /* any value of nfa_listid will do */ + need_restore = true; + // any value of rex.nfa_listid will do } else { - /* First recursive nfa_regmatch() call, switch to the second lastlist - * entry. Make sure nfa_listid is different from a previous recursive - * call, because some states may still have this ID. */ - ++nfa_ll_index; - if (nfa_listid <= nfa_alt_listid) - nfa_listid = nfa_alt_listid; + // First recursive nfa_regmatch() call, switch to the second lastlist + // entry. Make sure rex.nfa_listid is different from a previous + // recursive call, because some states may still have this ID. + nfa_ll_index++; + if (rex.nfa_listid <= rex.nfa_alt_listid) { + rex.nfa_listid = rex.nfa_alt_listid; + } } - /* Call nfa_regmatch() to check if the current concat matches at this - * position. The concat ends with the node NFA_END_INVISIBLE */ + // Call nfa_regmatch() to check if the current concat matches at this + // position. The concat ends with the node NFA_END_INVISIBLE nfa_endp = endposp; - result = nfa_regmatch(prog, state->out, submatch, m); + const int result = nfa_regmatch(prog, state->out, submatch, m); - if (need_restore) + if (need_restore) { nfa_restore_listids(prog, *listids); - else { - --nfa_ll_index; - nfa_alt_listid = nfa_listid; + } else { + nfa_ll_index--; + rex.nfa_alt_listid = rex.nfa_listid; } - /* restore position in input text */ - reglnum = save_reglnum; - if (REG_MULTI) - regline = reg_getline(reglnum); - reginput = regline + save_reginput_col; + // restore position in input text + rex.lnum = save_reglnum; + if (REG_MULTI) { + rex.line = reg_getline(rex.lnum); + } + rex.input = rex.line + save_reginput_col; if (result != NFA_TOO_EXPENSIVE) { nfa_match = save_nfa_match; - nfa_listid = save_nfa_listid; + rex.nfa_listid = save_nfa_listid; } nfa_endp = save_nfa_endp; @@ -4756,7 +4781,7 @@ static int recursive_regmatch( if (log_fd != NULL) { fprintf(log_fd, "****************************\n"); fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n"); - fprintf(log_fd, "MATCH = %s\n", !result ? "FALSE" : "OK"); + fprintf(log_fd, "MATCH = %s\n", !result ? "false" : "OK"); fprintf(log_fd, "****************************\n"); } else { EMSG(_(e_log_open_failed)); @@ -4930,11 +4955,11 @@ static int failure_chance(nfa_state_T *state, int depth) */ static int skip_to_start(int c, colnr_T *colp) { - const char_u *const s = cstrchr(regline + *colp, c); + const char_u *const s = cstrchr(rex.line + *colp, c); if (s == NULL) { return FAIL; } - *colp = (int)(s - regline); + *colp = (int)(s - rex.line); return OK; } @@ -4948,12 +4973,12 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) #define PTR2LEN(x) utf_ptr2len(x) colnr_T col = startcol; - int regstart_len = PTR2LEN(regline + startcol); + int regstart_len = PTR2LEN(rex.line + startcol); for (;;) { bool match = true; char_u *s1 = match_text; - char_u *s2 = regline + col + regstart_len; // skip regstart + char_u *s2 = rex.line + col + regstart_len; // skip regstart while (*s1) { int c1_len = PTR2LEN(s1); int c1 = PTR2CHAR(s1); @@ -4973,12 +4998,12 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) && !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) { cleanup_subexpr(); if (REG_MULTI) { - rex.reg_startpos[0].lnum = reglnum; + rex.reg_startpos[0].lnum = rex.lnum; rex.reg_startpos[0].col = col; - rex.reg_endpos[0].lnum = reglnum; - rex.reg_endpos[0].col = s2 - regline; + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = s2 - rex.line; } else { - rex.reg_startp[0] = regline + col; + rex.reg_startp[0] = rex.line + col; rex.reg_endp[0] = s2; } return 1L; @@ -5008,17 +5033,18 @@ static int nfa_did_time_out(void) /// Main matching routine. /// -/// Run NFA to determine whether it matches reginput. +/// Run NFA to determine whether it matches rex.input. /// /// When "nfa_endp" is not NULL it is a required end-of-match position. /// -/// Return TRUE if there is a match, FALSE if there is no match, +/// Return true if there is a match, false if there is no match, /// NFA_TOO_EXPENSIVE if we end up with too many states. /// When there is a match "submatch" contains the positions. /// /// Note: Caller must ensure that: start != NULL. static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m) + FUNC_ATTR_NONNULL_ARG(1, 2, 4) { int result = false; int flag = 0; @@ -5063,11 +5089,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, nfa_match = false; // Allocate memory for the lists of nodes. - size_t size = (nstate + 1) * sizeof(nfa_thread_T); + size_t size = (prog->nstate + 1) * sizeof(nfa_thread_T); list[0].t = xmalloc(size); - list[0].len = nstate + 1; + list[0].len = prog->nstate + 1; list[1].t = xmalloc(size); - list[1].len = nstate + 1; + list[1].len = prog->nstate + 1; #ifdef REGEXP_DEBUG log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); @@ -5085,23 +5111,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, thislist = &list[0]; thislist->n = 0; - thislist->has_pim = FALSE; + thislist->has_pim = false; nextlist = &list[1]; nextlist->n = 0; - nextlist->has_pim = FALSE; + nextlist->has_pim = false; #ifdef REGEXP_DEBUG fprintf(log_fd, "(---) STARTSTATE first\n"); #endif - thislist->id = nfa_listid + 1; + thislist->id = rex.nfa_listid + 1; - /* Inline optimized code for addstate(thislist, start, m, 0) if we know - * it's the first MOPEN. */ + // Inline optimized code for addstate(thislist, start, m, 0) if we know + // it's the first MOPEN. if (toplevel) { if (REG_MULTI) { - m->norm.list.multi[0].start_lnum = reglnum; - m->norm.list.multi[0].start_col = (colnr_T)(reginput - regline); - } else - m->norm.list.line[0].start = reginput; + m->norm.list.multi[0].start_lnum = rex.lnum; + m->norm.list.multi[0].start_col = (colnr_T)(rex.input - rex.line); + } else { + m->norm.list.line[0].start = rex.input; + } m->norm.in_use = 1; r = addstate(thislist, start->out, m, NULL, 0); } else { @@ -5122,8 +5149,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, * Run for each character. */ for (;; ) { - int curc = utf_ptr2char(reginput); - int clen = utfc_ptr2len(reginput); + int curc = utf_ptr2char(rex.input); + int clen = utfc_ptr2len(rex.input); if (curc == NUL) { clen = 0; go_to_nextline = false; @@ -5134,20 +5161,20 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, nextlist = &list[flag ^= 1]; nextlist->n = 0; // clear nextlist nextlist->has_pim = false; - nfa_listid++; + rex.nfa_listid++; if (prog->re_engine == AUTOMATIC_ENGINE - && (nfa_listid >= NFA_MAX_STATES)) { + && (rex.nfa_listid >= NFA_MAX_STATES)) { // Too many states, retry with old engine. nfa_match = NFA_TOO_EXPENSIVE; goto theend; } - thislist->id = nfa_listid; - nextlist->id = nfa_listid + 1; + thislist->id = rex.nfa_listid; + nextlist->id = rex.nfa_listid + 1; #ifdef REGEXP_DEBUG fprintf(log_fd, "------------------------------------------\n"); - fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); + fprintf(log_fd, ">>> Reginput is \"%s\"\n", rex.input); fprintf(log_fd, ">>> Advanced one character... Current char is %c (code %d) \n", curc, @@ -5200,7 +5227,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } else if (REG_MULTI) { col = t->subs.norm.list.multi[0].start_col; } else { - col = (int)(t->subs.norm.list.line[0].start - regline); + col = (int)(t->subs.norm.list.line[0].start - rex.line); } nfa_set_code(t->state->c); fprintf(log_fd, "(%d) char %d %s (start col %d)%s... \n", @@ -5226,64 +5253,66 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } nfa_match = true; copy_sub(&submatch->norm, &t->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&submatch->synt, &t->subs.synt); + } #ifdef REGEXP_DEBUG log_subsexpr(&t->subs); #endif - /* Found the left-most longest match, do not look at any other - * states at this position. When the list of states is going - * to be empty quit without advancing, so that "reginput" is - * correct. */ - if (nextlist->n == 0) + // Found the left-most longest match, do not look at any other + // states at this position. When the list of states is going + // to be empty quit without advancing, so that "rex.input" is + // correct. + if (nextlist->n == 0) { clen = 0; + } goto nextchar; } case NFA_END_INVISIBLE: case NFA_END_INVISIBLE_NEG: case NFA_END_PATTERN: - /* - * This is only encountered after a NFA_START_INVISIBLE or - * NFA_START_INVISIBLE_BEFORE node. - * They surround a zero-width group, used with "\@=", "\&", - * "\@!", "\@<=" and "\@<!". - * If we got here, it means that the current "invisible" group - * finished successfully, so return control to the parent - * nfa_regmatch(). For a look-behind match only when it ends - * in the position in "nfa_endp". - * Submatches are stored in *m, and used in the parent call. - */ + // This is only encountered after a NFA_START_INVISIBLE or + // NFA_START_INVISIBLE_BEFORE node. + // They surround a zero-width group, used with "\@=", "\&", + // "\@!", "\@<=" and "\@<!". + // If we got here, it means that the current "invisible" group + // finished successfully, so return control to the parent + // nfa_regmatch(). For a look-behind match only when it ends + // in the position in "nfa_endp". + // Submatches are stored in *m, and used in the parent call. #ifdef REGEXP_DEBUG if (nfa_endp != NULL) { - if (REG_MULTI) - fprintf( - log_fd, - "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n", - (int)reglnum, - (int)nfa_endp->se_u.pos.lnum, - (int)(reginput - regline), - nfa_endp->se_u.pos.col); - else + if (REG_MULTI) { + fprintf(log_fd, + "Current lnum: %d, endp lnum: %d;" + " current col: %d, endp col: %d\n", + (int)rex.lnum, + (int)nfa_endp->se_u.pos.lnum, + (int)(rex.input - rex.line), + nfa_endp->se_u.pos.col); + } else { fprintf(log_fd, "Current col: %d, endp col: %d\n", - (int)(reginput - regline), - (int)(nfa_endp->se_u.ptr - reginput)); + (int)(rex.input - rex.line), + (int)(nfa_endp->se_u.ptr - rex.input)); + } } #endif - /* If "nfa_endp" is set it's only a match if it ends at - * "nfa_endp" */ - if (nfa_endp != NULL && (REG_MULTI - ? (reglnum != nfa_endp->se_u.pos.lnum - || (int)(reginput - regline) - != nfa_endp->se_u.pos.col) - : reginput != nfa_endp->se_u.ptr)) + // If "nfa_endp" is set it's only a match if it ends at + // "nfa_endp" + if (nfa_endp != NULL + && (REG_MULTI + ? (rex.lnum != nfa_endp->se_u.pos.lnum + || (int)(rex.input - rex.line) != nfa_endp->se_u.pos.col) + : rex.input != nfa_endp->se_u.ptr)) { break; - - /* do not set submatches for \@! */ + } + // do not set submatches for \@! if (t->state->c != NFA_END_INVISIBLE_NEG) { copy_sub(&m->norm, &t->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub(&m->synt, &t->subs.synt); + } } #ifdef REGEXP_DEBUG fprintf(log_fd, "Match found:\n"); @@ -5322,9 +5351,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Copy submatch info for the recursive call, opposite // of what happens on success below. copy_sub_off(&m->norm, &t->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub_off(&m->synt, &t->subs.synt); - + } // First try matching the invisible match, then what // follows. result = recursive_regmatch(t->state, NULL, prog, submatch, m, @@ -5335,7 +5364,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } // for \@! and \@<! it is a match when the result is - // FALSE + // false if (result != (t->state->c == NFA_START_INVISIBLE_NEG || t->state->c == NFA_START_INVISIBLE_NEG_FIRST || t->state->c @@ -5344,8 +5373,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { // Copy submatch info from the recursive call copy_sub_off(&t->subs.norm, &m->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub_off(&t->subs.synt, &m->synt); + } // If the pattern has \ze and it matched in the // sub pattern, use it. copy_ze_off(&t->subs.norm, &m->norm); @@ -5369,11 +5399,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, pim.subs.norm.in_use = 0; pim.subs.synt.in_use = 0; if (REG_MULTI) { - pim.end.pos.col = (int)(reginput - regline); - pim.end.pos.lnum = reglnum; - } else - pim.end.ptr = reginput; - + pim.end.pos.col = (int)(rex.input - rex.line); + pim.end.pos.lnum = rex.lnum; + } else { + pim.end.ptr = rex.input; + } // t->state->out1 is the corresponding END_INVISIBLE // node; Add its out to the current list (zero-width // match). @@ -5426,7 +5456,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Copy submatch info to the recursive call, opposite of what // happens afterwards. copy_sub_off(&m->norm, &t->subs.norm); - if (nfa_has_zsubexpr) { + if (rex.nfa_has_zsubexpr) { copy_sub_off(&m->synt, &t->subs.synt); } @@ -5446,7 +5476,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, #endif // Copy submatch info from the recursive call copy_sub_off(&t->subs.norm, &m->norm); - if (nfa_has_zsubexpr) { + if (rex.nfa_has_zsubexpr) { copy_sub_off(&t->subs.synt, &m->synt); } // Now we need to skip over the matched text and then @@ -5454,9 +5484,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, if (REG_MULTI) { // TODO(RE): multi-line match bytelen = m->norm.list.multi[0].end_col - - (int)(reginput - regline); + - (int)(rex.input - rex.line); } else { - bytelen = (int)(m->norm.list.line[0].end - reginput); + bytelen = (int)(m->norm.list.line[0].end - rex.input); } #ifdef REGEXP_DEBUG @@ -5485,7 +5515,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } case NFA_BOL: - if (reginput == regline) { + if (rex.input == rex.line) { add_here = true; add_state = t->state->out; } @@ -5503,20 +5533,16 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, if (curc == NUL) { result = false; - } else if (has_mbyte) { + } else { int this_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); + this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); if (this_class <= 1) { result = false; } else if (reg_prev_class() == this_class) { result = false; } - } else if (!vim_iswordc_buf(curc, rex.reg_buf) - || (reginput > regline - && vim_iswordc_buf(reginput[-1], rex.reg_buf))) { - result = false; } if (result) { add_here = true; @@ -5526,22 +5552,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_EOW: result = true; - if (reginput == regline) { + if (rex.input == rex.line) { result = false; - } else if (has_mbyte) { + } else { int this_class, prev_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(reginput, rex.reg_buf->b_chartab); + this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) { result = false; } - } else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf) - || (reginput[0] != NUL - && vim_iswordc_buf(curc, rex.reg_buf))) { - result = false; } if (result) { add_here = true; @@ -5550,7 +5572,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_BOF: - if (reglnum == 0 && reginput == regline + if (rex.lnum == 0 && rex.input == rex.line && (!REG_MULTI || rex.reg_firstlnum == 1)) { add_here = true; add_state = t->state->out; @@ -5558,7 +5580,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_EOF: - if (reglnum == rex.reg_maxline && curc == NUL) { + if (rex.lnum == rex.reg_maxline && curc == NUL) { add_here = true; add_state = t->state->out; } @@ -5603,7 +5625,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // We don't care about the order of composing characters. // Get them into cchars[] first. while (len < clen) { - mc = utf_ptr2char(reginput + len); + mc = utf_ptr2char(rex.input + len); cchars[ccount++] = mc; len += mb_char2len(mc); if (ccount == MAX_MCO) @@ -5634,7 +5656,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_NEWL: if (curc == NUL && !rex.reg_line_lbr && REG_MULTI - && reglnum <= rex.reg_maxline) { + && rex.lnum <= rex.reg_maxline) { go_to_nextline = true; // Pass -1 for the offset, which means taking the position // at the start of the next line. @@ -5688,7 +5710,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, for (; c1 <= c2; c1++) { if (utf_fold(c1) == curc_low) { result = result_if_matched; - done = TRUE; + done = true; break; } } @@ -5746,13 +5768,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_KWORD: // \k - result = vim_iswordp_buf(reginput, rex.reg_buf); + result = vim_iswordp_buf(rex.input, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; case NFA_SKWORD: // \K result = !ascii_isdigit(curc) - && vim_iswordp_buf(reginput, rex.reg_buf); + && vim_iswordp_buf(rex.input, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; @@ -5767,12 +5789,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; case NFA_PRINT: // \p - result = vim_isprintc(PTR2CHAR(reginput)); + result = vim_isprintc(PTR2CHAR(rex.input)); ADD_STATE_IF_MATCH(t->state); break; case NFA_SPRINT: // \P - result = !ascii_isdigit(curc) && vim_isprintc(PTR2CHAR(reginput)); + result = !ascii_isdigit(curc) && vim_isprintc(PTR2CHAR(rex.input)); ADD_STATE_IF_MATCH(t->state); break; @@ -5959,14 +5981,14 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_LNUM_LT: assert(t->state->val >= 0 && !((rex.reg_firstlnum > 0 - && reglnum > LONG_MAX - rex.reg_firstlnum) + && rex.lnum > LONG_MAX - rex.reg_firstlnum) || (rex.reg_firstlnum < 0 - && reglnum < LONG_MIN + rex.reg_firstlnum)) - && reglnum + rex.reg_firstlnum >= 0); + && rex.lnum < LONG_MIN + rex.reg_firstlnum)) + && rex.lnum + rex.reg_firstlnum >= 0); result = (REG_MULTI && nfa_re_num_cmp((uintmax_t)t->state->val, t->state->c - NFA_LNUM, - (uintmax_t)(reglnum + rex.reg_firstlnum))); + (uintmax_t)(rex.lnum + rex.reg_firstlnum))); if (result) { add_here = true; add_state = t->state->out; @@ -5977,11 +5999,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_COL_GT: case NFA_COL_LT: assert(t->state->val >= 0 - && reginput >= regline - && (uintmax_t)(reginput - regline) <= UINTMAX_MAX - 1); + && rex.input >= rex.line + && (uintmax_t)(rex.input - rex.line) <= UINTMAX_MAX - 1); result = nfa_re_num_cmp((uintmax_t)t->state->val, t->state->c - NFA_COL, - (uintmax_t)(reginput - regline + 1)); + (uintmax_t)(rex.input - rex.line + 1)); if (result) { add_here = true; add_state = t->state->out; @@ -5993,7 +6015,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_VCOL_LT: { int op = t->state->c - NFA_VCOL; - colnr_T col = (colnr_T)(reginput - regline); + colnr_T col = (colnr_T)(rex.input - rex.line); // Bail out quickly when there can't be a match, avoid the overhead of // win_linetabsize() on long lines. @@ -6014,7 +6036,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, result = col > t->state->val * ts; } if (!result) { - uintmax_t lts = win_linetabsize(wp, regline, col); + uintmax_t lts = win_linetabsize(wp, rex.line, col); assert(t->state->val >= 0); result = nfa_re_num_cmp((uintmax_t)t->state->val, op, lts + 1); } @@ -6034,13 +6056,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Compare the mark position to the match position. result = (pos != NULL // mark doesn't exist && pos->lnum > 0 // mark isn't set in reg_buf - && (pos->lnum == reglnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(reginput - regline) + && (pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos->col == (colnr_T)(rex.input - rex.line) ? t->state->c == NFA_MARK - : (pos->col < (colnr_T)(reginput - regline) + : (pos->col < (colnr_T)(rex.input - rex.line) ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT)) - : (pos->lnum < reglnum + rex.reg_firstlnum + : (pos->lnum < rex.lnum + rex.reg_firstlnum ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT))); if (result) { @@ -6051,10 +6073,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } case NFA_CURSOR: - result = (rex.reg_win != NULL - && (reglnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum) - && ((colnr_T)(reginput - regline) - == rex.reg_win->w_cursor.col)); + result = rex.reg_win != NULL + && (rex.lnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum) + && ((colnr_T)(rex.input - rex.line) == rex.reg_win->w_cursor.col); if (result) { add_here = true; add_state = t->state->out; @@ -6112,7 +6133,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // If rex.reg_icombine is not set only skip over the character // itself. When it is set skip over composing characters. if (result && enc_utf8 && !rex.reg_icombine) { - clen = utf_ptr2len(reginput); + clen = utf_ptr2len(rex.input); } ADD_STATE_IF_MATCH(t->state); @@ -6143,7 +6164,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, &listids, &listids_len); pim->result = result ? NFA_PIM_MATCH : NFA_PIM_NOMATCH; // for \@! and \@<! it is a match when the result is - // FALSE + // false if (result != (pim->state->c == NFA_START_INVISIBLE_NEG || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST || pim->state->c @@ -6152,8 +6173,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { // Copy submatch info from the recursive call copy_sub_off(&pim->subs.norm, &m->norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub_off(&pim->subs.synt, &m->synt); + } } } else { result = (pim->result == NFA_PIM_MATCH); @@ -6163,12 +6185,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, log_fd, "Using previous recursive nfa_regmatch() result, result == %d\n", pim->result); - fprintf(log_fd, "MATCH = %s\n", result ? "OK" : "FALSE"); + fprintf(log_fd, "MATCH = %s\n", result ? "OK" : "false"); fprintf(log_fd, "\n"); #endif } - // for \@! and \@<! it is a match when result is FALSE + // for \@! and \@<! it is a match when result is false if (result != (pim->state->c == NFA_START_INVISIBLE_NEG || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST || pim->state->c @@ -6177,8 +6199,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)) { // Copy submatch info from the recursive call copy_sub_off(&t->subs.norm, &pim->subs.norm); - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) { copy_sub_off(&t->subs.synt, &pim->subs.synt); + } } else { // look-behind match failed, don't add the state continue; @@ -6222,29 +6245,28 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Also don't start a match past the first line. if (!nfa_match && ((toplevel - && reglnum == 0 + && rex.lnum == 0 && clen != 0 && (rex.reg_maxcol == 0 - || (colnr_T)(reginput - regline) < rex.reg_maxcol)) + || (colnr_T)(rex.input - rex.line) < rex.reg_maxcol)) || (nfa_endp != NULL && (REG_MULTI - ? (reglnum < nfa_endp->se_u.pos.lnum - || (reglnum == nfa_endp->se_u.pos.lnum - && (int)(reginput - regline) + ? (rex.lnum < nfa_endp->se_u.pos.lnum + || (rex.lnum == nfa_endp->se_u.pos.lnum + && (int)(rex.input - rex.line) < nfa_endp->se_u.pos.col)) - : reginput < nfa_endp->se_u.ptr)))) { + : rex.input < nfa_endp->se_u.ptr)))) { #ifdef REGEXP_DEBUG fprintf(log_fd, "(---) STARTSTATE\n"); #endif // Inline optimized code for addstate() if we know the state is // the first MOPEN. if (toplevel) { - int add = TRUE; - int c; + int add = true; if (prog->regstart != NUL && clen != 0) { if (nextlist->n == 0) { - colnr_T col = (colnr_T)(reginput - regline) + clen; + colnr_T col = (colnr_T)(rex.input - rex.line) + clen; // Nextlist is empty, we can skip ahead to the // character that must appear at the start. @@ -6253,13 +6275,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } #ifdef REGEXP_DEBUG fprintf(log_fd, " Skipping ahead %d bytes to regstart\n", - col - ((colnr_T)(reginput - regline) + clen)); + col - ((colnr_T)(rex.input - rex.line) + clen)); #endif - reginput = regline + col - clen; + rex.input = rex.line + col - clen; } else { // Checking if the required start character matches is // cheaper than adding a state that won't match. - c = PTR2CHAR(reginput + clen); + const int c = PTR2CHAR(rex.input + clen); if (c != prog->regstart && (!rex.reg_ic || utf_fold(c) != utf_fold(prog->regstart))) { @@ -6267,17 +6289,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, fprintf(log_fd, " Skipping start state, regstart does not match\n"); #endif - add = FALSE; + add = false; } } } if (add) { - if (REG_MULTI) + if (REG_MULTI) { m->norm.list.multi[0].start_col = - (colnr_T)(reginput - regline) + clen; - else - m->norm.list.line[0].start = reginput + clen; + (colnr_T)(rex.input - rex.line) + clen; + } else { + m->norm.list.line[0].start = rex.input + clen; + } if (addstate(nextlist, start->out, m, NULL, clen) == NULL) { nfa_match = NFA_TOO_EXPENSIVE; goto theend; @@ -6306,9 +6329,9 @@ nextchar: // Advance to the next character, or advance to the next line, or // finish. if (clen != 0) { - reginput += clen; + rex.input += clen; } else if (go_to_nextline || (nfa_endp != NULL && REG_MULTI - && reglnum < nfa_endp->se_u.pos.lnum)) { + && rex.lnum < nfa_endp->se_u.pos.lnum)) { reg_nextline(); } else { break; @@ -6347,7 +6370,7 @@ theend: return nfa_match; } -// Try match of "prog" with at regline["col"]. +// Try match of "prog" with at rex.line["col"]. // Returns <= 0 for failure, number of lines contained in the match otherwise. static long nfa_regtry(nfa_regprog_T *prog, colnr_T col, @@ -6361,7 +6384,7 @@ static long nfa_regtry(nfa_regprog_T *prog, FILE *f; #endif - reginput = regline + col; + rex.input = rex.line + col; nfa_time_limit = tm; nfa_timed_out = timed_out; nfa_time_count = 0; @@ -6374,7 +6397,7 @@ static long nfa_regtry(nfa_regprog_T *prog, #ifdef REGEXP_DEBUG fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); #endif - fprintf(f, "\tInput text is \"%s\" \n", reginput); + fprintf(f, "\tInput text is \"%s\" \n", rex.input); fprintf(f, "\t=======================================================\n\n"); nfa_print_state(f, start); fprintf(f, "\n\n"); @@ -6412,11 +6435,11 @@ static long nfa_regtry(nfa_regprog_T *prog, } if (rex.reg_endpos[0].lnum < 0) { // pattern has a \ze but it didn't match, use current end - rex.reg_endpos[0].lnum = reglnum; - rex.reg_endpos[0].col = (int)(reginput - regline); + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = (int)(rex.input - rex.line); } else { // Use line number of "\ze". - reglnum = rex.reg_endpos[0].lnum; + rex.lnum = rex.reg_endpos[0].lnum; } } else { for (i = 0; i < subs.norm.in_use; i++) { @@ -6425,10 +6448,10 @@ static long nfa_regtry(nfa_regprog_T *prog, } if (rex.reg_startp[0] == NULL) { - rex.reg_startp[0] = regline + col; + rex.reg_startp[0] = rex.line + col; } if (rex.reg_endp[0] == NULL) { - rex.reg_endp[0] = reginput; + rex.reg_endp[0] = rex.input; } } @@ -6463,7 +6486,7 @@ static long nfa_regtry(nfa_regprog_T *prog, } } - return 1 + reglnum; + return 1 + rex.lnum; } /// Match a regexp against a string ("line" points to the string) or multiple @@ -6481,7 +6504,6 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, { nfa_regprog_T *prog; long retval = 0L; - int i; colnr_T col = startcol; if (REG_MULTI) { @@ -6513,26 +6535,30 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, rex.reg_icombine = true; } - regline = line; - reglnum = 0; /* relative to line */ + rex.line = line; + rex.lnum = 0; // relative to line - nfa_has_zend = prog->has_zend; - nfa_has_backref = prog->has_backref; - nfa_nsubexpr = prog->nsubexp; - nfa_listid = 1; - nfa_alt_listid = 2; + rex.nfa_has_zend = prog->has_zend; + rex.nfa_has_backref = prog->has_backref; + rex.nfa_nsubexpr = prog->nsubexp; + rex.nfa_listid = 1; + rex.nfa_alt_listid = 2; +#ifdef REGEXP_DEBUG nfa_regengine.expr = prog->pattern; +#endif if (prog->reganch && col > 0) return 0L; - need_clear_subexpr = TRUE; - /* Clear the external match subpointers if necessary. */ + rex.need_clear_subexpr = true; + // Clear the external match subpointers if necessary. if (prog->reghasz == REX_SET) { - nfa_has_zsubexpr = TRUE; - need_clear_zsubexpr = TRUE; - } else - nfa_has_zsubexpr = FALSE; + rex.nfa_has_zsubexpr = true; + rex.need_clear_zsubexpr = true; + } else { + rex.nfa_has_zsubexpr = false; + rex.need_clear_zsubexpr = false; + } if (prog->regstart != NUL) { /* Skip ahead until a character we know the match must start with. @@ -6552,8 +6578,10 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, goto theend; } - nstate = prog->nstate; - for (i = 0; i < nstate; ++i) { + // Set the "nstate" used by nfa_regcomp() to zero to trigger an error when + // it's accidentally used during execution. + nstate = 0; + for (int i = 0; i < prog->nstate; i++) { prog->state[i].id = i; prog->state[i].lastlist[0] = 0; prog->state[i].lastlist[1] = 0; @@ -6561,7 +6589,9 @@ static long nfa_regexec_both(char_u *line, colnr_T startcol, retval = nfa_regtry(prog, col, tm, timed_out); +#ifdef REGEXP_DEBUG nfa_regengine.expr = NULL; +#endif theend: return retval; @@ -6579,7 +6609,9 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) if (expr == NULL) return NULL; +#ifdef REGEXP_DEBUG nfa_regengine.expr = expr; +#endif nfa_re_flags = re_flags; init_class_tab(); @@ -6616,26 +6648,27 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) * PASS 1 * Count number of NFA states in "nstate". Do not build the NFA. */ - post2nfa(postfix, post_ptr, TRUE); + post2nfa(postfix, post_ptr, true); /* allocate the regprog with space for the compiled regexp */ size_t prog_size = sizeof(nfa_regprog_T) + sizeof(nfa_state_T) * (nstate - 1); prog = xmalloc(prog_size); state_ptr = prog->state; + prog->re_in_use = false; /* * PASS 2 * Build the NFA */ - prog->start = post2nfa(postfix, post_ptr, FALSE); - if (prog->start == NULL) + prog->start = post2nfa(postfix, post_ptr, false); + if (prog->start == NULL) { goto fail; - + } prog->regflags = regflags; prog->engine = &nfa_regengine; prog->nstate = nstate; - prog->has_zend = nfa_has_zend; - prog->has_backref = nfa_has_backref; + prog->has_zend = rex.nfa_has_zend; + prog->has_backref = rex.nfa_has_backref; prog->nsubexp = regnpar; nfa_postprocess(prog); @@ -6651,7 +6684,9 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) /* Remember whether this pattern has any \z specials in it. */ prog->reghasz = re_has_z; prog->pattern = vim_strsave(expr); +#ifdef REGEXP_DEBUG nfa_regengine.expr = NULL; +#endif out: xfree(post_start); @@ -6663,8 +6698,8 @@ fail: XFREE_CLEAR(prog); #ifdef REGEXP_DEBUG nfa_postfix_dump(expr, FAIL); -#endif nfa_regengine.expr = NULL; +#endif goto out; } diff --git a/src/nvim/screen.c b/src/nvim/screen.c index 7bed747e9a..c5723035d6 100644 --- a/src/nvim/screen.c +++ b/src/nvim/screen.c @@ -3068,10 +3068,12 @@ win_line ( } // When still displaying '$' of change command, stop at cursor - if ((dollar_vcol >= 0 && wp == curwin - && lnum == wp->w_cursor.lnum && vcol >= (long)wp->w_virtcol - && filler_todo <= 0) - || (number_only && draw_state > WL_NR)) { + if (((dollar_vcol >= 0 + && wp == curwin + && lnum == wp->w_cursor.lnum + && vcol >= (long)wp->w_virtcol) + || (number_only && draw_state > WL_NR)) + && filler_todo <= 0) { grid_put_linebuf(grid, row, 0, col, -grid->Columns, wp->w_p_rl, wp, wp->w_hl_attr_normal, false); // Pretend we have finished updating the window. Except when @@ -3476,6 +3478,7 @@ win_line ( * Only do this when there is no syntax highlighting, the * @Spell cluster is not used or the current syntax item * contains the @Spell cluster. */ + v = (long)(ptr - line); if (has_spell && v >= word_end && v > cur_checked_col) { spell_attr = 0; if (!attr_pri) { diff --git a/src/nvim/testdir/check.vim b/src/nvim/testdir/check.vim new file mode 100644 index 0000000000..57a8eb57b8 --- /dev/null +++ b/src/nvim/testdir/check.vim @@ -0,0 +1,11 @@ +source shared.vim +source term_util.vim + +" Command to check that making screendumps is supported. +" Caller must source screendump.vim +command CheckScreendump call CheckScreendump() +func CheckScreendump() + if !CanRunVimInTerminal() + throw 'Skipped: cannot make screendumps' + endif +endfunc diff --git a/src/nvim/testdir/runtest.vim b/src/nvim/testdir/runtest.vim index e249d499c4..2bf61b0719 100644 --- a/src/nvim/testdir/runtest.vim +++ b/src/nvim/testdir/runtest.vim @@ -84,6 +84,11 @@ let &runtimepath .= ','.expand($BUILD_DIR).'/runtime/' " Always use forward slashes. set shellslash +if has('win32') + " avoid prompt that is long or contains a line break + let $PROMPT = '$P$G' +endif + " Prepare for calling test_garbagecollect_now(). let v:testing = 1 diff --git a/src/nvim/testdir/test_backup.vim b/src/nvim/testdir/test_backup.vim index fa10430613..ce2bfe72bc 100644 --- a/src/nvim/testdir/test_backup.vim +++ b/src/nvim/testdir/test_backup.vim @@ -1,7 +1,7 @@ " Tests for the backup function func Test_backup() - set backup backupdir=. + set backup backupdir=. backupskip= new call setline(1, ['line1', 'line2']) :f Xbackup.txt @@ -12,13 +12,13 @@ func Test_backup() let l = readfile('Xbackup.txt~') call assert_equal(['line1', 'line2'], l) bw! - set backup&vim backupdir&vim + set backup&vim backupdir&vim backupskip&vim call delete('Xbackup.txt') call delete('Xbackup.txt~') endfunc func Test_backup2() - set backup backupdir=.// + set backup backupdir=.// backupskip= new call setline(1, ['line1', 'line2', 'line3']) :f Xbackup.txt @@ -29,16 +29,16 @@ func Test_backup2() sp *Xbackup.txt~ call assert_equal(['line1', 'line2', 'line3'], getline(1,'$')) let f=expand('%') - call assert_match('src%nvim%testdir%Xbackup.txt\~', f) + call assert_match('%testdir%Xbackup.txt\~', f) bw! bw! call delete('Xbackup.txt') call delete(f) - set backup&vim backupdir&vim + set backup&vim backupdir&vim backupskip&vim endfunc func Test_backup2_backupcopy() - set backup backupdir=.// backupcopy=yes + set backup backupdir=.// backupcopy=yes backupskip= new call setline(1, ['line1', 'line2', 'line3']) :f Xbackup.txt @@ -49,10 +49,10 @@ func Test_backup2_backupcopy() sp *Xbackup.txt~ call assert_equal(['line1', 'line2', 'line3'], getline(1,'$')) let f=expand('%') - call assert_match('src%nvim%testdir%Xbackup.txt\~', f) + call assert_match('%testdir%Xbackup.txt\~', f) bw! bw! call delete('Xbackup.txt') call delete(f) - set backup&vim backupdir&vim backupcopy&vim + set backup&vim backupdir&vim backupcopy&vim backupskip&vim endfunc diff --git a/src/nvim/testdir/test_diffmode.vim b/src/nvim/testdir/test_diffmode.vim index 49bbe84869..a1f1dd3bab 100644 --- a/src/nvim/testdir/test_diffmode.vim +++ b/src/nvim/testdir/test_diffmode.vim @@ -1,6 +1,7 @@ " Tests for diff mode source shared.vim source screendump.vim +source check.vim func Test_diff_fold_sync() enew! @@ -801,6 +802,34 @@ func Test_diff_closeoff() enew! endfunc +func Test_diff_rnu() + CheckScreendump + + let content =<< trim END + call setline(1, ['a', 'a', 'a', 'y', 'b', 'b', 'b', 'b', 'b']) + vnew + call setline(1, ['a', 'a', 'a', 'x', 'x', 'x', 'b', 'b', 'b', 'b', 'b']) + call setline(1, ['a', 'a', 'a', 'y', 'b', 'b', 'b', 'b', 'b']) + vnew + call setline(1, ['a', 'a', 'a', 'x', 'x', 'x', 'b', 'b', 'b', 'b', 'b']) + windo diffthis + setlocal number rnu foldcolumn=0 + END + call writefile(content, 'Xtest_diff_rnu') + let buf = RunVimInTerminal('-S Xtest_diff_rnu', {}) + + call VerifyScreenDump(buf, 'Test_diff_rnu_01', {}) + + call term_sendkeys(buf, "j") + call VerifyScreenDump(buf, 'Test_diff_rnu_02', {}) + call term_sendkeys(buf, "j") + call VerifyScreenDump(buf, 'Test_diff_rnu_03', {}) + + " clean up + call StopVimInTerminal(buf) + call delete('Xtest_diff_rnu') +endfunc + func Test_diff_and_scroll() " this was causing an ml_get error set ls=2 diff --git a/src/nvim/testdir/test_filetype.vim b/src/nvim/testdir/test_filetype.vim index d440bdcb1e..2e280417ae 100644 --- a/src/nvim/testdir/test_filetype.vim +++ b/src/nvim/testdir/test_filetype.vim @@ -54,6 +54,7 @@ let s:filename_checks = { \ 'acedb': ['file.wrm'], \ 'ada': ['file.adb', 'file.ads', 'file.ada', 'file.gpr'], \ 'ahdl': ['file.tdf'], + \ 'aidl': ['file.aidl'], \ 'alsaconf': ['.asoundrc', '/usr/share/alsa/alsa.conf', '/etc/asound.conf'], \ 'aml': ['file.aml'], \ 'ampl': ['file.run'], diff --git a/src/nvim/testdir/test_quickfix.vim b/src/nvim/testdir/test_quickfix.vim index 35555ca9d3..926103b69f 100644 --- a/src/nvim/testdir/test_quickfix.vim +++ b/src/nvim/testdir/test_quickfix.vim @@ -554,6 +554,33 @@ func s:test_xhelpgrep(cchar) " This wipes out the buffer, make sure that doesn't cause trouble. Xclose + " When the current window is vertically split, jumping to a help match + " should open the help window at the top. + only | enew + let w1 = win_getid() + vert new + let w2 = win_getid() + Xnext + let w3 = win_getid() + call assert_true(&buftype == 'help') + call assert_true(winnr() == 1) + " See jump_to_help_window() for details + let w2_width = winwidth(w2) + if w2_width != &columns && w2_width < 80 + call assert_equal(['col', [['leaf', w3], + \ ['row', [['leaf', w2], ['leaf', w1]]]]], winlayout()) + else + call assert_equal(['row', [['col', [['leaf', w3], ['leaf', w2]]], + \ ['leaf', w1]]] , winlayout()) + endif + + new | only + set buftype=help + set modified + call assert_fails('Xnext', 'E37:') + set nomodified + new | only + if a:cchar == 'l' " When a help window is present, running :lhelpgrep should reuse the " help window and not the current window diff --git a/src/nvim/testdir/test_regexp_utf8.vim b/src/nvim/testdir/test_regexp_utf8.vim index f48458566b..4466ad436a 100644 --- a/src/nvim/testdir/test_regexp_utf8.vim +++ b/src/nvim/testdir/test_regexp_utf8.vim @@ -32,6 +32,9 @@ func Test_equivalence_re2() endfunc func s:classes_test() + if has('win32') + set iskeyword=@,48-57,_,192-255 + endif set isprint=@,161-255 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) @@ -51,6 +54,12 @@ func s:classes_test() let tabchar = '' let upperchars = '' let xdigitchars = '' + let identchars = '' + let identchars1 = '' + let kwordchars = '' + let kwordchars1 = '' + let fnamechars = '' + let fnamechars1 = '' let i = 1 while i <= 255 let c = nr2char(i) @@ -102,6 +111,24 @@ func s:classes_test() if c =~ '[[:xdigit:]]' let xdigitchars .= c endif + if c =~ '[[:ident:]]' + let identchars .= c + endif + if c =~ '\i' + let identchars1 .= c + endif + if c =~ '[[:keyword:]]' + let kwordchars .= c + endif + if c =~ '\k' + let kwordchars1 .= c + endif + if c =~ '[[:fname:]]' + let fnamechars .= c + endif + if c =~ '\f' + let fnamechars1 .= c + endif let i += 1 endwhile @@ -121,6 +148,37 @@ func s:classes_test() call assert_equal("\t\n\x0b\f\r ", spacechars) call assert_equal("\t", tabchar) call assert_equal('0123456789ABCDEFabcdef', xdigitchars) + + if has('win32') + let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' + let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + elseif has('ebcdic') + let identchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + let kwordchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + else + let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + endif + + if has('win32') + let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + elseif has('amiga') + let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + elseif has('vms') + let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + elseif has('ebcdic') + let fnamechars_ok = '#$%+,-./=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + else + let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + endif + + call assert_equal(identchars_ok, identchars) + call assert_equal(kwordchars_ok, kwordchars) + call assert_equal(fnamechars_ok, fnamechars) + + call assert_equal(identchars1, identchars) + call assert_equal(kwordchars1, kwordchars) + call assert_equal(fnamechars1, fnamechars) endfunc func Test_classes_re1() @@ -351,4 +409,128 @@ func Test_regexp_ignore_case() set regexpengine& endfunc +" Tests for regexp with multi-byte encoding and various magic settings +func Run_regexp_multibyte_magic() + let text =<< trim END + 1 a aa abb abbccc + 2 d dd dee deefff + 3 g gg ghh ghhiii + 4 j jj jkk jkklll + 5 m mm mnn mnnooo + 6 x ^aa$ x + 7 (a)(b) abbaa + 8 axx [ab]xx + 9 หม่x อมx + a อมx หม่x + b ちカヨは + c x ¬€x + d 天使x + e y + f z + g a啷bb + j 0123❤x + k combinations + l äö üᾱ̆́ + END + + new + call setline(1, text) + exe 'normal /a*b\{2}c\+/e' .. "\<CR>x" + call assert_equal('1 a aa abb abbcc', getline('.')) + exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x" + call assert_equal('2 d dd dee deeff', getline('.')) + set nomagic + exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x" + call assert_equal('3 g gg ghh ghhii', getline('.')) + exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x" + call assert_equal('4 j jj jkk jkkll', getline('.')) + exe 'normal /\vm*n{2}o+/e' .. "\<CR>x" + call assert_equal('5 m mm mnn mnnoo', getline('.')) + exe 'normal /\V^aa$/' .. "\<CR>x" + call assert_equal('6 x aa$ x', getline('.')) + set magic + exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x" + call assert_equal('7 (a)(b) abba', getline('.')) + exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x" + call assert_equal('8 axx ab]xx', getline('.')) + + " search for multi-byte without composing char + exe 'normal /ม' .. "\<CR>x" + call assert_equal('9 หม่x อx', getline('.')) + + " search for multi-byte with composing char + exe 'normal /ม่' .. "\<CR>x" + call assert_equal('a อมx หx', getline('.')) + + " find word by change of word class + exe 'normal /ち\<カヨ\>は' .. "\<CR>x" + call assert_equal('b カヨは', getline('.')) + + " Test \%u, [\u] and friends + " c + exe 'normal /\%u20ac' .. "\<CR>x" + call assert_equal('c x ¬x', getline('.')) + " d + exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x" + call assert_equal('d 使x', getline('.')) + " e + exe 'normal /\%U12345678' .. "\<CR>x" + call assert_equal('e y', getline('.')) + " f + exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x" + call assert_equal('f z', getline('.')) + " g + exe 'normal /\%d21879b' .. "\<CR>x" + call assert_equal('g abb', getline('.')) + + " j Test backwards search from a multi-byte char + exe "normal /x\<CR>x?.\<CR>x" + call assert_equal('j 012❤', getline('.')) + " k + let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g' + @w + call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18)) + + close! +endfunc + +func Test_regexp_multibyte_magic() + set regexpengine=1 + call Run_regexp_multibyte_magic() + set regexpengine=2 + call Run_regexp_multibyte_magic() + set regexpengine& +endfunc + +" Test for 7.3.192 +" command ":s/ \?/ /g" splits multi-byte characters into bytes +func Test_split_multibyte_to_bytes() + new + call setline(1, 'l äö üᾱ̆́') + s/ \?/ /g + call assert_equal(' l ä ö ü ᾱ̆́', getline(1)) + close! +endfunc + +" Test for matchstr() with multibyte characters +func Test_matchstr_multibyte() + new + call assert_equal('ב', matchstr("אבגד", ".", 0, 2)) + call assert_equal('בג', matchstr("אבגד", "..", 0, 2)) + call assert_equal('א', matchstr("אבגד", ".", 0, 0)) + call assert_equal('ג', matchstr("אבגד", ".", 4, -1)) + close! +endfunc + +" Test for 7.4.636 +" A search with end offset gets stuck at end of file. +func Test_search_with_end_offset() + new + call setline(1, ['', 'dog(a', 'cat(']) + exe "normal /(/e+" .. "\<CR>" + normal "ayn + call assert_equal("a\ncat(", @a) + close! +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/nvim/testdir/test_spell.vim b/src/nvim/testdir/test_spell.vim index e5eaa01e92..414c7278eb 100644 --- a/src/nvim/testdir/test_spell.vim +++ b/src/nvim/testdir/test_spell.vim @@ -1,10 +1,13 @@ " Test spell checking " Note: this file uses latin1 encoding, but is used with utf-8 encoding. +source check.vim if !has('spell') finish endif +source screendump.vim + func TearDown() set nospell call delete('Xtest.aff') @@ -477,6 +480,44 @@ func RunGoodBad(good, bad, expected_words, expected_bad_words) bwipe! endfunc +func Test_spell_screendump() + CheckScreendump + + let lines =<< trim END + call setline(1, [ + \ "This is some text without any spell errors. Everything", + \ "should just be black, nothing wrong here.", + \ "", + \ "This line has a sepll error. and missing caps.", + \ "And and this is the the duplication.", + \ "with missing caps here.", + \ ]) + set spell spelllang=en_nz + END + call writefile(lines, 'XtestSpell') + let buf = RunVimInTerminal('-S XtestSpell', {'rows': 8}) + call VerifyScreenDump(buf, 'Test_spell_1', {}) + + let lines =<< trim END + call setline(1, [ + \ "This is some text without any spell errors. Everything", + \ "should just be black, nothing wrong here.", + \ "", + \ "This line has a sepll error. and missing caps.", + \ "And and this is the the duplication.", + \ "with missing caps here.", + \ ]) + set spell spelllang=en_nz + END + call writefile(lines, 'XtestSpell') + let buf = RunVimInTerminal('-S XtestSpell', {'rows': 8}) + call VerifyScreenDump(buf, 'Test_spell_1', {}) + + " clean up + call StopVimInTerminal(buf) + call delete('XtestSpell') +endfunc + let g:test_data_aff1 = [ \"SET ISO8859-1", \"TRY esianrtolcdugmphbyfvkwjkqxz-\xEB\xE9\xE8\xEA\xEF\xEE\xE4\xE0\xE2\xF6\xFC\xFB'ESIANRTOLCDUGMPHBYFVKWJKQXZ", diff --git a/src/nvim/testdir/test_tabline.vim b/src/nvim/testdir/test_tabline.vim index f24552088b..117d962d08 100644 --- a/src/nvim/testdir/test_tabline.vim +++ b/src/nvim/testdir/test_tabline.vim @@ -64,3 +64,28 @@ func Test_redrawtabline() let &showtabline = showtabline_save au! Bufadd endfunc + +function EmptyTabname() + return "" +endfunction + +function MakeTabLine() abort + let titles = map(range(1, tabpagenr('$')), '"%( %" . v:val . "T%{EmptyTabname()}%T %)"') + let sep = 'あ' + let tabpages = join(titles, sep) + return tabpages .. sep .. '%=%999X X' +endfunction + +func Test_tabline_empty_group() + " this was reading invalid memory + set tabline=%!MakeTabLine() + tabnew + redraw! + + tabclose + set tabline= +endfunc + + + +" vim: shiftwidth=2 sts=2 expandtab diff --git a/test/functional/legacy/memory_usage_spec.lua b/test/functional/legacy/memory_usage_spec.lua index 28ca749749..251e6a5ea4 100644 --- a/test/functional/legacy/memory_usage_spec.lua +++ b/test/functional/legacy/memory_usage_spec.lua @@ -7,6 +7,9 @@ local iswin = helpers.iswin local retry = helpers.retry local ok = helpers.ok local source = helpers.source +local wait = helpers.wait +local uname = helpers.uname +local load_adjust = helpers.load_adjust local monitor_memory_usage = { memory_usage = function(self) @@ -99,6 +102,7 @@ describe('memory usage', function() call s:f(0) endfor ]]) + wait() local after = monitor_memory_usage(pid) -- Estimate the limit of max usage as 2x initial usage. -- The lower limit can fluctuate a bit, use 97%. @@ -143,16 +147,20 @@ describe('memory usage', function() call s:f() endfor ]]) + wait() local after = monitor_memory_usage(pid) for _ = 1, 3 do feed_command('so '..fname) + wait() end local last = monitor_memory_usage(pid) -- The usage may be a bit less than the last value, use 80%. -- Allow for 20% tolerance at the upper limit. That's very permissive, but - -- otherwise the test fails sometimes. + -- otherwise the test fails sometimes. On Sourcehut CI with FreeBSD we need to + -- be even more permissive. + local upper_multiplier = uname() == 'freebsd' and 15 or 12 local lower = before.last * 8 / 10 - local upper = (after.max + (after.last - before.last)) * 12 / 10 + local upper = load_adjust((after.max + (after.last - before.last)) * upper_multiplier / 10) check_result({before=before, after=after, last=last}, pcall(ok, lower < last.last)) check_result({before=before, after=after, last=last}, diff --git a/test/functional/terminal/tui_spec.lua b/test/functional/terminal/tui_spec.lua index 5d82037f42..c0578c08e1 100644 --- a/test/functional/terminal/tui_spec.lua +++ b/test/functional/terminal/tui_spec.lua @@ -605,6 +605,8 @@ describe('TUI', function() wait_for_mode('i') -- "bracketed paste" feed_data('\027[200~'..expected..'\027[201~') + -- FIXME: Data race between the two feeds + if uname() == 'freebsd' then screen:sleep(1) end feed_data(' end') expected = expected..' end' screen:expect([[ diff --git a/test/functional/ui/diff_spec.lua b/test/functional/ui/diff_spec.lua index 252991aca7..69b6ab8cf0 100644 --- a/test/functional/ui/diff_spec.lua +++ b/test/functional/ui/diff_spec.lua @@ -6,6 +6,7 @@ local clear = helpers.clear local command = helpers.command local insert = helpers.insert local write_file = helpers.write_file +local source = helpers.source describe('Diff mode screen', function() local fname = 'Xtest-functional-diff-screen-1' @@ -1031,3 +1032,79 @@ it('win_update redraws lines properly', function() | ]]} end) + +it('diff updates line numbers below filler lines', function() + clear() + local screen = Screen.new(40, 14) + screen:attach() + screen:set_default_attr_ids({ + [1] = {foreground = Screen.colors.DarkBlue, background = Screen.colors.WebGray}, + [2] = {background = Screen.colors.LightCyan1, bold = true, foreground = Screen.colors.Blue1}, + [3] = {reverse = true}, + [4] = {background = Screen.colors.LightBlue}, + [5] = {foreground = Screen.colors.DarkBlue, background = Screen.colors.LightGrey}, + [6] = {bold = true, foreground = Screen.colors.Blue1}, + [7] = {bold = true, reverse = true}, + [8] = {bold = true, background = Screen.colors.Red}, + [9] = {background = Screen.colors.LightMagenta}, + [10] = {bold = true, foreground = Screen.colors.Brown}, + [11] = {foreground = Screen.colors.Brown}, + }) + source([[ + call setline(1, ['a', 'a', 'a', 'y', 'b', 'b', 'b', 'b', 'b']) + vnew + call setline(1, ['a', 'a', 'a', 'x', 'x', 'x', 'b', 'b', 'b', 'b', 'b']) + windo diffthis + setlocal number rnu foldcolumn=0 + ]]) + screen:expect([[ + {1: }a {3:│}{10:1 }^a | + {1: }a {3:│}{11: 1 }a | + {1: }a {3:│}{11: 2 }a | + {1: }{8:x}{9: }{3:│}{11: 3 }{8:y}{9: }| + {1: }{4:x }{3:│}{11: }{2:----------------}| + {1: }{4:x }{3:│}{11: }{2:----------------}| + {1: }b {3:│}{11: 4 }b | + {1: }b {3:│}{11: 5 }b | + {1: }b {3:│}{11: 6 }b | + {1: }b {3:│}{11: 7 }b | + {1: }b {3:│}{11: 8 }b | + {6:~ }{3:│}{6:~ }| + {3:[No Name] [+] }{7:[No Name] [+] }| + | + ]]) + feed('j') + screen:expect([[ + {1: }a {3:│}{11: 1 }a | + {1: }a {3:│}{10:2 }^a | + {1: }a {3:│}{11: 1 }a | + {1: }{8:x}{9: }{3:│}{11: 2 }{8:y}{9: }| + {1: }{4:x }{3:│}{11: }{2:----------------}| + {1: }{4:x }{3:│}{11: }{2:----------------}| + {1: }b {3:│}{11: 3 }b | + {1: }b {3:│}{11: 4 }b | + {1: }b {3:│}{11: 5 }b | + {1: }b {3:│}{11: 6 }b | + {1: }b {3:│}{11: 7 }b | + {6:~ }{3:│}{6:~ }| + {3:[No Name] [+] }{7:[No Name] [+] }| + | + ]]) + feed('j') + screen:expect([[ + {1: }a {3:│}{11: 2 }a | + {1: }a {3:│}{11: 1 }a | + {1: }a {3:│}{10:3 }^a | + {1: }{8:x}{9: }{3:│}{11: 1 }{8:y}{9: }| + {1: }{4:x }{3:│}{11: }{2:----------------}| + {1: }{4:x }{3:│}{11: }{2:----------------}| + {1: }b {3:│}{11: 2 }b | + {1: }b {3:│}{11: 3 }b | + {1: }b {3:│}{11: 4 }b | + {1: }b {3:│}{11: 5 }b | + {1: }b {3:│}{11: 6 }b | + {6:~ }{3:│}{6:~ }| + {3:[No Name] [+] }{7:[No Name] [+] }| + | + ]]) +end) diff --git a/test/functional/ui/spell_spec.lua b/test/functional/ui/spell_spec.lua index 243b737583..2c6e586665 100644 --- a/test/functional/ui/spell_spec.lua +++ b/test/functional/ui/spell_spec.lua @@ -4,8 +4,9 @@ local helpers = require('test.functional.helpers')(after_each) local Screen = require('test.functional.ui.screen') local clear = helpers.clear local feed = helpers.feed -local feed_command = helpers.feed_command local insert = helpers.insert +local uname = helpers.uname +local command = helpers.command describe("'spell'", function() local screen @@ -16,12 +17,14 @@ describe("'spell'", function() screen:attach() screen:set_default_attr_ids( { [0] = {bold=true, foreground=Screen.colors.Blue}, - [1] = {special = Screen.colors.Red, undercurl = true} + [1] = {special = Screen.colors.Red, undercurl = true}, + [2] = {special = Screen.colors.Blue1, undercurl = true}, }) end) it('joins long lines #7937', function() - feed_command('set spell') + if uname() == 'openbsd' then pending('FIXME #12104', function() end) return end + command('set spell') insert([[ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, @@ -42,4 +45,26 @@ describe("'spell'", function() | ]]) end) + + it('has correct highlight at start of line', function() + insert([[ + "This is some text without any spell errors. Everything", + "should just be black, nothing wrong here.", + "", + "This line has a sepll error. and missing caps.", + "And and this is the the duplication.", + "with missing caps here.", + ]]) + command('set spell spelllang=en_nz') + screen:expect([[ + "This is some text without any spell errors. Everything", | + "should just be black, nothing wrong here.", | + "", | + "This line has a {1:sepll} error. {2:and} missing caps.", | + "{1:And and} this is {1:the the} duplication.", | + "with missing caps here.", | + ^ | + | + ]]) + end) end) |