From fb1edb2f5728d74ae811c6ab32395598cea5609b Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 88f0d781af..b951df69d6 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -4765,8 +4765,8 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) reg_getline(rp->rs_un.regsave.rs_u.pos.lnum); rp->rs_un.regsave.rs_u.pos.col -= - utf_head_off(line, - line + rp->rs_un.regsave.rs_u.pos.col - 1) + utf_head_off((char *)line, + (char *)line + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; } } else { -- cgit From bd51ac2a347c0a3efb64e4b09400b7314286844c Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index b951df69d6..9b9b33b09f 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -4039,7 +4039,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) } else { // Need to match first byte again for multi-byte. len = (int)STRLEN(opnd); - if (cstrncmp(opnd, rex.input, &len) != 0) { + if (cstrncmp((char *)opnd, (char *)rex.input, &len) != 0) { status = RA_NOMATCH; } } @@ -4270,7 +4270,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) } else { // Compare current input with back-ref in the same line. len = (int)(rex.reg_endp[no] - rex.reg_startp[no]); - if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0) { + if (cstrncmp((char *)rex.reg_startp[no], (char *)rex.input, &len) != 0) { status = RA_NOMATCH; } } @@ -4283,8 +4283,8 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) && rex.reg_endpos[no].lnum == rex.lnum) { // Compare back-ref within the current line. len = rex.reg_endpos[no].col - rex.reg_startpos[no].col; - if (cstrncmp(rex.line + rex.reg_startpos[no].col, - rex.input, &len) != 0) { + if (cstrncmp((char *)rex.line + rex.reg_startpos[no].col, + (char *)rex.input, &len) != 0) { status = RA_NOMATCH; } } else { @@ -4320,7 +4320,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) if (re_extmatch_in != NULL && re_extmatch_in->matches[no] != NULL) { int len = (int)STRLEN(re_extmatch_in->matches[no]); - if (cstrncmp(re_extmatch_in->matches[no], rex.input, &len) != 0) { + if (cstrncmp((char *)re_extmatch_in->matches[no], (char *)rex.input, &len) != 0) { status = RA_NOMATCH; } else { rex.input += len; @@ -5069,14 +5069,14 @@ static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm, int *time // the loop to avoid overhead of conditions. if (!rex.reg_ic) { while ((s = (char_u *)vim_strchr((char *)s, c)) != NULL) { - if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { + if (cstrncmp((char *)s, (char *)prog->regmust, &prog->regmlen) == 0) { break; // Found it. } MB_PTR_ADV(s); } } else { while ((s = cstrchr(s, c)) != NULL) { - if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { + if (cstrncmp((char *)s, (char *)prog->regmust, &prog->regmlen) == 0) { break; // Found it. } MB_PTR_ADV(s); -- cgit From 49e893f296bca9eef5ff45a3d746c261d055bf10 Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 9b9b33b09f..ccf2b60da2 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -2469,7 +2469,7 @@ do_multibyte: // Need to get composing character too. for (;;) { l = utf_ptr2len((char *)regparse); - if (!utf_composinglike((char_u *)regparse, (char_u *)regparse + l)) { + if (!utf_composinglike(regparse, regparse + l)) { break; } regmbc(utf_ptr2char((char *)regparse)); @@ -3192,7 +3192,7 @@ static int regrepeat(char_u *p, long maxcount) case SKWORD: case SKWORD + ADD_NL: while (count < maxcount) { - if (vim_iswordp_buf(scan, rex.reg_buf) + if (vim_iswordp_buf((char *)scan, rex.reg_buf) && (testval || !ascii_isdigit(*scan))) { MB_PTR_ADV(scan); } else if (*scan == NUL) { @@ -3829,7 +3829,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) break; case KWORD: - if (!vim_iswordp_buf(rex.input, rex.reg_buf)) { + if (!vim_iswordp_buf((char *)rex.input, rex.reg_buf)) { status = RA_NOMATCH; } else { ADVANCE_REGINPUT(); @@ -3838,7 +3838,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) case SKWORD: if (ascii_isdigit(*rex.input) - || !vim_iswordp_buf(rex.input, rex.reg_buf)) { + || !vim_iswordp_buf((char *)rex.input, rex.reg_buf)) { status = RA_NOMATCH; } else { ADVANCE_REGINPUT(); @@ -4046,7 +4046,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) // Check for following composing character, unless %C // follows (skips over all composing chars). if (status != RA_NOMATCH - && utf_composinglike(rex.input, rex.input + len) + && utf_composinglike((char *)rex.input, (char *)rex.input + len) && !rex.reg_icombine && OP(next) != RE_COMPOSING) { // raaron: This code makes a composing character get @@ -4975,13 +4975,13 @@ static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, int *timed_o && reg_endzpos[i].lnum == reg_startzpos[i].lnum && reg_endzpos[i].col >= reg_startzpos[i].col) { re_extmatch_out->matches[i] = - vim_strnsave(reg_getline(reg_startzpos[i].lnum) + reg_startzpos[i].col, - (size_t)(reg_endzpos[i].col - reg_startzpos[i].col)); + (char_u *)xstrnsave((char *)reg_getline(reg_startzpos[i].lnum) + reg_startzpos[i].col, + (size_t)(reg_endzpos[i].col - reg_startzpos[i].col)); } } else { if (reg_startzp[i] != NULL && reg_endzp[i] != NULL) { re_extmatch_out->matches[i] = - vim_strnsave(reg_startzp[i], (size_t)(reg_endzp[i] - reg_startzp[i])); + (char_u *)xstrnsave((char *)reg_startzp[i], (size_t)(reg_endzp[i] - reg_startzp[i])); } } } -- cgit From 73207cae611a1efb8cd17139e8228772daeb9866 Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index ccf2b60da2..63e4dd5b7e 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -1857,14 +1857,14 @@ static char_u *regatom(int *flagp) char_u *lp; ret = regnode(EXACTLY); - lp = reg_prev_sub; + lp = (char_u *)reg_prev_sub; while (*lp != NUL) { regc(*lp++); } regc(NUL); if (*reg_prev_sub != NUL) { *flagp |= HASWIDTH; - if ((lp - reg_prev_sub) == 1) { + if ((lp - (char_u *)reg_prev_sub) == 1) { *flagp |= SIMPLE; } } -- cgit From 684bc749efef0fa31395d349f4495d79ec5f3fd5 Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 63e4dd5b7e..59a6ed28af 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -5028,8 +5028,8 @@ static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm, int *time rex.reg_endpos = rex.reg_mmatch->endpos; } else { prog = (bt_regprog_T *)rex.reg_match->regprog; - rex.reg_startp = rex.reg_match->startp; - rex.reg_endp = rex.reg_match->endp; + rex.reg_startp = (char_u **)rex.reg_match->startp; + rex.reg_endp = (char_u **)rex.reg_match->endp; } // Be paranoid... -- cgit From 3ff46544c9872b4161fd098569c30b55fe3abd36 Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 59a6ed28af..ac33fc0f13 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -5594,7 +5594,7 @@ static char_u *regprop(char_u *op) case MOPEN + 7: case MOPEN + 8: case MOPEN + 9: - sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "MOPEN%d", OP(op) - MOPEN); p = NULL; break; case MCLOSE + 0: @@ -5609,7 +5609,7 @@ static char_u *regprop(char_u *op) case MCLOSE + 7: case MCLOSE + 8: case MCLOSE + 9: - sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "MCLOSE%d", OP(op) - MCLOSE); p = NULL; break; case BACKREF + 1: @@ -5621,7 +5621,7 @@ static char_u *regprop(char_u *op) case BACKREF + 7: case BACKREF + 8: case BACKREF + 9: - sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "BACKREF%d", OP(op) - BACKREF); p = NULL; break; case NOPEN: @@ -5639,7 +5639,7 @@ static char_u *regprop(char_u *op) case ZOPEN + 7: case ZOPEN + 8: case ZOPEN + 9: - sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "ZOPEN%d", OP(op) - ZOPEN); p = NULL; break; case ZCLOSE + 1: @@ -5651,7 +5651,7 @@ static char_u *regprop(char_u *op) case ZCLOSE + 7: case ZCLOSE + 8: case ZCLOSE + 9: - sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "ZCLOSE%d", OP(op) - ZCLOSE); p = NULL; break; case ZREF + 1: @@ -5663,7 +5663,7 @@ static char_u *regprop(char_u *op) case ZREF + 7: case ZREF + 8: case ZREF + 9: - sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "ZREF%d", OP(op) - ZREF); p = NULL; break; case STAR: @@ -5703,7 +5703,8 @@ static char_u *regprop(char_u *op) case BRACE_COMPLEX + 7: case BRACE_COMPLEX + 8: case BRACE_COMPLEX + 9: - sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "BRACE_COMPLEX%d", + OP(op) - BRACE_COMPLEX); p = NULL; break; case MULTIBYTECODE: @@ -5713,7 +5714,7 @@ static char_u *regprop(char_u *op) p = "NEWL"; break; default: - sprintf(buf + STRLEN(buf), "corrupt %d", OP(op)); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "corrupt %d", OP(op)); p = NULL; break; } -- cgit From 78e69412acb481c7ad56e68c541f5c5383992d5b Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Sat, 5 Nov 2022 15:51:26 +0800 Subject: vim-patch:8.2.4688: new regexp engine does not give an error for "\%v" Problem: New regexp engine does not give an error for "\%v". Solution: Check for a value argument. (issue vim/vim#10079) https://github.com/vim/vim/commit/91ff3d4f52a55a7c37a52aaad524cd9dd12efae4 Co-authored-by: Bram Moolenaar --- src/nvim/regexp_bt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index ac33fc0f13..bde2962d3b 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -2117,7 +2117,7 @@ static char_u *regatom(int *flagp) break; } else if (c == 'l' || c == 'c' || c == 'v') { if (cur && n) { - semsg(_(e_regexp_number_after_dot_pos_search), no_Magic(c)); + semsg(_(e_regexp_number_after_dot_pos_search_chr), no_Magic(c)); rc_did_emsg = true; return NULL; } -- cgit From 77e25e56d8ccc0c174305f9fe64ad06f0223ab2d Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Sat, 5 Nov 2022 15:56:15 +0800 Subject: vim-patch:8.2.4693: new regexp does not accept pattern "\%>0v" Problem: new regexp does not accept pattern "\%>0v". Solution: Do accept digit zero. https://github.com/vim/vim/commit/72bb10df1fb3eb69bc91f5babfb8881ce098cba1 Co-authored-by: Bram Moolenaar --- src/nvim/regexp_bt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index bde2962d3b..f0efd06cc0 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -2091,6 +2091,7 @@ static char_u *regatom(int *flagp) uint32_t n = 0; int cmp; bool cur = false; + bool got_digit = false; cmp = c; if (cmp == '<' || cmp == '>') { @@ -2101,6 +2102,7 @@ static char_u *regatom(int *flagp) c = getchr(); } while (ascii_isdigit(c)) { + got_digit = true; n = n * 10 + (uint32_t)(c - '0'); c = getchr(); } @@ -2115,7 +2117,7 @@ static char_u *regatom(int *flagp) *regcode++ = (char_u)cmp; } break; - } else if (c == 'l' || c == 'c' || c == 'v') { + } else if ((c == 'l' || c == 'c' || c == 'v') && (cur || got_digit)) { if (cur && n) { semsg(_(e_regexp_number_after_dot_pos_search_chr), no_Magic(c)); rc_did_emsg = true; -- cgit From b84666d2a0dc4a7585ef6aa5a8f9060046ff9082 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Sat, 5 Nov 2022 15:59:17 +0800 Subject: vim-patch:8.2.4978: no error if engine selection atom is not at the start Problem: No error if engine selection atom is not at the start. Solution: Give an error. (Christian Brabandt, closes vim/vim#10439) https://github.com/vim/vim/commit/360da40b47a84ee8586c3b5d062f8c64a2ac9cc6 Co-authored-by: Christian Brabandt --- src/nvim/regexp_bt.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index f0efd06cc0..6f63b38a90 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -1971,6 +1971,11 @@ static char_u *regatom(int *flagp) break; case '#': + if (regparse[0] == '=' && regparse[1] >= 48 && regparse[1] <= 50) { + // misplaced \%#=1 + semsg(_(e_atom_engine_must_be_at_start_of_pattern), regparse[1]); + return FAIL; + } ret = regnode(CURSOR); break; -- cgit From bdb98de2d16ce7185a0f53740e06511904fdd814 Mon Sep 17 00:00:00 2001 From: Lewis Russell Date: Mon, 7 Nov 2022 10:21:44 +0000 Subject: refactor: more clint (#20910) --- src/nvim/regexp_bt.c | 604 ++++++++++++++++++++++----------------------------- 1 file changed, 264 insertions(+), 340 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 6f63b38a90..7b5f4cd12a 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -1,137 +1,130 @@ // This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com -/* - * - * Backtracking regular expression implementation. - * - * This file is included in "regexp.c". - * - * NOTICE: - * - * This is NOT the original regular expression code as written by Henry - * Spencer. This code has been modified specifically for use with the VIM - * editor, and should not be used separately from Vim. If you want a good - * regular expression library, get the original code. The copyright notice - * that follows is from the original. - * - * END NOTICE - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - * - * Beware that some of this code is subtly aware of the way operator - * precedence is structured in regular expressions. Serious changes in - * regular-expression syntax might require a total rethink. - * - * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert - * Webb, Ciaran McCreesh and Bram Moolenaar. - * Named character class support added by Walter Briscoe (1998 Jul 01) - */ - -/* - * The "internal use only" fields in regexp_defs.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases. They are: - * - * regstart char that must begin a match; NUL if none obvious; Can be a - * multi-byte character. - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL - * regmlen length of regmust string - * regflags RF_ values or'ed together - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot. Regmust permits fast rejection - * of lines that cannot possibly match. The regmust tests are costly enough - * that vim_regcomp() supplies a regmust only if the r.e. contains something - * potentially expensive (at present, the only such thing detected is * or + - * at the start of the r.e., which can involve a lot of backup). Regmlen is - * supplied because the test in vim_regexec() needs it and vim_regcomp() is - * computing it anyway. - */ - -/* - * Structure for regexp "program". This is essentially a linear encoding - * of a nondeterministic finite-state machine (aka syntax charts or - * "railroad normal form" in parsing technology). Each node is an opcode - * plus a "next" pointer, possibly plus an operand. "Next" pointers of - * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" - * pointer with a BRANCH on both ends of it is connecting two alternatives. - * (Here we have one of the subtle syntax dependencies: an individual BRANCH - * (as opposed to a collection of them) is never concatenated with anything - * because of operator precedence). The "next" pointer of a BRACES_COMPLEX - * node points to the node after the stuff to be repeated. - * The operand of some types of node is a literal string; for others, it is a - * node leading into a sub-FSM. In particular, the operand of a BRANCH node - * is the first node of the branch. - * (NB this is *not* a tree structure: the tail of the branch connects to the - * thing following the set of BRANCHes.) - * - * pattern is coded like: - * - * +-----------------+ - * | V - * \| BRANCH BRANCH --> END - * | ^ | ^ - * +------+ +----------+ - * - * - * +------------------+ - * V | - * * BRANCH BRANCH --> BACK BRANCH --> NOTHING --> END - * | | ^ ^ - * | +---------------+ | - * +---------------------------------------------+ - * - * - * +----------------------+ - * V | - * \+ BRANCH --> BRANCH --> BACK BRANCH --> NOTHING --> END - * | | ^ ^ - * | +-----------+ | - * +--------------------------------------------------+ - * - * - * +-------------------------+ - * V | - * \{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX --> BACK END - * | | ^ - * | +----------------+ - * +-----------------------------------------------+ - * - * - * \@! BRANCH NOMATCH --> END --> END - * | | ^ ^ - * | +----------------+ | - * +--------------------------------+ - * - * +---------+ - * | V - * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END - * | | | | ^ ^ - * | | | +-----+ | - * | | +----------------+ | - * | +---------------------------+ | - * +------------------------------------------------------+ - * - * They all start with a BRANCH for "\|" alternatives, even when there is only - * one alternative. - */ +// Backtracking regular expression implementation. +// +// This file is included in "regexp.c". +// +// NOTICE: +// +// This is NOT the original regular expression code as written by Henry +// Spencer. This code has been modified specifically for use with the VIM +// editor, and should not be used separately from Vim. If you want a good +// regular expression library, get the original code. The copyright notice +// that follows is from the original. +// +// END NOTICE +// +// Copyright (c) 1986 by University of Toronto. +// Written by Henry Spencer. Not derived from licensed software. +// +// Permission is granted to anyone to use this software for any +// purpose on any computer system, and to redistribute it freely, +// subject to the following restrictions: +// +// 1. The author is not responsible for the consequences of use of +// this software, no matter how awful, even if they arise +// from defects in it. +// +// 2. The origin of this software must not be misrepresented, either +// by explicit claim or by omission. +// +// 3. Altered versions must be plainly marked as such, and must not +// be misrepresented as being the original software. +// +// Beware that some of this code is subtly aware of the way operator +// precedence is structured in regular expressions. Serious changes in +// regular-expression syntax might require a total rethink. +// +// Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert +// Webb, Ciaran McCreesh and Bram Moolenaar. +// Named character class support added by Walter Briscoe (1998 Jul 01) + +// The "internal use only" fields in regexp_defs.h are present to pass info from +// compile to execute that permits the execute phase to run lots faster on +// simple cases. They are: +// +// regstart char that must begin a match; NUL if none obvious; Can be a +// multi-byte character. +// reganch is the match anchored (at beginning-of-line only)? +// regmust string (pointer into program) that match must include, or NULL +// regmlen length of regmust string +// regflags RF_ values or'ed together +// +// Regstart and reganch permit very fast decisions on suitable starting points +// for a match, cutting down the work a lot. Regmust permits fast rejection +// of lines that cannot possibly match. The regmust tests are costly enough +// that vim_regcomp() supplies a regmust only if the r.e. contains something +// potentially expensive (at present, the only such thing detected is * or + +// at the start of the r.e., which can involve a lot of backup). Regmlen is +// supplied because the test in vim_regexec() needs it and vim_regcomp() is +// computing it anyway. + +// Structure for regexp "program". This is essentially a linear encoding +// of a nondeterministic finite-state machine (aka syntax charts or +// "railroad normal form" in parsing technology). Each node is an opcode +// plus a "next" pointer, possibly plus an operand. "Next" pointers of +// all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" +// pointer with a BRANCH on both ends of it is connecting two alternatives. +// (Here we have one of the subtle syntax dependencies: an individual BRANCH +// (as opposed to a collection of them) is never concatenated with anything +// because of operator precedence). The "next" pointer of a BRACES_COMPLEX +// node points to the node after the stuff to be repeated. +// The operand of some types of node is a literal string; for others, it is a +// node leading into a sub-FSM. In particular, the operand of a BRANCH node +// is the first node of the branch. +// (NB this is *not* a tree structure: the tail of the branch connects to the +// thing following the set of BRANCHes.) +// +// pattern is coded like: +// +// +-----------------+ +// | V +// \| BRANCH BRANCH --> END +// | ^ | ^ +// +------+ +----------+ +// +// +// +------------------+ +// V | +// * BRANCH BRANCH --> BACK BRANCH --> NOTHING --> END +// | | ^ ^ +// | +---------------+ | +// +---------------------------------------------+ +// +// +// +----------------------+ +// V | +// \+ BRANCH --> BRANCH --> BACK BRANCH --> NOTHING --> END +// | | ^ ^ +// | +-----------+ | +// +--------------------------------------------------+ +// +// +// +-------------------------+ +// V | +// \{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX --> BACK END +// | | ^ +// | +----------------+ +// +-----------------------------------------------+ +// +// +// \@! BRANCH NOMATCH --> END --> END +// | | ^ ^ +// | +----------------+ | +// +--------------------------------+ +// +// +---------+ +// | V +// \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END +// | | | | ^ ^ +// | | | +-----+ | +// | | +----------------+ | +// | +---------------------------+ | +// +------------------------------------------------------+ +// +// They all start with a BRANCH for "\|" alternatives, even when there is only +// one alternative. #include #include @@ -141,9 +134,7 @@ #include "nvim/garray.h" #include "nvim/regexp.h" -/* - * The opcodes are: - */ +// The opcodes are: // definition number opnd? meaning #define END 0 // End of program or NOMATCH operand. @@ -240,9 +231,7 @@ #define RE_VISUAL 208 // Match Visual area #define RE_COMPOSING 209 // any composing characters -/* - * Flags to be passed up and down. - */ +// Flags to be passed up and down. #define HASWIDTH 0x1 // Known never to match null string. #define SIMPLE 0x2 // Simple enough to be STAR/PLUS operand. #define SPSTART 0x4 // Starts with * or +. @@ -273,10 +262,8 @@ static int classcodes[] = { UPPER, NUPPER }; -/* - * When regcode is set to this value, code is not emitted and size is computed - * instead. - */ +// When regcode is set to this value, code is not emitted and size is computed +// instead. #define JUST_CALC_SIZE ((char_u *)-1) // Values for rs_state in regitem_T. @@ -297,11 +284,9 @@ typedef enum regstate_E { RS_STAR_SHORT, // STAR/PLUS/BRACE_SIMPLE shortest match } regstate_T; -/* - * Structure used to save the current input state, when it needs to be - * restored after trying a match. Used by reg_save() and reg_restore(). - * Also stores the length of "backpos". - */ +// Structure used to save the current input state, when it needs to be +// restored after trying a match. Used by reg_save() and reg_restore(). +// Also stores the length of "backpos". typedef struct { union { char_u *ptr; // rex.input pointer, for single-line regexp @@ -327,12 +312,10 @@ typedef struct regbehind_S { save_se_T save_end[NSUBEXP]; } regbehind_T; -/* - * When there are alternatives a regstate_T is put on the regstack to remember - * what we are doing. - * Before it may be another type of item, depending on rs_state, to remember - * more things. - */ +// When there are alternatives a regstate_T is put on the regstack to remember +// what we are doing. +// Before it may be another type of item, depending on rs_state, to remember +// more things. typedef struct regitem_S { regstate_T rs_state; // what we are doing, one of RS_ above int16_t rs_no; // submatch nr or BEHIND/NOBEHIND @@ -359,69 +342,63 @@ typedef struct backpos_S { regsave_T bp_pos; // last input position } backpos_T; -/* - * "regstack" and "backpos" are used by regmatch(). They are kept over calls - * to avoid invoking malloc() and free() often. - * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T - * or regbehind_T. - * "backpos_T" is a table with backpos_T for BACK - */ +// "regstack" and "backpos" are used by regmatch(). They are kept over calls +// to avoid invoking malloc() and free() often. +// "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T +// or regbehind_T. +// "backpos_T" is a table with backpos_T for BACK static garray_T regstack = GA_EMPTY_INIT_VALUE; static garray_T backpos = GA_EMPTY_INIT_VALUE; static regsave_T behind_pos; -/* - * Both for regstack and backpos tables we use the following strategy of - * allocation (to reduce malloc/free calls): - * - Initial size is fairly small. - * - When needed, the tables are grown bigger (8 times at first, double after - * that). - * - After executing the match we free the memory only if the array has grown. - * Thus the memory is kept allocated when it's at the initial size. - * This makes it fast while not keeping a lot of memory allocated. - * A three times speed increase was observed when using many simple patterns. - */ +// Both for regstack and backpos tables we use the following strategy of +// allocation (to reduce malloc/free calls): +// - Initial size is fairly small. +// - When needed, the tables are grown bigger (8 times at first, double after +// that). +// - After executing the match we free the memory only if the array has grown. +// Thus the memory is kept allocated when it's at the initial size. +// This makes it fast while not keeping a lot of memory allocated. +// A three times speed increase was observed when using many simple patterns. #define REGSTACK_INITIAL 2048 #define BACKPOS_INITIAL 64 -/* - * Opcode notes: - * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. - * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. - * - * STAR,PLUS '=', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. - * - * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX - * node, and defines the min and max limits to be used for that - * node. - * - * MOPEN,MCLOSE ...are numbered at compile time. - * ZOPEN,ZCLOSE ...ditto - */ - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit bytes, high order first. The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node. (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - */ +// Opcode notes: +// +// BRANCH The set of branches constituting a single choice are hooked +// together with their "next" pointers, since precedence prevents +// anything being concatenated to any individual branch. The +// "next" pointer of the last BRANCH in a choice points to the +// thing following the whole choice. This is also where the +// final "next" pointer of each individual branch points; each +// branch starts with the operand node of a BRANCH node. +// +// BACK Normal "next" pointers all implicitly point forward; BACK +// exists to make loop structures possible. +// +// STAR,PLUS '=', and complex '*' and '+', are implemented as circular +// BRANCH structures using BACK. Simple cases (one character +// per match) are implemented with STAR and PLUS for speed +// and to minimize recursive plunges. +// +// BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX +// node, and defines the min and max limits to be used for that +// node. +// +// MOPEN,MCLOSE ...are numbered at compile time. +// ZOPEN,ZCLOSE ...ditto +/// +// +// +// A node is one char of opcode followed by two chars of "next" pointer. +// "Next" pointers are stored as two 8-bit bytes, high order first. The +// value is a positive offset from the opcode of the node containing it. +// An operand, if any, simply follows the node. (Note that much of the +// code generation knows about this implicit relationship.) +// +// Using two bytes for the "next" pointer is vast overkill for most things, +// but allows patterns to get big without disasters. #define OP(p) ((int)(*(p))) #define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377)) #define OPERAND(p) ((p) + 3) @@ -449,9 +426,7 @@ static int regnarrate = 0; # include "regexp_bt.c.generated.h" #endif -/* - * Setup to parse the regexp. Used once to get the length and once to do it. - */ +// Setup to parse the regexp. Used once to get the length and once to do it. static void regcomp_start(char_u *expr, int re_flags) // see vim_regcomp() { initchr(expr); @@ -484,9 +459,7 @@ static bool use_multibytecode(int c) || utf_iscomposing(c)); } -/* - * Emit (if appropriate) a byte of code - */ +// Emit (if appropriate) a byte of code static void regc(int b) { if (regcode == JUST_CALC_SIZE) { @@ -496,9 +469,7 @@ static void regc(int b) } } -/* - * Emit (if appropriate) a multi-byte character of code - */ +// Emit (if appropriate) a multi-byte character of code static void regmbc(int c) { if (regcode == JUST_CALC_SIZE) { @@ -508,11 +479,9 @@ static void regmbc(int c) } } -/* - * Produce the bytes for equivalence class "c". - * Currently only handles latin1, latin9 and utf-8. - * NOTE: When changing this function, also change nfa_emit_equi_class() - */ +// Produce the bytes for equivalence class "c". +// Currently only handles latin1, latin9 and utf-8. +// NOTE: When changing this function, also change nfa_emit_equi_class() static void reg_equi_class(int c) { { @@ -1481,10 +1450,8 @@ static void reg_equi_class(int c) regmbc(c); } -/* - * Emit a node. - * Return pointer to generated code. - */ +// Emit a node. +// Return pointer to generated code. static char_u *regnode(int op) { char_u *ret; @@ -1500,9 +1467,7 @@ static char_u *regnode(int op) return ret; } -/* - * Write a four bytes number at "p" and return pointer to the next char. - */ +// Write a four bytes number at "p" and return pointer to the next char. static char_u *re_put_uint32(char_u *p, uint32_t val) { *p++ = (char_u)((val >> 24) & 0377); @@ -1512,11 +1477,9 @@ static char_u *re_put_uint32(char_u *p, uint32_t val) return p; } -/* - * regnext - dig the "next" pointer out of a node - * Returns NULL when calculating size, when there is no next item and when - * there is an error. - */ +// regnext - dig the "next" pointer out of a node +// Returns NULL when calculating size, when there is no next item and when +// there is an error. static char_u *regnext(char_u *p) FUNC_ATTR_NONNULL_ALL { @@ -1573,9 +1536,7 @@ static void regtail(char_u *p, char_u *val) } } -/* - * Like regtail, on item after a BRANCH; nop if none. - */ +// Like regtail, on item after a BRANCH; nop if none. static void regoptail(char_u *p, char_u *val) { // When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" @@ -1587,11 +1548,9 @@ static void regoptail(char_u *p, char_u *val) regtail(OPERAND(p), val); } -/* - * Insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ +// Insert an operator in front of already-emitted operand +// +// Means relocating the operand. static void reginsert(int op, char_u *opnd) { char_u *src; @@ -1615,10 +1574,8 @@ static void reginsert(int op, char_u *opnd) *place = NUL; } -/* - * Insert an operator in front of already-emitted operand. - * Add a number to the operator. - */ +// Insert an operator in front of already-emitted operand. +// Add a number to the operator. static void reginsert_nr(int op, long val, char_u *opnd) { char_u *src; @@ -1644,12 +1601,10 @@ static void reginsert_nr(int op, long val, char_u *opnd) re_put_uint32(place, (uint32_t)val); } -/* - * Insert an operator in front of already-emitted operand. - * The operator has the given limit values as operands. Also set next pointer. - * - * Means relocating the operand. - */ +// Insert an operator in front of already-emitted operand. +// The operator has the given limit values as operands. Also set next pointer. +// +// Means relocating the operand. static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) { char_u *src; @@ -1704,13 +1659,11 @@ static int seen_endbrace(int refnum) return true; } -/* - * Parse the lowest level. - * - * Optimization: gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run. Don't do this when one_exactly is set. - */ +// Parse the lowest level. +// +// Optimization: gobbles an entire sequence of ordinary characters so that +// it can turn them into a single node, which is smaller to store and +// faster to run. Don't do this when one_exactly is set. static char_u *regatom(int *flagp) { char_u *ret; @@ -2289,8 +2242,7 @@ collection: if (c_class != 0) { // produce equivalence class reg_equi_class(c_class); - } else if ((c_class = - get_coll_element(®parse)) != 0) { + } else if ((c_class = get_coll_element(®parse)) != 0) { // produce a collating element regmbc(c_class); } else { @@ -2466,7 +2418,7 @@ do_multibyte: for (len = 0; c != NUL && (len == 0 || (re_multi_type(peekchr()) == NOT_MULTI && !one_exactly - && !is_Magic(c))); ++len) { + && !is_Magic(c))); len++) { c = no_Magic(c); { regmbc(c); @@ -2500,15 +2452,13 @@ do_multibyte: return ret; } -/* - * Parse something followed by possible [*+=]. - * - * Note that the branching code sequences used for = and the general cases - * of * and + are somewhat optimized: they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ +// Parse something followed by possible [*+=]. +// +// Note that the branching code sequences used for = and the general cases +// of * and + are somewhat optimized: they use the same NOTHING node as +// both the endmarker for their branch list and the body of the last branch. +// It might seem that this node could be dispensed with entirely, but the +// endmarker role is not redundant. static char_u *regpiece(int *flagp) { char_u *ret; @@ -2644,10 +2594,8 @@ static char_u *regpiece(int *flagp) return ret; } -/* - * Parse one alternative of an | or & operator. - * Implements the concatenation operator. - */ +// Parse one alternative of an | or & operator. +// Implements the concatenation operator. static char_u *regconcat(int *flagp) { char_u *first = NULL; @@ -2722,10 +2670,8 @@ static char_u *regconcat(int *flagp) return first; } -/* - * Parse one alternative of an | operator. - * Implements the & operator. - */ +// Parse one alternative of an | operator. +// Implements the & operator. static char_u *regbranch(int *flagp) { char_u *ret; @@ -2874,27 +2820,25 @@ static char_u *reg(int paren, int *flagp) return ret; } -/* - * bt_regcomp() - compile a regular expression into internal code for the - * traditional back track matcher. - * Returns the program in allocated space. Returns NULL for an error. - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code. So we cheat: we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it. (Note that it has to be in - * one piece because free() must be able to free it all.) - * - * Whether upper/lower case is to be ignored is decided when executing the - * program, it does not matter here. - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. - * "re_flags": RE_MAGIC and/or RE_STRING. - */ +// bt_regcomp() - compile a regular expression into internal code for the +// traditional back track matcher. +// Returns the program in allocated space. Returns NULL for an error. +// +// We can't allocate space until we know how big the compiled form will be, +// but we can't compile it (and thus know how big it is) until we've got a +// place to put the code. So we cheat: we compile it twice, once with code +// generation turned off and size counting turned on, and once "for real". +// This also means that we don't allocate space until we are sure that the +// thing really will compile successfully, and we never have to move the +// code and thus invalidate pointers into it. (Note that it has to be in +// one piece because free() must be able to free it all.) +// +// Whether upper/lower case is to be ignored is decided when executing the +// program, it does not matter here. +// +// Beware that the optimization-preparation code in here knows about some +// of the structure of the compiled regexp. +// "re_flags": RE_MAGIC and/or RE_STRING. static regprog_T *bt_regcomp(char_u *expr, int re_flags) { char_u *scan; @@ -2999,19 +2943,15 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) return (regprog_T *)r; } -/* - * Check if during the previous call to vim_regcomp the EOL item "$" has been - * found. This is messy, but it works fine. - */ +// Check if during the previous call to vim_regcomp the EOL item "$" has been +// found. This is messy, but it works fine. int vim_regcomp_had_eol(void) { return had_eol; } -/* - * Get a number after a backslash that is inside []. - * When nothing is recognized return a backslash. - */ +// Get a number after a backslash that is inside []. +// When nothing is recognized return a backslash. static int coll_get_char(void) { int64_t nr = -1; @@ -3037,9 +2977,7 @@ static int coll_get_char(void) return (int)nr; } -/* - * Free a compiled regexp program, returned by bt_regcomp(). - */ +// Free a compiled regexp program, returned by bt_regcomp(). static void bt_regfree(regprog_T *prog) { xfree(prog); @@ -3047,11 +2985,9 @@ static void bt_regfree(regprog_T *prog) #define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) -/* - * The arguments from BRACE_LIMITS are stored here. They are actually local - * to regmatch(), but they are here to reduce the amount of stack space used - * (it can be called recursively many times). - */ +// The arguments from BRACE_LIMITS are stored here. They are actually local +// to regmatch(), but they are here to reduce the amount of stack space used +// (it can be called recursively many times). static long bl_minval; static long bl_maxval; @@ -3108,13 +3044,11 @@ static bool reg_save_equal(const regsave_T *save) else /* NOLINT */ \ *(pp) = (savep)->se_u.ptr; } -/* - * Tentatively set the sub-expression start to the current position (after - * calling regmatch() they will have changed). Need to save the existing - * values for when there is no match. - * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), - * depending on REG_MULTI. - */ +// Tentatively set the sub-expression start to the current position (after +// calling regmatch() they will have changed). Need to save the existing +// values for when there is no match. +// Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), +// depending on REG_MULTI. static void save_se_multi(save_se_T *savep, lpos_T *posp) { savep->se_u.pos = *posp; @@ -3494,10 +3428,8 @@ do_class: return (int)count; } -/* - * Push an item onto the regstack. - * Returns pointer to new item. Returns NULL when out of memory. - */ +// Push an item onto the regstack. +// Returns pointer to new item. Returns NULL when out of memory. static regitem_T *regstack_push(regstate_T state, char_u *scan) { regitem_T *rp; @@ -3516,9 +3448,7 @@ static regitem_T *regstack_push(regstate_T state, char_u *scan) return rp; } -/* - * Pop an item from the regstack. - */ +// Pop an item from the regstack. static void regstack_pop(char_u **scan) { regitem_T *rp; @@ -4643,7 +4573,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) // Pop the state. Restore pointers when there is no match. if (status == RA_NOMATCH) { reg_restore(&rp->rs_un.regsave, &backpos); - --brace_count[rp->rs_no]; // decrement match count + brace_count[rp->rs_no]--; // decrement match count } regstack_pop(&scan); break; @@ -4653,7 +4583,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) if (status == RA_NOMATCH) { // There was no match, but we did find enough matches. reg_restore(&rp->rs_un.regsave, &backpos); - --brace_count[rp->rs_no]; + brace_count[rp->rs_no]--; // continue with the items after "\{}" status = RA_CONT; } @@ -5247,9 +5177,7 @@ static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T return bt_regexec_both(NULL, col, tm, timed_out); } -/* - * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL. - */ +// Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL. static int re_num_cmp(uint32_t val, char_u *scan) { uint32_t n = (uint32_t)OPERAND_MIN(scan); @@ -5265,9 +5193,7 @@ static int re_num_cmp(uint32_t val, char_u *scan) #ifdef BT_REGEXP_DUMP -/* - * regdump - dump a regexp onto stdout in vaguely comprehensible form - */ +// regdump - dump a regexp onto stdout in vaguely comprehensible form static void regdump(char_u *pattern, bt_regprog_T *r) { char_u *s; @@ -5353,9 +5279,7 @@ static void regdump(char_u *pattern, bt_regprog_T *r) #ifdef REGEXP_DEBUG -/* - * regprop - printable representation of opcode - */ +// regprop - printable representation of opcode static char_u *regprop(char_u *op) { char *p; -- cgit From 66360675cf4d091b7460e4a8e1435c13216c1929 Mon Sep 17 00:00:00 2001 From: dundargoc Date: Sun, 11 Sep 2022 17:12:44 +0200 Subject: build: allow IWYU to fix includes for all .c files Allow Include What You Use to remove unnecessary includes and only include what is necessary. This helps with reducing compilation times and makes it easier to visualise which dependencies are actually required. Work on https://github.com/neovim/neovim/issues/549, but doesn't close it since this only works fully for .c files and not headers. --- src/nvim/regexp_bt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 7b5f4cd12a..2ac96997fc 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -132,6 +132,7 @@ #include #include "nvim/garray.h" +#include "nvim/profile.h" #include "nvim/regexp.h" // The opcodes are: -- cgit From f2b30b4d62b97da6ae1b4dd7c4e5730fc5bc95f7 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Sat, 19 Nov 2022 10:57:06 +0800 Subject: vim-patch:8.2.0260: several lines of code are duplicated (#21108) Problem: Several lines of code are duplicated. Solution: Move duplicated code to a function. (Yegappan Lakshmanan, closes vim/vim#5330) https://github.com/vim/vim/commit/f4140488c72cad4dbf5449dba099cfa7de7bbb22 Using sizeof seems better than ARRAY_SIZE for vim_snprintf(). --- src/nvim/regexp_bt.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 2ac96997fc..2337dbb51c 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -5164,17 +5164,7 @@ static int bt_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col, bool line_l static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm, int *timed_out) { - rex.reg_match = NULL; - rex.reg_mmatch = rmp; - rex.reg_buf = buf; - rex.reg_win = win; - rex.reg_firstlnum = lnum; - rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; - rex.reg_line_lbr = false; - rex.reg_ic = rmp->rmm_ic; - rex.reg_icombine = false; - rex.reg_maxcol = rmp->rmm_maxcol; - + init_regexec_multi(rmp, win, buf, lnum); return bt_regexec_both(NULL, col, tm, timed_out); } -- cgit From 7e9981d246a9d46f19dc6283664c229ae2efe727 Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Sat, 26 Nov 2022 21:28:47 +0800 Subject: vim-patch:9.0.0951: trying every character position for a match is inefficient (#21198) Problem: Trying every character position for a match is inefficient. Solution: Use the start position of the match ignoring "\zs". https://github.com/vim/vim/commit/01105b37a108022515d364201767f7f111ec4222 Co-authored-by: Bram Moolenaar --- src/nvim/regexp_bt.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 2337dbb51c..9411d17f57 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -4930,15 +4930,16 @@ static long regtry(bt_regprog_T *prog, colnr_T col, proftime_T *tm, int *timed_o /// Match a regexp against a string ("line" points to the string) or multiple /// lines (if "line" is NULL, use reg_getline()). /// -/// @param col column to start search +/// @param startcol column to start looking for match /// @param tm timeout limit or NULL /// @param timed_out flag set on timeout or NULL /// /// @return 0 for failure, or number of lines contained in the match. -static long bt_regexec_both(char_u *line, colnr_T col, proftime_T *tm, int *timed_out) +static long bt_regexec_both(char_u *line, colnr_T startcol, proftime_T *tm, int *timed_out) { bt_regprog_T *prog; char_u *s; + colnr_T col = startcol; long retval = 0L; // Create "regstack" and "backpos" if they are not allocated yet. @@ -5113,10 +5114,18 @@ theend: || (end->lnum == start->lnum && end->col < start->col)) { rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0]; } + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_mmatch->rmm_matchcol = col; } else { if (rex.reg_match->endp[0] < rex.reg_match->startp[0]) { rex.reg_match->endp[0] = rex.reg_match->startp[0]; } + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_match->rm_matchcol = col; } } -- cgit From bd22585061b66d7f71d4832b4a81e950b3c9d19d Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 2337dbb51c..b430c95a6e 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -2928,9 +2928,9 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) { - if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len) { + if (OP(scan) == EXACTLY && strlen((char *)OPERAND(scan)) >= (size_t)len) { longest = OPERAND(scan); - len = (int)STRLEN(OPERAND(scan)); + len = (int)strlen((char *)OPERAND(scan)); } } r->regmust = longest; @@ -3658,7 +3658,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) pos = &fm->mark; const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum && pos->col == MAXCOL - ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) + ? (colnr_T)strlen((char *)reg_getline(pos->lnum - rex.reg_firstlnum)) : pos->col; if (pos->lnum == rex.lnum + rex.reg_firstlnum @@ -3976,7 +3976,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) len = 1; // matched a single byte above } else { // Need to match first byte again for multi-byte. - len = (int)STRLEN(opnd); + len = (int)strlen((char *)opnd); if (cstrncmp((char *)opnd, (char *)rex.input, &len) != 0) { status = RA_NOMATCH; } @@ -4257,7 +4257,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) no = op - ZREF; if (re_extmatch_in != NULL && re_extmatch_in->matches[no] != NULL) { - int len = (int)STRLEN(re_extmatch_in->matches[no]); + int len = (int)strlen((char *)re_extmatch_in->matches[no]); if (cstrncmp((char *)re_extmatch_in->matches[no], (char *)rex.input, &len) != 0) { status = RA_NOMATCH; } else { @@ -4683,7 +4683,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) if (limit > 0 && ((rp->rs_un.regsave.rs_u.pos.lnum < behind_pos.rs_u.pos.lnum - ? (colnr_T)STRLEN(rex.line) + ? (colnr_T)strlen((char *)rex.line) : behind_pos.rs_u.pos.col) - rp->rs_un.regsave.rs_u.pos.col >= limit)) { no = FAIL; @@ -4696,7 +4696,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) } else { reg_restore(&rp->rs_un.regsave, &backpos); rp->rs_un.regsave.rs_u.pos.col = - (colnr_T)STRLEN(rex.line); + (colnr_T)strlen((char *)rex.line); } } else { const char_u *const line = @@ -4787,7 +4787,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) if (rex.line == NULL) { break; } - rex.input = rex.line + STRLEN(rex.line); + rex.input = rex.line + strlen((char *)rex.line); fast_breakcheck(); } else { MB_PTR_BACK(rex.line, rex.input); -- cgit From 614d382621fa0b9d19287b63edb39b637409c581 Mon Sep 17 00:00:00 2001 From: "Justin M. Keyes" Date: Fri, 16 Dec 2022 17:33:52 +0100 Subject: refactor: rename mch_msg => os_msg --- src/nvim/regexp_bt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 19b89bef74..4e2fa54c26 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -3539,8 +3539,8 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) #ifdef REGEXP_DEBUG if (scan != NULL && regnarrate) { - mch_errmsg((char *)regprop(scan)); - mch_errmsg("(\n"); + os_errmsg((char *)regprop(scan)); + os_errmsg("(\n"); } #endif @@ -3566,18 +3566,18 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) #ifdef REGEXP_DEBUG if (regnarrate) { - mch_errmsg((char *)regprop(scan)); - mch_errmsg("...\n"); + os_errmsg((char *)regprop(scan)); + os_errmsg("...\n"); if (re_extmatch_in != NULL) { int i; - mch_errmsg(_("External submatches:\n")); + os_errmsg(_("External submatches:\n")); for (i = 0; i < NSUBEXP; i++) { - mch_errmsg(" \""); + os_errmsg(" \""); if (re_extmatch_in->matches[i] != NULL) { - mch_errmsg((char *)re_extmatch_in->matches[i]); + os_errmsg((char *)re_extmatch_in->matches[i]); } - mch_errmsg("\"\n"); + os_errmsg("\"\n"); } } } -- cgit From 08c2c7480619ccdf0c92fe6ce76da5b73b0e395b Mon Sep 17 00:00:00 2001 From: dundargoc Date: Sat, 26 Nov 2022 18:57:46 +0100 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 4e2fa54c26..810b35a77d 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -3716,7 +3716,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) } else { // Get class of current and previous char (if it exists). const int this_class = - mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); + mb_get_class_tab((char *)rex.input, rex.reg_buf->b_chartab); if (this_class <= 1) { status = RA_NOMATCH; // Not on a word at all. } else if (reg_prev_class() == this_class) { @@ -3732,7 +3732,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) int this_class, prev_class; // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); + this_class = mb_get_class_tab((char *)rex.input, rex.reg_buf->b_chartab); prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) { -- cgit From e89c39d6f016a4140293755250e968e839009617 Mon Sep 17 00:00:00 2001 From: dundargoc <33953936+dundargoc@users.noreply.github.com> Date: Sat, 14 Jan 2023 08:58:28 +0100 Subject: refactor: replace char_u with char 21 (#21779) refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/regexp_bt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/regexp_bt.c') diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c index 810b35a77d..f930d872b6 100644 --- a/src/nvim/regexp_bt.c +++ b/src/nvim/regexp_bt.c @@ -3703,7 +3703,7 @@ static bool regmatch(char_u *scan, proftime_T *tm, int *timed_out) if (!re_num_cmp(win_linetabsize(rex.reg_win == NULL ? curwin : rex.reg_win, rex.reg_firstlnum + rex.lnum, - rex.line, + (char *)rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan)) { status = RA_NOMATCH; -- cgit