diff options
Diffstat (limited to 'src/nvim/regexp.c')
| -rw-r--r-- | src/nvim/regexp.c | 5503 | 
1 files changed, 277 insertions, 5226 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index b7f11b2de0..412cdac21b 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -5,41 +5,6 @@  /*   * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() - * - * NOTICE: - * - * This is NOT the original regular expression code as written by Henry - * Spencer.  This code has been modified specifically for use with the VIM - * editor, and should not be used separately from Vim.  If you want a good - * regular expression library, get the original code.  The copyright notice - * that follows is from the original. - * - * END NOTICE - * - *	Copyright (c) 1986 by University of Toronto. - *	Written by Henry Spencer.  Not derived from licensed software. - * - *	Permission is granted to anyone to use this software for any - *	purpose on any computer system, and to redistribute it freely, - *	subject to the following restrictions: - * - *	1. The author is not responsible for the consequences of use of - *		this software, no matter how awful, even if they arise - *		from defects in it. - * - *	2. The origin of this software must not be misrepresented, either - *		by explicit claim or by omission. - * - *	3. Altered versions must be plainly marked as such, and must not - *		be misrepresented as being the original software. - * - * Beware that some of this code is subtly aware of the way operator - * precedence is structured in regular expressions.  Serious changes in - * regular-expression syntax might require a total rethink. - * - * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert - * Webb, Ciaran McCreesh and Bram Moolenaar. - * Named character class support added by Walter Briscoe (1998 Jul 01)   */  // By default: do not create debugging logs or files related to regular @@ -70,205 +35,15 @@  #include "nvim/strings.h"  #ifdef REGEXP_DEBUG -/* show/save debugging data when BT engine is used */ +// show/save debugging data when BT engine is used  # define BT_REGEXP_DUMP -/* save the debugging data to a file instead of displaying it */ +// save the debugging data to a file instead of displaying it  # define BT_REGEXP_LOG  # define BT_REGEXP_DEBUG_LOG  # define BT_REGEXP_DEBUG_LOG_NAME       "bt_regexp_debug.log"  #endif  /* - * The "internal use only" fields in regexp_defs.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases.  They are: - * - * regstart	char that must begin a match; NUL if none obvious; Can be a - *		multi-byte character. - * reganch	is the match anchored (at beginning-of-line only)? - * regmust	string (pointer into program) that match must include, or NULL - * regmlen	length of regmust string - * regflags	RF_ values or'ed together - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot.  Regmust permits fast rejection - * of lines that cannot possibly match.  The regmust tests are costly enough - * that vim_regcomp() supplies a regmust only if the r.e. contains something - * potentially expensive (at present, the only such thing detected is * or + - * at the start of the r.e., which can involve a lot of backup).  Regmlen is - * supplied because the test in vim_regexec() needs it and vim_regcomp() is - * computing it anyway. - */ - -/* - * Structure for regexp "program".  This is essentially a linear encoding - * of a nondeterministic finite-state machine (aka syntax charts or - * "railroad normal form" in parsing technology).  Each node is an opcode - * plus a "next" pointer, possibly plus an operand.  "Next" pointers of - * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" - * pointer with a BRANCH on both ends of it is connecting two alternatives. - * (Here we have one of the subtle syntax dependencies:	an individual BRANCH - * (as opposed to a collection of them) is never concatenated with anything - * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX - * node points to the node after the stuff to be repeated. - * The operand of some types of node is a literal string; for others, it is a - * node leading into a sub-FSM.  In particular, the operand of a BRANCH node - * is the first node of the branch. - * (NB this is *not* a tree structure: the tail of the branch connects to the - * thing following the set of BRANCHes.) - * - * pattern	is coded like: - * - *			  +-----------------+ - *			  |		    V - * <aa>\|<bb>	BRANCH <aa> BRANCH <bb> --> END - *		     |	    ^	 |	    ^ - *		     +------+	 +----------+ - * - * - *		       +------------------+ - *		       V		  | - * <aa>*	BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END - *		     |	    |		    ^			   ^ - *		     |	    +---------------+			   | - *		     +---------------------------------------------+ - * - * - *		       +----------------------+ - *		       V		      | - * <aa>\+	BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END - *		     |		     |		 ^			^ - *		     |		     +-----------+			| - *		     +--------------------------------------------------+ - * - * - *					+-------------------------+ - *					V			  | - * <aa>\{}	BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END - *		     |				    |		     ^ - *		     |				    +----------------+ - *		     +-----------------------------------------------+ - * - * - * <aa>\@!<bb>	BRANCH NOMATCH <aa> --> END  <bb> --> END - *		     |	     |		      ^       ^ - *		     |	     +----------------+       | - *		     +--------------------------------+ - * - *						      +---------+ - *						      |		V - * \z[abc]	BRANCH BRANCH  a  BRANCH  b  BRANCH  c	BRANCH	NOTHING --> END - *		     |	    |	       |	  |	^		    ^ - *		     |	    |	       |	  +-----+		    | - *		     |	    |	       +----------------+		    | - *		     |	    +---------------------------+		    | - *		     +------------------------------------------------------+ - * - * They all start with a BRANCH for "\|" alternatives, even when there is only - * one alternative. - */ - -/* - * The opcodes are: - */ - -/* definition	number		   opnd?    meaning */ -#define END             0       /*	End of program or NOMATCH operand. */ -#define BOL             1       /*	Match "" at beginning of line. */ -#define EOL             2       /*	Match "" at end of line. */ -#define BRANCH          3       /* node Match this alternative, or the -                                 *	next... */ -#define BACK            4       /*	Match "", "next" ptr points backward. */ -#define EXACTLY         5       /* str	Match this string. */ -#define NOTHING         6       /*	Match empty string. */ -#define STAR            7       /* node Match this (simple) thing 0 or more -                                 *	times. */ -#define PLUS            8       /* node Match this (simple) thing 1 or more -                                 *	times. */ -#define MATCH           9       /* node match the operand zero-width */ -#define NOMATCH         10      /* node check for no match with operand */ -#define BEHIND          11      /* node look behind for a match with operand */ -#define NOBEHIND        12      /* node look behind for no match with operand */ -#define SUBPAT          13      /* node match the operand here */ -#define BRACE_SIMPLE    14      /* node Match this (simple) thing between m and -                                 *	n times (\{m,n\}). */ -#define BOW             15      /*	Match "" after [^a-zA-Z0-9_] */ -#define EOW             16      /*	Match "" at    [^a-zA-Z0-9_] */ -#define BRACE_LIMITS    17      /* nr nr  define the min & max for BRACE_SIMPLE -                                 *	and BRACE_COMPLEX. */ -#define NEWL            18      /*	Match line-break */ -#define BHPOS           19      /*	End position for BEHIND or NOBEHIND */ - - -/* character classes: 20-48 normal, 50-78 include a line-break */ -#define ADD_NL          30 -#define FIRST_NL        ANY + ADD_NL -#define ANY             20      /*	Match any one character. */ -#define ANYOF           21      /* str	Match any character in this string. */ -#define ANYBUT          22      /* str	Match any character not in this -                                 *	string. */ -#define IDENT           23      /*	Match identifier char */ -#define SIDENT          24      /*	Match identifier char but no digit */ -#define KWORD           25      /*	Match keyword char */ -#define SKWORD          26      /*	Match word char but no digit */ -#define FNAME           27      /*	Match file name char */ -#define SFNAME          28      /*	Match file name char but no digit */ -#define PRINT           29      /*	Match printable char */ -#define SPRINT          30      /*	Match printable char but no digit */ -#define WHITE           31      /*	Match whitespace char */ -#define NWHITE          32      /*	Match non-whitespace char */ -#define DIGIT           33      /*	Match digit char */ -#define NDIGIT          34      /*	Match non-digit char */ -#define HEX             35      /*	Match hex char */ -#define NHEX            36      /*	Match non-hex char */ -#define OCTAL           37      /*	Match octal char */ -#define NOCTAL          38      /*	Match non-octal char */ -#define WORD            39      /*	Match word char */ -#define NWORD           40      /*	Match non-word char */ -#define HEAD            41      /*	Match head char */ -#define NHEAD           42      /*	Match non-head char */ -#define ALPHA           43      /*	Match alpha char */ -#define NALPHA          44      /*	Match non-alpha char */ -#define LOWER           45      /*	Match lowercase char */ -#define NLOWER          46      /*	Match non-lowercase char */ -#define UPPER           47      /*	Match uppercase char */ -#define NUPPER          48      /*	Match non-uppercase char */ -#define LAST_NL         NUPPER + ADD_NL -// -V:WITH_NL:560 -#define WITH_NL(op)     ((op) >= FIRST_NL && (op) <= LAST_NL) - -#define MOPEN           80   // -89 Mark this point in input as start of -                             //     \( … \) subexpr.  MOPEN + 0 marks start of -                             //     match. -#define MCLOSE          90   // -99 Analogous to MOPEN.  MCLOSE + 0 marks -                             //     end of match. -#define BACKREF         100  // -109 node Match same string again \1-\9. - -# define ZOPEN          110  // -119 Mark this point in input as start of -                             //  \z( … \) subexpr. -# define ZCLOSE         120  // -129 Analogous to ZOPEN. -# define ZREF           130  // -139 node Match external submatch \z1-\z9 - -#define BRACE_COMPLEX   140 /* -149 node Match nodes between m & n times */ - -#define NOPEN           150     // Mark this point in input as start of -                                // \%( subexpr. -#define NCLOSE          151     // Analogous to NOPEN. - -#define MULTIBYTECODE   200     /* mbc	Match one multi-byte character */ -#define RE_BOF          201     /*	Match "" at beginning of file. */ -#define RE_EOF          202     /*	Match "" at end of file. */ -#define CURSOR          203     /*	Match location of cursor. */ - -#define RE_LNUM         204     /* nr cmp  Match line number */ -#define RE_COL          205     /* nr cmp  Match column number */ -#define RE_VCOL         206     /* nr cmp  Match virtual column number */ - -#define RE_MARK         207     /* mark cmp  Match mark position */ -#define RE_VISUAL       208     /*	Match Visual area */ -#define RE_COMPOSING    209     // any composing characters - -/*   * Magic characters have a special meaning, they don't match literally.   * Magic characters are negative.  This separates them from literal characters   * (possibly multi-byte).  Only ASCII characters can be Magic. @@ -285,107 +60,6 @@   */  typedef void (*(*fptr_T)(int *, int))(void); -typedef struct { -  char_u     *regparse; -  int prevchr_len; -  int curchr; -  int prevchr; -  int prevprevchr; -  int nextchr; -  int at_start; -  int prev_at_start; -  int regnpar; -} parse_state_T; - -/* - * Structure used to save the current input state, when it needs to be - * restored after trying a match.  Used by reg_save() and reg_restore(). - * Also stores the length of "backpos". - */ -typedef struct { -  union { -    char_u  *ptr;       ///< rex.input pointer, for single-line regexp -    lpos_T pos;         ///< rex.input pos, for multi-line regexp -  } rs_u; -  int rs_len; -} regsave_T; - -/* struct to save start/end pointer/position in for \(\) */ -typedef struct { -  union { -    char_u  *ptr; -    lpos_T pos; -  } se_u; -} save_se_T; - -/* used for BEHIND and NOBEHIND matching */ -typedef struct regbehind_S { -  regsave_T save_after; -  regsave_T save_behind; -  int save_need_clear_subexpr; -  save_se_T save_start[NSUBEXP]; -  save_se_T save_end[NSUBEXP]; -} regbehind_T; - -/* Values for rs_state in regitem_T. */ -typedef enum regstate_E { -  RS_NOPEN = 0          /* NOPEN and NCLOSE */ -  , RS_MOPEN            /* MOPEN + [0-9] */ -  , RS_MCLOSE           /* MCLOSE + [0-9] */ -  , RS_ZOPEN            /* ZOPEN + [0-9] */ -  , RS_ZCLOSE           /* ZCLOSE + [0-9] */ -  , RS_BRANCH           /* BRANCH */ -  , RS_BRCPLX_MORE      /* BRACE_COMPLEX and trying one more match */ -  , RS_BRCPLX_LONG      /* BRACE_COMPLEX and trying longest match */ -  , RS_BRCPLX_SHORT     /* BRACE_COMPLEX and trying shortest match */ -  , RS_NOMATCH          /* NOMATCH */ -  , RS_BEHIND1          /* BEHIND / NOBEHIND matching rest */ -  , RS_BEHIND2          /* BEHIND / NOBEHIND matching behind part */ -  , RS_STAR_LONG        /* STAR/PLUS/BRACE_SIMPLE longest match */ -  , RS_STAR_SHORT       /* STAR/PLUS/BRACE_SIMPLE shortest match */ -} regstate_T; - -/* - * When there are alternatives a regstate_T is put on the regstack to remember - * what we are doing. - * Before it may be another type of item, depending on rs_state, to remember - * more things. - */ -typedef struct regitem_S { -  regstate_T rs_state;          // what we are doing, one of RS_ above -  uint16_t   rs_no;             // submatch nr or BEHIND/NOBEHIND -  char_u     *rs_scan;          // current node in program -  union { -    save_se_T sesave; -    regsave_T regsave; -  } rs_un;                      ///< room for saving rex.input -} regitem_T; - - -/* used for STAR, PLUS and BRACE_SIMPLE matching */ -typedef struct regstar_S { -  int nextb;                    /* next byte */ -  int nextb_ic;                 /* next byte reverse case */ -  long count; -  long minval; -  long maxval; -} regstar_T; - -/* used to store input position when a BACK was encountered, so that we now if - * we made any progress since the last time. */ -typedef struct backpos_S { -  char_u      *bp_scan;         /* "scan" where BACK was encountered */ -  regsave_T bp_pos;             /* last input position */ -} backpos_T; - -typedef struct { -  int a, b, c; -} decomp_T; - - -#ifdef INCLUDE_GENERATED_DECLARATIONS -# include "regexp.c.generated.h" -#endif  static int no_Magic(int x)  {    if (is_Magic(x)) @@ -400,66 +74,13 @@ static int toggle_Magic(int x)    return Magic(x);  } -/* - * The first byte of the regexp internal "program" is actually this magic - * number; the start node begins in the second byte.  It's used to catch the - * most severe mutilation of the program by the caller. - */ - +// The first byte of the BT regexp internal "program" is actually this magic +// number; the start node begins in the second byte.  It's used to catch the +// most severe mutilation of the program by the caller.  #define REGMAGIC        0234 -/* - * Opcode notes: - * - * BRANCH	The set of branches constituting a single choice are hooked - *		together with their "next" pointers, since precedence prevents - *		anything being concatenated to any individual branch.  The - *		"next" pointer of the last BRANCH in a choice points to the - *		thing following the whole choice.  This is also where the - *		final "next" pointer of each individual branch points; each - *		branch starts with the operand node of a BRANCH node. - * - * BACK		Normal "next" pointers all implicitly point forward; BACK - *		exists to make loop structures possible. - * - * STAR,PLUS	'=', and complex '*' and '+', are implemented as circular - *		BRANCH structures using BACK.  Simple cases (one character - *		per match) are implemented with STAR and PLUS for speed - *		and to minimize recursive plunges. - * - * BRACE_LIMITS	This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX - *		node, and defines the min and max limits to be used for that - *		node. - * - * MOPEN,MCLOSE	...are numbered at compile time. - * ZOPEN,ZCLOSE	...ditto - */ - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit bytes, high order first.  The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node.  (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - */ -#define OP(p)           ((int)*(p)) -#define NEXT(p)         (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377)) -#define OPERAND(p)      ((p) + 3) -/* Obtain an operand that was stored as four bytes, MSB first. */ -#define OPERAND_MIN(p)  (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \ -                         + ((long)(p)[5] << 8) + (long)(p)[6]) -/* Obtain a second operand stored as four bytes. */ -#define OPERAND_MAX(p)  OPERAND_MIN((p) + 4) -/* Obtain a second single-byte operand stored after a four bytes operand. */ -#define OPERAND_CMP(p)  (p)[7] - -/* - * Utility definitions. - */ -#define UCHARAT(p)      ((int)*(char_u *)(p)) +// Utility definitions. +#define UCHARAT(p)      ((int)(*(char_u *)(p)))  // Used for an error (down from) vim_regcomp(): give the error message, set  // rc_did_emsg and return NULL @@ -477,14 +98,6 @@ static int toggle_Magic(int x)  #define MAX_LIMIT       (32767L << 16L) - -#ifdef BT_REGEXP_DUMP -static void regdump(char_u *, bt_regprog_T *); -#endif -#ifdef REGEXP_DEBUG -static char_u   *regprop(char_u *); -#endif -  static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");  static char_u e_reverse_range[] = N_("E944: Reverse range in character class");  static char_u e_large_class[] = N_("E945: Range too large in character class"); @@ -500,11 +113,17 @@ static char_u e_recursive[]  = N_("E956: Cannot use pattern recursively");  #define NOT_MULTI       0  #define MULTI_ONE       1  #define MULTI_MULT      2 -/* - * Return NOT_MULTI if c is not a "multi" operator. - * Return MULTI_ONE if c is a single "multi" operator. - * Return MULTI_MULT if c is a multi "multi" operator. - */ + +// return values for regmatch() +#define RA_FAIL         1       // something failed, abort +#define RA_CONT         2       // continue in inner loop +#define RA_BREAK        3       // break inner loop +#define RA_MATCH        4       // successful match +#define RA_NOMATCH      5       // didn't match + +/// Return NOT_MULTI if c is not a "multi" operator. +/// Return MULTI_ONE if c is a single "multi" operator. +/// Return MULTI_MULT if c is a multi "multi" operator.  static int re_multi_type(int c)  {    if (c == Magic('@') || c == Magic('=') || c == Magic('?')) @@ -514,22 +133,6 @@ static int re_multi_type(int c)    return NOT_MULTI;  } -/* - * Flags to be passed up and down. - */ -#define HASWIDTH        0x1     /* Known never to match null string. */ -#define SIMPLE          0x2     /* Simple enough to be STAR/PLUS operand. */ -#define SPSTART         0x4     /* Starts with * or +. */ -#define HASNL           0x8     /* Contains some \n. */ -#define HASLOOKBH       0x10    /* Contains "\@<=" or "\@<!". */ -#define WORST           0       /* Worst case. */ - -/* - * When regcode is set to this value, code is not emitted and size is computed - * instead. - */ -#define JUST_CALC_SIZE  ((char_u *) -1) -  static char_u           *reg_prev_sub = NULL;  /* @@ -684,38 +287,28 @@ static void init_class_tab(void)  # define ri_upper(c)    (c < 0x100 && (class_tab[c] & RI_UPPER))  # define ri_white(c)    (c < 0x100 && (class_tab[c] & RI_WHITE)) -/* flags for regflags */ -#define RF_ICASE    1   /* ignore case */ -#define RF_NOICASE  2   /* don't ignore case */ -#define RF_HASNL    4   /* can match a NL */ -#define RF_ICOMBINE 8   /* ignore combining characters */ -#define RF_LOOKBH   16  /* uses "\@<=" or "\@<!" */ +// flags for regflags +#define RF_ICASE    1   // ignore case +#define RF_NOICASE  2   // don't ignore case +#define RF_HASNL    4   // can match a NL +#define RF_ICOMBINE 8   // ignore combining characters +#define RF_LOOKBH   16  // uses "\@<=" or "\@<!"  // Global work variables for vim_regcomp().  static char_u *regparse;        ///< Input-scan pointer. -static int prevchr_len;         ///< byte length of previous char -static int num_complex_braces;  ///< Complex \{...} count  static int regnpar;             ///< () count.  static bool wants_nfa;          ///< regex should use NFA engine  static int regnzpar;            ///< \z() count.  static int re_has_z;            ///< \z item detected -static char_u *regcode;         ///< Code-emit pointer, or JUST_CALC_SIZE -static long regsize;            ///< Code size. -static int reg_toolong;         ///< true when offset out of range -static char_u had_endbrace[NSUBEXP];  ///< flags, true if end of () found -static unsigned regflags;         ///< RF_ flags for prog -static long brace_min[10];        ///< Minimums for complex brace repeats -static long brace_max[10];        ///< Maximums for complex brace repeats -static int brace_count[10];       ///< Current counts for complex brace repeats -static int had_eol;               ///< true when EOL found by vim_regcomp() -static int one_exactly = false;   ///< only do one char for EXACTLY - -static int reg_magic;           /* magicness of the pattern: */ -#define MAGIC_NONE      1       /* "\V" very unmagic */ -#define MAGIC_OFF       2       /* "\M" or 'magic' off */ -#define MAGIC_ON        3       /* "\m" or 'magic' */ -#define MAGIC_ALL       4       /* "\v" very magic */ +static unsigned regflags;       ///< RF_ flags for prog +static int had_eol;             ///< true when EOL found by vim_regcomp() + +static int reg_magic;           // magicness of the pattern: +#define MAGIC_NONE      1       // "\V" very unmagic +#define MAGIC_OFF       2       // "\M" or 'magic' off +#define MAGIC_ON        3       // "\m" or 'magic' +#define MAGIC_ALL       4       // "\v" very magic  static int reg_string;          // matching with a string instead of a buffer                                  // line @@ -725,22 +318,22 @@ static int reg_strict;          // "[abc" is illegal   * META contains all characters that may be magic, except '^' and '$'.   */ -/* META[] is used often enough to justify turning it into a table. */ +// META[] is used often enough to justify turning it into a table.  static char_u META_flags[] = { -  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -  /*		   %  &     (  )  *  +	      .    */ -  0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, -  /*     1  2  3	4  5  6  7  8  9	<  =  >  ? */ -  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, -  /*  @  A     C	D     F     H  I     K	L  M	 O */ -  1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, -  /*  P	     S	   U  V  W  X	  Z  [		 _ */ -  1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, -  /*     a     c	d     f     h  i     k	l  m  n  o */ -  0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, -  /*  p	     s	   u  v  w  x	  z  {	|     ~    */ -  1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +//                 %  &     (  )  *  +        . +    0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, +//     1  2  3  4  5  6  7  8  9        <  =  >  ? +    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, +//  @  A     C  D     F     H  I     K  L  M     O +    1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, +//  P        S     U  V  W  X     Z  [           _ +    1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, +//     a     c  d     f     h  i     k  l  m  n  o +    0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, +//  p        s     u  v  w  x     z  {  |     ~ +    1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1  };  static int curchr;              // currently parsed character @@ -748,24 +341,36 @@ static int curchr;              // currently parsed character  // start, eg in /[ ^I]^ the pattern was never found even if it existed,  // because ^ was taken to be magic -- webb  static int prevchr; -static int prevprevchr;         /* previous-previous character */ -static int nextchr;             /* used for ungetchr() */ +static int prevprevchr;         // previous-previous character +static int nextchr;             // used for ungetchr() -/* arguments for reg() */ -#define REG_NOPAREN     0       /* toplevel reg() */ -#define REG_PAREN       1       /* \(\) */ -#define REG_ZPAREN      2       /* \z(\) */ -#define REG_NPAREN      3       /* \%(\) */ +// arguments for reg() +#define REG_NOPAREN     0       // toplevel reg() +#define REG_PAREN       1       // \(\) +#define REG_ZPAREN      2       // \z(\) +#define REG_NPAREN      3       // \%(\) + +typedef struct { +  char_u *regparse; +  int prevchr_len; +  int curchr; +  int prevchr; +  int prevprevchr; +  int nextchr; +  int at_start; +  int prev_at_start; +  int regnpar; +} parse_state_T; -/* - * Forward declarations for vim_regcomp()'s friends. - */ -# define REGMBC(x) regmbc(x); -# define CASEMBC(x) case x:  static regengine_T bt_regengine;  static regengine_T nfa_regengine; +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "regexp.c.generated.h" +#endif + +  // Return true if compiled regular expression "prog" can match a line break.  int re_multiline(const regprog_T *prog)    FUNC_ATTR_NONNULL_ALL @@ -797,312 +402,6 @@ static int get_equi_class(char_u **pp)  /* - * Produce the bytes for equivalence class "c". - * Currently only handles latin1, latin9 and utf-8. - * NOTE: When changing this function, also change nfa_emit_equi_class() - */ -static void reg_equi_class(int c) -{ -  { -    switch (c) { -      // Do not use '\300' style, it results in a negative number. -    case 'A': case 0xc0: case 0xc1: case 0xc2: -    case 0xc3: case 0xc4: case 0xc5: -      CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd) -      CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2) -      regmbc('A'); regmbc(0xc0); regmbc(0xc1); -      regmbc(0xc2); regmbc(0xc3); regmbc(0xc4); -      regmbc(0xc5); -      REGMBC(0x100) REGMBC(0x102) REGMBC(0x104) -      REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0) -      REGMBC(0x1ea2) -      return; -    case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06) -      regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06) -      return; -    case 'C': case 0xc7: -      CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c) -      regmbc('C'); regmbc(0xc7); -      REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a) -      REGMBC(0x10c) -      return; -    case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a) -      CASEMBC(0x1e0e) CASEMBC(0x1e10) -      regmbc('D'); REGMBC(0x10e) REGMBC(0x110) -      REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10) -      return; -    case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb: -      CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118) -      CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc) -      regmbc('E'); regmbc(0xc8); regmbc(0xc9); -      regmbc(0xca); regmbc(0xcb); -      REGMBC(0x112) REGMBC(0x114) REGMBC(0x116) -      REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba) -      REGMBC(0x1ebc) -      return; -    case 'F': CASEMBC(0x1e1e) -      regmbc('F'); REGMBC(0x1e1e) -      return; -    case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120) -      CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4) -      CASEMBC(0x1e20) -      regmbc('G'); REGMBC(0x11c) REGMBC(0x11e) -      REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4) -      REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20) -      return; -    case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22) -      CASEMBC(0x1e26) CASEMBC(0x1e28) -      regmbc('H'); REGMBC(0x124) REGMBC(0x126) -      REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28) -      return; -    case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf: -      CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e) -      CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8) -      regmbc('I'); regmbc(0xcc); regmbc(0xcd); -      regmbc(0xce); regmbc(0xcf); -      REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c) -      REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf) -      REGMBC(0x1ec8) -      return; -    case 'J': CASEMBC(0x134) -      regmbc('J'); REGMBC(0x134) -      return; -    case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30) -      CASEMBC(0x1e34) -      regmbc('K'); REGMBC(0x136) REGMBC(0x1e8) -      REGMBC(0x1e30) REGMBC(0x1e34) -      return; -    case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d) -      CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a) -      regmbc('L'); REGMBC(0x139) REGMBC(0x13b) -      REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141) -      REGMBC(0x1e3a) -      return; -    case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40) -      regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40) -      return; -    case 'N': case 0xd1: -      CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44) -      CASEMBC(0x1e48) -      regmbc('N'); regmbc(0xd1); -      REGMBC(0x143) REGMBC(0x145) REGMBC(0x147) -      REGMBC(0x1e44) REGMBC(0x1e48) -      return; -    case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5: -    case 0xd6: case 0xd8: -      CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0) -      CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece) -      regmbc('O'); regmbc(0xd2); regmbc(0xd3); -      regmbc(0xd4); regmbc(0xd5); regmbc(0xd6); -      regmbc(0xd8); -      REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150) -      REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea) -      REGMBC(0x1ec) REGMBC(0x1ece) -      return; -    case 'P': case 0x1e54: case 0x1e56: -      regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56) -      return; -    case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158) -      CASEMBC(0x1e58) CASEMBC(0x1e5e) -      regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158) -      REGMBC(0x1e58) REGMBC(0x1e5e) -      return; -    case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e) -      CASEMBC(0x160) CASEMBC(0x1e60) -      regmbc('S'); REGMBC(0x15a) REGMBC(0x15c) -      REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60) -      return; -    case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166) -      CASEMBC(0x1e6a) CASEMBC(0x1e6e) -      regmbc('T'); REGMBC(0x162) REGMBC(0x164) -      REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e) -      return; -    case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc: -      CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e) -      CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3) -      CASEMBC(0x1ee6) -      regmbc('U'); regmbc(0xd9); regmbc(0xda); -      regmbc(0xdb); regmbc(0xdc); -      REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c) -      REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172) -      REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6) -      return; -    case 'V': CASEMBC(0x1e7c) -      regmbc('V'); REGMBC(0x1e7c) -      return; -    case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82) -      CASEMBC(0x1e84) CASEMBC(0x1e86) -      regmbc('W'); REGMBC(0x174) REGMBC(0x1e80) -      REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86) -      return; -    case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c) -      regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c) -      return; -    case 'Y': case 0xdd: -      CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2) -      CASEMBC(0x1ef6) CASEMBC(0x1ef8) -      regmbc('Y'); regmbc(0xdd); -      REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e) -      REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8) -      return; -    case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d) -      CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94) -      regmbc('Z'); REGMBC(0x179) REGMBC(0x17b) -      REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90) -      REGMBC(0x1e94) -      return; -    case 'a': case 0xe0: case 0xe1: case 0xe2: -    case 0xe3: case 0xe4: case 0xe5: -      CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce) -      CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3) -      regmbc('a'); regmbc(0xe0); regmbc(0xe1); -      regmbc(0xe2); regmbc(0xe3); regmbc(0xe4); -      regmbc(0xe5); -      REGMBC(0x101) REGMBC(0x103) REGMBC(0x105) -      REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1) -      REGMBC(0x1ea3) -      return; -    case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07) -      regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07) -      return; -    case 'c': case 0xe7: -      CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d) -      regmbc('c'); regmbc(0xe7); -      REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b) -      REGMBC(0x10d) -      return; -    case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b) -      CASEMBC(0x1e0f) CASEMBC(0x1e11) -      regmbc('d'); REGMBC(0x10f) REGMBC(0x111) -      REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11) -      return; -    case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb: -      CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119) -      CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd) -      regmbc('e'); regmbc(0xe8); regmbc(0xe9); -      regmbc(0xea); regmbc(0xeb); -      REGMBC(0x113) REGMBC(0x115) REGMBC(0x117) -      REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb) -      REGMBC(0x1ebd) -      return; -    case 'f': CASEMBC(0x1e1f) -      regmbc('f'); REGMBC(0x1e1f) -      return; -    case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121) -      CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5) -      CASEMBC(0x1e21) -      regmbc('g'); REGMBC(0x11d) REGMBC(0x11f) -      REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5) -      REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21) -      return; -    case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23) -      CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96) -      regmbc('h'); REGMBC(0x125) REGMBC(0x127) -      REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29) -      REGMBC(0x1e96) -      return; -    case 'i': case 0xec: case 0xed: case 0xee: case 0xef: -      CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f) -      CASEMBC(0x1d0) CASEMBC(0x1ec9) -      regmbc('i'); regmbc(0xec); regmbc(0xed); -      regmbc(0xee); regmbc(0xef); -      REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d) -      REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9) -      return; -    case 'j': CASEMBC(0x135) CASEMBC(0x1f0) -      regmbc('j'); REGMBC(0x135) REGMBC(0x1f0) -      return; -    case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31) -      CASEMBC(0x1e35) -      regmbc('k'); REGMBC(0x137) REGMBC(0x1e9) -      REGMBC(0x1e31) REGMBC(0x1e35) -      return; -    case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e) -      CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b) -      regmbc('l'); REGMBC(0x13a) REGMBC(0x13c) -      REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142) -      REGMBC(0x1e3b) -      return; -    case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41) -      regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41) -      return; -    case 'n': case 0xf1: -      CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149) -      CASEMBC(0x1e45) CASEMBC(0x1e49) -      regmbc('n'); regmbc(0xf1); -      REGMBC(0x144) REGMBC(0x146) REGMBC(0x148) -      REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49) -      return; -    case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5: -    case 0xf6: case 0xf8: -      CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1) -      CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf) -      regmbc('o'); regmbc(0xf2); regmbc(0xf3); -      regmbc(0xf4); regmbc(0xf5); regmbc(0xf6); -      regmbc(0xf8); -      REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151) -      REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb) -      REGMBC(0x1ed) REGMBC(0x1ecf) -      return; -    case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57) -      regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57) -      return; -    case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159) -      CASEMBC(0x1e59) CASEMBC(0x1e5f) -      regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159) -      REGMBC(0x1e59) REGMBC(0x1e5f) -      return; -    case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f) -      CASEMBC(0x161) CASEMBC(0x1e61) -      regmbc('s'); REGMBC(0x15b) REGMBC(0x15d) -      REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61) -      return; -    case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167) -      CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97) -      regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167) -      REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97) -      return; -    case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc: -      CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f) -      CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4) -      CASEMBC(0x1ee7) -      regmbc('u'); regmbc(0xf9); regmbc(0xfa); -      regmbc(0xfb); regmbc(0xfc); -      REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d) -      REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173) -      REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7) -      return; -    case 'v': CASEMBC(0x1e7d) -      regmbc('v'); REGMBC(0x1e7d) -      return; -    case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83) -      CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98) -      regmbc('w'); REGMBC(0x175) REGMBC(0x1e81) -      REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87) -      REGMBC(0x1e98) -      return; -    case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d) -      regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d) -      return; -    case 'y': case 0xfd: case 0xff: -      CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99) -      CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9) -      regmbc('y'); regmbc(0xfd); regmbc(0xff); -      REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99) -      REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9) -      return; -    case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e) -      CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95) -      regmbc('z'); REGMBC(0x17a) REGMBC(0x17c) -      REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91) -      REGMBC(0x1e95) -      return; -    } -  } -  regmbc(c); -} - -/*   * Check for a collating element "[.a.]".  "pp" points to the '['.   * Returns a character. Zero means that no item was recognized.  Otherwise   * "pp" is advanced to after the item. @@ -1125,7 +424,7 @@ static int get_coll_element(char_u **pp)    return 0;  } -static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */ +static int reg_cpo_lit;  // 'cpoptions' contains 'l' flag  static void get_cpo_flags(void)  { @@ -1141,10 +440,12 @@ static char_u *skip_anyof(char_u *p)  {    int l; -  if (*p == '^')        /* Complement of range. */ -    ++p; -  if (*p == ']' || *p == '-') -    ++p; +  if (*p == '^') {  // Complement of range. +    p++; +  } +  if (*p == ']' || *p == '-') { +    p++; +  }    while (*p != NUL && *p != ']') {      if ((l = utfc_ptr2len(p)) > 1) {        p += l; @@ -1204,1546 +505,32 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char_u **newp)          break;      } else if (p[0] == '\\' && p[1] != NUL)   {        if (dirc == '?' && newp != NULL && p[1] == '?') { -        /* change "\?" to "?", make a copy first. */ +        // change "\?" to "?", make a copy first.          if (*newp == NULL) {            *newp = vim_strsave(startp);            p = *newp + (p - startp);          }          STRMOVE(p, p + 1); -      } else -        ++p;            /* skip next character */ -      if (*p == 'v') +      } else { +        p++;            // skip next character +      } +      if (*p == 'v') {          mymagic = MAGIC_ALL; -      else if (*p == 'V') +      } else if (*p == 'V') {          mymagic = MAGIC_NONE; -    } -  } -  return p; -} - -/// Return true if the back reference is legal. We must have seen the close -/// brace. -/// TODO(vim): Should also check that we don't refer to something repeated -/// (+*=): what instance of the repetition should we match? -static int seen_endbrace(int refnum) -{ -  if (!had_endbrace[refnum]) { -      char_u *p; - -      // Trick: check if "@<=" or "@<!" follows, in which case -      // the \1 can appear before the referenced match. -      for (p = regparse; *p != NUL; p++) { -        if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '=')) { -          break; -        }        } - -    if (*p == NUL) { -      emsg(_("E65: Illegal back reference")); -      rc_did_emsg = true; -      return false;      }    } -  return true; -} - -/* - * bt_regcomp() - compile a regular expression into internal code for the - * traditional back track matcher. - * Returns the program in allocated space.  Returns NULL for an error. - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code.  So we cheat:  we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it.  (Note that it has to be in - * one piece because free() must be able to free it all.) - * - * Whether upper/lower case is to be ignored is decided when executing the - * program, it does not matter here. - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. - * "re_flags": RE_MAGIC and/or RE_STRING. - */ -static regprog_T *bt_regcomp(char_u *expr, int re_flags) -{ -  char_u      *scan; -  char_u      *longest; -  int len; -  int flags; - -  if (expr == NULL) { -    IEMSG_RET_NULL(_(e_null)); -  } - -  init_class_tab(); - -  /* -   * First pass: determine size, legality. -   */ -  regcomp_start(expr, re_flags); -  regcode = JUST_CALC_SIZE; -  regc(REGMAGIC); -  if (reg(REG_NOPAREN, &flags) == NULL) -    return NULL; - -  /* Allocate space. */ -  bt_regprog_T *r = xmalloc(sizeof(bt_regprog_T) + regsize); -  r->re_in_use = false; - -  /* -   * Second pass: emit code. -   */ -  regcomp_start(expr, re_flags); -  regcode = r->program; -  regc(REGMAGIC); -  if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong) { -    xfree(r); -    if (reg_toolong) -      EMSG_RET_NULL(_("E339: Pattern too long")); -    return NULL; -  } - -  /* Dig out information for optimizations. */ -  r->regstart = NUL;            /* Worst-case defaults. */ -  r->reganch = 0; -  r->regmust = NULL; -  r->regmlen = 0; -  r->regflags = regflags; -  if (flags & HASNL) -    r->regflags |= RF_HASNL; -  if (flags & HASLOOKBH) -    r->regflags |= RF_LOOKBH; -  /* Remember whether this pattern has any \z specials in it. */ -  r->reghasz = re_has_z; -  scan = r->program + 1;        /* First BRANCH. */ -  if (OP(regnext(scan)) == END) {   /* Only one top-level choice. */ -    scan = OPERAND(scan); - -    /* Starting-point info. */ -    if (OP(scan) == BOL || OP(scan) == RE_BOF) { -      r->reganch++; -      scan = regnext(scan); -    } - -    if (OP(scan) == EXACTLY) { -      r->regstart = utf_ptr2char(OPERAND(scan)); -    } else if (OP(scan) == BOW -               || OP(scan) == EOW -               || OP(scan) == NOTHING -               || OP(scan) == MOPEN  + 0 || OP(scan) == NOPEN -               || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) { -      char_u *regnext_scan = regnext(scan); -      if (OP(regnext_scan) == EXACTLY) { -        r->regstart = utf_ptr2char(OPERAND(regnext_scan)); -      } -    } - -    /* -     * If there's something expensive in the r.e., find the longest -     * literal string that must appear and make it the regmust.  Resolve -     * ties in favor of later strings, since the regstart check works -     * with the beginning of the r.e. and avoiding duplication -     * strengthens checking.  Not a strong reason, but sufficient in the -     * absence of others. -     */ -    /* -     * When the r.e. starts with BOW, it is faster to look for a regmust -     * first. Used a lot for "#" and "*" commands. (Added by mool). -     */ -    if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW) -        && !(flags & HASNL)) { -      longest = NULL; -      len = 0; -      for (; scan != NULL; scan = regnext(scan)) -        if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len) { -          longest = OPERAND(scan); -          len = (int)STRLEN(OPERAND(scan)); -        } -      r->regmust = longest; -      r->regmlen = len; -    } -  } -#ifdef BT_REGEXP_DUMP -  regdump(expr, r); -#endif -  r->engine = &bt_regengine; -  return (regprog_T *)r; -} - -/* - * Free a compiled regexp program, returned by bt_regcomp(). - */ -static void bt_regfree(regprog_T *prog) -{ -  xfree(prog); +  return p;  } -/* - * Setup to parse the regexp.  Used once to get the length and once to do it. - */ -static void  -regcomp_start ( -    char_u *expr, -    int re_flags                       /* see vim_regcomp() */ -) -{ -  initchr(expr); -  if (re_flags & RE_MAGIC) -    reg_magic = MAGIC_ON; -  else -    reg_magic = MAGIC_OFF; -  reg_string = (re_flags & RE_STRING); -  reg_strict = (re_flags & RE_STRICT); -  get_cpo_flags(); - -  num_complex_braces = 0; -  regnpar = 1; -  memset(had_endbrace, 0, sizeof(had_endbrace)); -  regnzpar = 1; -  re_has_z = 0; -  regsize = 0L; -  reg_toolong = false; -  regflags = 0; -  had_eol = false; -} - -/* - * Check if during the previous call to vim_regcomp the EOL item "$" has been - * found.  This is messy, but it works fine. - */ -int vim_regcomp_had_eol(void) -{ -  return had_eol; -}  // variables used for parsing +static int prevchr_len;    // byte length of previous char  static int at_start;       // True when on the first character  static int prev_at_start;  // True when on the second character  /* - * Parse regular expression, i.e. main body or parenthesized thing. - * - * Caller must absorb opening parenthesis. - * - * Combining parenthesis handling with the base level of regular expression - * is a trifle forced, but the need to tie the tails of the branches to what - * follows makes it hard to avoid. - */ -static char_u * -reg ( -    int paren,              /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */ -    int *flagp -) -{ -  char_u      *ret; -  char_u      *br; -  char_u      *ender; -  int parno = 0; -  int flags; - -  *flagp = HASWIDTH;            /* Tentatively. */ - -  if (paren == REG_ZPAREN) { -    /* Make a ZOPEN node. */ -    if (regnzpar >= NSUBEXP) -      EMSG_RET_NULL(_("E50: Too many \\z(")); -    parno = regnzpar; -    regnzpar++; -    ret = regnode(ZOPEN + parno); -  } else if (paren == REG_PAREN)    { -    /* Make a MOPEN node. */ -    if (regnpar >= NSUBEXP) -      EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); -    parno = regnpar; -    ++regnpar; -    ret = regnode(MOPEN + parno); -  } else if (paren == REG_NPAREN)   { -    /* Make a NOPEN node. */ -    ret = regnode(NOPEN); -  } else -    ret = NULL; - -  /* Pick up the branches, linking them together. */ -  br = regbranch(&flags); -  if (br == NULL) -    return NULL; -  if (ret != NULL) -    regtail(ret, br);           /* [MZ]OPEN -> first. */ -  else -    ret = br; -  /* If one of the branches can be zero-width, the whole thing can. -   * If one of the branches has * at start or matches a line-break, the -   * whole thing can. */ -  if (!(flags & HASWIDTH)) -    *flagp &= ~HASWIDTH; -  *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); -  while (peekchr() == Magic('|')) { -    skipchr(); -    br = regbranch(&flags); -    if (br == NULL || reg_toolong) -      return NULL; -    regtail(ret, br);           /* BRANCH -> BRANCH. */ -    if (!(flags & HASWIDTH)) -      *flagp &= ~HASWIDTH; -    *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); -  } - -  /* Make a closing node, and hook it on the end. */ -  ender = regnode( -      paren == REG_ZPAREN ? ZCLOSE + parno : -      paren == REG_PAREN ? MCLOSE + parno : -      paren == REG_NPAREN ? NCLOSE : END); -  regtail(ret, ender); - -  /* Hook the tails of the branches to the closing node. */ -  for (br = ret; br != NULL; br = regnext(br)) -    regoptail(br, ender); - -  /* Check for proper termination. */ -  if (paren != REG_NOPAREN && getchr() != Magic(')')) { -    if (paren == REG_ZPAREN) -      EMSG_RET_NULL(_("E52: Unmatched \\z(")); -    else if (paren == REG_NPAREN) -      EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); -    else -      EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); -  } else if (paren == REG_NOPAREN && peekchr() != NUL) { -    if (curchr == Magic(')')) -      EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); -    else -      EMSG_RET_NULL(_(e_trailing));             /* "Can't happen". */ -    /* NOTREACHED */ -  } -  // Here we set the flag allowing back references to this set of -  // parentheses. -  if (paren == REG_PAREN) { -    had_endbrace[parno] = true;  // have seen the close paren -  } -  return ret; -} - -/* - * Parse one alternative of an | operator. - * Implements the & operator. - */ -static char_u *regbranch(int *flagp) -{ -  char_u      *ret; -  char_u      *chain = NULL; -  char_u      *latest; -  int flags; - -  *flagp = WORST | HASNL;               /* Tentatively. */ - -  ret = regnode(BRANCH); -  for (;; ) { -    latest = regconcat(&flags); -    if (latest == NULL) -      return NULL; -    /* If one of the branches has width, the whole thing has.  If one of -     * the branches anchors at start-of-line, the whole thing does. -     * If one of the branches uses look-behind, the whole thing does. */ -    *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH); -    /* If one of the branches doesn't match a line-break, the whole thing -     * doesn't. */ -    *flagp &= ~HASNL | (flags & HASNL); -    if (chain != NULL) -      regtail(chain, latest); -    if (peekchr() != Magic('&')) -      break; -    skipchr(); -    regtail(latest, regnode(END));     /* operand ends */ -    if (reg_toolong) -      break; -    reginsert(MATCH, latest); -    chain = latest; -  } - -  return ret; -} - -/* - * Parse one alternative of an | or & operator. - * Implements the concatenation operator. - */ -static char_u *regconcat(int *flagp) -{ -  char_u      *first = NULL; -  char_u      *chain = NULL; -  char_u      *latest; -  int flags; -  int cont = true; - -  *flagp = WORST;               /* Tentatively. */ - -  while (cont) { -    switch (peekchr()) { -    case NUL: -    case Magic('|'): -    case Magic('&'): -    case Magic(')'): -      cont = false; -      break; -    case Magic('Z'): -      regflags |= RF_ICOMBINE; -      skipchr_keepstart(); -      break; -    case Magic('c'): -      regflags |= RF_ICASE; -      skipchr_keepstart(); -      break; -    case Magic('C'): -      regflags |= RF_NOICASE; -      skipchr_keepstart(); -      break; -    case Magic('v'): -      reg_magic = MAGIC_ALL; -      skipchr_keepstart(); -      curchr = -1; -      break; -    case Magic('m'): -      reg_magic = MAGIC_ON; -      skipchr_keepstart(); -      curchr = -1; -      break; -    case Magic('M'): -      reg_magic = MAGIC_OFF; -      skipchr_keepstart(); -      curchr = -1; -      break; -    case Magic('V'): -      reg_magic = MAGIC_NONE; -      skipchr_keepstart(); -      curchr = -1; -      break; -    default: -      latest = regpiece(&flags); -      if (latest == NULL || reg_toolong) -        return NULL; -      *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH); -      if (chain == NULL)                        /* First piece. */ -        *flagp |= flags & SPSTART; -      else -        regtail(chain, latest); -      chain = latest; -      if (first == NULL) -        first = latest; -      break; -    } -  } -  if (first == NULL)            /* Loop ran zero times. */ -    first = regnode(NOTHING); -  return first; -} - -/* - * Parse something followed by possible [*+=]. - * - * Note that the branching code sequences used for = and the general cases - * of * and + are somewhat optimized:  they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ -static char_u *regpiece(int *flagp) -{ -  char_u          *ret; -  int op; -  char_u          *next; -  int flags; -  long minval; -  long maxval; - -  ret = regatom(&flags); -  if (ret == NULL) -    return NULL; - -  op = peekchr(); -  if (re_multi_type(op) == NOT_MULTI) { -    *flagp = flags; -    return ret; -  } -  /* default flags */ -  *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH))); - -  skipchr(); -  switch (op) { -  case Magic('*'): -    if (flags & SIMPLE) -      reginsert(STAR, ret); -    else { -      /* Emit x* as (x&|), where & means "self". */ -      reginsert(BRANCH, ret);           /* Either x */ -      regoptail(ret, regnode(BACK));            /* and loop */ -      regoptail(ret, ret);              /* back */ -      regtail(ret, regnode(BRANCH));            /* or */ -      regtail(ret, regnode(NOTHING));           /* null. */ -    } -    break; - -  case Magic('+'): -    if (flags & SIMPLE) -      reginsert(PLUS, ret); -    else { -      /* Emit x+ as x(&|), where & means "self". */ -      next = regnode(BRANCH);           /* Either */ -      regtail(ret, next); -      regtail(regnode(BACK), ret);              /* loop back */ -      regtail(next, regnode(BRANCH));           /* or */ -      regtail(ret, regnode(NOTHING));           /* null. */ -    } -    *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH))); -    break; - -  case Magic('@'): -  { -    int lop = END; -    int64_t nr = getdecchrs(); - -    switch (no_Magic(getchr())) { -    case '=': lop = MATCH; break;                                 /* \@= */ -    case '!': lop = NOMATCH; break;                               /* \@! */ -    case '>': lop = SUBPAT; break;                                /* \@> */ -    case '<': switch (no_Magic(getchr())) { -      case '=': lop = BEHIND; break;                               /* \@<= */ -      case '!': lop = NOBEHIND; break;                             /* \@<! */ -    } -    } -    if (lop == END) -      EMSG2_RET_NULL(_("E59: invalid character after %s@"), -          reg_magic == MAGIC_ALL); -    /* Look behind must match with behind_pos. */ -    if (lop == BEHIND || lop == NOBEHIND) { -      regtail(ret, regnode(BHPOS)); -      *flagp |= HASLOOKBH; -    } -    regtail(ret, regnode(END));             /* operand ends */ -    if (lop == BEHIND || lop == NOBEHIND) { -      if (nr < 0) -        nr = 0;                 /* no limit is same as zero limit */ -      reginsert_nr(lop, (uint32_t)nr, ret); -    } else -      reginsert(lop, ret); -    break; -  } - -  case Magic('?'): -  case Magic('='): -    /* Emit x= as (x|) */ -    reginsert(BRANCH, ret);                     /* Either x */ -    regtail(ret, regnode(BRANCH));              /* or */ -    next = regnode(NOTHING);                    /* null. */ -    regtail(ret, next); -    regoptail(ret, next); -    break; - -  case Magic('{'): -    if (!read_limits(&minval, &maxval)) -      return NULL; -    if (flags & SIMPLE) { -      reginsert(BRACE_SIMPLE, ret); -      reginsert_limits(BRACE_LIMITS, minval, maxval, ret); -    } else { -      if (num_complex_braces >= 10) -        EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), -            reg_magic == MAGIC_ALL); -      reginsert(BRACE_COMPLEX + num_complex_braces, ret); -      regoptail(ret, regnode(BACK)); -      regoptail(ret, ret); -      reginsert_limits(BRACE_LIMITS, minval, maxval, ret); -      ++num_complex_braces; -    } -    if (minval > 0 && maxval > 0) -      *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH))); -    break; -  } -  if (re_multi_type(peekchr()) != NOT_MULTI) { -    // Can't have a multi follow a multi. -    if (peekchr() == Magic('*')) { -      EMSG2_RET_NULL(_("E61: Nested %s*"), reg_magic >= MAGIC_ON); -    } -    EMSG3_RET_NULL(_("E62: Nested %s%c"), reg_magic == MAGIC_ALL, no_Magic(peekchr())); -  } - -  return ret; -} - -/* When making changes to classchars also change nfa_classcodes. */ -static char_u   *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; -static int classcodes[] = { -  ANY, IDENT, SIDENT, KWORD, SKWORD, -  FNAME, SFNAME, PRINT, SPRINT, -  WHITE, NWHITE, DIGIT, NDIGIT, -  HEX, NHEX, OCTAL, NOCTAL, -  WORD, NWORD, HEAD, NHEAD, -  ALPHA, NALPHA, LOWER, NLOWER, -  UPPER, NUPPER -}; - -/* - * Parse the lowest level. - * - * Optimization:  gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run.  Don't do this when one_exactly is set. - */ -static char_u *regatom(int *flagp) -{ -  char_u          *ret; -  int flags; -  int c; -  char_u          *p; -  int extra = 0; -  int save_prev_at_start = prev_at_start; - -  *flagp = WORST;               /* Tentatively. */ - -  c = getchr(); -  switch (c) { -  case Magic('^'): -    ret = regnode(BOL); -    break; - -  case Magic('$'): -    ret = regnode(EOL); -    had_eol = true; -    break; - -  case Magic('<'): -    ret = regnode(BOW); -    break; - -  case Magic('>'): -    ret = regnode(EOW); -    break; - -  case Magic('_'): -    c = no_Magic(getchr()); -    if (c == '^') {             /* "\_^" is start-of-line */ -      ret = regnode(BOL); -      break; -    } -    if (c == '$') {             /* "\_$" is end-of-line */ -      ret = regnode(EOL); -      had_eol = true; -      break; -    } - -    extra = ADD_NL; -    *flagp |= HASNL; - -    /* "\_[" is character range plus newline */ -    if (c == '[') -      goto collection; - -  // "\_x" is character class plus newline -  FALLTHROUGH; - -  /* -   * Character classes. -   */ -  case Magic('.'): -  case Magic('i'): -  case Magic('I'): -  case Magic('k'): -  case Magic('K'): -  case Magic('f'): -  case Magic('F'): -  case Magic('p'): -  case Magic('P'): -  case Magic('s'): -  case Magic('S'): -  case Magic('d'): -  case Magic('D'): -  case Magic('x'): -  case Magic('X'): -  case Magic('o'): -  case Magic('O'): -  case Magic('w'): -  case Magic('W'): -  case Magic('h'): -  case Magic('H'): -  case Magic('a'): -  case Magic('A'): -  case Magic('l'): -  case Magic('L'): -  case Magic('u'): -  case Magic('U'): -    p = vim_strchr(classchars, no_Magic(c)); -    if (p == NULL) -      EMSG_RET_NULL(_("E63: invalid use of \\_")); -    /* When '.' is followed by a composing char ignore the dot, so that -     * the composing char is matched here. */ -    if (c == Magic('.') && utf_iscomposing(peekchr())) { -      c = getchr(); -      goto do_multibyte; -    } -    ret = regnode(classcodes[p - classchars] + extra); -    *flagp |= HASWIDTH | SIMPLE; -    break; - -  case Magic('n'): -    if (reg_string) { -      /* In a string "\n" matches a newline character. */ -      ret = regnode(EXACTLY); -      regc(NL); -      regc(NUL); -      *flagp |= HASWIDTH | SIMPLE; -    } else { -      /* In buffer text "\n" matches the end of a line. */ -      ret = regnode(NEWL); -      *flagp |= HASWIDTH | HASNL; -    } -    break; - -  case Magic('('): -    if (one_exactly) -      EMSG_ONE_RET_NULL; -    ret = reg(REG_PAREN, &flags); -    if (ret == NULL) -      return NULL; -    *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); -    break; - -  case NUL: -  case Magic('|'): -  case Magic('&'): -  case Magic(')'): -    if (one_exactly) -      EMSG_ONE_RET_NULL; -    IEMSG_RET_NULL(_(e_internal));       // Supposed to be caught earlier. -  // NOTREACHED - -  case Magic('='): -  case Magic('?'): -  case Magic('+'): -  case Magic('@'): -  case Magic('{'): -  case Magic('*'): -    c = no_Magic(c); -    EMSG3_RET_NULL(_("E64: %s%c follows nothing"), -                   (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL), c); -  // NOTREACHED - -  case Magic('~'):              /* previous substitute pattern */ -    if (reg_prev_sub != NULL) { -      char_u      *lp; - -      ret = regnode(EXACTLY); -      lp = reg_prev_sub; -      while (*lp != NUL) -        regc(*lp++); -      regc(NUL); -      if (*reg_prev_sub != NUL) { -        *flagp |= HASWIDTH; -        if ((lp - reg_prev_sub) == 1) -          *flagp |= SIMPLE; -      } -    } else -      EMSG_RET_NULL(_(e_nopresub)); -    break; - -  case Magic('1'): -  case Magic('2'): -  case Magic('3'): -  case Magic('4'): -  case Magic('5'): -  case Magic('6'): -  case Magic('7'): -  case Magic('8'): -  case Magic('9'): -  { -    int refnum; - -    refnum = c - Magic('0'); -    if (!seen_endbrace(refnum)) { -      return NULL; -    } -    ret = regnode(BACKREF + refnum); -  } -  break; - -  case Magic('z'): -  { -    c = no_Magic(getchr()); -    switch (c) { -    case '(': if ((reg_do_extmatch & REX_SET) == 0) -        EMSG_RET_NULL(_(e_z_not_allowed)); -      if (one_exactly) -        EMSG_ONE_RET_NULL; -      ret = reg(REG_ZPAREN, &flags); -      if (ret == NULL) -        return NULL; -      *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH); -      re_has_z = REX_SET; -      break; - -    case '1': -    case '2': -    case '3': -    case '4': -    case '5': -    case '6': -    case '7': -    case '8': -    case '9': if ((reg_do_extmatch & REX_USE) == 0) -        EMSG_RET_NULL(_(e_z1_not_allowed)); -      ret = regnode(ZREF + c - '0'); -      re_has_z = REX_USE; -      break; - -    case 's': ret = regnode(MOPEN + 0); -      if (!re_mult_next("\\zs")) { -        return NULL; -      } -      break; - -    case 'e': ret = regnode(MCLOSE + 0); -      if (!re_mult_next("\\ze")) { -        return NULL; -      } -      break; - -    default:  EMSG_RET_NULL(_("E68: Invalid character after \\z")); -    } -  } -  break; - -  case Magic('%'): -  { -    c = no_Magic(getchr()); -    switch (c) { -    /* () without a back reference */ -    case '(': -      if (one_exactly) -        EMSG_ONE_RET_NULL; -      ret = reg(REG_NPAREN, &flags); -      if (ret == NULL) -        return NULL; -      *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); -      break; - -    /* Catch \%^ and \%$ regardless of where they appear in the -     * pattern -- regardless of whether or not it makes sense. */ -    case '^': -      ret = regnode(RE_BOF); -      break; - -    case '$': -      ret = regnode(RE_EOF); -      break; - -    case '#': -      ret = regnode(CURSOR); -      break; - -    case 'V': -      ret = regnode(RE_VISUAL); -      break; - -    case 'C': -      ret = regnode(RE_COMPOSING); -      break; - -    /* \%[abc]: Emit as a list of branches, all ending at the last -     * branch which matches nothing. */ -    case '[': -      if (one_exactly)                          /* doesn't nest */ -        EMSG_ONE_RET_NULL; -      { -        char_u    *lastbranch; -        char_u    *lastnode = NULL; -        char_u    *br; - -        ret = NULL; -        while ((c = getchr()) != ']') { -          if (c == NUL) -            EMSG2_RET_NULL(_(e_missing_sb), -                reg_magic == MAGIC_ALL); -          br = regnode(BRANCH); -          if (ret == NULL) { -            ret = br; -          } else { -            regtail(lastnode, br); -            if (reg_toolong) { -              return NULL; -            } -          } - -          ungetchr(); -          one_exactly = true; -          lastnode = regatom(flagp); -          one_exactly = false; -          if (lastnode == NULL) { -            return NULL; -          } -        } -        if (ret == NULL) -          EMSG2_RET_NULL(_(e_empty_sb), -              reg_magic == MAGIC_ALL); -        lastbranch = regnode(BRANCH); -        br = regnode(NOTHING); -        if (ret != JUST_CALC_SIZE) { -          regtail(lastnode, br); -          regtail(lastbranch, br); -          /* connect all branches to the NOTHING -           * branch at the end */ -          for (br = ret; br != lastnode; ) { -            if (OP(br) == BRANCH) { -              regtail(br, lastbranch); -              if (reg_toolong) { -                return NULL; -              } -              br = OPERAND(br); -            } else -              br = regnext(br); -          } -        } -        *flagp &= ~(HASWIDTH | SIMPLE); -        break; -      } - -    case 'd':               /* %d123 decimal */ -    case 'o':               /* %o123 octal */ -    case 'x':               /* %xab hex 2 */ -    case 'u':               /* %uabcd hex 4 */ -    case 'U':               /* %U1234abcd hex 8 */ -    { -      int64_t i; - -      switch (c) { -      case 'd': i = getdecchrs(); break; -      case 'o': i = getoctchrs(); break; -      case 'x': i = gethexchrs(2); break; -      case 'u': i = gethexchrs(4); break; -      case 'U': i = gethexchrs(8); break; -      default:  i = -1; break; -      } - -      if (i < 0 || i > INT_MAX) { -        EMSG2_RET_NULL(_("E678: Invalid character after %s%%[dxouU]"), -                       reg_magic == MAGIC_ALL); -      } -      if (use_multibytecode(i)) { -        ret = regnode(MULTIBYTECODE); -      } else { -        ret = regnode(EXACTLY); -      } -      if (i == 0) { -        regc(0x0a); -      } else { -        regmbc(i); -      } -      regc(NUL); -      *flagp |= HASWIDTH; -      break; -    } - -    default: -      if (ascii_isdigit(c) || c == '<' || c == '>' -          || c == '\'') { -        uint32_t n = 0; -        int cmp; - -        cmp = c; -        if (cmp == '<' || cmp == '>') -          c = getchr(); -        while (ascii_isdigit(c)) { -          n = n * 10 + (uint32_t)(c - '0'); -          c = getchr(); -        } -        if (c == '\'' && n == 0) { -          /* "\%'m", "\%<'m" and "\%>'m": Mark */ -          c = getchr(); -          ret = regnode(RE_MARK); -          if (ret == JUST_CALC_SIZE) -            regsize += 2; -          else { -            *regcode++ = c; -            *regcode++ = cmp; -          } -          break; -        } else if (c == 'l' || c == 'c' || c == 'v') { -          if (c == 'l') { -            ret = regnode(RE_LNUM); -            if (save_prev_at_start) { -              at_start = true; -            } -          } else if (c == 'c') { -            ret = regnode(RE_COL); -          } else { -            ret = regnode(RE_VCOL); -          } -          if (ret == JUST_CALC_SIZE) { -            regsize += 5; -          } else { -            // put the number and the optional -            // comparator after the opcode -            regcode = re_put_uint32(regcode, n); -            *regcode++ = cmp; -          } -          break; -        } -      } - -      EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), -          reg_magic == MAGIC_ALL); -    } -  } -  break; - -  case Magic('['): -collection: -    { -      char_u      *lp; - -      /* -       * If there is no matching ']', we assume the '[' is a normal -       * character.  This makes 'incsearch' and ":help [" work. -       */ -      lp = skip_anyof(regparse); -      if (*lp == ']') {         /* there is a matching ']' */ -        int startc = -1;                /* > 0 when next '-' is a range */ -        int endc; - -        /* -         * In a character class, different parsing rules apply. -         * Not even \ is special anymore, nothing is. -         */ -        if (*regparse == '^') {             /* Complement of range. */ -          ret = regnode(ANYBUT + extra); -          regparse++; -        } else -          ret = regnode(ANYOF + extra); - -        /* At the start ']' and '-' mean the literal character. */ -        if (*regparse == ']' || *regparse == '-') { -          startc = *regparse; -          regc(*regparse++); -        } - -        while (*regparse != NUL && *regparse != ']') { -          if (*regparse == '-') { -            ++regparse; -            /* The '-' is not used for a range at the end and -             * after or before a '\n'. */ -            if (*regparse == ']' || *regparse == NUL -                || startc == -1 -                || (regparse[0] == '\\' && regparse[1] == 'n')) { -              regc('-'); -              startc = '-';                     /* [--x] is a range */ -            } else { -              /* Also accept "a-[.z.]" */ -              endc = 0; -              if (*regparse == '[') -                endc = get_coll_element(®parse); -              if (endc == 0) { -                endc = mb_ptr2char_adv((const char_u **)®parse); -              } - -              /* Handle \o40, \x20 and \u20AC style sequences */ -              if (endc == '\\' && !reg_cpo_lit) -                endc = coll_get_char(); - -              if (startc > endc) { -                EMSG_RET_NULL(_(e_reverse_range)); -              } -              if (utf_char2len(startc) > 1 -                  || utf_char2len(endc) > 1) { -                // Limit to a range of 256 chars -                if (endc > startc + 256) { -                  EMSG_RET_NULL(_(e_large_class)); -                } -                while (++startc <= endc) { -                  regmbc(startc); -                } -              } else { -                while (++startc <= endc) -                  regc(startc); -              } -              startc = -1; -            } -          } -          /* -           * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim -           * accepts "\t", "\e", etc., but only when the 'l' flag in -           * 'cpoptions' is not included. -           */ -          else if (*regparse == '\\' -                   && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL -                       || (!reg_cpo_lit -                           && vim_strchr(REGEXP_ABBR, -                               regparse[1]) != NULL))) { -            regparse++; -            if (*regparse == 'n') { -              /* '\n' in range: also match NL */ -              if (ret != JUST_CALC_SIZE) { -                /* Using \n inside [^] does not change what -                 * matches. "[^\n]" is the same as ".". */ -                if (*ret == ANYOF) { -                  *ret = ANYOF + ADD_NL; -                  *flagp |= HASNL; -                } -                /* else: must have had a \n already */ -              } -              regparse++; -              startc = -1; -            } else if (*regparse == 'd' -                       || *regparse == 'o' -                       || *regparse == 'x' -                       || *regparse == 'u' -                       || *regparse == 'U') { -              startc = coll_get_char(); -              if (startc == 0) -                regc(0x0a); -              else -                regmbc(startc); -            } else { -              startc = backslash_trans(*regparse++); -              regc(startc); -            } -          } else if (*regparse == '[') { -            int c_class; -            int cu; - -            c_class = get_char_class(®parse); -            startc = -1; -            /* Characters assumed to be 8 bits! */ -            switch (c_class) { -            case CLASS_NONE: -              c_class = get_equi_class(®parse); -              if (c_class != 0) { -                /* produce equivalence class */ -                reg_equi_class(c_class); -              } else if ((c_class = -                            get_coll_element(®parse)) != 0) { -                /* produce a collating element */ -                regmbc(c_class); -              } else { -                /* literal '[', allow [[-x] as a range */ -                startc = *regparse++; -                regc(startc); -              } -              break; -            case CLASS_ALNUM: -              for (cu = 1; cu < 128; cu++) { -                if (isalnum(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_ALPHA: -              for (cu = 1; cu < 128; cu++) { -                if (isalpha(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_BLANK: -              regc(' '); -              regc('\t'); -              break; -            case CLASS_CNTRL: -              for (cu = 1; cu <= 127; cu++) { -                if (iscntrl(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_DIGIT: -              for (cu = 1; cu <= 127; cu++) { -                if (ascii_isdigit(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_GRAPH: -              for (cu = 1; cu <= 127; cu++) { -                if (isgraph(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_LOWER: -              for (cu = 1; cu <= 255; cu++) { -                if (mb_islower(cu) && cu != 170 && cu != 186) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_PRINT: -              for (cu = 1; cu <= 255; cu++) { -                if (vim_isprintc(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_PUNCT: -              for (cu = 1; cu < 128; cu++) { -                if (ispunct(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_SPACE: -              for (cu = 9; cu <= 13; cu++) -                regc(cu); -              regc(' '); -              break; -            case CLASS_UPPER: -              for (cu = 1; cu <= 255; cu++) { -                if (mb_isupper(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_XDIGIT: -              for (cu = 1; cu <= 255; cu++) { -                if (ascii_isxdigit(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_TAB: -              regc('\t'); -              break; -            case CLASS_RETURN: -              regc('\r'); -              break; -            case CLASS_BACKSPACE: -              regc('\b'); -              break; -            case CLASS_ESCAPE: -              regc(ESC); -              break; -            case CLASS_IDENT: -              for (cu = 1; cu <= 255; cu++) { -                if (vim_isIDc(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_KEYWORD: -              for (cu = 1; cu <= 255; cu++) { -                if (reg_iswordc(cu)) { -                  regmbc(cu); -                } -              } -              break; -            case CLASS_FNAME: -              for (cu = 1; cu <= 255; cu++) { -                if (vim_isfilec(cu)) { -                  regmbc(cu); -                } -              } -              break; -            } -          } else { -            // produce a multibyte character, including any -            // following composing characters. -            startc = utf_ptr2char(regparse); -            int len = utfc_ptr2len(regparse); -            if (utf_char2len(startc) != len) { -              // composing chars -              startc = -1; -            } -            while (--len >= 0) { -              regc(*regparse++); -            } -          } -        } -        regc(NUL); -        prevchr_len = 1;                /* last char was the ']' */ -        if (*regparse != ']') -          EMSG_RET_NULL(_(e_toomsbra));                 /* Cannot happen? */ -        skipchr();                  /* let's be friends with the lexer again */ -        *flagp |= HASWIDTH | SIMPLE; -        break; -      } else if (reg_strict) -        EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); -    } -    FALLTHROUGH; - -  default: -  { -    int len; - -    /* A multi-byte character is handled as a separate atom if it's -     * before a multi and when it's a composing char. */ -    if (use_multibytecode(c)) { -do_multibyte: -      ret = regnode(MULTIBYTECODE); -      regmbc(c); -      *flagp |= HASWIDTH | SIMPLE; -      break; -    } - -    ret = regnode(EXACTLY); - -    /* -     * Append characters as long as: -     * - there is no following multi, we then need the character in -     *   front of it as a single character operand -     * - not running into a Magic character -     * - "one_exactly" is not set -     * But always emit at least one character.  Might be a Multi, -     * e.g., a "[" without matching "]". -     */ -    for (len = 0; c != NUL && (len == 0 -                               || (re_multi_type(peekchr()) == NOT_MULTI -                                   && !one_exactly -                                   && !is_Magic(c))); ++len) { -      c = no_Magic(c); -      { -        regmbc(c); -        { -          int l; - -          /* Need to get composing character too. */ -          for (;; ) { -            l = utf_ptr2len(regparse); -            if (!utf_composinglike(regparse, regparse + l)) { -              break; -            } -            regmbc(utf_ptr2char(regparse)); -            skipchr(); -          } -        } -      } -      c = getchr(); -    } -    ungetchr(); - -    regc(NUL); -    *flagp |= HASWIDTH; -    if (len == 1) -      *flagp |= SIMPLE; -  } -  break; -  } - -  return ret; -} - -/// Used in a place where no * or \+ can follow. -static bool re_mult_next(char *what) -{ -  if (re_multi_type(peekchr()) == MULTI_MULT) { -    semsg(_("E888: (NFA regexp) cannot repeat %s"), what); -    rc_did_emsg = true; -    return false; -  } -  return true; -} - -// Return true if MULTIBYTECODE should be used instead of EXACTLY for -// character "c". -static bool use_multibytecode(int c) -{ -  return utf_char2len(c) > 1 -         && (re_multi_type(peekchr()) != NOT_MULTI -             || utf_iscomposing(c)); -} - -/* - * Emit a node. - * Return pointer to generated code. - */ -static char_u *regnode(int op) -{ -  char_u  *ret; - -  ret = regcode; -  if (ret == JUST_CALC_SIZE) -    regsize += 3; -  else { -    *regcode++ = op; -    *regcode++ = NUL;                   /* Null "next" pointer. */ -    *regcode++ = NUL; -  } -  return ret; -} - -/* - * Emit (if appropriate) a byte of code - */ -static void regc(int b) -{ -  if (regcode == JUST_CALC_SIZE) -    regsize++; -  else -    *regcode++ = b; -} - -/* - * Emit (if appropriate) a multi-byte character of code - */ -static void regmbc(int c) -{ -  if (regcode == JUST_CALC_SIZE) { -    regsize += utf_char2len(c); -  } else { -    regcode += utf_char2bytes(c, regcode); -  } -} - -/* - * Insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ -static void reginsert(int op, char_u *opnd) -{ -  char_u      *src; -  char_u      *dst; -  char_u      *place; - -  if (regcode == JUST_CALC_SIZE) { -    regsize += 3; -    return; -  } -  src = regcode; -  regcode += 3; -  dst = regcode; -  while (src > opnd) -    *--dst = *--src; - -  place = opnd;                 /* Op node, where operand used to be. */ -  *place++ = op; -  *place++ = NUL; -  *place = NUL; -} - -/* - * Insert an operator in front of already-emitted operand. - * Add a number to the operator. - */ -static void reginsert_nr(int op, long val, char_u *opnd) -{ -  char_u      *src; -  char_u      *dst; -  char_u      *place; - -  if (regcode == JUST_CALC_SIZE) { -    regsize += 7; -    return; -  } -  src = regcode; -  regcode += 7; -  dst = regcode; -  while (src > opnd) -    *--dst = *--src; - -  place = opnd;                 /* Op node, where operand used to be. */ -  *place++ = op; -  *place++ = NUL; -  *place++ = NUL; -  assert(val >= 0 && (uintmax_t)val <= UINT32_MAX); -  re_put_uint32(place, (uint32_t)val); -} - -/* - * Insert an operator in front of already-emitted operand. - * The operator has the given limit values as operands.  Also set next pointer. - * - * Means relocating the operand. - */ -static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) -{ -  char_u      *src; -  char_u      *dst; -  char_u      *place; - -  if (regcode == JUST_CALC_SIZE) { -    regsize += 11; -    return; -  } -  src = regcode; -  regcode += 11; -  dst = regcode; -  while (src > opnd) -    *--dst = *--src; - -  place = opnd;                 /* Op node, where operand used to be. */ -  *place++ = op; -  *place++ = NUL; -  *place++ = NUL; -  assert(minval >= 0 && (uintmax_t)minval <= UINT32_MAX); -  place = re_put_uint32(place, (uint32_t)minval); -  assert(maxval >= 0 && (uintmax_t)maxval <= UINT32_MAX); -  place = re_put_uint32(place, (uint32_t)maxval); -  regtail(opnd, place); -} - -/* - * Write a four bytes number at "p" and return pointer to the next char. - */ -static char_u *re_put_uint32(char_u *p, uint32_t val) -{ -  *p++ = (char_u) ((val >> 24) & 0377); -  *p++ = (char_u) ((val >> 16) & 0377); -  *p++ = (char_u) ((val >> 8) & 0377); -  *p++ = (char_u) (val & 0377); -  return p; -} - -// Set the next-pointer at the end of a node chain. -static void regtail(char_u *p, char_u *val) -{ -  int offset; - -  if (p == JUST_CALC_SIZE) { -    return; -  } - -  // Find last node. -  char_u *scan = p; -  for (;; ) { -    char_u *temp = regnext(scan); -    if (temp == NULL) { -      break; -    } -    scan = temp; -  } - -  if (OP(scan) == BACK) { -    offset = (int)(scan - val); -  } else { -    offset = (int)(val - scan); -  } -  // When the offset uses more than 16 bits it can no longer fit in the two -  // bytes available.  Use a global flag to avoid having to check return -  // values in too many places. -  if (offset > 0xffff) { -    reg_toolong = true; -  } else { -    *(scan + 1) = (char_u)(((unsigned)offset >> 8) & 0377); -    *(scan + 2) = (char_u)(offset & 0377); -  } -} - -/* - * Like regtail, on item after a BRANCH; nop if none. - */ -static void regoptail(char_u *p, char_u *val) -{ -  /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */ -  if (p == NULL || p == JUST_CALC_SIZE -      || (OP(p) != BRANCH -          && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9))) -    return; -  regtail(OPERAND(p), val); -} - -/* - * Functions for getting characters from the regexp input. - */ - -/*   * Start parsing at "str".   */  static void initchr(char_u *str) @@ -2804,9 +591,10 @@ static int peekchr(void)    case '.':    case '[':    case '~': -    /* magic when 'magic' is on */ -    if (reg_magic >= MAGIC_ON) +    // magic when 'magic' is on +    if (reg_magic >= MAGIC_ON) {        curchr = Magic(curchr); +    }      break;    case '(':    case ')': @@ -2821,18 +609,19 @@ static int peekchr(void)    case '|':    case '<':    case '>': -  case '#':           /* future ext. */ -  case '"':           /* future ext. */ -  case '\'':          /* future ext. */ -  case ',':           /* future ext. */ -  case '-':           /* future ext. */ -  case ':':           /* future ext. */ -  case ';':           /* future ext. */ -  case '`':           /* future ext. */ -  case '/':           /* Can't be used in / command */ -    /* magic only after "\v" */ -    if (reg_magic == MAGIC_ALL) +  case '#':           // future ext. +  case '"':           // future ext. +  case '\'':          // future ext. +  case ',':           // future ext. +  case '-':           // future ext. +  case ':':           // future ext. +  case ';':           // future ext. +  case '`':           // future ext. +  case '/':           // Can't be used in / command +    // magic only after "\v" +    if (reg_magic == MAGIC_ALL) {        curchr = Magic(curchr); +    }      break;    case '*':      // * is not magic as the very first character, eg "?*ptr", when @@ -2945,11 +734,12 @@ static int peekchr(void)   */  static void skipchr(void)  { -  /* peekchr() eats a backslash, do the same here */ -  if (*regparse == '\\') +  // peekchr() eats a backslash, do the same here +  if (*regparse == '\\') {      prevchr_len = 1; -  else +  } else {      prevchr_len = 0; +  }    if (regparse[prevchr_len] != NUL) {      // Exclude composing chars that utfc_ptr2len does include.      prevchr_len += utf_ptr2len(regparse + prevchr_len); @@ -2959,7 +749,7 @@ static void skipchr(void)    at_start = false;    prevprevchr = prevchr;    prevchr = curchr; -  curchr = nextchr;         /* use previously unget char, or -1 */ +  curchr = nextchr;         // use previously unget char, or -1    nextchr = -1;  } @@ -3052,8 +842,8 @@ static int64_t getdecchrs(void)        break;      nr *= 10;      nr += c - '0'; -    ++regparse; -    curchr = -1;     /* no longer valid */ +    regparse++; +    curchr = -1;     // no longer valid    }    if (i == 0) @@ -3089,29 +879,6 @@ static int64_t getoctchrs(void)    return nr;  } -/* - * Get a number after a backslash that is inside []. - * When nothing is recognized return a backslash. - */ -static int coll_get_char(void) -{ -  int64_t nr = -1; - -  switch (*regparse++) { -  case 'd': nr = getdecchrs(); break; -  case 'o': nr = getoctchrs(); break; -  case 'x': nr = gethexchrs(2); break; -  case 'u': nr = gethexchrs(4); break; -  case 'U': nr = gethexchrs(8); break; -  } -  if (nr < 0 || nr > INT_MAX) { -    // If getting the number fails be backwards compatible: the character -    // is a backslash. -    regparse--; -    nr = '\\'; -  } -  return nr; -}  /*   * read_limits - Read two integers to be taken as a minimum and maximum. @@ -3159,7 +926,7 @@ static int read_limits(long *minval, long *maxval)      *minval = *maxval;      *maxval = tmp;    } -  skipchr();            /* let's be friends with the lexer again */ +  skipchr();            // let's be friends with the lexer again    return OK;  } @@ -3171,22 +938,6 @@ static int read_limits(long *minval, long *maxval)   * Global work variables for vim_regexec().   */ -/* Save the sub-expressions before attempting a match. */ -#define save_se(savep, posp, pp) \ -  REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp)) - -/* After a failed match restore the sub-expressions. */ -#define restore_se(savep, posp, pp) { \ -    if (REG_MULTI) \ -      *(posp) = (savep)->se_u.pos; \ -    else \ -      *(pp) = (savep)->se_u.ptr; } - - -#ifdef REGEXP_DEBUG -int regnarrate = 0; -#endif -  // Sometimes need to save a copy of a line.  Since alloc()/free() is very  // slow, we keep one allocated piece of memory and only re-allocate it when  // it's too small.  It's freed in bt_regexec_both() when finished. @@ -3262,41 +1013,6 @@ typedef struct {  static regexec_T rex;  static bool rex_in_use = false; -/* - * "regstack" and "backpos" are used by regmatch().  They are kept over calls - * to avoid invoking malloc() and free() often. - * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T - * or regbehind_T. - * "backpos_T" is a table with backpos_T for BACK - */ -static garray_T regstack = GA_EMPTY_INIT_VALUE; -static garray_T backpos = GA_EMPTY_INIT_VALUE; - -/* - * Both for regstack and backpos tables we use the following strategy of - * allocation (to reduce malloc/free calls): - * - Initial size is fairly small. - * - When needed, the tables are grown bigger (8 times at first, double after - *   that). - * - After executing the match we free the memory only if the array has grown. - *   Thus the memory is kept allocated when it's at the initial size. - * This makes it fast while not keeping a lot of memory allocated. - * A three times speed increase was observed when using many simple patterns. - */ -#define REGSTACK_INITIAL        2048 -#define BACKPOS_INITIAL         64 - -#if defined(EXITFREE) -void free_regexp_stuff(void) -{ -  ga_clear(®stack); -  ga_clear(&backpos); -  xfree(reg_tofree); -  xfree(reg_prev_sub); -} - -#endif -  // Return true if character 'c' is included in 'iskeyword' option for  // "reg_buf" buffer.  static bool reg_iswordc(int c) @@ -3321,312 +1037,15 @@ static char_u *reg_getline(linenr_T lnum)    return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, false);  } -static regsave_T behind_pos; - -static char_u   *reg_startzp[NSUBEXP];  /* Workspace to mark beginning */ -static char_u   *reg_endzp[NSUBEXP];    /*   and end of \z(...\) matches */ -static lpos_T reg_startzpos[NSUBEXP];   /* idem, beginning pos */ -static lpos_T reg_endzpos[NSUBEXP];     /* idem, end pos */ +static char_u   *reg_startzp[NSUBEXP];  // Workspace to mark beginning +static char_u   *reg_endzp[NSUBEXP];    //   and end of \z(...\) matches +static lpos_T reg_startzpos[NSUBEXP];   // idem, beginning pos +static lpos_T reg_endzpos[NSUBEXP];     // idem, end pos  // true if using multi-line regexp.  #define REG_MULTI       (rex.reg_match == NULL)  /* - * Match a regexp against a string. - * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). - * Uses curbuf for line count and 'iskeyword'. - * If "line_lbr" is true, consider a "\n" in "line" to be a line break. - * - * Returns 0 for failure, number of lines contained in the match otherwise. - */ -static int  -bt_regexec_nl ( -    regmatch_T *rmp, -    char_u *line,      /* string to match against */ -    colnr_T col,       /* column to start looking for match */ -    bool line_lbr -) -{ -  rex.reg_match = rmp; -  rex.reg_mmatch = NULL; -  rex.reg_maxline = 0; -  rex.reg_line_lbr = line_lbr; -  rex.reg_buf = curbuf; -  rex.reg_win = NULL; -  rex.reg_ic = rmp->rm_ic; -  rex.reg_icombine = false; -  rex.reg_maxcol = 0; - -  long r = bt_regexec_both(line, col, NULL, NULL); -  assert(r <= INT_MAX); -  return (int)r; -} - -/// Wrapper around strchr which accounts for case-insensitive searches and -/// non-ASCII characters. -/// -/// This function is used a lot for simple searches, keep it fast! -/// -/// @param  s  string to search -/// @param  c  character to find in @a s -/// -/// @return  NULL if no match, otherwise pointer to the position in @a s -static inline char_u *cstrchr(const char_u *const s, const int c) -  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL -  FUNC_ATTR_ALWAYS_INLINE -{ -  if (!rex.reg_ic) { -    return vim_strchr(s, c); -  } - -  // Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is -  // expected to be highly optimized. -  if (c > 0x80) { -    const int folded_c = utf_fold(c); -    for (const char_u *p = s; *p != NUL; p += utfc_ptr2len(p)) { -      if (utf_fold(utf_ptr2char(p)) == folded_c) { -        return (char_u *)p; -      } -    } -    return NULL; -  } - -  int cc; -  if (ASCII_ISUPPER(c)) { -    cc = TOLOWER_ASC(c); -  } else if (ASCII_ISLOWER(c)) { -    cc = TOUPPER_ASC(c); -  } else { -    return vim_strchr(s, c); -  } - -  char tofind[] = { (char)c, (char)cc, NUL }; -  return (char_u *)strpbrk((const char *)s, tofind); -} - -/// Matches a regexp against multiple lines. -/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). -/// Uses curbuf for line count and 'iskeyword'. -///  -/// @param win Window in which to search or NULL -/// @param buf Buffer in which to search -/// @param lnum Number of line to start looking for match  -/// @param col Column to start looking for match -/// @param tm Timeout limit or NULL -/// -/// @return zero if there is no match and number of lines contained in the match -///         otherwise. -static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, -                             linenr_T lnum, colnr_T col, -                             proftime_T *tm, int *timed_out) -{ -  rex.reg_match = NULL; -  rex.reg_mmatch = rmp; -  rex.reg_buf = buf; -  rex.reg_win = win; -  rex.reg_firstlnum = lnum; -  rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; -  rex.reg_line_lbr = false; -  rex.reg_ic = rmp->rmm_ic; -  rex.reg_icombine = false; -  rex.reg_maxcol = rmp->rmm_maxcol; - -  return bt_regexec_both(NULL, col, tm, timed_out); -} - -/// Match a regexp against a string ("line" points to the string) or multiple -/// lines (if "line" is NULL, use reg_getline()). -/// @return 0 for failure, or number of lines contained in the match. -static long bt_regexec_both(char_u *line, -                            colnr_T col,      // column to start search -                            proftime_T *tm,   // timeout limit or NULL -                            int *timed_out)   // flag set on timeout or NULL -{ -  bt_regprog_T        *prog; -  char_u      *s; -  long retval = 0L; - -  /* Create "regstack" and "backpos" if they are not allocated yet. -   * We allocate *_INITIAL amount of bytes first and then set the grow size -   * to much bigger value to avoid many malloc calls in case of deep regular -   * expressions.  */ -  if (regstack.ga_data == NULL) { -    /* Use an item size of 1 byte, since we push different things -     * onto the regstack. */ -    ga_init(®stack, 1, REGSTACK_INITIAL); -    ga_grow(®stack, REGSTACK_INITIAL); -    ga_set_growsize(®stack, REGSTACK_INITIAL * 8); -  } - -  if (backpos.ga_data == NULL) { -    ga_init(&backpos, sizeof(backpos_T), BACKPOS_INITIAL); -    ga_grow(&backpos, BACKPOS_INITIAL); -    ga_set_growsize(&backpos, BACKPOS_INITIAL * 8); -  } - -  if (REG_MULTI) { -    prog = (bt_regprog_T *)rex.reg_mmatch->regprog; -    line = reg_getline((linenr_T)0); -    rex.reg_startpos = rex.reg_mmatch->startpos; -    rex.reg_endpos = rex.reg_mmatch->endpos; -  } else { -    prog = (bt_regprog_T *)rex.reg_match->regprog; -    rex.reg_startp = rex.reg_match->startp; -    rex.reg_endp = rex.reg_match->endp; -  } - -  /* Be paranoid... */ -  if (prog == NULL || line == NULL) { -    iemsg(_(e_null)); -    goto theend; -  } - -  /* Check validity of program. */ -  if (prog_magic_wrong()) -    goto theend; - -  // If the start column is past the maximum column: no need to try. -  if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) { -    goto theend; -  } - -  // If pattern contains "\c" or "\C": overrule value of rex.reg_ic -  if (prog->regflags & RF_ICASE) { -    rex.reg_ic = true; -  } else if (prog->regflags & RF_NOICASE) { -    rex.reg_ic = false; -  } - -  // If pattern contains "\Z" overrule value of rex.reg_icombine -  if (prog->regflags & RF_ICOMBINE) { -    rex.reg_icombine = true; -  } - -  /* If there is a "must appear" string, look for it. */ -  if (prog->regmust != NULL) { -    int c = utf_ptr2char(prog->regmust); -    s = line + col; - -    // This is used very often, esp. for ":global".  Use two versions of -    // the loop to avoid overhead of conditions. -    if (!rex.reg_ic) { -      while ((s = vim_strchr(s, c)) != NULL) { -        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { -          break;  // Found it. -        } -        MB_PTR_ADV(s); -      } -    } else { -      while ((s = cstrchr(s, c)) != NULL) { -        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { -          break;  // Found it. -        } -        MB_PTR_ADV(s); -      } -    } -    if (s == NULL) {  // Not present. -      goto theend; -    } -  } - -  rex.line = line; -  rex.lnum = 0; -  reg_toolong = false; - -  /* Simplest case: Anchored match need be tried only once. */ -  if (prog->reganch) { -    int c = utf_ptr2char(rex.line + col); -    if (prog->regstart == NUL -        || prog->regstart == c -        || (rex.reg_ic -            && (utf_fold(prog->regstart) == utf_fold(c) -                || (c < 255 && prog->regstart < 255 -                    && mb_tolower(prog->regstart) == mb_tolower(c))))) { -      retval = regtry(prog, col, tm, timed_out); -    } else { -      retval = 0; -    } -  } else { -    int tm_count = 0; -    /* Messy cases:  unanchored match. */ -    while (!got_int) { -      if (prog->regstart != NUL) { -        // Skip until the char we know it must start with. -        s = cstrchr(rex.line + col, prog->regstart); -        if (s == NULL) { -          retval = 0; -          break; -        } -        col = (int)(s - rex.line); -      } - -      // Check for maximum column to try. -      if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) { -        retval = 0; -        break; -      } - -      retval = regtry(prog, col, tm, timed_out); -      if (retval > 0) { -        break; -      } - -      // if not currently on the first line, get it again -      if (rex.lnum != 0) { -        rex.lnum = 0; -        rex.line = reg_getline((linenr_T)0); -      } -      if (rex.line[col] == NUL) { -        break; -      } -      col += utfc_ptr2len(rex.line + col); -      // Check for timeout once in a twenty times to avoid overhead. -      if (tm != NULL && ++tm_count == 20) { -        tm_count = 0; -        if (profile_passed_limit(*tm)) { -          if (timed_out != NULL) { -            *timed_out = true; -          } -          break; -        } -      } -    } -  } - -theend: -  /* Free "reg_tofree" when it's a bit big. -   * Free regstack and backpos if they are bigger than their initial size. */ -  if (reg_tofreelen > 400) { -    XFREE_CLEAR(reg_tofree); -  } -  if (regstack.ga_maxlen > REGSTACK_INITIAL) -    ga_clear(®stack); -  if (backpos.ga_maxlen > BACKPOS_INITIAL) -    ga_clear(&backpos); - -  if (retval > 0) { -    // Make sure the end is never before the start.  Can happen when \zs -    // and \ze are used. -    if (REG_MULTI) { -      const lpos_T *const start = &rex.reg_mmatch->startpos[0]; -      const lpos_T *const end = &rex.reg_mmatch->endpos[0]; - -      if (end->lnum < start->lnum -          || (end->lnum == start->lnum && end->col < start->col)) { -        rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0]; -      } -    } else { -      if (rex.reg_match->endp[0] < rex.reg_match->startp[0]) { -        rex.reg_match->endp[0] = rex.reg_match->startp[0]; -      } -    } -  } - -  return retval; -} - - -/*   * Create a new extmatch and mark it as referenced once.   */  static reg_extmatch_T *make_extmatch(void) @@ -3662,75 +1081,6 @@ void unref_extmatch(reg_extmatch_T *em)    }  } -/// Try match of "prog" with at rex.line["col"]. -/// @returns 0 for failure, or number of lines contained in the match. -static long regtry(bt_regprog_T *prog, -                   colnr_T col, -                   proftime_T *tm,    // timeout limit or NULL -                   int *timed_out)    // flag set on timeout or NULL -{ -  rex.input = rex.line + col; -  rex.need_clear_subexpr = true; -  // Clear the external match subpointers if necessaey. -  rex.need_clear_zsubexpr = (prog->reghasz == REX_SET); - -  if (regmatch(prog->program + 1, tm, timed_out) == 0) { -    return 0; -  } - -  cleanup_subexpr(); -  if (REG_MULTI) { -    if (rex.reg_startpos[0].lnum < 0) { -      rex.reg_startpos[0].lnum = 0; -      rex.reg_startpos[0].col = col; -    } -    if (rex.reg_endpos[0].lnum < 0) { -      rex.reg_endpos[0].lnum = rex.lnum; -      rex.reg_endpos[0].col = (int)(rex.input - rex.line); -    } else { -      // Use line number of "\ze". -      rex.lnum = rex.reg_endpos[0].lnum; -    } -  } else { -    if (rex.reg_startp[0] == NULL) { -      rex.reg_startp[0] = rex.line + col; -    } -    if (rex.reg_endp[0] == NULL) { -      rex.reg_endp[0] = rex.input; -    } -  } -  /* Package any found \z(...\) matches for export. Default is none. */ -  unref_extmatch(re_extmatch_out); -  re_extmatch_out = NULL; - -  if (prog->reghasz == REX_SET) { -    int i; - -    cleanup_zsubexpr(); -    re_extmatch_out = make_extmatch(); -    for (i = 0; i < NSUBEXP; i++) { -      if (REG_MULTI) { -        /* Only accept single line matches. */ -        if (reg_startzpos[i].lnum >= 0 -            && reg_endzpos[i].lnum == reg_startzpos[i].lnum -            && reg_endzpos[i].col >= reg_startzpos[i].col) { -          re_extmatch_out->matches[i] = -            vim_strnsave(reg_getline(reg_startzpos[i].lnum) -                         + reg_startzpos[i].col, -                         reg_endzpos[i].col -                         - reg_startzpos[i].col); -        } -      } else { -        if (reg_startzp[i] != NULL && reg_endzp[i] != NULL) -          re_extmatch_out->matches[i] = -            vim_strnsave(reg_startzp[i], reg_endzp[i] - reg_startzp[i]); -      } -    } -  } -  return 1 + rex.lnum; -} - -  // Get class of previous character.  static int reg_prev_class(void)  { @@ -3813,1802 +1163,6 @@ static bool reg_match_visual(void)    return true;  } -#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) - -/* - * The arguments from BRACE_LIMITS are stored here.  They are actually local - * to regmatch(), but they are here to reduce the amount of stack space used - * (it can be called recursively many times). - */ -static long bl_minval; -static long bl_maxval; - -/// Main matching routine -/// -/// Conceptually the strategy is simple: Check to see whether the current node -/// matches, push an item onto the regstack and loop to see whether the rest -/// matches, and then act accordingly.  In practice we make some effort to -/// avoid using the regstack, in particular by going through "ordinary" nodes -/// (that don't need to know whether the rest of the match failed) by a nested -/// loop. -/// -/// Returns true when there is a match.  Leaves rex.input and rex.lnum -/// just after the last matched character. -/// Returns false when there is no match.  Leaves rex.input and rex.lnum in an -/// undefined state! -static bool regmatch( -    char_u *scan,               // Current node. -    proftime_T *tm,             // timeout limit or NULL -    int *timed_out              // flag set on timeout or NULL -) -{ -  char_u        *next;          /* Next node. */ -  int op; -  int c; -  regitem_T     *rp; -  int no; -  int status;                   // one of the RA_ values: -  int tm_count = 0; -#define RA_FAIL         1       // something failed, abort -#define RA_CONT         2       // continue in inner loop -#define RA_BREAK        3       // break inner loop -#define RA_MATCH        4       // successful match -#define RA_NOMATCH      5       // didn't match - -  // Make "regstack" and "backpos" empty.  They are allocated and freed in -  // bt_regexec_both() to reduce malloc()/free() calls. -  regstack.ga_len = 0; -  backpos.ga_len = 0; - -  /* -   * Repeat until "regstack" is empty. -   */ -  for (;; ) { -    /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q". -     * Allow interrupting them with CTRL-C. */ -    fast_breakcheck(); - -#ifdef REGEXP_DEBUG -    if (scan != NULL && regnarrate) { -      mch_errmsg((char *)regprop(scan)); -      mch_errmsg("(\n"); -    } -#endif - -    /* -     * Repeat for items that can be matched sequentially, without using the -     * regstack. -     */ -    for (;; ) { -      if (got_int || scan == NULL) { -        status = RA_FAIL; -        break; -      } -      // Check for timeout once in a 100 times to avoid overhead. -      if (tm != NULL && ++tm_count == 100) { -        tm_count = 0; -        if (profile_passed_limit(*tm)) { -          if (timed_out != NULL) { -            *timed_out = true; -          } -          status = RA_FAIL; -          break; -        } -      } -      status = RA_CONT; - -#ifdef REGEXP_DEBUG -      if (regnarrate) { -        mch_errmsg((char *)regprop(scan)); -        mch_errmsg("...\n"); -        if (re_extmatch_in != NULL) { -          int i; - -          mch_errmsg(_("External submatches:\n")); -          for (i = 0; i < NSUBEXP; i++) { -            mch_errmsg("    \""); -            if (re_extmatch_in->matches[i] != NULL) -              mch_errmsg((char *)re_extmatch_in->matches[i]); -            mch_errmsg("\"\n"); -          } -        } -      } -#endif -      next = regnext(scan); - -      op = OP(scan); -      // Check for character class with NL added. -      if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI -          && *rex.input == NUL && rex.lnum <= rex.reg_maxline) { -        reg_nextline(); -      } else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n') { -        ADVANCE_REGINPUT(); -      } else { -        if (WITH_NL(op)) { -          op -= ADD_NL; -        } -        c = utf_ptr2char(rex.input); -        switch (op) { -        case BOL: -          if (rex.input != rex.line) { -            status = RA_NOMATCH; -          } -          break; - -        case EOL: -          if (c != NUL) { -            status = RA_NOMATCH; -          } -          break; - -        case RE_BOF: -          // We're not at the beginning of the file when below the first -          // line where we started, not at the start of the line or we -          // didn't start at the first line of the buffer. -          if (rex.lnum != 0 || rex.input != rex.line -              || (REG_MULTI && rex.reg_firstlnum > 1)) { -            status = RA_NOMATCH; -          } -          break; - -        case RE_EOF: -          if (rex.lnum != rex.reg_maxline || c != NUL) { -            status = RA_NOMATCH; -          } -          break; - -        case CURSOR: -          // Check if the buffer is in a window and compare the -          // rex.reg_win->w_cursor position to the match position. -          if (rex.reg_win == NULL -              || (rex.lnum + rex.reg_firstlnum != rex.reg_win->w_cursor.lnum) -              || ((colnr_T)(rex.input - rex.line) != -                  rex.reg_win->w_cursor.col)) { -            status = RA_NOMATCH; -          } -          break; - -        case RE_MARK: -          /* Compare the mark position to the match position. */ -        { -          int mark = OPERAND(scan)[0]; -          int cmp = OPERAND(scan)[1]; -          pos_T   *pos; - -          pos = getmark_buf(rex.reg_buf, mark, false); -          if (pos == NULL                    // mark doesn't exist -              || pos->lnum <= 0) {           // mark isn't set in reg_buf -            status = RA_NOMATCH; -          } else { -            const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum -              && pos->col == MAXCOL -              ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) -              : pos->col; - -            if (pos->lnum == rex.lnum + rex.reg_firstlnum -                ? (pos_col == (colnr_T)(rex.input - rex.line) -                   ? (cmp == '<' || cmp == '>') -                   : (pos_col < (colnr_T)(rex.input - rex.line) -                      ? cmp != '>' -                      : cmp != '<')) -                : (pos->lnum < rex.lnum + rex.reg_firstlnum -                   ? cmp != '>' -                   : cmp != '<')) { -              status = RA_NOMATCH; -            } -          } -        } -        break; - -        case RE_VISUAL: -          if (!reg_match_visual()) -            status = RA_NOMATCH; -          break; - -        case RE_LNUM: -          assert(rex.lnum + rex.reg_firstlnum >= 0 -                 && (uintmax_t)(rex.lnum + rex.reg_firstlnum) <= UINT32_MAX); -          if (!REG_MULTI -              || !re_num_cmp((uint32_t)(rex.lnum + rex.reg_firstlnum), scan)) { -            status = RA_NOMATCH; -          } -          break; - -        case RE_COL: -          assert(rex.input - rex.line + 1 >= 0 -                 && (uintmax_t)(rex.input - rex.line + 1) <= UINT32_MAX); -          if (!re_num_cmp((uint32_t)(rex.input - rex.line + 1), scan)) { -            status = RA_NOMATCH; -          } -          break; - -        case RE_VCOL: -          if (!re_num_cmp(win_linetabsize(rex.reg_win == NULL -                                          ? curwin : rex.reg_win, -                                          rex.line, -                                          (colnr_T)(rex.input - rex.line)) + 1, -                          scan)) { -            status = RA_NOMATCH; -          } -          break; - -        case BOW:  // \<word; rex.input points to w -          if (c == NUL) {  // Can't match at end of line -            status = RA_NOMATCH; -          } else { -            // Get class of current and previous char (if it exists). -            const int this_class = -              mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); -            if (this_class <= 1) { -              status = RA_NOMATCH;  // Not on a word at all. -            } else if (reg_prev_class() == this_class) { -              status = RA_NOMATCH;  // Previous char is in same word. -            } -          } -          break; - -        case EOW:  // word\>; rex.input points after d -          if (rex.input == rex.line) {  // Can't match at start of line -            status = RA_NOMATCH; -          } else { -            int this_class, prev_class; - -            // Get class of current and previous char (if it exists). -            this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); -            prev_class = reg_prev_class(); -            if (this_class == prev_class -                || prev_class == 0 || prev_class == 1) { -              status = RA_NOMATCH; -            } -          } -          break;  // Matched with EOW - -        case ANY: -          // ANY does not match new lines. -          if (c == NUL) { -            status = RA_NOMATCH; -          } else { -            ADVANCE_REGINPUT(); -          } -          break; - -        case IDENT: -          if (!vim_isIDc(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case SIDENT: -          if (ascii_isdigit(*rex.input) || !vim_isIDc(c)) { -            status = RA_NOMATCH; -          } else { -            ADVANCE_REGINPUT(); -          } -          break; - -        case KWORD: -          if (!vim_iswordp_buf(rex.input, rex.reg_buf)) { -            status = RA_NOMATCH; -          } else { -            ADVANCE_REGINPUT(); -          } -          break; - -        case SKWORD: -          if (ascii_isdigit(*rex.input) -              || !vim_iswordp_buf(rex.input, rex.reg_buf)) { -            status = RA_NOMATCH; -          } else { -            ADVANCE_REGINPUT(); -          } -          break; - -        case FNAME: -          if (!vim_isfilec(c)) { -            status = RA_NOMATCH; -          } else { -            ADVANCE_REGINPUT(); -          } -          break; - -        case SFNAME: -          if (ascii_isdigit(*rex.input) || !vim_isfilec(c)) { -            status = RA_NOMATCH; -          } else { -            ADVANCE_REGINPUT(); -          } -          break; - -        case PRINT: -          if (!vim_isprintc(utf_ptr2char(rex.input))) { -            status = RA_NOMATCH; -          } else { -            ADVANCE_REGINPUT(); -          } -          break; - -        case SPRINT: -          if (ascii_isdigit(*rex.input) || !vim_isprintc(utf_ptr2char(rex.input))) { -            status = RA_NOMATCH; -          } else { -            ADVANCE_REGINPUT(); -          } -          break; - -        case WHITE: -          if (!ascii_iswhite(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NWHITE: -          if (c == NUL || ascii_iswhite(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case DIGIT: -          if (!ri_digit(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NDIGIT: -          if (c == NUL || ri_digit(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case HEX: -          if (!ri_hex(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NHEX: -          if (c == NUL || ri_hex(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case OCTAL: -          if (!ri_octal(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NOCTAL: -          if (c == NUL || ri_octal(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case WORD: -          if (!ri_word(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NWORD: -          if (c == NUL || ri_word(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case HEAD: -          if (!ri_head(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NHEAD: -          if (c == NUL || ri_head(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case ALPHA: -          if (!ri_alpha(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NALPHA: -          if (c == NUL || ri_alpha(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case LOWER: -          if (!ri_lower(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NLOWER: -          if (c == NUL || ri_lower(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case UPPER: -          if (!ri_upper(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case NUPPER: -          if (c == NUL || ri_upper(c)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case EXACTLY: -        { -          int len; -          char_u  *opnd; - -          opnd = OPERAND(scan); -          // Inline the first byte, for speed. -          if (*opnd != *rex.input -              && (!rex.reg_ic)) { -            status = RA_NOMATCH; -          } else if (*opnd == NUL) { -            // match empty string always works; happens when "~" is -            // empty. -          } else { -            if (opnd[1] == NUL && !rex.reg_ic) { -              len = 1;  // matched a single byte above -            } else { -              // Need to match first byte again for multi-byte. -              len = (int)STRLEN(opnd); -              if (cstrncmp(opnd, rex.input, &len) != 0) { -                status = RA_NOMATCH; -              } -            } -            // Check for following composing character, unless %C -            // follows (skips over all composing chars). -            if (status != RA_NOMATCH -                && utf_composinglike(rex.input, rex.input + len) -                && !rex.reg_icombine -                && OP(next) != RE_COMPOSING) { -              // raaron: This code makes a composing character get -              // ignored, which is the correct behavior (sometimes) -              // for voweled Hebrew texts. -              status = RA_NOMATCH; -            } -            if (status != RA_NOMATCH) { -              rex.input += len; -            } -          } -        } -        break; - -        case ANYOF: -        case ANYBUT: -          if (c == NUL) -            status = RA_NOMATCH; -          else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF)) -            status = RA_NOMATCH; -          else -            ADVANCE_REGINPUT(); -          break; - -        case MULTIBYTECODE: -          { -            int i, len; - -            const char_u *opnd = OPERAND(scan); -            // Safety check (just in case 'encoding' was changed since -            // compiling the program). -            if ((len = utfc_ptr2len(opnd)) < 2) { -              status = RA_NOMATCH; -              break; -            } -            const int opndc = utf_ptr2char(opnd); -            if (utf_iscomposing(opndc)) { -              // When only a composing char is given match at any -              // position where that composing char appears. -              status = RA_NOMATCH; -              for (i = 0; rex.input[i] != NUL; -                   i += utf_ptr2len(rex.input + i)) { -                const int inpc = utf_ptr2char(rex.input + i); -                if (!utf_iscomposing(inpc)) { -                  if (i > 0) { -                    break; -                  } -                } else if (opndc == inpc) { -                  // Include all following composing chars. -                  len = i + utfc_ptr2len(rex.input + i); -                  status = RA_MATCH; -                  break; -                } -              } -            } else { -              for (i = 0; i < len; i++) { -                if (opnd[i] != rex.input[i]) { -                  status = RA_NOMATCH; -                  break; -                } -              } -            } -            rex.input += len; -          } -          break; - -        case RE_COMPOSING: -          { -            // Skip composing characters. -            while (utf_iscomposing(utf_ptr2char(rex.input))) { -              MB_CPTR_ADV(rex.input); -            } -          } -          break; - -        case NOTHING: -          break; - -        case BACK: -        { -          int i; - -          /* -           * When we run into BACK we need to check if we don't keep -           * looping without matching any input.  The second and later -           * times a BACK is encountered it fails if the input is still -           * at the same position as the previous time. -           * The positions are stored in "backpos" and found by the -           * current value of "scan", the position in the RE program. -           */ -          backpos_T *bp = (backpos_T *)backpos.ga_data; -          for (i = 0; i < backpos.ga_len; ++i) -            if (bp[i].bp_scan == scan) -              break; -          if (i == backpos.ga_len) { -            backpos_T *p = GA_APPEND_VIA_PTR(backpos_T, &backpos); -            p->bp_scan = scan; -          } else if (reg_save_equal(&bp[i].bp_pos)) -            /* Still at same position as last time, fail. */ -            status = RA_NOMATCH; - -          assert(status != RA_FAIL); -          if (status != RA_NOMATCH) { -            reg_save(&bp[i].bp_pos, &backpos); -          } -        } -        break; - -        case MOPEN + 0:     /* Match start: \zs */ -        case MOPEN + 1:     /* \( */ -        case MOPEN + 2: -        case MOPEN + 3: -        case MOPEN + 4: -        case MOPEN + 5: -        case MOPEN + 6: -        case MOPEN + 7: -        case MOPEN + 8: -        case MOPEN + 9: -        { -          no = op - MOPEN; -          cleanup_subexpr(); -          rp = regstack_push(RS_MOPEN, scan); -          if (rp == NULL) -            status = RA_FAIL; -          else { -            rp->rs_no = no; -            save_se(&rp->rs_un.sesave, &rex.reg_startpos[no], -                    &rex.reg_startp[no]); -            // We simply continue and handle the result when done. -          } -        } -        break; - -        case NOPEN:         /* \%( */ -        case NCLOSE:        /* \) after \%( */ -          if (regstack_push(RS_NOPEN, scan) == NULL) -            status = RA_FAIL; -          /* We simply continue and handle the result when done. */ -          break; - -        case ZOPEN + 1: -        case ZOPEN + 2: -        case ZOPEN + 3: -        case ZOPEN + 4: -        case ZOPEN + 5: -        case ZOPEN + 6: -        case ZOPEN + 7: -        case ZOPEN + 8: -        case ZOPEN + 9: -        { -          no = op - ZOPEN; -          cleanup_zsubexpr(); -          rp = regstack_push(RS_ZOPEN, scan); -          if (rp == NULL) -            status = RA_FAIL; -          else { -            rp->rs_no = no; -            save_se(&rp->rs_un.sesave, ®_startzpos[no], -                ®_startzp[no]); -            /* We simply continue and handle the result when done. */ -          } -        } -        break; - -        case MCLOSE + 0:    /* Match end: \ze */ -        case MCLOSE + 1:    /* \) */ -        case MCLOSE + 2: -        case MCLOSE + 3: -        case MCLOSE + 4: -        case MCLOSE + 5: -        case MCLOSE + 6: -        case MCLOSE + 7: -        case MCLOSE + 8: -        case MCLOSE + 9: -        { -          no = op - MCLOSE; -          cleanup_subexpr(); -          rp = regstack_push(RS_MCLOSE, scan); -          if (rp == NULL) { -            status = RA_FAIL; -          } else { -            rp->rs_no = no; -            save_se(&rp->rs_un.sesave, &rex.reg_endpos[no], &rex.reg_endp[no]); -            // We simply continue and handle the result when done. -          } -        } -        break; - -        case ZCLOSE + 1:    /* \) after \z( */ -        case ZCLOSE + 2: -        case ZCLOSE + 3: -        case ZCLOSE + 4: -        case ZCLOSE + 5: -        case ZCLOSE + 6: -        case ZCLOSE + 7: -        case ZCLOSE + 8: -        case ZCLOSE + 9: -        { -          no = op - ZCLOSE; -          cleanup_zsubexpr(); -          rp = regstack_push(RS_ZCLOSE, scan); -          if (rp == NULL) -            status = RA_FAIL; -          else { -            rp->rs_no = no; -            save_se(&rp->rs_un.sesave, ®_endzpos[no], -                ®_endzp[no]); -            /* We simply continue and handle the result when done. */ -          } -        } -        break; - -        case BACKREF + 1: -        case BACKREF + 2: -        case BACKREF + 3: -        case BACKREF + 4: -        case BACKREF + 5: -        case BACKREF + 6: -        case BACKREF + 7: -        case BACKREF + 8: -        case BACKREF + 9: -        { -          int len; - -          no = op - BACKREF; -          cleanup_subexpr(); -          if (!REG_MULTI) {  // Single-line regexp -            if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL) { -              // Backref was not set: Match an empty string. -              len = 0; -            } else { -              // Compare current input with back-ref in the same line. -              len = (int)(rex.reg_endp[no] - rex.reg_startp[no]); -              if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0) { -                status = RA_NOMATCH; -              } -            } -          } else {  // Multi-line regexp -            if (rex.reg_startpos[no].lnum < 0 || rex.reg_endpos[no].lnum < 0) { -              // Backref was not set: Match an empty string. -              len = 0; -            } else { -              if (rex.reg_startpos[no].lnum == rex.lnum -                  && rex.reg_endpos[no].lnum == rex.lnum) { -                // Compare back-ref within the current line. -                len = rex.reg_endpos[no].col - rex.reg_startpos[no].col; -                if (cstrncmp(rex.line + rex.reg_startpos[no].col, -                             rex.input, &len) != 0) { -                  status = RA_NOMATCH; -                } -              } else { -                // Messy situation: Need to compare between two lines. -                int r = match_with_backref(rex.reg_startpos[no].lnum, -                                           rex.reg_startpos[no].col, -                                           rex.reg_endpos[no].lnum, -                                           rex.reg_endpos[no].col, -                                           &len); -                if (r != RA_MATCH) { -                  status = r; -                } -              } -            } -          } - -          // Matched the backref, skip over it. -          rex.input += len; -        } -        break; - -        case ZREF + 1: -        case ZREF + 2: -        case ZREF + 3: -        case ZREF + 4: -        case ZREF + 5: -        case ZREF + 6: -        case ZREF + 7: -        case ZREF + 8: -        case ZREF + 9: -        { -          cleanup_zsubexpr(); -          no = op - ZREF; -          if (re_extmatch_in != NULL -              && re_extmatch_in->matches[no] != NULL) { -            int len = (int)STRLEN(re_extmatch_in->matches[no]); -            if (cstrncmp(re_extmatch_in->matches[no], rex.input, &len) != 0) { -              status = RA_NOMATCH; -            } else { -              rex.input += len; -            } -          } else { -            // Backref was not set: Match an empty string. -          } -        } -        break; - -        case BRANCH: -        { -          if (OP(next) != BRANCH)       /* No choice. */ -            next = OPERAND(scan);               /* Avoid recursion. */ -          else { -            rp = regstack_push(RS_BRANCH, scan); -            if (rp == NULL) -              status = RA_FAIL; -            else -              status = RA_BREAK;                /* rest is below */ -          } -        } -        break; - -        case BRACE_LIMITS: -        { -          if (OP(next) == BRACE_SIMPLE) { -            bl_minval = OPERAND_MIN(scan); -            bl_maxval = OPERAND_MAX(scan); -          } else if (OP(next) >= BRACE_COMPLEX -                     && OP(next) < BRACE_COMPLEX + 10) { -            no = OP(next) - BRACE_COMPLEX; -            brace_min[no] = OPERAND_MIN(scan); -            brace_max[no] = OPERAND_MAX(scan); -            brace_count[no] = 0; -          } else { -            internal_error("BRACE_LIMITS"); -            status = RA_FAIL; -          } -        } -        break; - -        case BRACE_COMPLEX + 0: -        case BRACE_COMPLEX + 1: -        case BRACE_COMPLEX + 2: -        case BRACE_COMPLEX + 3: -        case BRACE_COMPLEX + 4: -        case BRACE_COMPLEX + 5: -        case BRACE_COMPLEX + 6: -        case BRACE_COMPLEX + 7: -        case BRACE_COMPLEX + 8: -        case BRACE_COMPLEX + 9: -        { -          no = op - BRACE_COMPLEX; -          ++brace_count[no]; - -          /* If not matched enough times yet, try one more */ -          if (brace_count[no] <= (brace_min[no] <= brace_max[no] -                                  ? brace_min[no] : brace_max[no])) { -            rp = regstack_push(RS_BRCPLX_MORE, scan); -            if (rp == NULL) -              status = RA_FAIL; -            else { -              rp->rs_no = no; -              reg_save(&rp->rs_un.regsave, &backpos); -              next = OPERAND(scan); -              /* We continue and handle the result when done. */ -            } -            break; -          } - -          /* If matched enough times, may try matching some more */ -          if (brace_min[no] <= brace_max[no]) { -            /* Range is the normal way around, use longest match */ -            if (brace_count[no] <= brace_max[no]) { -              rp = regstack_push(RS_BRCPLX_LONG, scan); -              if (rp == NULL) -                status = RA_FAIL; -              else { -                rp->rs_no = no; -                reg_save(&rp->rs_un.regsave, &backpos); -                next = OPERAND(scan); -                /* We continue and handle the result when done. */ -              } -            } -          } else { -            /* Range is backwards, use shortest match first */ -            if (brace_count[no] <= brace_min[no]) { -              rp = regstack_push(RS_BRCPLX_SHORT, scan); -              if (rp == NULL) -                status = RA_FAIL; -              else { -                reg_save(&rp->rs_un.regsave, &backpos); -                /* We continue and handle the result when done. */ -              } -            } -          } -        } -        break; - -        case BRACE_SIMPLE: -        case STAR: -        case PLUS: -        { -          regstar_T rst; - -          /* -           * Lookahead to avoid useless match attempts when we know -           * what character comes next. -           */ -          if (OP(next) == EXACTLY) { -            rst.nextb = *OPERAND(next); -            if (rex.reg_ic) { -              if (mb_isupper(rst.nextb)) { -                rst.nextb_ic = mb_tolower(rst.nextb); -              } else { -                rst.nextb_ic = mb_toupper(rst.nextb); -              } -            } else { -              rst.nextb_ic = rst.nextb; -            } -          } else { -            rst.nextb = NUL; -            rst.nextb_ic = NUL; -          } -          if (op != BRACE_SIMPLE) { -            rst.minval = (op == STAR) ? 0 : 1; -            rst.maxval = MAX_LIMIT; -          } else { -            rst.minval = bl_minval; -            rst.maxval = bl_maxval; -          } - -          /* -           * When maxval > minval, try matching as much as possible, up -           * to maxval.  When maxval < minval, try matching at least the -           * minimal number (since the range is backwards, that's also -           * maxval!). -           */ -          rst.count = regrepeat(OPERAND(scan), rst.maxval); -          if (got_int) { -            status = RA_FAIL; -            break; -          } -          if (rst.minval <= rst.maxval -              ? rst.count >= rst.minval : rst.count >= rst.maxval) { -            /* It could match.  Prepare for trying to match what -             * follows.  The code is below.  Parameters are stored in -             * a regstar_T on the regstack. */ -            if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { -              emsg(_(e_maxmempat)); -              status = RA_FAIL; -            } else { -              ga_grow(®stack, sizeof(regstar_T)); -              regstack.ga_len += sizeof(regstar_T); -              rp = regstack_push(rst.minval <= rst.maxval -                  ? RS_STAR_LONG : RS_STAR_SHORT, scan); -              if (rp == NULL) -                status = RA_FAIL; -              else { -                *(((regstar_T *)rp) - 1) = rst; -                status = RA_BREAK;                  /* skip the restore bits */ -              } -            } -          } else -            status = RA_NOMATCH; - -        } -        break; - -        case NOMATCH: -        case MATCH: -        case SUBPAT: -          rp = regstack_push(RS_NOMATCH, scan); -          if (rp == NULL) -            status = RA_FAIL; -          else { -            rp->rs_no = op; -            reg_save(&rp->rs_un.regsave, &backpos); -            next = OPERAND(scan); -            /* We continue and handle the result when done. */ -          } -          break; - -        case BEHIND: -        case NOBEHIND: -          /* Need a bit of room to store extra positions. */ -          if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { -            emsg(_(e_maxmempat)); -            status = RA_FAIL; -          } else { -            ga_grow(®stack, sizeof(regbehind_T)); -            regstack.ga_len += sizeof(regbehind_T); -            rp = regstack_push(RS_BEHIND1, scan); -            if (rp == NULL) -              status = RA_FAIL; -            else { -              /* Need to save the subexpr to be able to restore them -               * when there is a match but we don't use it. */ -              save_subexpr(((regbehind_T *)rp) - 1); - -              rp->rs_no = op; -              reg_save(&rp->rs_un.regsave, &backpos); -              /* First try if what follows matches.  If it does then we -               * check the behind match by looping. */ -            } -          } -          break; - -        case BHPOS: -          if (REG_MULTI) { -            if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line) -                || behind_pos.rs_u.pos.lnum != rex.lnum) { -              status = RA_NOMATCH; -            } -          } else if (behind_pos.rs_u.ptr != rex.input) { -            status = RA_NOMATCH; -          } -          break; - -        case NEWL: -          if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline -               || rex.reg_line_lbr) && (c != '\n' || !rex.reg_line_lbr)) { -            status = RA_NOMATCH; -          } else if (rex.reg_line_lbr) { -            ADVANCE_REGINPUT(); -          } else { -            reg_nextline(); -          } -          break; - -        case END: -          status = RA_MATCH;    /* Success! */ -          break; - -        default: -          iemsg(_(e_re_corr)); -#ifdef REGEXP_DEBUG -          printf("Illegal op code %d\n", op); -#endif -          status = RA_FAIL; -          break; -        } -      } - -      /* If we can't continue sequentially, break the inner loop. */ -      if (status != RA_CONT) -        break; - -      /* Continue in inner loop, advance to next item. */ -      scan = next; - -    } /* end of inner loop */ - -    /* -     * If there is something on the regstack execute the code for the state. -     * If the state is popped then loop and use the older state. -     */ -    while (!GA_EMPTY(®stack) && status != RA_FAIL) { -      rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; -      switch (rp->rs_state) { -      case RS_NOPEN: -        /* Result is passed on as-is, simply pop the state. */ -        regstack_pop(&scan); -        break; - -      case RS_MOPEN: -        // Pop the state.  Restore pointers when there is no match. -        if (status == RA_NOMATCH) { -          restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no], -                     &rex.reg_startp[rp->rs_no]); -        } -        regstack_pop(&scan); -        break; - -      case RS_ZOPEN: -        /* Pop the state.  Restore pointers when there is no match. */ -        if (status == RA_NOMATCH) -          restore_se(&rp->rs_un.sesave, ®_startzpos[rp->rs_no], -              ®_startzp[rp->rs_no]); -        regstack_pop(&scan); -        break; - -      case RS_MCLOSE: -        // Pop the state.  Restore pointers when there is no match. -        if (status == RA_NOMATCH) { -          restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no], -                     &rex.reg_endp[rp->rs_no]); -        } -        regstack_pop(&scan); -        break; - -      case RS_ZCLOSE: -        /* Pop the state.  Restore pointers when there is no match. */ -        if (status == RA_NOMATCH) -          restore_se(&rp->rs_un.sesave, ®_endzpos[rp->rs_no], -              ®_endzp[rp->rs_no]); -        regstack_pop(&scan); -        break; - -      case RS_BRANCH: -        if (status == RA_MATCH) -          /* this branch matched, use it */ -          regstack_pop(&scan); -        else { -          if (status != RA_BREAK) { -            /* After a non-matching branch: try next one. */ -            reg_restore(&rp->rs_un.regsave, &backpos); -            scan = rp->rs_scan; -          } -          if (scan == NULL || OP(scan) != BRANCH) { -            /* no more branches, didn't find a match */ -            status = RA_NOMATCH; -            regstack_pop(&scan); -          } else { -            /* Prepare to try a branch. */ -            rp->rs_scan = regnext(scan); -            reg_save(&rp->rs_un.regsave, &backpos); -            scan = OPERAND(scan); -          } -        } -        break; - -      case RS_BRCPLX_MORE: -        /* Pop the state.  Restore pointers when there is no match. */ -        if (status == RA_NOMATCH) { -          reg_restore(&rp->rs_un.regsave, &backpos); -          --brace_count[rp->rs_no];             /* decrement match count */ -        } -        regstack_pop(&scan); -        break; - -      case RS_BRCPLX_LONG: -        /* Pop the state.  Restore pointers when there is no match. */ -        if (status == RA_NOMATCH) { -          /* There was no match, but we did find enough matches. */ -          reg_restore(&rp->rs_un.regsave, &backpos); -          --brace_count[rp->rs_no]; -          /* continue with the items after "\{}" */ -          status = RA_CONT; -        } -        regstack_pop(&scan); -        if (status == RA_CONT) -          scan = regnext(scan); -        break; - -      case RS_BRCPLX_SHORT: -        /* Pop the state.  Restore pointers when there is no match. */ -        if (status == RA_NOMATCH) -          /* There was no match, try to match one more item. */ -          reg_restore(&rp->rs_un.regsave, &backpos); -        regstack_pop(&scan); -        if (status == RA_NOMATCH) { -          scan = OPERAND(scan); -          status = RA_CONT; -        } -        break; - -      case RS_NOMATCH: -        /* Pop the state.  If the operand matches for NOMATCH or -        * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup, -        * except for SUBPAT, and continue with the next item. */ -        if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH)) -          status = RA_NOMATCH; -        else { -          status = RA_CONT; -          if (rp->rs_no != SUBPAT)              /* zero-width */ -            reg_restore(&rp->rs_un.regsave, &backpos); -        } -        regstack_pop(&scan); -        if (status == RA_CONT) -          scan = regnext(scan); -        break; - -      case RS_BEHIND1: -        if (status == RA_NOMATCH) { -          regstack_pop(&scan); -          regstack.ga_len -= sizeof(regbehind_T); -        } else { -          /* The stuff after BEHIND/NOBEHIND matches.  Now try if -           * the behind part does (not) match before the current -           * position in the input.  This must be done at every -           * position in the input and checking if the match ends at -           * the current position. */ - -          /* save the position after the found match for next */ -          reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); - -          /* Start looking for a match with operand at the current -           * position.  Go back one character until we find the -           * result, hitting the start of the line or the previous -           * line (for multi-line matching). -           * Set behind_pos to where the match should end, BHPOS -           * will match it.  Save the current value. */ -          (((regbehind_T *)rp) - 1)->save_behind = behind_pos; -          behind_pos = rp->rs_un.regsave; - -          rp->rs_state = RS_BEHIND2; - -          reg_restore(&rp->rs_un.regsave, &backpos); -          scan = OPERAND(rp->rs_scan) + 4; -        } -        break; - -      case RS_BEHIND2: -        /* -         * Looping for BEHIND / NOBEHIND match. -         */ -        if (status == RA_MATCH && reg_save_equal(&behind_pos)) { -          /* found a match that ends where "next" started */ -          behind_pos = (((regbehind_T *)rp) - 1)->save_behind; -          if (rp->rs_no == BEHIND) -            reg_restore(&(((regbehind_T *)rp) - 1)->save_after, -                &backpos); -          else { -            /* But we didn't want a match.  Need to restore the -             * subexpr, because what follows matched, so they have -             * been set. */ -            status = RA_NOMATCH; -            restore_subexpr(((regbehind_T *)rp) - 1); -          } -          regstack_pop(&scan); -          regstack.ga_len -= sizeof(regbehind_T); -        } else { -          long limit; - -          /* No match or a match that doesn't end where we want it: Go -           * back one character.  May go to previous line once. */ -          no = OK; -          limit = OPERAND_MIN(rp->rs_scan); -          if (REG_MULTI) { -            if (limit > 0 -                && ((rp->rs_un.regsave.rs_u.pos.lnum -                     < behind_pos.rs_u.pos.lnum -                     ? (colnr_T)STRLEN(rex.line) -                     : behind_pos.rs_u.pos.col) -                    - rp->rs_un.regsave.rs_u.pos.col >= limit)) -              no = FAIL; -            else if (rp->rs_un.regsave.rs_u.pos.col == 0) { -              if (rp->rs_un.regsave.rs_u.pos.lnum -                  < behind_pos.rs_u.pos.lnum -                  || reg_getline( -                      --rp->rs_un.regsave.rs_u.pos.lnum) -                  == NULL) -                no = FAIL; -              else { -                reg_restore(&rp->rs_un.regsave, &backpos); -                rp->rs_un.regsave.rs_u.pos.col = -                  (colnr_T)STRLEN(rex.line); -              } -            } else { -              const char_u *const line = -                  reg_getline(rp->rs_un.regsave.rs_u.pos.lnum); - -              rp->rs_un.regsave.rs_u.pos.col -= -                  utf_head_off(line, -                               line + rp->rs_un.regsave.rs_u.pos.col - 1) -                  + 1; -            } -          } else { -            if (rp->rs_un.regsave.rs_u.ptr == rex.line) { -              no = FAIL; -            } else { -              MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr); -              if (limit > 0 -                  && (behind_pos.rs_u.ptr - rp->rs_un.regsave.rs_u.ptr) > (ptrdiff_t)limit) { -                no = FAIL; -              } -            } -          } -          if (no == OK) { -            /* Advanced, prepare for finding match again. */ -            reg_restore(&rp->rs_un.regsave, &backpos); -            scan = OPERAND(rp->rs_scan) + 4; -            if (status == RA_MATCH) { -              /* We did match, so subexpr may have been changed, -               * need to restore them for the next try. */ -              status = RA_NOMATCH; -              restore_subexpr(((regbehind_T *)rp) - 1); -            } -          } else { -            /* Can't advance.  For NOBEHIND that's a match. */ -            behind_pos = (((regbehind_T *)rp) - 1)->save_behind; -            if (rp->rs_no == NOBEHIND) { -              reg_restore(&(((regbehind_T *)rp) - 1)->save_after, -                  &backpos); -              status = RA_MATCH; -            } else { -              /* We do want a proper match.  Need to restore the -               * subexpr if we had a match, because they may have -               * been set. */ -              if (status == RA_MATCH) { -                status = RA_NOMATCH; -                restore_subexpr(((regbehind_T *)rp) - 1); -              } -            } -            regstack_pop(&scan); -            regstack.ga_len -= sizeof(regbehind_T); -          } -        } -        break; - -      case RS_STAR_LONG: -      case RS_STAR_SHORT: -      { -        regstar_T           *rst = ((regstar_T *)rp) - 1; - -        if (status == RA_MATCH) { -          regstack_pop(&scan); -          regstack.ga_len -= sizeof(regstar_T); -          break; -        } - -        /* Tried once already, restore input pointers. */ -        if (status != RA_BREAK) -          reg_restore(&rp->rs_un.regsave, &backpos); - -        /* Repeat until we found a position where it could match. */ -        for (;; ) { -          if (status != RA_BREAK) { -            /* Tried first position already, advance. */ -            if (rp->rs_state == RS_STAR_LONG) { -              /* Trying for longest match, but couldn't or -               * didn't match -- back up one char. */ -              if (--rst->count < rst->minval) -                break; -              if (rex.input == rex.line) { -                // backup to last char of previous line -                rex.lnum--; -                rex.line = reg_getline(rex.lnum); -                // Just in case regrepeat() didn't count right. -                if (rex.line == NULL) { -                  break; -                } -                rex.input = rex.line + STRLEN(rex.line); -                fast_breakcheck(); -              } else { -                MB_PTR_BACK(rex.line, rex.input); -              } -            } else { -              /* Range is backwards, use shortest match first. -               * Careful: maxval and minval are exchanged! -               * Couldn't or didn't match: try advancing one -               * char. */ -              if (rst->count == rst->minval -                  || regrepeat(OPERAND(rp->rs_scan), 1L) == 0) -                break; -              ++rst->count; -            } -            if (got_int) -              break; -          } else -            status = RA_NOMATCH; - -          // If it could match, try it. -          if (rst->nextb == NUL || *rex.input == rst->nextb -              || *rex.input == rst->nextb_ic) { -            reg_save(&rp->rs_un.regsave, &backpos); -            scan = regnext(rp->rs_scan); -            status = RA_CONT; -            break; -          } -        } -        if (status != RA_CONT) { -          /* Failed. */ -          regstack_pop(&scan); -          regstack.ga_len -= sizeof(regstar_T); -          status = RA_NOMATCH; -        } -      } -      break; -      } - -      /* If we want to continue the inner loop or didn't pop a state -       * continue matching loop */ -      if (status == RA_CONT || rp == (regitem_T *) -          ((char *)regstack.ga_data + regstack.ga_len) - 1) -        break; -    } - -    /* May need to continue with the inner loop, starting at "scan". */ -    if (status == RA_CONT) -      continue; - -    /* -     * If the regstack is empty or something failed we are done. -     */ -    if (GA_EMPTY(®stack) || status == RA_FAIL) { -      if (scan == NULL) { -        /* -         * We get here only if there's trouble -- normally "case END" is -         * the terminating point. -         */ -        iemsg(_(e_re_corr)); -#ifdef REGEXP_DEBUG -        printf("Premature EOL\n"); -#endif -      } -      return status == RA_MATCH; -    } - -  } /* End of loop until the regstack is empty. */ - -  /* NOTREACHED */ -} - -/* - * Push an item onto the regstack. - * Returns pointer to new item.  Returns NULL when out of memory. - */ -static regitem_T *regstack_push(regstate_T state, char_u *scan) -{ -  regitem_T   *rp; - -  if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { -    emsg(_(e_maxmempat)); -    return NULL; -  } -  ga_grow(®stack, sizeof(regitem_T)); - -  rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len); -  rp->rs_state = state; -  rp->rs_scan = scan; - -  regstack.ga_len += sizeof(regitem_T); -  return rp; -} - -/* - * Pop an item from the regstack. - */ -static void regstack_pop(char_u **scan) -{ -  regitem_T   *rp; - -  rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; -  *scan = rp->rs_scan; - -  regstack.ga_len -= sizeof(regitem_T); -} - -/* - * regrepeat - repeatedly match something simple, return how many. - * Advances rex.input (and rex.lnum) to just after the matched chars. - */ -static int  -regrepeat ( -    char_u *p, -    long maxcount              /* maximum number of matches allowed */ -) -{ -  long count = 0; -  char_u      *opnd; -  int mask; -  int testval = 0; - -  char_u *scan = rex.input;  // Make local copy of rex.input for speed. -  opnd = OPERAND(p); -  switch (OP(p)) { -  case ANY: -  case ANY + ADD_NL: -    while (count < maxcount) { -      /* Matching anything means we continue until end-of-line (or -       * end-of-file for ANY + ADD_NL), only limited by maxcount. */ -      while (*scan != NUL && count < maxcount) { -        count++; -        MB_PTR_ADV(scan); -      } -      if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline -          || rex.reg_line_lbr || count == maxcount) { -        break; -      } -      count++;  // count the line-break -      reg_nextline(); -      scan = rex.input; -      if (got_int) { -        break; -      } -    } -    break; - -  case IDENT: -  case IDENT + ADD_NL: -    testval = 1; -    FALLTHROUGH; -  case SIDENT: -  case SIDENT + ADD_NL: -    while (count < maxcount) { -      if (vim_isIDc(utf_ptr2char(scan)) && (testval || !ascii_isdigit(*scan))) { -        MB_PTR_ADV(scan); -      } else if (*scan == NUL) { -        if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline -            || rex.reg_line_lbr) { -          break; -        } -        reg_nextline(); -        scan = rex.input; -        if (got_int) { -          break; -        } -      } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { -        scan++; -      } else { -        break; -      } -      ++count; -    } -    break; - -  case KWORD: -  case KWORD + ADD_NL: -    testval = 1; -    FALLTHROUGH; -  case SKWORD: -  case SKWORD + ADD_NL: -    while (count < maxcount) { -      if (vim_iswordp_buf(scan, rex.reg_buf) -          && (testval || !ascii_isdigit(*scan))) { -        MB_PTR_ADV(scan); -      } else if (*scan == NUL) { -        if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline -            || rex.reg_line_lbr) { -          break; -        } -        reg_nextline(); -        scan = rex.input; -        if (got_int) { -          break; -        } -      } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { -        scan++; -      } else { -        break; -      } -      count++; -    } -    break; - -  case FNAME: -  case FNAME + ADD_NL: -    testval = 1; -    FALLTHROUGH; -  case SFNAME: -  case SFNAME + ADD_NL: -    while (count < maxcount) { -      if (vim_isfilec(utf_ptr2char(scan)) && (testval || !ascii_isdigit(*scan))) { -        MB_PTR_ADV(scan); -      } else if (*scan == NUL) { -        if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline -            || rex.reg_line_lbr) { -          break; -        } -        reg_nextline(); -        scan = rex.input; -        if (got_int) { -          break; -        } -      } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { -        scan++; -      } else { -        break; -      } -      count++; -    } -    break; - -  case PRINT: -  case PRINT + ADD_NL: -    testval = 1; -    FALLTHROUGH; -  case SPRINT: -  case SPRINT + ADD_NL: -    while (count < maxcount) { -      if (*scan == NUL) { -        if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline -            || rex.reg_line_lbr) { -          break; -        } -        reg_nextline(); -        scan = rex.input; -        if (got_int) { -          break; -        } -      } else if (vim_isprintc(utf_ptr2char(scan)) == 1 -                 && (testval || !ascii_isdigit(*scan))) { -        MB_PTR_ADV(scan); -      } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { -        scan++; -      } else { -        break; -      } -      count++; -    } -    break; - -  case WHITE: -  case WHITE + ADD_NL: -    testval = mask = RI_WHITE; -do_class: -    while (count < maxcount) { -      int l; -      if (*scan == NUL) { -        if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline -            || rex.reg_line_lbr) { -          break; -        } -        reg_nextline(); -        scan = rex.input; -        if (got_int) { -          break; -        } -      } else if ((l = utfc_ptr2len(scan)) > 1) { -        if (testval != 0) { -          break; -        } -        scan += l; -      } else if ((class_tab[*scan] & mask) == testval) { -        scan++; -      } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { -        scan++; -      } else { -        break; -      } -      ++count; -    } -    break; - -  case NWHITE: -  case NWHITE + ADD_NL: -    mask = RI_WHITE; -    goto do_class; -  case DIGIT: -  case DIGIT + ADD_NL: -    testval = mask = RI_DIGIT; -    goto do_class; -  case NDIGIT: -  case NDIGIT + ADD_NL: -    mask = RI_DIGIT; -    goto do_class; -  case HEX: -  case HEX + ADD_NL: -    testval = mask = RI_HEX; -    goto do_class; -  case NHEX: -  case NHEX + ADD_NL: -    mask = RI_HEX; -    goto do_class; -  case OCTAL: -  case OCTAL + ADD_NL: -    testval = mask = RI_OCTAL; -    goto do_class; -  case NOCTAL: -  case NOCTAL + ADD_NL: -    mask = RI_OCTAL; -    goto do_class; -  case WORD: -  case WORD + ADD_NL: -    testval = mask = RI_WORD; -    goto do_class; -  case NWORD: -  case NWORD + ADD_NL: -    mask = RI_WORD; -    goto do_class; -  case HEAD: -  case HEAD + ADD_NL: -    testval = mask = RI_HEAD; -    goto do_class; -  case NHEAD: -  case NHEAD + ADD_NL: -    mask = RI_HEAD; -    goto do_class; -  case ALPHA: -  case ALPHA + ADD_NL: -    testval = mask = RI_ALPHA; -    goto do_class; -  case NALPHA: -  case NALPHA + ADD_NL: -    mask = RI_ALPHA; -    goto do_class; -  case LOWER: -  case LOWER + ADD_NL: -    testval = mask = RI_LOWER; -    goto do_class; -  case NLOWER: -  case NLOWER + ADD_NL: -    mask = RI_LOWER; -    goto do_class; -  case UPPER: -  case UPPER + ADD_NL: -    testval = mask = RI_UPPER; -    goto do_class; -  case NUPPER: -  case NUPPER + ADD_NL: -    mask = RI_UPPER; -    goto do_class; - -  case EXACTLY: -  { -    int cu, cl; - -    // This doesn't do a multi-byte character, because a MULTIBYTECODE -    // would have been used for it.  It does handle single-byte -    // characters, such as latin1. -    if (rex.reg_ic) { -      cu = mb_toupper(*opnd); -      cl = mb_tolower(*opnd); -      while (count < maxcount && (*scan == cu || *scan == cl)) { -        count++; -        scan++; -      } -    } else { -      cu = *opnd; -      while (count < maxcount && *scan == cu) { -        count++; -        scan++; -      } -    } -    break; -  } - -  case MULTIBYTECODE: -  { -    int i, len, cf = 0; - -    /* Safety check (just in case 'encoding' was changed since -     * compiling the program). */ -    if ((len = utfc_ptr2len(opnd)) > 1) { -      if (rex.reg_ic) { -        cf = utf_fold(utf_ptr2char(opnd)); -      } -      while (count < maxcount && utfc_ptr2len(scan) >= len) { -        for (i = 0; i < len; i++) { -          if (opnd[i] != scan[i]) { -            break; -          } -        } -        if (i < len && (!rex.reg_ic -                        || utf_fold(utf_ptr2char(scan)) != cf)) { -          break; -        } -        scan += len; -        ++count; -      } -    } -  } -  break; - -  case ANYOF: -  case ANYOF + ADD_NL: -    testval = 1; -    FALLTHROUGH; - -  case ANYBUT: -  case ANYBUT + ADD_NL: -    while (count < maxcount) { -      int len; -      if (*scan == NUL) { -        if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline -            || rex.reg_line_lbr) { -          break; -        } -        reg_nextline(); -        scan = rex.input; -        if (got_int) { -          break; -        } -      } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { -        scan++; -      } else if ((len = utfc_ptr2len(scan)) > 1) { -        if ((cstrchr(opnd, utf_ptr2char(scan)) == NULL) == testval) { -          break; -        } -        scan += len; -      } else { -        if ((cstrchr(opnd, *scan) == NULL) == testval) -          break; -        ++scan; -      } -      ++count; -    } -    break; - -  case NEWL: -    while (count < maxcount -           && ((*scan == NUL && rex.lnum <= rex.reg_maxline && !rex.reg_line_lbr -                && REG_MULTI) || (*scan == '\n' && rex.reg_line_lbr))) { -      count++; -      if (rex.reg_line_lbr) { -        ADVANCE_REGINPUT(); -      } else { -        reg_nextline(); -      } -      scan = rex.input; -      if (got_int) { -        break; -      } -    } -    break; - -  default:  // Oh dear.  Called inappropriately. -    iemsg(_(e_re_corr)); -#ifdef REGEXP_DEBUG -    printf("Called regrepeat with op code %d\n", OP(p)); -#endif -    break; -  } - -  rex.input = scan; - -  return (int)count; -} - -/* - * regnext - dig the "next" pointer out of a node - * Returns NULL when calculating size, when there is no next item and when - * there is an error. - */ -static char_u *regnext(char_u *p) -  FUNC_ATTR_NONNULL_ALL -{ -  int offset; - -  if (p == JUST_CALC_SIZE || reg_toolong) -    return NULL; - -  offset = NEXT(p); -  if (offset == 0) -    return NULL; - -  if (OP(p) == BACK) -    return p - offset; -  else -    return p + offset; -} -  /*   * Check the regexp program for its magic number.   * Return true if it's wrong. @@ -5654,7 +1208,7 @@ static void cleanup_zsubexpr(void)  {    if (rex.need_clear_zsubexpr) {      if (REG_MULTI) { -      /* Use 0xff to set lnum to -1 */ +      // Use 0xff to set lnum to -1        memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);        memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);      } else { @@ -5665,45 +1219,6 @@ static void cleanup_zsubexpr(void)    }  } -// Save the current subexpr to "bp", so that they can be restored -// later by restore_subexpr(). -static void save_subexpr(regbehind_T *bp) -  FUNC_ATTR_NONNULL_ALL -{ -  // When "rex.need_clear_subexpr" is set we don't need to save the values, only -  // remember that this flag needs to be set again when restoring. -  bp->save_need_clear_subexpr = rex.need_clear_subexpr; -  if (!rex.need_clear_subexpr) { -    for (int i = 0; i < NSUBEXP; i++) { -      if (REG_MULTI) { -        bp->save_start[i].se_u.pos = rex.reg_startpos[i]; -        bp->save_end[i].se_u.pos = rex.reg_endpos[i]; -      } else { -        bp->save_start[i].se_u.ptr = rex.reg_startp[i]; -        bp->save_end[i].se_u.ptr = rex.reg_endp[i]; -      } -    } -  } -} - -// Restore the subexpr from "bp". -static void restore_subexpr(regbehind_T *bp) -  FUNC_ATTR_NONNULL_ALL -{ -  // Only need to restore saved values when they are not to be cleared. -  rex.need_clear_subexpr = bp->save_need_clear_subexpr; -  if (!rex.need_clear_subexpr) { -    for (int i = 0; i < NSUBEXP; i++) { -      if (REG_MULTI) { -        rex.reg_startpos[i] = bp->save_start[i].se_u.pos; -        rex.reg_endpos[i] = bp->save_end[i].se_u.pos; -      } else { -        rex.reg_startp[i] = bp->save_start[i].se_u.ptr; -        rex.reg_endp[i] = bp->save_end[i].se_u.ptr; -      } -    } -  } -}  // Advance rex.lnum, rex.line and rex.input to the next line.  static void reg_nextline(void) @@ -5713,81 +1228,6 @@ static void reg_nextline(void)    fast_breakcheck();  } -// Save the input line and position in a regsave_T. -static void reg_save(regsave_T *save, garray_T *gap) -  FUNC_ATTR_NONNULL_ALL -{ -  if (REG_MULTI) { -    save->rs_u.pos.col = (colnr_T)(rex.input - rex.line); -    save->rs_u.pos.lnum = rex.lnum; -  } else { -    save->rs_u.ptr = rex.input; -  } -  save->rs_len = gap->ga_len; -} - -// Restore the input line and position from a regsave_T. -static void reg_restore(regsave_T *save, garray_T *gap) -  FUNC_ATTR_NONNULL_ALL -{ -  if (REG_MULTI) { -    if (rex.lnum != save->rs_u.pos.lnum) { -      // only call reg_getline() when the line number changed to save -      // a bit of time -      rex.lnum = save->rs_u.pos.lnum; -      rex.line = reg_getline(rex.lnum); -    } -    rex.input = rex.line + save->rs_u.pos.col; -  } else { -    rex.input = save->rs_u.ptr; -  } -  gap->ga_len = save->rs_len; -} - -// Return true if current position is equal to saved position. -static bool reg_save_equal(const regsave_T *save) -  FUNC_ATTR_NONNULL_ALL -{ -  if (REG_MULTI) { -    return rex.lnum == save->rs_u.pos.lnum -           && rex.input == rex.line + save->rs_u.pos.col; -  } -  return rex.input == save->rs_u.ptr; -} - -/* - * Tentatively set the sub-expression start to the current position (after - * calling regmatch() they will have changed).  Need to save the existing - * values for when there is no match. - * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), - * depending on REG_MULTI. - */ -static void save_se_multi(save_se_T *savep, lpos_T *posp) -{ -  savep->se_u.pos = *posp; -  posp->lnum = rex.lnum; -  posp->col = (colnr_T)(rex.input - rex.line); -} - -static void save_se_one(save_se_T *savep, char_u **pp) -{ -  savep->se_u.ptr = *pp; -  *pp = rex.input; -} - -/* - * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL. - */ -static int re_num_cmp(uint32_t val, char_u *scan) -{ -  uint32_t n = (uint32_t)OPERAND_MIN(scan); - -  if (OPERAND_CMP(scan) == '>') -    return val > n; -  if (OPERAND_CMP(scan) == '<') -    return val < n; -  return val == n; -}  /*   * Check whether a backreference matches. @@ -5795,7 +1235,12 @@ static int re_num_cmp(uint32_t val, char_u *scan)   * If "bytelen" is not NULL, it is set to the byte length of the match in the   * last line.   */ -static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen) +static int match_with_backref( +    linenr_T start_lnum, +    colnr_T start_col, +    linenr_T end_lnum, +    colnr_T end_col, +    int *bytelen)  {    linenr_T clnum = start_lnum;    colnr_T ccol = start_col; @@ -5810,7 +1255,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e      if (rex.line != reg_tofree) {        len = (int)STRLEN(rex.line);        if (reg_tofree == NULL || len >= (int)reg_tofreelen) { -        len += 50;              /* get some extra */ +        len += 50;              // get some extra          xfree(reg_tofree);          reg_tofree = xmalloc(len);          reg_tofreelen = len; @@ -5820,7 +1265,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e        rex.line = reg_tofree;      } -    /* Get the line to compare with. */ +    // Get the line to compare with.      p = reg_getline(clnum);      assert(p); @@ -5842,7 +1287,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e        return RA_NOMATCH;  // text too short      } -    /* Advance to next line. */ +    // Advance to next line.      reg_nextline();      if (bytelen != NULL)        *bytelen = 0; @@ -5857,520 +1302,72 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e    return RA_MATCH;  } -#ifdef BT_REGEXP_DUMP - -/* - * regdump - dump a regexp onto stdout in vaguely comprehensible form - */ -static void regdump(char_u *pattern, bt_regprog_T *r) -{ -  char_u  *s; -  int op = EXACTLY;             /* Arbitrary non-END op. */ -  char_u  *next; -  char_u  *end = NULL; -  FILE    *f; - -#ifdef BT_REGEXP_LOG -  f = fopen("bt_regexp_log.log", "a"); -#else -  f = stdout; -#endif -  if (f == NULL) -    return; -  fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", -      pattern); - -  s = r->program + 1; -  /* -   * Loop until we find the END that isn't before a referred next (an END -   * can also appear in a NOMATCH operand). -   */ -  while (op != END || s <= end) { -    op = OP(s); -    fprintf(f, "%2d%s", (int)(s - r->program), regprop(s));     /* Where, what. */ -    next = regnext(s); -    if (next == NULL)           /* Next ptr. */ -      fprintf(f, "(0)"); -    else -      fprintf(f, "(%d)", (int)((s - r->program) + (next - s))); -    if (end < next) -      end = next; -    if (op == BRACE_LIMITS) { -      /* Two ints */ -      fprintf(f, " minval %" PRId64 ", maxval %" PRId64, -              (int64_t)OPERAND_MIN(s), (int64_t)OPERAND_MAX(s)); -      s += 8; -    } else if (op == BEHIND || op == NOBEHIND) { -      /* one int */ -      fprintf(f, " count %" PRId64, (int64_t)OPERAND_MIN(s)); -      s += 4; -    } else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL) { -      // one int plus comparator -      fprintf(f, " count %" PRId64, (int64_t)OPERAND_MIN(s)); -      s += 5; -    } -    s += 3; -    if (op == ANYOF || op == ANYOF + ADD_NL -        || op == ANYBUT || op == ANYBUT + ADD_NL -        || op == EXACTLY) { -      /* Literal string, where present. */ -      fprintf(f, "\nxxxxxxxxx\n"); -      while (*s != NUL) -        fprintf(f, "%c", *s++); -      fprintf(f, "\nxxxxxxxxx\n"); -      s++; -    } -    fprintf(f, "\r\n"); -  } - -  /* Header fields of interest. */ -  if (r->regstart != NUL) -    fprintf(f, "start `%s' 0x%x; ", r->regstart < 256 -        ? (char *)transchar(r->regstart) -        : "multibyte", r->regstart); -  if (r->reganch) -    fprintf(f, "anchored; "); -  if (r->regmust != NULL) -    fprintf(f, "must have \"%s\"", r->regmust); -  fprintf(f, "\r\n"); - -#ifdef BT_REGEXP_LOG -  fclose(f); -#endif -} -#endif      /* BT_REGEXP_DUMP */ - -#ifdef REGEXP_DEBUG -/* - * regprop - printable representation of opcode - */ -static char_u *regprop(char_u *op) +/// Used in a place where no * or \+ can follow. +static bool re_mult_next(char *what)  { -  char            *p; -  static char buf[50]; - -  STRCPY(buf, ":"); - -  switch ((int) OP(op)) { -  case BOL: -    p = "BOL"; -    break; -  case EOL: -    p = "EOL"; -    break; -  case RE_BOF: -    p = "BOF"; -    break; -  case RE_EOF: -    p = "EOF"; -    break; -  case CURSOR: -    p = "CURSOR"; -    break; -  case RE_VISUAL: -    p = "RE_VISUAL"; -    break; -  case RE_LNUM: -    p = "RE_LNUM"; -    break; -  case RE_MARK: -    p = "RE_MARK"; -    break; -  case RE_COL: -    p = "RE_COL"; -    break; -  case RE_VCOL: -    p = "RE_VCOL"; -    break; -  case BOW: -    p = "BOW"; -    break; -  case EOW: -    p = "EOW"; -    break; -  case ANY: -    p = "ANY"; -    break; -  case ANY + ADD_NL: -    p = "ANY+NL"; -    break; -  case ANYOF: -    p = "ANYOF"; -    break; -  case ANYOF + ADD_NL: -    p = "ANYOF+NL"; -    break; -  case ANYBUT: -    p = "ANYBUT"; -    break; -  case ANYBUT + ADD_NL: -    p = "ANYBUT+NL"; -    break; -  case IDENT: -    p = "IDENT"; -    break; -  case IDENT + ADD_NL: -    p = "IDENT+NL"; -    break; -  case SIDENT: -    p = "SIDENT"; -    break; -  case SIDENT + ADD_NL: -    p = "SIDENT+NL"; -    break; -  case KWORD: -    p = "KWORD"; -    break; -  case KWORD + ADD_NL: -    p = "KWORD+NL"; -    break; -  case SKWORD: -    p = "SKWORD"; -    break; -  case SKWORD + ADD_NL: -    p = "SKWORD+NL"; -    break; -  case FNAME: -    p = "FNAME"; -    break; -  case FNAME + ADD_NL: -    p = "FNAME+NL"; -    break; -  case SFNAME: -    p = "SFNAME"; -    break; -  case SFNAME + ADD_NL: -    p = "SFNAME+NL"; -    break; -  case PRINT: -    p = "PRINT"; -    break; -  case PRINT + ADD_NL: -    p = "PRINT+NL"; -    break; -  case SPRINT: -    p = "SPRINT"; -    break; -  case SPRINT + ADD_NL: -    p = "SPRINT+NL"; -    break; -  case WHITE: -    p = "WHITE"; -    break; -  case WHITE + ADD_NL: -    p = "WHITE+NL"; -    break; -  case NWHITE: -    p = "NWHITE"; -    break; -  case NWHITE + ADD_NL: -    p = "NWHITE+NL"; -    break; -  case DIGIT: -    p = "DIGIT"; -    break; -  case DIGIT + ADD_NL: -    p = "DIGIT+NL"; -    break; -  case NDIGIT: -    p = "NDIGIT"; -    break; -  case NDIGIT + ADD_NL: -    p = "NDIGIT+NL"; -    break; -  case HEX: -    p = "HEX"; -    break; -  case HEX + ADD_NL: -    p = "HEX+NL"; -    break; -  case NHEX: -    p = "NHEX"; -    break; -  case NHEX + ADD_NL: -    p = "NHEX+NL"; -    break; -  case OCTAL: -    p = "OCTAL"; -    break; -  case OCTAL + ADD_NL: -    p = "OCTAL+NL"; -    break; -  case NOCTAL: -    p = "NOCTAL"; -    break; -  case NOCTAL + ADD_NL: -    p = "NOCTAL+NL"; -    break; -  case WORD: -    p = "WORD"; -    break; -  case WORD + ADD_NL: -    p = "WORD+NL"; -    break; -  case NWORD: -    p = "NWORD"; -    break; -  case NWORD + ADD_NL: -    p = "NWORD+NL"; -    break; -  case HEAD: -    p = "HEAD"; -    break; -  case HEAD + ADD_NL: -    p = "HEAD+NL"; -    break; -  case NHEAD: -    p = "NHEAD"; -    break; -  case NHEAD + ADD_NL: -    p = "NHEAD+NL"; -    break; -  case ALPHA: -    p = "ALPHA"; -    break; -  case ALPHA + ADD_NL: -    p = "ALPHA+NL"; -    break; -  case NALPHA: -    p = "NALPHA"; -    break; -  case NALPHA + ADD_NL: -    p = "NALPHA+NL"; -    break; -  case LOWER: -    p = "LOWER"; -    break; -  case LOWER + ADD_NL: -    p = "LOWER+NL"; -    break; -  case NLOWER: -    p = "NLOWER"; -    break; -  case NLOWER + ADD_NL: -    p = "NLOWER+NL"; -    break; -  case UPPER: -    p = "UPPER"; -    break; -  case UPPER + ADD_NL: -    p = "UPPER+NL"; -    break; -  case NUPPER: -    p = "NUPPER"; -    break; -  case NUPPER + ADD_NL: -    p = "NUPPER+NL"; -    break; -  case BRANCH: -    p = "BRANCH"; -    break; -  case EXACTLY: -    p = "EXACTLY"; -    break; -  case NOTHING: -    p = "NOTHING"; -    break; -  case BACK: -    p = "BACK"; -    break; -  case END: -    p = "END"; -    break; -  case MOPEN + 0: -    p = "MATCH START"; -    break; -  case MOPEN + 1: -  case MOPEN + 2: -  case MOPEN + 3: -  case MOPEN + 4: -  case MOPEN + 5: -  case MOPEN + 6: -  case MOPEN + 7: -  case MOPEN + 8: -  case MOPEN + 9: -    sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN); -    p = NULL; -    break; -  case MCLOSE + 0: -    p = "MATCH END"; -    break; -  case MCLOSE + 1: -  case MCLOSE + 2: -  case MCLOSE + 3: -  case MCLOSE + 4: -  case MCLOSE + 5: -  case MCLOSE + 6: -  case MCLOSE + 7: -  case MCLOSE + 8: -  case MCLOSE + 9: -    sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE); -    p = NULL; -    break; -  case BACKREF + 1: -  case BACKREF + 2: -  case BACKREF + 3: -  case BACKREF + 4: -  case BACKREF + 5: -  case BACKREF + 6: -  case BACKREF + 7: -  case BACKREF + 8: -  case BACKREF + 9: -    sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF); -    p = NULL; -    break; -  case NOPEN: -    p = "NOPEN"; -    break; -  case NCLOSE: -    p = "NCLOSE"; -    break; -  case ZOPEN + 1: -  case ZOPEN + 2: -  case ZOPEN + 3: -  case ZOPEN + 4: -  case ZOPEN + 5: -  case ZOPEN + 6: -  case ZOPEN + 7: -  case ZOPEN + 8: -  case ZOPEN + 9: -    sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN); -    p = NULL; -    break; -  case ZCLOSE + 1: -  case ZCLOSE + 2: -  case ZCLOSE + 3: -  case ZCLOSE + 4: -  case ZCLOSE + 5: -  case ZCLOSE + 6: -  case ZCLOSE + 7: -  case ZCLOSE + 8: -  case ZCLOSE + 9: -    sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE); -    p = NULL; -    break; -  case ZREF + 1: -  case ZREF + 2: -  case ZREF + 3: -  case ZREF + 4: -  case ZREF + 5: -  case ZREF + 6: -  case ZREF + 7: -  case ZREF + 8: -  case ZREF + 9: -    sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF); -    p = NULL; -    break; -  case STAR: -    p = "STAR"; -    break; -  case PLUS: -    p = "PLUS"; -    break; -  case NOMATCH: -    p = "NOMATCH"; -    break; -  case MATCH: -    p = "MATCH"; -    break; -  case BEHIND: -    p = "BEHIND"; -    break; -  case NOBEHIND: -    p = "NOBEHIND"; -    break; -  case SUBPAT: -    p = "SUBPAT"; -    break; -  case BRACE_LIMITS: -    p = "BRACE_LIMITS"; -    break; -  case BRACE_SIMPLE: -    p = "BRACE_SIMPLE"; -    break; -  case BRACE_COMPLEX + 0: -  case BRACE_COMPLEX + 1: -  case BRACE_COMPLEX + 2: -  case BRACE_COMPLEX + 3: -  case BRACE_COMPLEX + 4: -  case BRACE_COMPLEX + 5: -  case BRACE_COMPLEX + 6: -  case BRACE_COMPLEX + 7: -  case BRACE_COMPLEX + 8: -  case BRACE_COMPLEX + 9: -    sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX); -    p = NULL; -    break; -  case MULTIBYTECODE: -    p = "MULTIBYTECODE"; -    break; -  case NEWL: -    p = "NEWL"; -    break; -  default: -    sprintf(buf + STRLEN(buf), "corrupt %d", OP(op)); -    p = NULL; -    break; +  if (re_multi_type(peekchr()) == MULTI_MULT) { +    semsg(_("E888: (NFA regexp) cannot repeat %s"), what); +    rc_did_emsg = true; +    return false;    } -  if (p != NULL) -    STRCAT(buf, p); -  return (char_u *)buf; +  return true;  } -#endif      /* REGEXP_DEBUG */ - +typedef struct { +  int a, b, c; +} decomp_T; -/* 0xfb20 - 0xfb4f */ +// 0xfb20 - 0xfb4f  static decomp_T decomp_table[0xfb4f-0xfb20+1] =  { -  {0x5e2,0,0},                  /* 0xfb20	alt ayin */ -  {0x5d0,0,0},                  /* 0xfb21	alt alef */ -  {0x5d3,0,0},                  /* 0xfb22	alt dalet */ -  {0x5d4,0,0},                  /* 0xfb23	alt he */ -  {0x5db,0,0},                  /* 0xfb24	alt kaf */ -  {0x5dc,0,0},                  /* 0xfb25	alt lamed */ -  {0x5dd,0,0},                  /* 0xfb26	alt mem-sofit */ -  {0x5e8,0,0},                  /* 0xfb27	alt resh */ -  {0x5ea,0,0},                  /* 0xfb28	alt tav */ -  {'+', 0, 0},                  /* 0xfb29	alt plus */ -  {0x5e9, 0x5c1, 0},            /* 0xfb2a	shin+shin-dot */ -  {0x5e9, 0x5c2, 0},            /* 0xfb2b	shin+sin-dot */ -  {0x5e9, 0x5c1, 0x5bc},        /* 0xfb2c	shin+shin-dot+dagesh */ -  {0x5e9, 0x5c2, 0x5bc},        /* 0xfb2d	shin+sin-dot+dagesh */ -  {0x5d0, 0x5b7, 0},            /* 0xfb2e	alef+patah */ -  {0x5d0, 0x5b8, 0},            /* 0xfb2f	alef+qamats */ -  {0x5d0, 0x5b4, 0},            /* 0xfb30	alef+hiriq */ -  {0x5d1, 0x5bc, 0},            /* 0xfb31	bet+dagesh */ -  {0x5d2, 0x5bc, 0},            /* 0xfb32	gimel+dagesh */ -  {0x5d3, 0x5bc, 0},            /* 0xfb33	dalet+dagesh */ -  {0x5d4, 0x5bc, 0},            /* 0xfb34	he+dagesh */ -  {0x5d5, 0x5bc, 0},            /* 0xfb35	vav+dagesh */ -  {0x5d6, 0x5bc, 0},            /* 0xfb36	zayin+dagesh */ -  {0xfb37, 0, 0},               /* 0xfb37 -- */ -  {0x5d8, 0x5bc, 0},            /* 0xfb38	tet+dagesh */ -  {0x5d9, 0x5bc, 0},            /* 0xfb39	yud+dagesh */ -  {0x5da, 0x5bc, 0},            /* 0xfb3a	kaf sofit+dagesh */ -  {0x5db, 0x5bc, 0},            /* 0xfb3b	kaf+dagesh */ -  {0x5dc, 0x5bc, 0},            /* 0xfb3c	lamed+dagesh */ -  {0xfb3d, 0, 0},               /* 0xfb3d -- */ -  {0x5de, 0x5bc, 0},            /* 0xfb3e	mem+dagesh */ -  {0xfb3f, 0, 0},               /* 0xfb3f -- */ -  {0x5e0, 0x5bc, 0},            /* 0xfb40	nun+dagesh */ -  {0x5e1, 0x5bc, 0},            /* 0xfb41	samech+dagesh */ -  {0xfb42, 0, 0},               /* 0xfb42 -- */ -  {0x5e3, 0x5bc, 0},            /* 0xfb43	pe sofit+dagesh */ -  {0x5e4, 0x5bc,0},             /* 0xfb44	pe+dagesh */ -  {0xfb45, 0, 0},               /* 0xfb45 -- */ -  {0x5e6, 0x5bc, 0},            /* 0xfb46	tsadi+dagesh */ -  {0x5e7, 0x5bc, 0},            /* 0xfb47	qof+dagesh */ -  {0x5e8, 0x5bc, 0},            /* 0xfb48	resh+dagesh */ -  {0x5e9, 0x5bc, 0},            /* 0xfb49	shin+dagesh */ -  {0x5ea, 0x5bc, 0},            /* 0xfb4a	tav+dagesh */ -  {0x5d5, 0x5b9, 0},            /* 0xfb4b	vav+holam */ -  {0x5d1, 0x5bf, 0},            /* 0xfb4c	bet+rafe */ -  {0x5db, 0x5bf, 0},            /* 0xfb4d	kaf+rafe */ -  {0x5e4, 0x5bf, 0},            /* 0xfb4e	pe+rafe */ -  {0x5d0, 0x5dc, 0}             /* 0xfb4f	alef-lamed */ +  { 0x5e2, 0, 0 },          // 0xfb20       alt ayin +  { 0x5d0, 0, 0 },          // 0xfb21       alt alef +  { 0x5d3, 0, 0 },          // 0xfb22       alt dalet +  { 0x5d4, 0, 0 },          // 0xfb23       alt he +  { 0x5db, 0, 0 },          // 0xfb24       alt kaf +  { 0x5dc, 0, 0 },          // 0xfb25       alt lamed +  { 0x5dd, 0, 0 },          // 0xfb26       alt mem-sofit +  { 0x5e8, 0, 0 },          // 0xfb27       alt resh +  { 0x5ea, 0, 0 },          // 0xfb28       alt tav +  { '+', 0, 0 },            // 0xfb29       alt plus +  { 0x5e9, 0x5c1, 0 },      // 0xfb2a       shin+shin-dot +  { 0x5e9, 0x5c2, 0 },      // 0xfb2b       shin+sin-dot +  { 0x5e9, 0x5c1, 0x5bc },  // 0xfb2c       shin+shin-dot+dagesh +  { 0x5e9, 0x5c2, 0x5bc },  // 0xfb2d       shin+sin-dot+dagesh +  { 0x5d0, 0x5b7, 0 },      // 0xfb2e       alef+patah +  { 0x5d0, 0x5b8, 0 },      // 0xfb2f       alef+qamats +  { 0x5d0, 0x5b4, 0 },      // 0xfb30       alef+hiriq +  { 0x5d1, 0x5bc, 0 },      // 0xfb31       bet+dagesh +  { 0x5d2, 0x5bc, 0 },      // 0xfb32       gimel+dagesh +  { 0x5d3, 0x5bc, 0 },      // 0xfb33       dalet+dagesh +  { 0x5d4, 0x5bc, 0 },      // 0xfb34       he+dagesh +  { 0x5d5, 0x5bc, 0 },      // 0xfb35       vav+dagesh +  { 0x5d6, 0x5bc, 0 },      // 0xfb36       zayin+dagesh +  { 0xfb37, 0, 0 },         // 0xfb37 -- UNUSED +  { 0x5d8, 0x5bc, 0 },      // 0xfb38       tet+dagesh +  { 0x5d9, 0x5bc, 0 },      // 0xfb39       yud+dagesh +  { 0x5da, 0x5bc, 0 },      // 0xfb3a       kaf sofit+dagesh +  { 0x5db, 0x5bc, 0 },      // 0xfb3b       kaf+dagesh +  { 0x5dc, 0x5bc, 0 },      // 0xfb3c       lamed+dagesh +  { 0xfb3d, 0, 0 },         // 0xfb3d -- UNUSED +  { 0x5de, 0x5bc, 0 },      // 0xfb3e       mem+dagesh +  { 0xfb3f, 0, 0 },         // 0xfb3f -- UNUSED +  { 0x5e0, 0x5bc, 0 },      // 0xfb40       nun+dagesh +  { 0x5e1, 0x5bc, 0 },      // 0xfb41       samech+dagesh +  { 0xfb42, 0, 0 },         // 0xfb42 -- UNUSED +  { 0x5e3, 0x5bc, 0 },      // 0xfb43       pe sofit+dagesh +  { 0x5e4, 0x5bc, 0 },      // 0xfb44       pe+dagesh +  { 0xfb45, 0, 0 },         // 0xfb45 -- UNUSED +  { 0x5e6, 0x5bc, 0 },      // 0xfb46       tsadi+dagesh +  { 0x5e7, 0x5bc, 0 },      // 0xfb47       qof+dagesh +  { 0x5e8, 0x5bc, 0 },      // 0xfb48       resh+dagesh +  { 0x5e9, 0x5bc, 0 },      // 0xfb49       shin+dagesh +  { 0x5ea, 0x5bc, 0 },      // 0xfb4a       tav+dagesh +  { 0x5d5, 0x5b9, 0 },      // 0xfb4b       vav+holam +  { 0x5d1, 0x5bf, 0 },      // 0xfb4c       bet+rafe +  { 0x5db, 0x5bf, 0 },      // 0xfb4d       kaf+rafe +  { 0x5e4, 0x5bf, 0 },      // 0xfb4e       pe+rafe +  { 0x5d0, 0x5dc, 0 }       // 0xfb4f       alef-lamed  };  static void mb_decompose(int c, int *c1, int *c2, int *c3) @@ -6439,12 +1436,53 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n)    return result;  } +/// Wrapper around strchr which accounts for case-insensitive searches and +/// non-ASCII characters. +/// +/// This function is used a lot for simple searches, keep it fast! +/// +/// @param  s  string to search +/// @param  c  character to find in @a s +/// +/// @return  NULL if no match, otherwise pointer to the position in @a s +static inline char_u *cstrchr(const char_u *const s, const int c) +  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL +  FUNC_ATTR_ALWAYS_INLINE +{ +  if (!rex.reg_ic) { +    return vim_strchr(s, c); +  } + +  // Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is +  // expected to be highly optimized. +  if (c > 0x80) { +    const int folded_c = utf_fold(c); +    for (const char_u *p = s; *p != NUL; p += utfc_ptr2len(p)) { +      if (utf_fold(utf_ptr2char(p)) == folded_c) { +        return (char_u *)p; +      } +    } +    return NULL; +  } + +  int cc; +  if (ASCII_ISUPPER(c)) { +    cc = TOLOWER_ASC(c); +  } else if (ASCII_ISLOWER(c)) { +    cc = TOUPPER_ASC(c); +  } else { +    return vim_strchr(s, c); +  } + +  char tofind[] = { (char)c, (char)cc, NUL }; +  return (char_u *)strpbrk((const char *)s, tofind); +} +  ////////////////////////////////////////////////////////////////  //                    regsub stuff                            //  //////////////////////////////////////////////////////////////// -/* This stuff below really confuses cc on an SGI -- webb */ - +// This stuff below really confuses cc on an SGI -- webb  static fptr_T do_upper(int *d, int c) @@ -6498,13 +1536,13 @@ char_u *regtilde(char_u *source, int magic)    for (p = newsub; *p; ++p) {      if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) {        if (reg_prev_sub != NULL) { -        /* length = len(newsub) - 1 + len(prev_sub) + 1 */ +        // length = len(newsub) - 1 + len(prev_sub) + 1          prevlen = (int)STRLEN(reg_prev_sub);          tmpsub = xmalloc(STRLEN(newsub) + prevlen); -        /* copy prefix */ -        len = (int)(p - newsub);              /* not including ~ */ +        // copy prefix +        len = (int)(p - newsub);              // not including ~          memmove(tmpsub, newsub, (size_t)len); -        /* interpret tilde */ +        // interpret tilde          memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);          // copy postfix          if (!magic) { @@ -6512,15 +1550,17 @@ char_u *regtilde(char_u *source, int magic)          }          STRCPY(tmpsub + len + prevlen, p + 1); -        if (newsub != source)                 /* already allocated newsub */ +        if (newsub != source) {               // already allocated newsub            xfree(newsub); +        }          newsub = tmpsub;          p = newsub + len + prevlen; -      } else if (magic) -        STRMOVE(p, p + 1);              /* remove '~' */ -      else -        STRMOVE(p, p + 2);              /* remove '\~' */ -      --p; +      } else if (magic) { +        STRMOVE(p, p + 1);              // remove '~' +      } else { +        STRMOVE(p, p + 2);              // remove '\~' +      } +      p--;      } else {        if (*p == '\\' && p[1]) {         // skip escaped characters          p++; @@ -6639,7 +1679,8 @@ int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest,    return result;  } -int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, int copy, int magic, int backslash) +int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, +                     int copy, int magic, int backslash)  {    regexec_T rex_save;    bool rex_in_use_save = rex_in_use; @@ -6677,8 +1718,8 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,    int no = -1;    fptr_T func_all = (fptr_T)NULL;    fptr_T func_one = (fptr_T)NULL; -  linenr_T clnum = 0;           /* init for GCC */ -  int len = 0;                  /* init for GCC */ +  linenr_T clnum = 0;           // init for GCC +  int len = 0;                  // init for GCC    static char_u *eval_result = NULL;    // We need to keep track of how many backslashes we escape, so that the byte @@ -6790,7 +1831,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,            }          }          if (had_backslash && backslash) { -          /* Backslashes will be consumed, need to double them. */ +          // Backslashes will be consumed, need to double them.            s = vim_strsave_escaped(eval_result, (char_u *)"\\");            xfree(eval_result);            eval_result = s; @@ -6830,9 +1871,9 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,            }          }        } -      if (no < 0) {           /* Ordinary character. */ +      if (no < 0) {           // Ordinary character.          if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL) { -          /* Copy a special key as-is. */ +          // Copy a special key as-is.            if (copy) {              *dst++ = c;              *dst++ = *src++; @@ -6960,14 +2001,15 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,                } else {                  c = utf_ptr2char(s); -                if (func_one != (fptr_T)NULL) -                  /* Turbo C complains without the typecast */ +                if (func_one != (fptr_T)NULL) { +                  // Turbo C complains without the typecast                    func_one = (fptr_T)(func_one(&cc, c)); -                else if (func_all != (fptr_T)NULL) -                  /* Turbo C complains without the typecast */ +                } else if (func_all != (fptr_T)NULL) { +                  // Turbo C complains without the typecast                    func_all = (fptr_T)(func_all(&cc, c)); -                else             /* just copy */ +                } else {  // just copy                    cc = c; +                }                  {                    int l; @@ -7162,6 +2204,12 @@ list_T *reg_submatch_list(int no)    return list;  } +// XXX Do not allow headers generator to catch definitions from regexp_nfa.c +#ifndef DO_NOT_DEFINE_EMPTY_ATTRIBUTES +# include "nvim/regexp_bt.c" +# include "nvim/regexp_nfa.c" +#endif +  static regengine_T bt_regengine =  {    bt_regcomp, @@ -7171,12 +2219,6 @@ static regengine_T bt_regengine =    (char_u *)""  }; - -// XXX Do not allow headers generator to catch definitions from regexp_nfa.c -#ifndef DO_NOT_DEFINE_EMPTY_ATTRIBUTES -# include "nvim/regexp_nfa.c" -#endif -  static regengine_T nfa_regengine =  {    nfa_regcomp, @@ -7212,7 +2254,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)    regexp_engine = p_re; -  /* Check for prefix "\%#=", that sets the regexp engine */ +  // Check for prefix "\%#=", that sets the regexp engine    if (STRNCMP(expr, "\\%#=", 4) == 0) {      int newengine = expr[4] - '0'; @@ -7297,6 +2339,18 @@ void vim_regfree(regprog_T *prog)      prog->engine->regfree(prog);  } + +#if defined(EXITFREE) +void free_regexp_stuff(void) +{ +  ga_clear(®stack); +  ga_clear(&backpos); +  xfree(reg_tofree); +  xfree(reg_prev_sub); +} + +#endif +  static void report_re_switch(char_u *pat)  {    if (p_verbose > 0) { @@ -7319,8 +2373,7 @@ static void report_re_switch(char_u *pat)  /// @param nl  ///  /// @return true if there is a match, false if not. -static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, -                               bool nl) +static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool nl)  {    regexec_T rex_save;    bool rex_in_use_save = rex_in_use; @@ -7377,8 +2430,7 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col,  // Note: "*prog" may be freed and changed.  // Return true if there is a match, false if not. -bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, -                      colnr_T col) +bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T col)  {    regmatch_T regmatch = { .regprog = *prog, .rm_ic = ignore_case };    bool r = vim_regexec_string(®match, line, col, false); @@ -7410,13 +2462,12 @@ bool vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)  /// match otherwise.  long vim_regexec_multi(      regmmatch_T *rmp, -    win_T       *win,               // window in which to search or NULL -    buf_T       *buf,               // buffer in which to search -    linenr_T lnum,                  // nr of line to start looking for match -    colnr_T col,                    // column to start looking for match -    proftime_T  *tm,                // timeout limit or NULL -    int         *timed_out          // flag is set when timeout limit reached -) +    win_T       *win,                 // window in which to search or NULL +    buf_T       *buf,                 // buffer in which to search +    linenr_T lnum,                    // nr of line to start looking for match +    colnr_T col,                      // column to start looking for match +    proftime_T  *tm,                  // timeout limit or NULL +    int         *timed_out)           // flag is set when timeout limit reached    FUNC_ATTR_NONNULL_ARG(1)  {    regexec_T rex_save;  | 
