diff options
Diffstat (limited to 'src/nvim/regexp.c')
-rw-r--r-- | src/nvim/regexp.c | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 86f2463ab7..088f3bda70 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -42,6 +42,104 @@ #include "nvim/types_defs.h" #include "nvim/vim_defs.h" +enum { + /// In the NFA engine: how many braces are allowed. + /// TODO(RE): Use dynamic memory allocation instead of static, like here + NFA_MAX_BRACES = 20, +}; + +enum { + /// In the NFA engine: how many states are allowed. + NFA_MAX_STATES = 100000, + NFA_TOO_EXPENSIVE = -1, +}; + +/// Which regexp engine to use? Needed for vim_regcomp(). +/// Must match with 'regexpengine'. +enum { + AUTOMATIC_ENGINE = 0, + BACKTRACKING_ENGINE = 1, + NFA_ENGINE = 2, +}; + +/// Structure returned by vim_regcomp() to pass on to vim_regexec(). +/// This is the general structure. For the actual matcher, two specific +/// structures are used. See code below. +struct regprog { + regengine_T *engine; + unsigned regflags; + unsigned re_engine; ///< Automatic, backtracking or NFA engine. + unsigned re_flags; ///< Second argument for vim_regcomp(). + bool re_in_use; ///< prog is being executed +}; + +/// Structure used by the back track matcher. +/// These fields are only to be used in regexp.c! +/// See regexp.c for an explanation. +typedef struct { + // These four members implement regprog_T. + regengine_T *engine; + unsigned regflags; + unsigned re_engine; + unsigned re_flags; + bool re_in_use; + + int regstart; + uint8_t reganch; + uint8_t *regmust; + int regmlen; + uint8_t reghasz; + uint8_t program[]; +} bt_regprog_T; + +/// Structure representing a NFA state. +/// An NFA state may have no outgoing edge, when it is a NFA_MATCH state. +typedef struct nfa_state nfa_state_T; +struct nfa_state { + int c; + nfa_state_T *out; + nfa_state_T *out1; + int id; + int lastlist[2]; ///< 0: normal, 1: recursive + int val; +}; + +/// Structure used by the NFA matcher. +typedef struct { + // These four members implement regprog_T. + regengine_T *engine; + unsigned regflags; + unsigned re_engine; + unsigned re_flags; + bool re_in_use; + + nfa_state_T *start; ///< points into state[] + + int reganch; ///< pattern starts with ^ + int regstart; ///< char at start of pattern + uint8_t *match_text; ///< plain text to match with + + int has_zend; ///< pattern contains \ze + int has_backref; ///< pattern contains \1 .. \9 + int reghasz; + char *pattern; + int nsubexp; ///< number of () + int nstate; + nfa_state_T state[]; +} nfa_regprog_T; + +struct regengine { + /// bt_regcomp or nfa_regcomp + regprog_T *(*regcomp)(uint8_t *, int); + /// bt_regfree or nfa_regfree + void (*regfree)(regprog_T *); + /// bt_regexec_nl or nfa_regexec_nl + int (*regexec_nl)(regmatch_T *, uint8_t *, colnr_T, bool); + /// bt_regexec_mult or nfa_regexec_mult + int (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, proftime_T *, int *); + // uint8_t *expr; +}; + // Structure used to save the current input state, when it needs to be // restored after trying a match. Used by reg_save() and reg_restore(). // Also stores the length of "backpos". |