diff options
78 files changed, 5974 insertions, 5690 deletions
diff --git a/.builds/freebsd.yml b/.builds/freebsd.yml index b2b4aa4a5c..2d06b1e685 100644 --- a/.builds/freebsd.yml +++ b/.builds/freebsd.yml @@ -35,10 +35,6 @@ tasks: - unittest: | cd neovim gmake unittest - -# Unfortunately, oldtest is tanking hard on sourcehut's FreeBSD instance -# and not producing any logs as a result. So don't do this task for now. -# Ref: https://github.com/neovim/neovim/pull/11477#discussion_r352095005. -# - test-oldtest: | -# cd neovim -# gmake oldtest +- oldtest: | + cd neovim + gmake oldtest diff --git a/.clang-format b/.clang-format index c86f5a3ddf..d5e328f461 100644 --- a/.clang-format +++ b/.clang-format @@ -2,10 +2,10 @@ BasedOnStyle: Google Language: Cpp ColumnLimit: 100 IndentWidth: 2 -TabWidth: 2 +TabWidth: 8 UseTab: Never -IndentCaseLabels: true -BreakBeforeBraces: Linux +IndentCaseLabels: false +BreakBeforeBraces: Custom AlignEscapedNewlinesLeft: false AllowShortFunctionsOnASingleLine: false AlignTrailingComments: true @@ -17,4 +17,24 @@ AllowShortLoopsOnASingleLine: false BinPackParameters: false BreakBeforeBinaryOperators: true BreakBeforeTernaryOperators: true -ContinuationIndentWidth: 4 +ContinuationIndentWidth: 2 +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: No +AlwaysBreakTemplateDeclarations: No +AlignEscapedNewlines: DontAlign +BinPackArguments: false +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +PointerAlignment: Right +SortIncludes: false +Cpp11BracedListStyle: false diff --git a/runtime/autoload/dist/ft.vim b/runtime/autoload/dist/ft.vim index 54cb6ce70f..c52def1051 100644 --- a/runtime/autoload/dist/ft.vim +++ b/runtime/autoload/dist/ft.vim @@ -1,7 +1,7 @@ " Vim functions for file type detection " " Maintainer: Bram Moolenaar <Bram@vim.org> -" Last Change: 2022 Feb 22 +" Last Change: 2022 Mar 05 " These functions are moved here from runtime/filetype.vim to make startup " faster. diff --git a/runtime/compiler/jest.vim b/runtime/compiler/jest.vim index fee70b7c55..a4bb549de1 100644 --- a/runtime/compiler/jest.vim +++ b/runtime/compiler/jest.vim @@ -1,7 +1,7 @@ " Vim compiler file " Compiler: Jest " Maintainer: Doug Kearns <dougkearns@gmail.com> -" Last Change: 2018 May 15 +" Last Change: 2021 Nov 20 if exists("current_compiler") finish @@ -15,12 +15,14 @@ endif let s:cpo_save = &cpo set cpo&vim -" CompilerSet makeprg=npx\ jest\ --no-colors +" CompilerSet makeprg=npx\ --no-install\ jest\ --no-colors CompilerSet makeprg=jest\ --no-colors -CompilerSet errorformat=%E\ \ ●\ %m, +CompilerSet errorformat=%-A\ \ ●\ Console, + \%E\ \ ●\ %m, \%Z\ %\\{4}%.%#Error:\ %f:\ %m\ (%l:%c):%\\=, \%Z\ %\\{6}at\ %\\S%#\ (%f:%l:%c), + \%Z\ %\\{6}at\ %\\S%#\ %f:%l:%c, \%+C\ %\\{4}%\\w%.%#, \%+C\ %\\{4}%[-+]%.%#, \%-C%.%#, diff --git a/runtime/compiler/sml.vim b/runtime/compiler/sml.vim index c7e1b1bf16..a0b13b6c8a 100644 --- a/runtime/compiler/sml.vim +++ b/runtime/compiler/sml.vim @@ -1,7 +1,7 @@ " Vim compiler file " Compiler: SML/NJ Compiler " Maintainer: Doug Kearns <dougkearns@gmail.com> -" Last Change: 2020 Feb 10 +" Last Change: 2022 Feb 09 if exists("current_compiler") finish @@ -16,10 +16,10 @@ let s:cpo_save = &cpo set cpo&vim CompilerSet makeprg=sml -CompilerSet errorformat=%f:%l.%c-%\\d%\\+.%\\d%\\+\ %trror:\ %m, +CompilerSet errorformat=%f:%l.%c-%e.%k\ %trror:\ %m, \%f:%l.%c\ %trror:\ %m, - \%trror:\ %m - \%f:%l.%c-%\\d%\\+.%\\d%\\+\ %tarning:\ %m, + \%trror:\ %m, + \%f:%l.%c-%e.%k\ %tarning:\ %m, \%f:%l.%c\ %tarning:\ %m, \%tarning:\ %m, \%-G%.%# diff --git a/runtime/doc/autocmd.txt b/runtime/doc/autocmd.txt index 4b8c07fde4..ab0b0cd07c 100644 --- a/runtime/doc/autocmd.txt +++ b/runtime/doc/autocmd.txt @@ -1079,7 +1079,7 @@ WinLeave Before leaving a window. If the window to be executes the BufLeave autocommands before the WinLeave autocommands (but not for ":new"). Not used for ":qa" or ":q" when exiting Vim. - After WinClosed. + Before WinClosed. *WinNew* WinNew When a new window was created. Not done for the first window, when Vim has just started. diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index b0859d1cea..5b0c7918e0 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -4900,7 +4900,7 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* empty list is returned. If length of {str} is greater than 256, then returns an empty list. - Refer to |fuzzy-match| for more information about fuzzy + Refer to |fuzzy-matching| for more information about fuzzy matching strings. Example: > diff --git a/runtime/doc/channel.txt b/runtime/doc/channel.txt index e14427494d..d4bed7a5f2 100644 --- a/runtime/doc/channel.txt +++ b/runtime/doc/channel.txt @@ -229,12 +229,12 @@ prompt. > call chansend(g:shell_job, [a:text, '']) endfunc - " Function handling output from the shell: Added above the prompt. + " Function handling output from the shell: Add it above the prompt. func GotOutput(channel, msg, name) call append(line("$") - 1, a:msg) endfunc - " Function handling the shell exit: close the window. + " Function handling the shell exits: close the window. func JobExit(job, status, event) quit! endfunc diff --git a/runtime/doc/diagnostic.txt b/runtime/doc/diagnostic.txt index 781539cfb6..e33c786482 100644 --- a/runtime/doc/diagnostic.txt +++ b/runtime/doc/diagnostic.txt @@ -74,7 +74,7 @@ Functions that take a severity as an optional parameter (e.g. 2. A table with a "min" or "max" key (or both): > - vim.diagnostic.get(0, { severity = {min=vim.diagnostic.severity.WARN}) + vim.diagnostic.get(0, { severity = {min=vim.diagnostic.severity.WARN} }) The latter form allows users to specify a range of severities. diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index 3015e232a7..9fcdbe4cc3 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -1651,6 +1651,7 @@ Some variables can be set by the user, but the type cannot be changed. *v:argv* *argv-variable* v:argv The command line arguments Vim was invoked with. This is a list of strings. The first item is the Vim command. + See |v:progpath| for the command with full path. *v:beval_col* *beval_col-variable* v:beval_col The number of the column, over which the mouse pointer is. @@ -2027,6 +2028,9 @@ v:null Special value used to put "null" in JSON and NIL in msgpack. used as a String (e.g. in |expr5| with string concatenation operator) and to zero when used as a Number (e.g. in |expr5| or |expr7| when used with numeric operators). Read-only. + In some places `v:null` can be used for a List, Dict, etc. + that is not set. That is slightly different than an empty + List, Dict, etc. *v:numbermax* *numbermax-variable* v:numbermax Maximum value of a number. @@ -3062,16 +3066,17 @@ text... opposite of |:lockvar|. :if {expr1} *:if* *:end* *:endif* *:en* *E171* *E579* *E580* -:en[dif] Execute the commands until the next matching ":else" - or ":endif" if {expr1} evaluates to non-zero. +:en[dif] Execute the commands until the next matching `:else` + or `:endif` if {expr1} evaluates to non-zero. Although the short forms work, it is recommended to - always use `:endif` to avoid confusion. + always use `:endif` to avoid confusion and to make + auto-indenting work properly. From Vim version 4.5 until 5.0, every Ex command in - between the ":if" and ":endif" is ignored. These two + between the `:if` and `:endif` is ignored. These two commands were just to allow for future expansions in a backward compatible way. Nesting was allowed. Note - that any ":else" or ":elseif" was ignored, the "else" + that any `:else` or `:elseif` was ignored, the `else` part was not executed either. You can use this to remain compatible with older @@ -3080,32 +3085,32 @@ text... : version-5-specific-commands :endif < The commands still need to be parsed to find the - "endif". Sometimes an older Vim has a problem with a - new command. For example, ":silent" is recognized as - a ":substitute" command. In that case ":execute" can + `endif`. Sometimes an older Vim has a problem with a + new command. For example, `:silent` is recognized as + a `:substitute` command. In that case `:execute` can avoid problems: > :if version >= 600 : execute "silent 1,$delete" :endif < - NOTE: The ":append" and ":insert" commands don't work - properly in between ":if" and ":endif". + NOTE: The `:append` and `:insert` commands don't work + properly in between `:if` and `:endif`. *:else* *:el* *E581* *E583* -:el[se] Execute the commands until the next matching ":else" - or ":endif" if they previously were not being +:el[se] Execute the commands until the next matching `:else` + or `:endif` if they previously were not being executed. *:elseif* *:elsei* *E582* *E584* -:elsei[f] {expr1} Short for ":else" ":if", with the addition that there - is no extra ":endif". +:elsei[f] {expr1} Short for `:else` `:if`, with the addition that there + is no extra `:endif`. :wh[ile] {expr1} *:while* *:endwhile* *:wh* *:endw* *E170* *E585* *E588* *E733* -:endw[hile] Repeat the commands between ":while" and ":endwhile", +:endw[hile] Repeat the commands between `:while` and `:endwhile`, as long as {expr1} evaluates to non-zero. When an error is detected from a command inside the - loop, execution continues after the "endwhile". + loop, execution continues after the `endwhile`. Example: > :let lnum = 1 :while lnum <= line("$") @@ -3113,16 +3118,16 @@ text... :let lnum = lnum + 1 :endwhile < - NOTE: The ":append" and ":insert" commands don't work - properly inside a ":while" and ":for" loop. + NOTE: The `:append` and `:insert` commands don't work + properly inside a `:while` and `:for` loop. :for {var} in {object} *:for* *E690* *E732* :endfo[r] *:endfo* *:endfor* - Repeat the commands between ":for" and ":endfor" for + Repeat the commands between `:for` and `:endfor` for each item in {object}. {object} can be a |List| or a |Blob|. Variable {var} is set to the value of each item. When an error is detected for a command inside - the loop, execution continues after the "endfor". + the loop, execution continues after the `endfor`. Changing {object} inside the loop affects what items are used. Make a copy if this is unwanted: > :for item in copy(mylist) @@ -3146,7 +3151,7 @@ text... :for [{var1}, {var2}, ...] in {listlist} :endfo[r] - Like ":for" above, but each item in {listlist} must be + Like `:for` above, but each item in {listlist} must be a list, of which each item is assigned to {var1}, {var2}, etc. Example: > :for [lnum, col] in [[1, 3], [2, 5], [3, 8]] @@ -3154,38 +3159,39 @@ text... :endfor < *:continue* *:con* *E586* -:con[tinue] When used inside a ":while" or ":for" loop, jumps back +:con[tinue] When used inside a `:while` or `:for` loop, jumps back to the start of the loop. - If it is used after a |:try| inside the loop but - before the matching |:finally| (if present), the - commands following the ":finally" up to the matching - |:endtry| are executed first. This process applies to - all nested ":try"s inside the loop. The outermost - ":endtry" then jumps back to the start of the loop. + + If it is used after a `:try` inside the loop but + before the matching `:finally` (if present), the + commands following the `:finally` up to the matching + `:endtry` are executed first. This process applies to + all nested `:try`s inside the loop. The outermost + `:endtry` then jumps back to the start of the loop. *:break* *:brea* *E587* -:brea[k] When used inside a ":while" or ":for" loop, skips to - the command after the matching ":endwhile" or - ":endfor". - If it is used after a |:try| inside the loop but - before the matching |:finally| (if present), the - commands following the ":finally" up to the matching - |:endtry| are executed first. This process applies to - all nested ":try"s inside the loop. The outermost - ":endtry" then jumps to the command after the loop. +:brea[k] When used inside a `:while` or `:for` loop, skips to + the command after the matching `:endwhile` or + `:endfor`. + If it is used after a `:try` inside the loop but + before the matching `:finally` (if present), the + commands following the `:finally` up to the matching + `:endtry` are executed first. This process applies to + all nested `:try`s inside the loop. The outermost + `:endtry` then jumps to the command after the loop. :try *:try* *:endt* *:endtry* *E600* *E601* *E602* :endt[ry] Change the error handling for the commands between - ":try" and ":endtry" including everything being - executed across ":source" commands, function calls, + `:try` and `:endtry` including everything being + executed across `:source` commands, function calls, or autocommand invocations. When an error or interrupt is detected and there is - a |:finally| command following, execution continues - after the ":finally". Otherwise, or when the - ":endtry" is reached thereafter, the next - (dynamically) surrounding ":try" is checked for - a corresponding ":finally" etc. Then the script + a `:finally` command following, execution continues + after the `:finally`. Otherwise, or when the + `:endtry` is reached thereafter, the next + (dynamically) surrounding `:try` is checked for + a corresponding `:finally` etc. Then the script processing is terminated. Whether a function definition has an "abort" argument does not matter. Example: > @@ -3193,9 +3199,9 @@ text... echomsg "not reached" < Moreover, an error or interrupt (dynamically) inside - ":try" and ":endtry" is converted to an exception. It - can be caught as if it were thrown by a |:throw| - command (see |:catch|). In this case, the script + `:try` and `:endtry` is converted to an exception. It + can be caught as if it were thrown by a `:throw` + command (see `:catch`). In this case, the script processing is not terminated. The value "Vim:Interrupt" is used for an interrupt @@ -3211,11 +3217,11 @@ text... try | edit | catch /^Vim(edit):E\d\+/ | echo "error" | endtry < *:cat* *:catch* *E603* *E604* *E605* -:cat[ch] /{pattern}/ The following commands until the next |:catch|, - |:finally|, or |:endtry| that belongs to the same - |:try| as the ":catch" are executed when an exception +:cat[ch] /{pattern}/ The following commands until the next `:catch`, + `:finally`, or `:endtry` that belongs to the same + `:try` as the `:catch` are executed when an exception matching {pattern} is being thrown and has not yet - been caught by a previous ":catch". Otherwise, these + been caught by a previous `:catch`. Otherwise, these commands are skipped. When {pattern} is omitted all errors are caught. Examples: > @@ -3239,27 +3245,27 @@ text... locales. *:fina* *:finally* *E606* *E607* -:fina[lly] The following commands until the matching |:endtry| +:fina[lly] The following commands until the matching `:endtry` are executed whenever the part between the matching - |:try| and the ":finally" is left: either by falling - through to the ":finally" or by a |:continue|, - |:break|, |:finish|, or |:return|, or by an error or - interrupt or exception (see |:throw|). + `:try` and the `:finally` is left: either by falling + through to the `:finally` or by a `:continue`, + `:break`, `:finish`, or `:return`, or by an error or + interrupt or exception (see `:throw`). *:th* *:throw* *E608* :th[row] {expr1} The {expr1} is evaluated and thrown as an exception. - If the ":throw" is used after a |:try| but before the - first corresponding |:catch|, commands are skipped - until the first ":catch" matching {expr1} is reached. - If there is no such ":catch" or if the ":throw" is - used after a ":catch" but before the |:finally|, the - commands following the ":finally" (if present) up to - the matching |:endtry| are executed. If the ":throw" - is after the ":finally", commands up to the ":endtry" - are skipped. At the ":endtry", this process applies - again for the next dynamically surrounding ":try" + If the `:throw` is used after a `:try` but before the + first corresponding `:catch`, commands are skipped + until the first `:catch` matching {expr1} is reached. + If there is no such `:catch` or if the `:throw` is + used after a `:catch` but before the `:finally`, the + commands following the `:finally` (if present) up to + the matching `:endtry` are executed. If the `:throw` + is after the `:finally`, commands up to the `:endtry` + are skipped. At the `:endtry`, this process applies + again for the next dynamically surrounding `:try` (which may be found in a calling function or sourcing - script), until a matching ":catch" has been found. + script), until a matching `:catch` has been found. If the exception is not caught, the command processing is terminated. Example: > @@ -3274,7 +3280,7 @@ text... Also see |:comment|. Use "\n" to start a new line. Use "\r" to move the cursor to the first column. - Uses the highlighting set by the |:echohl| command. + Uses the highlighting set by the `:echohl` command. Cannot be followed by a comment. Example: > :echo "the value of 'shell' is" &shell @@ -3283,9 +3289,9 @@ text... And since Vim mostly postpones redrawing until it's finished with a sequence of commands this happens quite often. To avoid that a command from before the - ":echo" causes a redraw afterwards (redraws are often + `:echo` causes a redraw afterwards (redraws are often postponed until you type something), force a redraw - with the |:redraw| command. Example: > + with the `:redraw` command. Example: > :new | redraw | echo "there is a new window" < *:echo-self-refer* When printing nested containers echo prints second @@ -3304,13 +3310,13 @@ text... *:echon* :echon {expr1} .. Echoes each {expr1}, without anything added. Also see |:comment|. - Uses the highlighting set by the |:echohl| command. + Uses the highlighting set by the `:echohl` command. Cannot be followed by a comment. Example: > :echon "the value of 'shell' is " &shell < - Note the difference between using ":echo", which is a - Vim command, and ":!echo", which is an external shell + Note the difference between using `:echo`, which is a + Vim command, and `:!echo`, which is an external shell command: > :!echo % --> filename < The arguments of ":!" are expanded, see |:_%|. > @@ -3326,8 +3332,8 @@ text... *:echoh* *:echohl* :echoh[l] {name} Use the highlight group {name} for the following - |:echo|, |:echon| and |:echomsg| commands. Also used - for the |input()| prompt. Example: > + `:echo`, `:echon` and `:echomsg` commands. Also used + for the `input()` prompt. Example: > :echohl WarningMsg | echo "Don't panic!" | echohl None < Don't forget to set the group back to "None", otherwise all following echo's will be highlighted. @@ -3336,14 +3342,14 @@ text... :echom[sg] {expr1} .. Echo the expression(s) as a true message, saving the message in the |message-history|. Spaces are placed between the arguments as with the - |:echo| command. But unprintable characters are + `:echo` command. But unprintable characters are displayed, not interpreted. - The parsing works slightly different from |:echo|, - more like |:execute|. All the expressions are first + The parsing works slightly different from `:echo`, + more like `:execute`. All the expressions are first evaluated and concatenated before echoing anything. If expressions does not evaluate to a Number or String, string() is used to turn it into a string. - Uses the highlighting set by the |:echohl| command. + Uses the highlighting set by the `:echohl` command. Example: > :echomsg "It's a Zizzer Zazzer Zuzz, as you can plainly see." < See |:echo-redraw| to avoid the message disappearing @@ -3353,12 +3359,12 @@ text... message in the |message-history|. When used in a script or function the line number will be added. Spaces are placed between the arguments as with the - |:echomsg| command. When used inside a try conditional, + `:echomsg` command. When used inside a try conditional, the message is raised as an error exception instead (see |try-echoerr|). Example: > :echoerr "This script just failed!" -< If you just want a highlighted message use |:echohl|. +< If you just want a highlighted message use `:echohl`. And to get a beep: > :exe "normal \<Esc>" < diff --git a/runtime/doc/insert.txt b/runtime/doc/insert.txt index 39682a2ab2..d9aecfe4fd 100644 --- a/runtime/doc/insert.txt +++ b/runtime/doc/insert.txt @@ -269,8 +269,8 @@ start allow backspacing over the start position of insert; CTRL-W and When 'backspace' is empty, Vi compatible backspacing is used. You cannot backspace over autoindent, before column 1 or before where insert started. -For backwards compatibility the values "0", "1" and "2" are also allowed, see -|'backspace'|. +For backwards compatibility the values "0", "1", "2" and "3" are also allowed, +see |'backspace'|. If the 'backspace' option does contain "eol" and the cursor is in column 1 when one of the three keys is used, the current line is joined with the @@ -798,6 +798,7 @@ CTRL-X CTRL-K Search the files given with the 'dictionary' option the 'dictionary' option is empty. For suggestions where to find a list of words, see the 'dictionary' option. + 'ignorecase', 'smartcase' and 'infercase' apply. CTRL-K or CTRL-N Search forward for next matching keyword. This diff --git a/runtime/doc/lsp.txt b/runtime/doc/lsp.txt index d717759444..54c648e171 100644 --- a/runtime/doc/lsp.txt +++ b/runtime/doc/lsp.txt @@ -355,8 +355,8 @@ To configure the behavior of a builtin |lsp-handler|, the convenient method *lsp-handler-resolution* Handlers can be set by: -- Setting a field in |vim.lsp.handlers|. *vim.lsp.handlers* - |vim.lsp.handlers| is a global table that contains the default mapping of +- Setting a field in vim.lsp.handlers. *vim.lsp.handlers* + vim.lsp.handlers is a global table that contains the default mapping of |lsp-method| names to |lsp-handlers|. To override the handler for the `"textDocument/definition"` method: > diff --git a/runtime/doc/lua.txt b/runtime/doc/lua.txt index 98af84e1cb..11629332ae 100644 --- a/runtime/doc/lua.txt +++ b/runtime/doc/lua.txt @@ -1083,7 +1083,7 @@ from within Lua. `vim.opt.wildignore = '*.o,*.a,__pycache__'` However, vim.opt also supports a more elegent way of setting - list-style options, but using lua tables: + list-style options by using lua tables: `vim.opt.wildignore = { '*.o', '*.a', '__pycache__' }` To replicate the behavior of |:set+=|, use: > diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index c3bd5baff2..680b853dab 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -153,9 +153,10 @@ index, on which the cursor is. This can look like this: > Note: the count does not take offset into account. When no match is found you get the error: *E486* Pattern not found -Note that for the |:global| command this behaves like a normal message, for Vi -compatibility. For the |:s| command the "e" flag can be used to avoid the -error message |:s_flags|. +Note that for the `:global` command, you get a normal message "Pattern not +found", for Vi compatibility. +For the |:s| command the "e" flag can be used to avoid the error message +|:s_flags|. *search-offset* *{offset}* These commands search for the specified pattern. With "/" and "?" an @@ -1422,7 +1423,7 @@ Finally, these constructs are unique to Perl: ":2match" for another plugin. ============================================================================== -11. Fuzzy matching *fuzzy-match* +11. Fuzzy matching *fuzzy-matching* Fuzzy matching refers to matching strings using a non-exact search string. Fuzzy matching will match a string, if all the characters in the search string diff --git a/runtime/doc/quickfix.txt b/runtime/doc/quickfix.txt index 601384a71f..0345b14b7a 100644 --- a/runtime/doc/quickfix.txt +++ b/runtime/doc/quickfix.txt @@ -642,22 +642,25 @@ instead. If the buffer in the used window has changed, and the error is in another file, jumping to the error will fail. You will first have to make sure the window contains a buffer which can be abandoned. -The following steps are used to find a window to open the file selected from -the quickfix window: -1. If 'switchbuf' contains "usetab", then find a window in any tabpage - (starting with the first tabpage) that has the selected file and jump to - it. -2. Otherwise find a window displaying the selected file in the current tab - page (starting with the window before the quickfix window) and use it. -3. Otherwise find a window displaying a normal buffer ('buftype' is empty) - starting with the window before the quickfix window. If a window is found, - open the file in that window. -4. If a usable window is not found and 'switchbuf' contains "uselast", then - open the file in the last used window. -5. Otherwise open the file in the window before the quickfix window. If there - is no previous window, then open the file in the next window. -6. If a usable window is not found in the above steps, then create a new - horizontally split window above the quickfix window and open the file. +When you select a file from the quickfix window, the following steps are used +to find a window to edit the file: + +1. If a window displaying the selected file is present in the current tabpage + (starting with the window before the quickfix window), then that window is + used. +2. If the above step fails and if 'switchbuf' contains "usetab" and a window + displaying the selected file is present in any one of the tabpages + (starting with the first tabpage) then that window is used. +3. If the above step fails then a window in the current tabpage displaying a + buffer with 'buftype' not set (starting with the window before the quickfix + window) is used. +4. If the above step fails and if 'switchbuf' contains "uselast", then the + previously accessed window is used. +5. If the above step fails then the window before the quickfix window is used. + If there is no previous window, then the window after the quickfix window + is used. +6. If the above step fails, then a new horizontally split window above the + quickfix window is used. *CTRL-W_<Enter>* *CTRL-W_<CR>* You can use CTRL-W <Enter> to open a new window and jump to the error there. @@ -697,13 +700,15 @@ this window, the displayed location list is used. When you select a file from the location list window, the following steps are used to find a window to edit the file: -1. If a window with the location list displayed in the location list window is - present, then the file is opened in that window. -2. If the above step fails and if the file is already opened in another - window, then that window is used. -3. If the above step fails then an existing window showing a buffer with - 'buftype' not set is used. -4. If the above step fails, then the file is edited in a new window. +1. If a non-quickfix window associated with the location list is present in + the current tabpage, then that window is used. +2. If the above step fails and if the file is already opened in another window + in the current tabpage, then that window is used. +3. If the above step fails and 'switchbuf' contains "usetab" and if the file + is opened in a window in any one of the tabpages, then that window is used. +4. If the above step fails then a window in the current tabpage showing a + buffer with 'buftype' not set is used. +5. If the above step fails, then the file is edited in a new window. In all of the above cases, if the location list for the selected window is not yet set, then it is set to the location list displayed in the location list @@ -1036,7 +1041,7 @@ commands can be combined to create a NewGrep command: > matching is used to find matching lines. In this case, {pattern} is treated as a literal string instead of a regular expression. See - |fuzzy-match| for more information about fuzzy + |fuzzy-matching| for more information about fuzzy matching strings. |QuickFixCmdPre| and |QuickFixCmdPost| are triggered. diff --git a/runtime/doc/repeat.txt b/runtime/doc/repeat.txt index 05529dc90a..994f97bba0 100644 --- a/runtime/doc/repeat.txt +++ b/runtime/doc/repeat.txt @@ -161,6 +161,11 @@ Q Repeat the last recorded register [count] times. result of evaluating the expression is executed as an Ex command. Mappings are not recognized in these commands. + When the |line-continuation| character (\) is present + at the beginning of a line in a linewise register, + then it is combined with the previous line. This is + useful for yanking and executing parts of a Vim + script. *:@:* :[addr]@: Repeat last command-line. First set cursor at line diff --git a/runtime/doc/syntax.txt b/runtime/doc/syntax.txt index 9084c5315a..c8280173fb 100644 --- a/runtime/doc/syntax.txt +++ b/runtime/doc/syntax.txt @@ -4388,7 +4388,7 @@ Leading context *:syn-lc* *:syn-leading* *:syn-context* Note: This is an obsolete feature, only included for backwards compatibility with previous Vim versions. It's now recommended to use the |/\@<=| construct -in the pattern. +in the pattern. You can also often use |/\zs|. The "lc" offset specifies leading context -- a part of the pattern that must be present, but is not considered part of the match. An offset of "lc=n" will diff --git a/runtime/doc/treesitter.txt b/runtime/doc/treesitter.txt index dbd8ec6fef..02be20c5e8 100644 --- a/runtime/doc/treesitter.txt +++ b/runtime/doc/treesitter.txt @@ -207,14 +207,14 @@ Here is a list of built-in predicates : `eq?` *ts-predicate-eq?* This predicate will check text correspondence between nodes or - strings : > + strings: > ((identifier) @foo (#eq? @foo "foo")) ((node1) @left (node2) @right (#eq? @left @right)) < `match?` *ts-predicate-match?* `vim-match?` *ts-predicate-vim-match?* This will match if the provided vim regex matches the text - corresponding to a node : > + corresponding to a node: > ((identifier) @constant (#match? @constant "^[A-Z_]+$")) < Note: the `^` and `$` anchors will respectively match the start and end of the node's text. @@ -225,17 +225,18 @@ Here is a list of built-in predicates : `contains?` *ts-predicate-contains?* Will check if any of the following arguments appears in the - text corresponding to the node : > + text corresponding to the node: > ((identifier) @foo (#contains? @foo "foo")) ((identifier) @foo-bar (#contains @foo-bar "foo" "bar")) < `any-of?` *ts-predicate-any-of?* - Will check if the text is the same as any of the following. + Will check if the text is the same as any of the following + arguments: > + ((identifier) @foo (#any-of? @foo "foo" "bar")) +< This is the recommended way to check if the node matches one of many keywords for example, as it has been optimized for this. - arguments : > - ((identifier) @foo (#any-of? @foo "foo" "bar")) < *lua-treesitter-not-predicate* Each predicate has a `not-` prefixed predicate that is just the negation of diff --git a/runtime/doc/uganda.txt b/runtime/doc/uganda.txt index 79519da51e..23dfa082a0 100644 --- a/runtime/doc/uganda.txt +++ b/runtime/doc/uganda.txt @@ -129,11 +129,12 @@ Kibaale Children's Centre *kcc* *Kibaale* *charity* Kibaale Children's Centre (KCC) is located in Kibaale, a small town in the south of Uganda, near Tanzania, in East Africa. The area is known as Rakai District. The population is mostly farmers. Although people are poor, there -is enough food. But this district is suffering from AIDS more than any other -part of the world. Some say that it started there. Estimations are that 10 -to 30% of the Ugandans are infected with HIV. Because parents die, there are -many orphans. In this district about 60,000 children have lost one or both -parents, out of a population of 350,000. And this is still continuing. +usually is enough food. But this district is suffering from AIDS more than +any other part of the world. Some say that it started there. Estimations are +that in the past 10 to 30% of the Ugandans are infected with HIV. Because +parents die, there are many orphans. In this district about 60,000 children +have lost one or both parents, out of a population of 350,000. Although AIDS +is now mostly under control, the problems are still continuing. The children need a lot of help. The KCC is working hard to provide the needy with food, medical care and education. Food and medical care to keep them diff --git a/runtime/ftplugin/ant.vim b/runtime/ftplugin/ant.vim index 5905858896..aee07ca4b9 100644 --- a/runtime/ftplugin/ant.vim +++ b/runtime/ftplugin/ant.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: ant -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/aspvbs.vim b/runtime/ftplugin/aspvbs.vim index 660dab4685..70a130d287 100644 --- a/runtime/ftplugin/aspvbs.vim +++ b/runtime/ftplugin/aspvbs.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: aspvbs -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/config.vim b/runtime/ftplugin/config.vim index 7fde42ebf5..73136cbc66 100644 --- a/runtime/ftplugin/config.vim +++ b/runtime/ftplugin/config.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: config -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/csc.vim b/runtime/ftplugin/csc.vim index 3a09c3bf8b..7b4126a503 100644 --- a/runtime/ftplugin/csc.vim +++ b/runtime/ftplugin/csc.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: csc -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif let b:did_ftplugin = 1 diff --git a/runtime/ftplugin/csh.vim b/runtime/ftplugin/csh.vim index 929823219c..ca5da5a8b9 100644 --- a/runtime/ftplugin/csh.vim +++ b/runtime/ftplugin/csh.vim @@ -1,7 +1,7 @@ " Vim filetype plugin file " Language: csh " Maintainer: Doug Kearns <dougkearns@gmail.com> -" Previous Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" Previous Maintainer: Dan Sharp " Contributor: Johannes Zellner <johannes@zellner.org> " Last Change: 2021 Oct 15 diff --git a/runtime/ftplugin/dtd.vim b/runtime/ftplugin/dtd.vim index 6c08f6691d..a046118c70 100644 --- a/runtime/ftplugin/dtd.vim +++ b/runtime/ftplugin/dtd.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: dtd -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif let b:did_ftplugin = 1 diff --git a/runtime/ftplugin/html.vim b/runtime/ftplugin/html.vim index 7579080ea5..3179aa2e88 100644 --- a/runtime/ftplugin/html.vim +++ b/runtime/ftplugin/html.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: html -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif let b:did_ftplugin = 1 diff --git a/runtime/ftplugin/java.vim b/runtime/ftplugin/java.vim index 292cb6b166..74c8e8d1c1 100644 --- a/runtime/ftplugin/java.vim +++ b/runtime/ftplugin/java.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: Java -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Change: 2012 Mar 11 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif let b:did_ftplugin = 1 diff --git a/runtime/ftplugin/jsp.vim b/runtime/ftplugin/jsp.vim index fbba863b32..18136ccc24 100644 --- a/runtime/ftplugin/jsp.vim +++ b/runtime/ftplugin/jsp.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: jsp -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/pascal.vim b/runtime/ftplugin/pascal.vim index 2de92563ae..aba1e54f27 100644 --- a/runtime/ftplugin/pascal.vim +++ b/runtime/ftplugin/pascal.vim @@ -1,7 +1,7 @@ " Vim filetype plugin file " Language: Pascal " Maintainer: Doug Kearns <dougkearns@gmail.com> -" Previous Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" Previous Maintainer: Dan Sharp " Last Change: 2021 Apr 23 if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/php.vim b/runtime/ftplugin/php.vim index a2f8b4d8d3..3ff0828ffe 100644 --- a/runtime/ftplugin/php.vim +++ b/runtime/ftplugin/php.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: php -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/sgml.vim b/runtime/ftplugin/sgml.vim index bf63efbf1f..ef52125c68 100644 --- a/runtime/ftplugin/sgml.vim +++ b/runtime/ftplugin/sgml.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: sgml -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/sh.vim b/runtime/ftplugin/sh.vim index 593fcec927..93a46f63e2 100644 --- a/runtime/ftplugin/sh.vim +++ b/runtime/ftplugin/sh.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: sh -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif let b:did_ftplugin = 1 diff --git a/runtime/ftplugin/svg.vim b/runtime/ftplugin/svg.vim index 8fff6ea32c..6f16b1a0f4 100644 --- a/runtime/ftplugin/svg.vim +++ b/runtime/ftplugin/svg.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: svg -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/tcsh.vim b/runtime/ftplugin/tcsh.vim index 33f1aabf68..85d3873b33 100644 --- a/runtime/ftplugin/tcsh.vim +++ b/runtime/ftplugin/tcsh.vim @@ -1,7 +1,7 @@ " Vim filetype plugin file " Language: tcsh " Maintainer: Doug Kearns <dougkearns@gmail.com> -" Previous Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" Previous Maintainer: Dan Sharp " Last Change: 2021 Oct 15 if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/xhtml.vim b/runtime/ftplugin/xhtml.vim index 21ed3e1100..d2a1c0b566 100644 --- a/runtime/ftplugin/xhtml.vim +++ b/runtime/ftplugin/xhtml.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: xhtml -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/xml.vim b/runtime/ftplugin/xml.vim index 1d43521155..9aa188cecc 100644 --- a/runtime/ftplugin/xml.vim +++ b/runtime/ftplugin/xml.vim @@ -3,7 +3,7 @@ " Maintainer: Christian Brabandt <cb@256bit.org> " Last Changed: Dec 07th, 2018 " Repository: https://github.com/chrisbra/vim-xml-ftplugin -" Previous Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" Previous Maintainer: Dan Sharp " URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/xsd.vim b/runtime/ftplugin/xsd.vim index 6a4a193656..7d3efbb390 100644 --- a/runtime/ftplugin/xsd.vim +++ b/runtime/ftplugin/xsd.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: xsd -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/ftplugin/xslt.vim b/runtime/ftplugin/xslt.vim index 1a5ee62865..9d2def107b 100644 --- a/runtime/ftplugin/xslt.vim +++ b/runtime/ftplugin/xslt.vim @@ -1,8 +1,10 @@ " Vim filetype plugin file " Language: xslt -" Maintainer: Dan Sharp <dwsharp at users dot sourceforge dot net> +" +" This runtime file is looking for a new maintainer. +" +" Former maintainer: Dan Sharp " Last Changed: 20 Jan 2009 -" URL: http://dwsharp.users.sourceforge.net/vim/ftplugin if exists("b:did_ftplugin") | finish | endif diff --git a/runtime/indent/vim.vim b/runtime/indent/vim.vim index f5a94940bd..cd2d4982d8 100644 --- a/runtime/indent/vim.vim +++ b/runtime/indent/vim.vim @@ -1,7 +1,7 @@ " Vim indent file " Language: Vim script " Maintainer: Bram Moolenaar <Bram@vim.org> -" Last Change: 2022 Feb 23 +" Last Change: 2022 Mar 01 " Only load this indent file when no other was loaded. if exists("b:did_indent") @@ -105,7 +105,7 @@ function GetVimIndentIntern() " terminated dict. " ":function" starts a block but "function(" doesn't. if prev_text !~ '^\s*au\%[tocmd]' && prev_text !~ '^\s*{.*}' - let i = match(prev_text, '\(^\||\)\s*\(export\s\+\)\?\({\|\(if\|wh\%[ile]\|for\|try\|cat\%[ch]\|fina\|finall\%[y]\|def\|el\%[seif]\)\>\|fu\%[nction]\s\)') + let i = match(prev_text, '\(^\||\)\s*\(export\s\+\)\?\({\|\(if\|wh\%[ile]\|for\|try\|cat\%[ch]\|fina\|finall\%[y]\|def\|el\%[seif]\)\>\|fu\%[nction][! ]\)') if i >= 0 let ind += shiftwidth() if strpart(prev_text, i, 1) == '|' && has('syntax_items') diff --git a/runtime/lua/vim/keymap.lua b/runtime/lua/vim/keymap.lua index df49eff4b6..7f12372502 100644 --- a/runtime/lua/vim/keymap.lua +++ b/runtime/lua/vim/keymap.lua @@ -25,7 +25,7 @@ local keymap = {} --- vim.keymap.set('n', 'asdf', require('jkl').my_fun) --- </pre> --- ---- the require('jkl') gets evaluated during this call in order to access the function. If you want to +--- the `require('jkl')` gets evaluated during this call in order to access the function. If you want to --- avoid this cost at startup you can wrap it in a function, for example: --- <pre> --- vim.keymap.set('n', 'asdf', function() return require('jkl').my_fun() end) diff --git a/runtime/menu.vim b/runtime/menu.vim index 701dd33cdd..956b941971 100644 --- a/runtime/menu.vim +++ b/runtime/menu.vim @@ -569,9 +569,9 @@ func! s:XxdConv() %!mc vim:xxd else call s:XxdFind() - exe '%!' . g:xxdprogram + exe ':%!' . g:xxdprogram endif - if getline(1) =~ "^0000000:" " only if it worked + if getline(1) =~ "^00000000:" " only if it worked set ft=xxd endif let &mod = mod @@ -583,7 +583,7 @@ func! s:XxdBack() %!mc vim:xxd -r else call s:XxdFind() - exe '%!' . g:xxdprogram . ' -r' + exe ':%!' . g:xxdprogram . ' -r' endif set ft= doautocmd filetypedetect BufReadPost diff --git a/scripts/stripdecls.py b/scripts/stripdecls.py deleted file mode 100755 index b4ac34dcfe..0000000000 --- a/scripts/stripdecls.py +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/python -# vim: set fileencoding=utf-8: - -from __future__ import print_function, unicode_literals, division - -from clang.cindex import Index, CursorKind -from collections import namedtuple, OrderedDict, defaultdict -import sys -import os - - -DECL_KINDS = { - CursorKind.FUNCTION_DECL, -} - - -Strip = namedtuple('Strip', 'start_line start_column end_line end_column') - - -def main(progname, cfname, only_static, move_all): - cfname = os.path.abspath(os.path.normpath(cfname)) - - hfname1 = os.path.splitext(cfname)[0] + os.extsep + 'h' - hfname2 = os.path.splitext(cfname)[0] + '_defs' + os.extsep + 'h' - - files_to_modify = (cfname, hfname1, hfname2) - - index = Index.create() - src_dirname = os.path.join(os.path.dirname(__file__), '..', 'src') - src_dirname = os.path.abspath(os.path.normpath(src_dirname)) - relname = os.path.join(src_dirname, 'nvim') - unit = index.parse(cfname, args=('-I' + src_dirname, - '-DUNIX', - '-DEXITFREE', - '-DFEAT_USR_CMDS', - '-DFEAT_CMDL_COMPL', - '-DFEAT_COMPL_FUNC', - '-DPROTO', - '-DUSE_MCH_ERRMSG')) - cursor = unit.cursor - - tostrip = defaultdict(OrderedDict) - definitions = set() - - for child in cursor.get_children(): - if not (child.location and child.location.file): - continue - fname = os.path.abspath(os.path.normpath(child.location.file.name)) - if fname not in files_to_modify: - continue - if child.kind not in DECL_KINDS: - continue - if only_static and next(child.get_tokens()).spelling == 'static': - continue - - if child.is_definition() and fname == cfname: - definitions.add(child.spelling) - else: - stripdict = tostrip[fname] - assert(child.spelling not in stripdict) - stripdict[child.spelling] = Strip( - child.extent.start.line, - child.extent.start.column, - child.extent.end.line, - child.extent.end.column, - ) - - for (fname, stripdict) in tostrip.items(): - if not move_all: - for name in set(stripdict) - definitions: - stripdict.pop(name) - - if not stripdict: - continue - - if fname.endswith('.h'): - is_h_file = True - include_line = next(reversed(stripdict.values())).start_line + 1 - else: - is_h_file = False - include_line = next(iter(stripdict.values())).start_line - - lines = None - generated_existed = os.path.exists(fname + '.generated.h') - with open(fname, 'rb') as F: - lines = list(F) - - stripped = [] - - for name, position in reversed(stripdict.items()): - sl = slice(position.start_line - 1, position.end_line) - if is_h_file: - include_line -= sl.stop - sl.start - stripped += lines[sl] - lines[sl] = () - - if not generated_existed: - lines[include_line:include_line] = [ - '#ifdef INCLUDE_GENERATED_DECLARATIONS\n', - '# include "{0}.generated.h"\n'.format( - os.path.relpath(fname, relname)), - '#endif\n', - ] - - with open(fname, 'wb') as F: - F.writelines(lines) - - -if __name__ == '__main__': - progname = sys.argv[0] - args = sys.argv[1:] - if not args or '--help' in args: - print('Usage:') - print('') - print(' {0} [--static [--all]] file.c...'.format(progname)) - print('') - print('Stripts all declarations from file.c, file.h and file_defs.h.') - print('If --static argument is given then only static declarations are') - print('stripped. Declarations are stripped only if corresponding') - print('definition is found unless --all argument was given.') - print('') - print('Note: it is assumed that static declarations starts with "static"') - print(' keyword.') - sys.exit(0 if args else 1) - - if args[0] == '--static': - only_static = True - args = args[1:] - else: - only_static = False - - if args[0] == '--all': - move_all = True - args = args[1:] - else: - move_all = False - - for cfname in args: - print('Processing {0}'.format(cfname)) - main(progname, cfname, only_static, move_all) diff --git a/src/clint.py b/src/clint.py index 4b7bf002e6..10f33d22e2 100755 --- a/src/clint.py +++ b/src/clint.py @@ -191,7 +191,6 @@ _ERROR_CATEGORIES = [ 'readability/fn_size', 'readability/multiline_comment', 'readability/multiline_string', - 'readability/nolint', 'readability/nul', 'readability/todo', 'readability/utf8', @@ -298,9 +297,6 @@ def ParseNolintSuppressions(filename, raw_line, linenum, error): if category in _ERROR_CATEGORIES: _error_suppressions.setdefault( category, set()).add(linenum) - else: - error(filename, linenum, 'readability/nolint', 5, - 'Unknown NOLINT error category: %s' % category) def ParseKnownErrorSuppressions(filename, raw_lines, linenum): @@ -373,7 +369,7 @@ def Search(pattern, s): return _regexp_compile_cache[pattern].search(s) -class _IncludeState(dict): +class _IncludeState(dict): # lgtm [py/missing-equals] """Tracks line numbers for includes, and the order in which includes appear. diff --git a/src/nvim/CMakeLists.txt b/src/nvim/CMakeLists.txt index 9f29cae29d..96e5d1e77c 100644 --- a/src/nvim/CMakeLists.txt +++ b/src/nvim/CMakeLists.txt @@ -135,6 +135,9 @@ foreach(sfile ${NVIM_SOURCES}) if(${f} MATCHES "^(regexp_nfa.c)$") list(APPEND to_remove ${sfile}) endif() + if(${f} MATCHES "^(regexp_bt.c)$") + list(APPEND to_remove ${sfile}) + endif() if(WIN32 AND ${f} MATCHES "^(pty_process_unix.c)$") list(APPEND to_remove ${sfile}) endif() @@ -261,6 +264,7 @@ endif() # NVIM_GENERATED_SOURCES: generated source files # These lists must be mutually exclusive. foreach(sfile ${NVIM_SOURCES} + "${CMAKE_CURRENT_LIST_DIR}/regexp_bt.c" "${CMAKE_CURRENT_LIST_DIR}/regexp_nfa.c" ${GENERATED_API_DISPATCH} "${GENERATED_UI_EVENTS_CALL}" diff --git a/src/nvim/api/buffer.c b/src/nvim/api/buffer.c index 3dd647e76f..3bddbe8fe6 100644 --- a/src/nvim/api/buffer.c +++ b/src/nvim/api/buffer.c @@ -133,7 +133,7 @@ Integer nvim_buf_line_count(Buffer buffer, Error *err) /// - buffer handle /// - on_reload: Lua callback invoked on reload. The entire buffer /// content should be considered changed. Args: -/// - the string "detach" +/// - the string "reload" /// - buffer handle /// - utf_sizes: include UTF-32 and UTF-16 size of the replaced /// region, as args to `on_lines`. diff --git a/src/nvim/api/vim.c b/src/nvim/api/vim.c index c37df45c14..1a324bfaa5 100644 --- a/src/nvim/api/vim.c +++ b/src/nvim/api/vim.c @@ -2241,7 +2241,7 @@ Array nvim_get_mark(String name, Dictionary opts, Error *err) /// - winid: (number) |window-ID| of the window to use as context for statusline. /// - maxwidth: (number) Maximum width of statusline. /// - fillchar: (string) Character to fill blank spaces in the statusline (see -/// 'fillchars'). +/// 'fillchars'). Treated as single-width even if it isn't. /// - highlights: (boolean) Return highlight information. /// - use_tabline: (boolean) Evaluate tabline instead of statusline. When |TRUE|, {winid} /// is ignored. @@ -2277,11 +2277,12 @@ Dictionary nvim_eval_statusline(String str, Dict(eval_statusline) *opts, Error * if (HAS_KEY(opts->fillchar)) { if (opts->fillchar.type != kObjectTypeString || opts->fillchar.data.string.size == 0 - || char2cells(fillchar = utf_ptr2char((char_u *)opts->fillchar.data.string.data)) != 1 - || (size_t)utf_char2len(fillchar) != opts->fillchar.data.string.size) { - api_set_error(err, kErrorTypeValidation, "fillchar must be a single-width character"); + || ((size_t)utf_ptr2len((char_u *)opts->fillchar.data.string.data) + != opts->fillchar.data.string.size)) { + api_set_error(err, kErrorTypeValidation, "fillchar must be a single character"); return result; } + fillchar = utf_ptr2char((char_u *)opts->fillchar.data.string.data); } if (HAS_KEY(opts->highlights)) { diff --git a/src/nvim/charset.c b/src/nvim/charset.c index 97aac67627..3383dd2a76 100644 --- a/src/nvim/charset.c +++ b/src/nvim/charset.c @@ -928,10 +928,12 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *en // continue until the NUL posptr = NULL; } else { - // Special check for an empty line, which can happen on exit, when - // ml_get_buf() always returns an empty string. - if (*ptr == NUL) { - pos->col = 0; + // In a few cases the position can be beyond the end of the line. + for (colnr_T i = 0; i < pos->col; i++) { + if (ptr[i] == NUL) { + pos->col = i; + break; + } } posptr = ptr + pos->col; posptr -= utf_head_off(line, posptr); diff --git a/src/nvim/decoration.c b/src/nvim/decoration.c index 619e8fdadc..b533f4c46f 100644 --- a/src/nvim/decoration.c +++ b/src/nvim/decoration.c @@ -150,7 +150,6 @@ bool decor_redraw_reset(buf_T *buf, DecorState *state) { state->row = -1; state->buf = buf; - state->has_sign_decor = false; for (size_t i = 0; i < kv_size(state->active); i++) { DecorRange item = kv_A(state->active, i); if (item.virt_text_owned) { @@ -202,11 +201,6 @@ bool decor_redraw_start(buf_T *buf, int top_row, DecorState *state) goto next_mark; } - // Don't add signs for end marks as the start mark has already been added. - if (mt_end(mark) && decor_has_sign(&decor)) { - goto next_mark; - } - mtpos_t altpos = marktree_get_altpos(buf->b_marktree, mark, NULL); // Exclude start marks if the end mark position is above the top row @@ -270,10 +264,6 @@ static void decor_add(DecorState *state, int start_row, int start_col, int end_r kv_A(state->active, index) = kv_A(state->active, index-1); } kv_A(state->active, index) = range; - - if (decor_has_sign(decor)) { - state->has_sign_decor = true; - } } int decor_redraw_col(buf_T *buf, int col, int win_col, bool hidden, DecorState *state) @@ -327,8 +317,7 @@ next_mark: bool active = false, keep = true; if (item.end_row < state->row || (item.end_row == state->row && item.end_col <= col)) { - if (!(item.start_row >= state->row && kv_size(item.decor.virt_text)) - && !decor_has_sign(&item.decor)) { + if (!(item.start_row >= state->row && kv_size(item.decor.virt_text))) { keep = false; } } else { @@ -363,19 +352,29 @@ next_mark: return attr; } -void decor_redraw_signs(buf_T *buf, DecorState *state, int row, - int *num_signs, sign_attrs_T sattrs[]) +void decor_redraw_signs(buf_T *buf, int row, int *num_signs, sign_attrs_T sattrs[]) { - for (size_t i = 0; i < kv_size(state->active); i++) { - DecorRange item = kv_A(state->active, i); - Decoration *decor = &item.decor; + if (!buf->b_signs) { + return; + } + + MarkTreeIter itr[1] = { 0 }; + marktree_itr_get(buf->b_marktree, row, 0, itr); + + while (true) { + mtkey_t mark = marktree_itr_current(itr); + if (mark.pos.row < 0 || mark.pos.row > row) { + break; + } - if (!decor_has_sign(decor)) { - continue; + if (mt_end(mark) || marktree_decor_level(mark) < kDecorLevelVisible) { + goto next_mark; } - if (state->row != item.start_row) { - continue; + Decoration *decor = mark.decor_full; + + if (!decor || !decor_has_sign(decor)) { + goto next_mark; } int j; @@ -403,6 +402,9 @@ void decor_redraw_signs(buf_T *buf, DecorState *state, int row, sattrs[j].sat_prio = decor->priority; (*num_signs)++; } + +next_mark: + marktree_itr_next(buf->b_marktree, itr); } } diff --git a/src/nvim/decoration.h b/src/nvim/decoration.h index 7e6dbdf1a7..2277a0ef1c 100644 --- a/src/nvim/decoration.h +++ b/src/nvim/decoration.h @@ -80,7 +80,6 @@ typedef struct { int col_until; int current; int eol_col; - bool has_sign_decor; } DecorState; typedef struct { diff --git a/src/nvim/eval.c b/src/nvim/eval.c index c5c03455b7..8cdb03c341 100644 --- a/src/nvim/eval.c +++ b/src/nvim/eval.c @@ -7789,8 +7789,7 @@ bool callback_call(Callback *const callback, const int argcount_in, typval_T *co break; case kCallbackLua: - ILOG(" We tryin to call dat dang lua ref "); - nlua_call_ref(callback->data.luaref, "aucmd", args, false, NULL); + nlua_call_ref(callback->data.luaref, NULL, args, false, NULL); return false; break; @@ -9441,7 +9440,7 @@ void new_script_vars(scid_T id) hashtab_T *ht; scriptvar_T *sv; - ga_grow(&ga_scripts, (int)(id - ga_scripts.ga_len)); + ga_grow(&ga_scripts, id - ga_scripts.ga_len); { /* Re-allocating ga_data means that an ht_array pointing to * ht_smallarray becomes invalid. We can recognize this: ht_mask is diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index 7da4fe62e8..d81a408663 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -9706,8 +9706,7 @@ free_lstval: if (set_unnamed) { // Discard the result. We already handle the error case. - if (op_reg_set_previous(regname)) { - } + op_reg_set_previous(regname); } } @@ -11413,13 +11412,13 @@ static void f_synIDattr(typval_T *argvars, typval_T *rettv, FunPtr fptr) if (len <= 5 || (TOLOWER_ASC(what[5]) == 'l' && len <= 9)) { // underline p = highlight_has_attr(id, HL_UNDERLINE, modec); } else if (TOLOWER_ASC(what[5]) == 'c') { // undercurl - p = highlight_has_attr(id, HL_UNDERCURL, modec); + p = highlight_has_attr(id, HL_UNDERCURL, modec); } else if (len > 9 && TOLOWER_ASC(what[9]) == 'l') { // underlineline - p = highlight_has_attr(id, HL_UNDERLINELINE, modec); + p = highlight_has_attr(id, HL_UNDERLINELINE, modec); } else if (len > 6 && TOLOWER_ASC(what[6]) == 'o') { // underdot - p = highlight_has_attr(id, HL_UNDERDOT, modec); + p = highlight_has_attr(id, HL_UNDERDOT, modec); } else { // underdash - p = highlight_has_attr(id, HL_UNDERDASH, modec); + p = highlight_has_attr(id, HL_UNDERDASH, modec); } break; } diff --git a/src/nvim/eval/userfunc.c b/src/nvim/eval/userfunc.c index 5764f9fbd4..471c4092fe 100644 --- a/src/nvim/eval/userfunc.c +++ b/src/nvim/eval/userfunc.c @@ -516,7 +516,7 @@ static char_u *fname_trans_sid(const char_u *const name, char_u *const fname_buf if (llen > 0) { fname_buf[0] = K_SPECIAL; fname_buf[1] = KS_EXTRA; - fname_buf[2] = (int)KE_SNR; + fname_buf[2] = KE_SNR; int i = 3; if (eval_fname_sid((const char *)name)) { // "<SID>" or "s:" if (current_sctx.sc_sid <= 0) { @@ -1713,7 +1713,7 @@ char_u *trans_function_name(char_u **pp, bool skip, int flags, funcdict_T *fdp, // Check for hard coded <SNR>: already translated function ID (from a user // command). if ((*pp)[0] == K_SPECIAL && (*pp)[1] == KS_EXTRA - && (*pp)[2] == (int)KE_SNR) { + && (*pp)[2] == KE_SNR) { *pp += 3; len = get_id_len((const char **)pp) + 3; return (char_u *)xmemdupz(start, len); @@ -1821,7 +1821,7 @@ char_u *trans_function_name(char_u **pp, bool skip, int flags, funcdict_T *fdp, // Change "<SNR>" to the byte sequence. name[0] = K_SPECIAL; name[1] = KS_EXTRA; - name[2] = (int)KE_SNR; + name[2] = KE_SNR; memmove(name + 3, name + 5, strlen((char *)name + 5) + 1); } goto theend; @@ -1888,7 +1888,7 @@ char_u *trans_function_name(char_u **pp, bool skip, int flags, funcdict_T *fdp, if (!skip && lead > 0) { name[0] = K_SPECIAL; name[1] = KS_EXTRA; - name[2] = (int)KE_SNR; + name[2] = KE_SNR; if (sid_buf_len > 0) { // If it's "<SID>" memcpy(name + 3, sid_buf, sid_buf_len); } diff --git a/src/nvim/ex_cmds2.c b/src/nvim/ex_cmds2.c index 8666c7e33a..5c040adc1c 100644 --- a/src/nvim/ex_cmds2.c +++ b/src/nvim/ex_cmds2.c @@ -1698,7 +1698,7 @@ static bool concat_continued_line(garray_T *const ga, const int init_growsize, return false; } if (ga->ga_len > init_growsize) { - ga_set_growsize(ga, MAX(ga->ga_len, 8000)); + ga_set_growsize(ga, MIN(ga->ga_len, 8000)); } ga_concat_len(ga, (const char *)line + 1, len - 1); return true; @@ -1796,7 +1796,7 @@ scriptitem_T *new_script_item(char_u *const name, scid_T *const sid_out) if (sid_out != NULL) { *sid_out = sid; } - ga_grow(&script_items, (int)(sid - script_items.ga_len)); + ga_grow(&script_items, sid - script_items.ga_len); while (script_items.ga_len < sid) { script_items.ga_len++; SCRIPT_ITEM(script_items.ga_len).sn_name = NULL; @@ -1852,7 +1852,7 @@ static void cmd_source_buffer(const exarg_T *const eap) for (linenr_T curr_lnum = eap->line1; curr_lnum <= final_lnum; curr_lnum++) { // Adjust growsize to current length to speed up concatenating many lines. if (ga.ga_len > 400) { - ga_set_growsize(&ga, MAX(ga.ga_len, 8000)); + ga_set_growsize(&ga, MIN(ga.ga_len, 8000)); } ga_concat(&ga, (char *)ml_get(curr_lnum)); ga_append(&ga, NL); @@ -2188,7 +2188,6 @@ scriptitem_T *get_current_script_id(char_u *fname, sctx_T *ret_sctx) } - /// ":scriptnames" void ex_scriptnames(exarg_T *eap) { diff --git a/src/nvim/ex_docmd.c b/src/nvim/ex_docmd.c index 6e915d98dc..a140d76858 100644 --- a/src/nvim/ex_docmd.c +++ b/src/nvim/ex_docmd.c @@ -8160,7 +8160,7 @@ static void ex_operators(exarg_T *eap) case CMD_yank: oa.op_type = OP_YANK; - (void)op_yank(&oa, true, false); + (void)op_yank(&oa, true); break; default: // CMD_rshift or CMD_lshift diff --git a/src/nvim/ex_eval.c b/src/nvim/ex_eval.c index 6395bbc70b..25b6aa7d8a 100644 --- a/src/nvim/ex_eval.c +++ b/src/nvim/ex_eval.c @@ -430,7 +430,7 @@ char *get_exception_string(void *value, except_type_T type, char_u *cmdname, int STRCAT(val, p); p[-2] = NUL; - sprintf((char *)(val + STRLEN(p)), " (%s)", &mesg[1]); + snprintf(val + STRLEN(p), strlen(" (%s)"), " (%s)", &mesg[1]); p[-2] = '"'; } break; diff --git a/src/nvim/fileio.c b/src/nvim/fileio.c index 965aa8749d..971ae9abae 100644 --- a/src/nvim/fileio.c +++ b/src/nvim/fileio.c @@ -3911,13 +3911,13 @@ static int check_mtime(buf_T *buf, FileInfo *file_info) static bool time_differs(const FileInfo *file_info, long mtime, long mtime_ns) FUNC_ATTR_CONST { - return (long)file_info->stat.st_mtim.tv_nsec != mtime_ns + return file_info->stat.st_mtim.tv_nsec != mtime_ns #if defined(__linux__) || defined(MSWIN) // On a FAT filesystem, esp. under Linux, there are only 5 bits to store // the seconds. Since the roundoff is done when flushing the inode, the // time may change unexpectedly by one second!!! - || (long)file_info->stat.st_mtim.tv_sec - mtime > 1 - || mtime - (long)file_info->stat.st_mtim.tv_sec > 1; + || file_info->stat.st_mtim.tv_sec - mtime > 1 + || mtime - file_info->stat.st_mtim.tv_sec > 1; #else || (long)file_info->stat.st_mtim.tv_sec != mtime; #endif @@ -4961,9 +4961,8 @@ int buf_check_timestamp(buf_T *buf) buf_store_file_info(buf, &file_info); } - // Don't do anything for a directory. Might contain the file - // explorer. if (os_isdir(buf->b_fname)) { + // Don't do anything for a directory. Might contain the file explorer. } else if ((buf->b_p_ar >= 0 ? buf->b_p_ar : p_ar) && !bufIsChanged(buf) && file_info_ok) { // If 'autoread' is set, the buffer has no changes and the file still diff --git a/src/nvim/getchar.c b/src/nvim/getchar.c index 22d957d03d..299456f688 100644 --- a/src/nvim/getchar.c +++ b/src/nvim/getchar.c @@ -4140,7 +4140,7 @@ int makemap(FILE *fd, buf_T *buf) } for (p = mp->m_str; *p != NUL; p++) { if (p[0] == K_SPECIAL && p[1] == KS_EXTRA - && p[2] == (int)KE_SNR) { + && p[2] == KE_SNR) { break; } } diff --git a/src/nvim/highlight.c b/src/nvim/highlight.c index a01d8369ba..a1fd0d0d66 100644 --- a/src/nvim/highlight.c +++ b/src/nvim/highlight.c @@ -327,7 +327,7 @@ void update_window_hl(win_T *wp, bool invalid) wp->w_hl_attr_normal); } - for (int hlf = 0; hlf < (int)HLF_COUNT; hlf++) { + for (int hlf = 0; hlf < HLF_COUNT; hlf++) { int attr; if (wp->w_hl_ids[hlf] != 0) { attr = hl_get_ui_attr(hlf, wp->w_hl_ids[hlf], false); diff --git a/src/nvim/lua/xdiff.c b/src/nvim/lua/xdiff.c index ea7a700e1e..37855630d1 100644 --- a/src/nvim/lua/xdiff.c +++ b/src/nvim/lua/xdiff.c @@ -171,6 +171,7 @@ static NluaXdiffMode process_xdl_diff_opts(lua_State *lstate, xdemitconf_t *cfg, goto exit_1; } if (strequal("unified", v->data.string.data)) { + // the default } else if (strequal("indices", v->data.string.data)) { had_result_type_indices = true; } else { diff --git a/src/nvim/marktree.c b/src/nvim/marktree.c index 918db8b76c..4456b28293 100644 --- a/src/nvim/marktree.c +++ b/src/nvim/marktree.c @@ -558,8 +558,8 @@ void marktree_revise(MarkTree *b, MarkTreeIter *itr, uint8_t decor_level, mtkey_ { // TODO(bfredl): clean up this mess and re-instantiate &= and |= forms // once we upgrade to a non-broken version of gcc in functionaltest-lua CI - rawkey(itr).flags = (uint16_t)((uint16_t)rawkey(itr).flags & (uint16_t)~MT_FLAG_DECOR_MASK); - rawkey(itr).flags = (uint16_t)((uint16_t)rawkey(itr).flags + rawkey(itr).flags = (uint16_t)(rawkey(itr).flags & (uint16_t)~MT_FLAG_DECOR_MASK); + rawkey(itr).flags = (uint16_t)(rawkey(itr).flags | (uint16_t)(decor_level << MT_FLAG_DECOR_OFFSET) | (uint16_t)(key.flags & MT_FLAG_DECOR_MASK)); rawkey(itr).decor_full = key.decor_full; @@ -1159,8 +1159,6 @@ static size_t check_node(MarkTree *b, mtnode_t *x, mtpos_t *last, bool *last_rig if (i > 0) { unrelative(x->key[i-1].pos, last); } - if (x->level) { - } assert(pos_leq(*last, x->key[i].pos)); if (last->row == x->key[i].pos.row && last->col == x->key[i].pos.col) { assert(!*last_right || mt_right(x->key[i])); diff --git a/src/nvim/memline.c b/src/nvim/memline.c index 004ef36b36..59f57aa667 100644 --- a/src/nvim/memline.c +++ b/src/nvim/memline.c @@ -303,7 +303,7 @@ int ml_open(buf_T *buf) b0p->b0_id[0] = BLOCK0_ID0; b0p->b0_id[1] = BLOCK0_ID1; - b0p->b0_magic_long = (long)B0_MAGIC_LONG; + b0p->b0_magic_long = B0_MAGIC_LONG; b0p->b0_magic_int = (int)B0_MAGIC_INT; b0p->b0_magic_short = (short)B0_MAGIC_SHORT; b0p->b0_magic_char = B0_MAGIC_CHAR; @@ -3709,7 +3709,7 @@ static char *findswapname(buf_T *buf, char **dirp, char *old_fname, bool *found_ static int b0_magic_wrong(ZERO_BL *b0p) { - return b0p->b0_magic_long != (long)B0_MAGIC_LONG + return b0p->b0_magic_long != B0_MAGIC_LONG || b0p->b0_magic_int != (int)B0_MAGIC_INT || b0p->b0_magic_short != (short)B0_MAGIC_SHORT || b0p->b0_magic_char != B0_MAGIC_CHAR; diff --git a/src/nvim/normal.c b/src/nvim/normal.c index 7fe6469527..5c39e9c8bf 100644 --- a/src/nvim/normal.c +++ b/src/nvim/normal.c @@ -2105,10 +2105,9 @@ bool do_mouse(oparg_T *oap, int c, int dir, long count, bool fixindent) } else { // MOUSE_RIGHT stuffcharReadbuff('#'); } - } - // Handle double clicks, unless on status line - else if (in_status_line) { - } else if (in_sep_line) { + } else if (in_status_line || in_sep_line) { + // Do nothing if on status line or vertical separator + // Handle double clicks otherwise } else if ((mod_mask & MOD_MASK_MULTI_CLICK) && (State & (NORMAL | INSERT))) { if (is_click || !VIsual_active) { if (VIsual_active) { diff --git a/src/nvim/ops.c b/src/nvim/ops.c index b5c7020dee..244a1881aa 100644 --- a/src/nvim/ops.c +++ b/src/nvim/ops.c @@ -1023,6 +1023,60 @@ static int stuff_yank(int regname, char_u *p) static int execreg_lastc = NUL; +/// When executing a register as a series of ex-commands, if the +/// line-continuation character is used for a line, then join it with one or +/// more previous lines. Note that lines are processed backwards starting from +/// the last line in the register. +/// +/// @param lines list of lines in the register +/// @param idx index of the line starting with \ or "\. Join this line with all the immediate +/// predecessor lines that start with a \ and the first line that doesn't start +/// with a \. Lines that start with a comment "\ character are ignored. +/// @returns the concatenated line. The index of the line that should be +/// processed next is returned in idx. +static char_u *execreg_line_continuation(char_u **lines, size_t *idx) +{ + size_t i = *idx; + assert(i > 0); + const size_t cmd_end = i; + + garray_T ga; + ga_init(&ga, (int)sizeof(char_u), 400); + + char_u *p; + + // search backwards to find the first line of this command. + // Any line not starting with \ or "\ is the start of the + // command. + while (--i > 0) { + p = skipwhite(lines[i]); + if (*p != '\\' && (p[0] != '"' || p[1] != '\\' || p[2] != ' ')) { + break; + } + } + const size_t cmd_start = i; + + // join all the lines + ga_concat(&ga, (char *)lines[cmd_start]); + for (size_t j = cmd_start + 1; j <= cmd_end; j++) { + p = skipwhite(lines[j]); + if (*p == '\\') { + // Adjust the growsize to the current length to + // speed up concatenating many lines. + if (ga.ga_len > 400) { + ga_set_growsize(&ga, MIN(ga.ga_len, 8000)); + } + ga_concat(&ga, (char *)(p + 1)); + } + } + ga_append(&ga, NUL); + char_u *str = vim_strsave(ga.ga_data); + ga_clear(&ga); + + *idx = i; + return str; +} + /// Execute a yank register: copy it into the stuff buffer /// /// @param colon insert ':' before each line @@ -1111,7 +1165,21 @@ int do_execreg(int regname, int colon, int addcr, int silent) return FAIL; } } - escaped = vim_strsave_escape_ks(reg->y_array[i]); + + // Handle line-continuation for :@<register> + char_u *str = reg->y_array[i]; + bool free_str = false; + if (colon && i > 0) { + p = skipwhite(str); + if (*p == '\\' || (p[0] == '"' && p[1] == '\\' && p[2] == ' ')) { + str = execreg_line_continuation(reg->y_array, &i); + free_str = true; + } + } + escaped = vim_strsave_escape_ks(str); + if (free_str) { + xfree(str); + } retval = ins_typebuf(escaped, remap, 0, true, silent); xfree(escaped); if (retval == FAIL) { @@ -1504,20 +1572,20 @@ int op_delete(oparg_T *oap) yankreg_T *reg = NULL; int did_yank = false; if (oap->regname != 0) { - // yank without message - did_yank = op_yank(oap, false, true); - if (!did_yank) { - // op_yank failed, don't do anything + // check for read-only register + if (!valid_yank_reg(oap->regname, true)) { + beep_flush(); return OK; } + reg = get_yank_register(oap->regname, YREG_YANK); // yank into specif'd reg + op_yank_reg(oap, false, reg, is_append_register(oap->regname)); // yank without message + did_yank = true; } - /* - * Put deleted text into register 1 and shift number registers if the - * delete contains a line break, or when a regname has been specified. - */ - if (oap->regname != 0 || oap->motion_type == kMTLineWise - || oap->line_count > 1 || oap->use_reg_one) { + // Put deleted text into register 1 and shift number registers if the + // delete contains a line break, or when using a specific operator (Vi + // compatible) + if (oap->motion_type == kMTLineWise || oap->line_count > 1 || oap->use_reg_one) { shift_delete_registers(is_append_register(oap->regname)); reg = &y_regs[1]; op_yank_reg(oap, false, reg, false); @@ -2579,12 +2647,12 @@ void free_register(yankreg_T *reg) /// Yanks the text between "oap->start" and "oap->end" into a yank register. /// If we are to append (uppercase register), we first yank into a new yank /// register and then concatenate the old and the new one. +/// Do not call this from a delete operation. Use op_yank_reg() instead. /// /// @param oap operator arguments /// @param message show message when more than `&report` lines are yanked. -/// @param deleting whether the function was called from a delete operation. /// @returns whether the operation register was writable. -bool op_yank(oparg_T *oap, bool message, int deleting) +bool op_yank(oparg_T *oap, bool message) FUNC_ATTR_NONNULL_ALL { // check for read-only register @@ -2598,11 +2666,8 @@ bool op_yank(oparg_T *oap, bool message, int deleting) yankreg_T *reg = get_yank_register(oap->regname, YREG_YANK); op_yank_reg(oap, message, reg, is_append_register(oap->regname)); - // op_delete will set_clipboard and do_autocmd - if (!deleting) { - set_clipboard(oap->regname, reg); - do_autocmd_textyankpost(oap, reg); - } + set_clipboard(oap->regname, reg); + do_autocmd_textyankpost(oap, reg); return true; } @@ -6630,7 +6695,7 @@ void do_pending_operator(cmdarg_T *cap, int old_col, bool gui_yank) } else { curwin->w_p_lbr = lbr_saved; oap->excl_tr_ws = cap->cmdchar == 'z'; - (void)op_yank(oap, !gui_yank, false); + (void)op_yank(oap, !gui_yank); } check_cursor_col(); break; @@ -7314,7 +7379,6 @@ const yankreg_T *op_reg_get(const char name) /// /// @return true on success, false on failure. bool op_reg_set_previous(const char name) - FUNC_ATTR_WARN_UNUSED_RESULT { int i = op_reg_index(name); if (i == -1) { diff --git a/src/nvim/option.c b/src/nvim/option.c index 9068c90dd1..a0706dfa14 100644 --- a/src/nvim/option.c +++ b/src/nvim/option.c @@ -3851,7 +3851,7 @@ static bool parse_winhl_opt(win_T *wp) if (strncmp("Normal", p, nlen) == 0) { w_hl_id_normal = hl_id; } else { - for (hlf = 0; hlf < (int)HLF_COUNT; hlf++) { + for (hlf = 0; hlf < HLF_COUNT; hlf++) { if (strlen(hlf_names[hlf]) == nlen && strncmp(hlf_names[hlf], p, nlen) == 0) { w_hl_ids[hlf] = hl_id; @@ -5267,7 +5267,7 @@ static void showoptions(int all, int opt_flags) && Columns + GAP >= INT_MIN + 3 && (Columns + GAP - 3) / INC >= INT_MIN && (Columns + GAP - 3) / INC <= INT_MAX); - cols = (int)((Columns + GAP - 3) / INC); + cols = (Columns + GAP - 3) / INC; if (cols == 0) { cols = 1; } @@ -5666,11 +5666,11 @@ void comp_col(void) assert(sc_col >= 0 && INT_MIN + sc_col <= Columns && Columns - sc_col <= INT_MAX); - sc_col = (int)(Columns - sc_col); + sc_col = Columns - sc_col; assert(ru_col >= 0 && INT_MIN + ru_col <= Columns && Columns - ru_col <= INT_MAX); - ru_col = (int)(Columns - ru_col); + ru_col = Columns - ru_col; if (sc_col <= 0) { // screen too narrow, will become a mess sc_col = 1; } diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 6a6c915094..009a26d4e0 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -5,41 +5,6 @@ /* * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() - * - * NOTICE: - * - * This is NOT the original regular expression code as written by Henry - * Spencer. This code has been modified specifically for use with the VIM - * editor, and should not be used separately from Vim. If you want a good - * regular expression library, get the original code. The copyright notice - * that follows is from the original. - * - * END NOTICE - * - * Copyright (c) 1986 by University of Toronto. - * Written by Henry Spencer. Not derived from licensed software. - * - * Permission is granted to anyone to use this software for any - * purpose on any computer system, and to redistribute it freely, - * subject to the following restrictions: - * - * 1. The author is not responsible for the consequences of use of - * this software, no matter how awful, even if they arise - * from defects in it. - * - * 2. The origin of this software must not be misrepresented, either - * by explicit claim or by omission. - * - * 3. Altered versions must be plainly marked as such, and must not - * be misrepresented as being the original software. - * - * Beware that some of this code is subtly aware of the way operator - * precedence is structured in regular expressions. Serious changes in - * regular-expression syntax might require a total rethink. - * - * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert - * Webb, Ciaran McCreesh and Bram Moolenaar. - * Named character class support added by Walter Briscoe (1998 Jul 01) */ // By default: do not create debugging logs or files related to regular @@ -70,205 +35,15 @@ #include "nvim/strings.h" #ifdef REGEXP_DEBUG -/* show/save debugging data when BT engine is used */ +// show/save debugging data when BT engine is used # define BT_REGEXP_DUMP -/* save the debugging data to a file instead of displaying it */ +// save the debugging data to a file instead of displaying it # define BT_REGEXP_LOG # define BT_REGEXP_DEBUG_LOG # define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log" #endif /* - * The "internal use only" fields in regexp_defs.h are present to pass info from - * compile to execute that permits the execute phase to run lots faster on - * simple cases. They are: - * - * regstart char that must begin a match; NUL if none obvious; Can be a - * multi-byte character. - * reganch is the match anchored (at beginning-of-line only)? - * regmust string (pointer into program) that match must include, or NULL - * regmlen length of regmust string - * regflags RF_ values or'ed together - * - * Regstart and reganch permit very fast decisions on suitable starting points - * for a match, cutting down the work a lot. Regmust permits fast rejection - * of lines that cannot possibly match. The regmust tests are costly enough - * that vim_regcomp() supplies a regmust only if the r.e. contains something - * potentially expensive (at present, the only such thing detected is * or + - * at the start of the r.e., which can involve a lot of backup). Regmlen is - * supplied because the test in vim_regexec() needs it and vim_regcomp() is - * computing it anyway. - */ - -/* - * Structure for regexp "program". This is essentially a linear encoding - * of a nondeterministic finite-state machine (aka syntax charts or - * "railroad normal form" in parsing technology). Each node is an opcode - * plus a "next" pointer, possibly plus an operand. "Next" pointers of - * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" - * pointer with a BRANCH on both ends of it is connecting two alternatives. - * (Here we have one of the subtle syntax dependencies: an individual BRANCH - * (as opposed to a collection of them) is never concatenated with anything - * because of operator precedence). The "next" pointer of a BRACES_COMPLEX - * node points to the node after the stuff to be repeated. - * The operand of some types of node is a literal string; for others, it is a - * node leading into a sub-FSM. In particular, the operand of a BRANCH node - * is the first node of the branch. - * (NB this is *not* a tree structure: the tail of the branch connects to the - * thing following the set of BRANCHes.) - * - * pattern is coded like: - * - * +-----------------+ - * | V - * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END - * | ^ | ^ - * +------+ +----------+ - * - * - * +------------------+ - * V | - * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END - * | | ^ ^ - * | +---------------+ | - * +---------------------------------------------+ - * - * - * +----------------------+ - * V | - * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END - * | | ^ ^ - * | +-----------+ | - * +--------------------------------------------------+ - * - * - * +-------------------------+ - * V | - * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END - * | | ^ - * | +----------------+ - * +-----------------------------------------------+ - * - * - * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END - * | | ^ ^ - * | +----------------+ | - * +--------------------------------+ - * - * +---------+ - * | V - * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END - * | | | | ^ ^ - * | | | +-----+ | - * | | +----------------+ | - * | +---------------------------+ | - * +------------------------------------------------------+ - * - * They all start with a BRANCH for "\|" alternatives, even when there is only - * one alternative. - */ - -/* - * The opcodes are: - */ - -/* definition number opnd? meaning */ -#define END 0 /* End of program or NOMATCH operand. */ -#define BOL 1 /* Match "" at beginning of line. */ -#define EOL 2 /* Match "" at end of line. */ -#define BRANCH 3 /* node Match this alternative, or the - * next... */ -#define BACK 4 /* Match "", "next" ptr points backward. */ -#define EXACTLY 5 /* str Match this string. */ -#define NOTHING 6 /* Match empty string. */ -#define STAR 7 /* node Match this (simple) thing 0 or more - * times. */ -#define PLUS 8 /* node Match this (simple) thing 1 or more - * times. */ -#define MATCH 9 /* node match the operand zero-width */ -#define NOMATCH 10 /* node check for no match with operand */ -#define BEHIND 11 /* node look behind for a match with operand */ -#define NOBEHIND 12 /* node look behind for no match with operand */ -#define SUBPAT 13 /* node match the operand here */ -#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and - * n times (\{m,n\}). */ -#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */ -#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */ -#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE - * and BRACE_COMPLEX. */ -#define NEWL 18 /* Match line-break */ -#define BHPOS 19 /* End position for BEHIND or NOBEHIND */ - - -/* character classes: 20-48 normal, 50-78 include a line-break */ -#define ADD_NL 30 -#define FIRST_NL ANY + ADD_NL -#define ANY 20 /* Match any one character. */ -#define ANYOF 21 /* str Match any character in this string. */ -#define ANYBUT 22 /* str Match any character not in this - * string. */ -#define IDENT 23 /* Match identifier char */ -#define SIDENT 24 /* Match identifier char but no digit */ -#define KWORD 25 /* Match keyword char */ -#define SKWORD 26 /* Match word char but no digit */ -#define FNAME 27 /* Match file name char */ -#define SFNAME 28 /* Match file name char but no digit */ -#define PRINT 29 /* Match printable char */ -#define SPRINT 30 /* Match printable char but no digit */ -#define WHITE 31 /* Match whitespace char */ -#define NWHITE 32 /* Match non-whitespace char */ -#define DIGIT 33 /* Match digit char */ -#define NDIGIT 34 /* Match non-digit char */ -#define HEX 35 /* Match hex char */ -#define NHEX 36 /* Match non-hex char */ -#define OCTAL 37 /* Match octal char */ -#define NOCTAL 38 /* Match non-octal char */ -#define WORD 39 /* Match word char */ -#define NWORD 40 /* Match non-word char */ -#define HEAD 41 /* Match head char */ -#define NHEAD 42 /* Match non-head char */ -#define ALPHA 43 /* Match alpha char */ -#define NALPHA 44 /* Match non-alpha char */ -#define LOWER 45 /* Match lowercase char */ -#define NLOWER 46 /* Match non-lowercase char */ -#define UPPER 47 /* Match uppercase char */ -#define NUPPER 48 /* Match non-uppercase char */ -#define LAST_NL NUPPER + ADD_NL -// -V:WITH_NL:560 -#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL) - -#define MOPEN 80 // -89 Mark this point in input as start of - // \( … \) subexpr. MOPEN + 0 marks start of - // match. -#define MCLOSE 90 // -99 Analogous to MOPEN. MCLOSE + 0 marks - // end of match. -#define BACKREF 100 // -109 node Match same string again \1-\9. - -# define ZOPEN 110 // -119 Mark this point in input as start of - // \z( … \) subexpr. -# define ZCLOSE 120 // -129 Analogous to ZOPEN. -# define ZREF 130 // -139 node Match external submatch \z1-\z9 - -#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */ - -#define NOPEN 150 // Mark this point in input as start of - // \%( subexpr. -#define NCLOSE 151 // Analogous to NOPEN. - -#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */ -#define RE_BOF 201 /* Match "" at beginning of file. */ -#define RE_EOF 202 /* Match "" at end of file. */ -#define CURSOR 203 /* Match location of cursor. */ - -#define RE_LNUM 204 /* nr cmp Match line number */ -#define RE_COL 205 /* nr cmp Match column number */ -#define RE_VCOL 206 /* nr cmp Match virtual column number */ - -#define RE_MARK 207 /* mark cmp Match mark position */ -#define RE_VISUAL 208 /* Match Visual area */ -#define RE_COMPOSING 209 // any composing characters - -/* * Magic characters have a special meaning, they don't match literally. * Magic characters are negative. This separates them from literal characters * (possibly multi-byte). Only ASCII characters can be Magic. @@ -285,107 +60,6 @@ */ typedef void (*(*fptr_T)(int *, int))(void); -typedef struct { - char_u *regparse; - int prevchr_len; - int curchr; - int prevchr; - int prevprevchr; - int nextchr; - int at_start; - int prev_at_start; - int regnpar; -} parse_state_T; - -/* - * Structure used to save the current input state, when it needs to be - * restored after trying a match. Used by reg_save() and reg_restore(). - * Also stores the length of "backpos". - */ -typedef struct { - union { - char_u *ptr; ///< rex.input pointer, for single-line regexp - lpos_T pos; ///< rex.input pos, for multi-line regexp - } rs_u; - int rs_len; -} regsave_T; - -/* struct to save start/end pointer/position in for \(\) */ -typedef struct { - union { - char_u *ptr; - lpos_T pos; - } se_u; -} save_se_T; - -/* used for BEHIND and NOBEHIND matching */ -typedef struct regbehind_S { - regsave_T save_after; - regsave_T save_behind; - int save_need_clear_subexpr; - save_se_T save_start[NSUBEXP]; - save_se_T save_end[NSUBEXP]; -} regbehind_T; - -/* Values for rs_state in regitem_T. */ -typedef enum regstate_E { - RS_NOPEN = 0 /* NOPEN and NCLOSE */ - , RS_MOPEN /* MOPEN + [0-9] */ - , RS_MCLOSE /* MCLOSE + [0-9] */ - , RS_ZOPEN /* ZOPEN + [0-9] */ - , RS_ZCLOSE /* ZCLOSE + [0-9] */ - , RS_BRANCH /* BRANCH */ - , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */ - , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */ - , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */ - , RS_NOMATCH /* NOMATCH */ - , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */ - , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */ - , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */ - , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */ -} regstate_T; - -/* - * When there are alternatives a regstate_T is put on the regstack to remember - * what we are doing. - * Before it may be another type of item, depending on rs_state, to remember - * more things. - */ -typedef struct regitem_S { - regstate_T rs_state; // what we are doing, one of RS_ above - uint16_t rs_no; // submatch nr or BEHIND/NOBEHIND - char_u *rs_scan; // current node in program - union { - save_se_T sesave; - regsave_T regsave; - } rs_un; ///< room for saving rex.input -} regitem_T; - - -/* used for STAR, PLUS and BRACE_SIMPLE matching */ -typedef struct regstar_S { - int nextb; /* next byte */ - int nextb_ic; /* next byte reverse case */ - long count; - long minval; - long maxval; -} regstar_T; - -/* used to store input position when a BACK was encountered, so that we now if - * we made any progress since the last time. */ -typedef struct backpos_S { - char_u *bp_scan; /* "scan" where BACK was encountered */ - regsave_T bp_pos; /* last input position */ -} backpos_T; - -typedef struct { - int a, b, c; -} decomp_T; - - -#ifdef INCLUDE_GENERATED_DECLARATIONS -# include "regexp.c.generated.h" -#endif static int no_Magic(int x) { if (is_Magic(x)) @@ -400,66 +74,13 @@ static int toggle_Magic(int x) return Magic(x); } -/* - * The first byte of the regexp internal "program" is actually this magic - * number; the start node begins in the second byte. It's used to catch the - * most severe mutilation of the program by the caller. - */ - +// The first byte of the BT regexp internal "program" is actually this magic +// number; the start node begins in the second byte. It's used to catch the +// most severe mutilation of the program by the caller. #define REGMAGIC 0234 -/* - * Opcode notes: - * - * BRANCH The set of branches constituting a single choice are hooked - * together with their "next" pointers, since precedence prevents - * anything being concatenated to any individual branch. The - * "next" pointer of the last BRANCH in a choice points to the - * thing following the whole choice. This is also where the - * final "next" pointer of each individual branch points; each - * branch starts with the operand node of a BRANCH node. - * - * BACK Normal "next" pointers all implicitly point forward; BACK - * exists to make loop structures possible. - * - * STAR,PLUS '=', and complex '*' and '+', are implemented as circular - * BRANCH structures using BACK. Simple cases (one character - * per match) are implemented with STAR and PLUS for speed - * and to minimize recursive plunges. - * - * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX - * node, and defines the min and max limits to be used for that - * node. - * - * MOPEN,MCLOSE ...are numbered at compile time. - * ZOPEN,ZCLOSE ...ditto - */ - -/* - * A node is one char of opcode followed by two chars of "next" pointer. - * "Next" pointers are stored as two 8-bit bytes, high order first. The - * value is a positive offset from the opcode of the node containing it. - * An operand, if any, simply follows the node. (Note that much of the - * code generation knows about this implicit relationship.) - * - * Using two bytes for the "next" pointer is vast overkill for most things, - * but allows patterns to get big without disasters. - */ -#define OP(p) ((int)*(p)) -#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377)) -#define OPERAND(p) ((p) + 3) -/* Obtain an operand that was stored as four bytes, MSB first. */ -#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \ - + ((long)(p)[5] << 8) + (long)(p)[6]) -/* Obtain a second operand stored as four bytes. */ -#define OPERAND_MAX(p) OPERAND_MIN((p) + 4) -/* Obtain a second single-byte operand stored after a four bytes operand. */ -#define OPERAND_CMP(p) (p)[7] - -/* - * Utility definitions. - */ -#define UCHARAT(p) ((int)*(char_u *)(p)) +// Utility definitions. +#define UCHARAT(p) ((int)(*(char_u *)(p))) // Used for an error (down from) vim_regcomp(): give the error message, set // rc_did_emsg and return NULL @@ -468,6 +89,8 @@ static int toggle_Magic(int x) #define EMSG_RET_FAIL(m) return (emsg(m), rc_did_emsg = true, FAIL) #define EMSG2_RET_NULL(m, c) \ return (semsg((m), (c) ? "" : "\\"), rc_did_emsg = true, (void *)NULL) +#define EMSG3_RET_NULL(m, c, a) \ + return (semsg((const char *)(m), (c) ? "" : "\\", (a)), rc_did_emsg = true, (void *)NULL) #define EMSG2_RET_FAIL(m, c) \ return (semsg((m), (c) ? "" : "\\"), rc_did_emsg = true, FAIL) #define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_( \ @@ -475,14 +98,6 @@ static int toggle_Magic(int x) #define MAX_LIMIT (32767L << 16L) - -#ifdef BT_REGEXP_DUMP -static void regdump(char_u *, bt_regprog_T *); -#endif -#ifdef REGEXP_DEBUG -static char_u *regprop(char_u *); -#endif - static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); static char_u e_reverse_range[] = N_("E944: Reverse range in character class"); static char_u e_large_class[] = N_("E945: Range too large in character class"); @@ -498,11 +113,17 @@ static char_u e_recursive[] = N_("E956: Cannot use pattern recursively"); #define NOT_MULTI 0 #define MULTI_ONE 1 #define MULTI_MULT 2 -/* - * Return NOT_MULTI if c is not a "multi" operator. - * Return MULTI_ONE if c is a single "multi" operator. - * Return MULTI_MULT if c is a multi "multi" operator. - */ + +// return values for regmatch() +#define RA_FAIL 1 // something failed, abort +#define RA_CONT 2 // continue in inner loop +#define RA_BREAK 3 // break inner loop +#define RA_MATCH 4 // successful match +#define RA_NOMATCH 5 // didn't match + +/// Return NOT_MULTI if c is not a "multi" operator. +/// Return MULTI_ONE if c is a single "multi" operator. +/// Return MULTI_MULT if c is a multi "multi" operator. static int re_multi_type(int c) { if (c == Magic('@') || c == Magic('=') || c == Magic('?')) @@ -512,22 +133,6 @@ static int re_multi_type(int c) return NOT_MULTI; } -/* - * Flags to be passed up and down. - */ -#define HASWIDTH 0x1 /* Known never to match null string. */ -#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */ -#define SPSTART 0x4 /* Starts with * or +. */ -#define HASNL 0x8 /* Contains some \n. */ -#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */ -#define WORST 0 /* Worst case. */ - -/* - * When regcode is set to this value, code is not emitted and size is computed - * instead. - */ -#define JUST_CALC_SIZE ((char_u *) -1) - static char_u *reg_prev_sub = NULL; /* @@ -682,38 +287,28 @@ static void init_class_tab(void) # define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER)) # define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE)) -/* flags for regflags */ -#define RF_ICASE 1 /* ignore case */ -#define RF_NOICASE 2 /* don't ignore case */ -#define RF_HASNL 4 /* can match a NL */ -#define RF_ICOMBINE 8 /* ignore combining characters */ -#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */ +// flags for regflags +#define RF_ICASE 1 // ignore case +#define RF_NOICASE 2 // don't ignore case +#define RF_HASNL 4 // can match a NL +#define RF_ICOMBINE 8 // ignore combining characters +#define RF_LOOKBH 16 // uses "\@<=" or "\@<!" // Global work variables for vim_regcomp(). static char_u *regparse; ///< Input-scan pointer. -static int prevchr_len; ///< byte length of previous char -static int num_complex_braces; ///< Complex \{...} count static int regnpar; ///< () count. static bool wants_nfa; ///< regex should use NFA engine static int regnzpar; ///< \z() count. static int re_has_z; ///< \z item detected -static char_u *regcode; ///< Code-emit pointer, or JUST_CALC_SIZE -static long regsize; ///< Code size. -static int reg_toolong; ///< true when offset out of range -static char_u had_endbrace[NSUBEXP]; ///< flags, true if end of () found -static unsigned regflags; ///< RF_ flags for prog -static long brace_min[10]; ///< Minimums for complex brace repeats -static long brace_max[10]; ///< Maximums for complex brace repeats -static int brace_count[10]; ///< Current counts for complex brace repeats -static int had_eol; ///< true when EOL found by vim_regcomp() -static int one_exactly = false; ///< only do one char for EXACTLY - -static int reg_magic; /* magicness of the pattern: */ -#define MAGIC_NONE 1 /* "\V" very unmagic */ -#define MAGIC_OFF 2 /* "\M" or 'magic' off */ -#define MAGIC_ON 3 /* "\m" or 'magic' */ -#define MAGIC_ALL 4 /* "\v" very magic */ +static unsigned regflags; ///< RF_ flags for prog +static int had_eol; ///< true when EOL found by vim_regcomp() + +static int reg_magic; // magicness of the pattern: +#define MAGIC_NONE 1 // "\V" very unmagic +#define MAGIC_OFF 2 // "\M" or 'magic' off +#define MAGIC_ON 3 // "\m" or 'magic' +#define MAGIC_ALL 4 // "\v" very magic static int reg_string; // matching with a string instead of a buffer // line @@ -723,22 +318,22 @@ static int reg_strict; // "[abc" is illegal * META contains all characters that may be magic, except '^' and '$'. */ -/* META[] is used often enough to justify turning it into a table. */ +// META[] is used often enough to justify turning it into a table. static char_u META_flags[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* % & ( ) * + . */ - 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, - /* 1 2 3 4 5 6 7 8 9 < = > ? */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, - /* @ A C D F H I K L M O */ - 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, - /* P S U V W X Z [ _ */ - 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, - /* a c d f h i k l m n o */ - 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, - /* p s u v w x z { | ~ */ - 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// % & ( ) * + . + 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, +// 1 2 3 4 5 6 7 8 9 < = > ? + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, +// @ A C D F H I K L M O + 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, +// P S U V W X Z [ _ + 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, +// a c d f h i k l m n o + 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, +// p s u v w x z { | ~ + 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1 }; static int curchr; // currently parsed character @@ -746,24 +341,36 @@ static int curchr; // currently parsed character // start, eg in /[ ^I]^ the pattern was never found even if it existed, // because ^ was taken to be magic -- webb static int prevchr; -static int prevprevchr; /* previous-previous character */ -static int nextchr; /* used for ungetchr() */ +static int prevprevchr; // previous-previous character +static int nextchr; // used for ungetchr() -/* arguments for reg() */ -#define REG_NOPAREN 0 /* toplevel reg() */ -#define REG_PAREN 1 /* \(\) */ -#define REG_ZPAREN 2 /* \z(\) */ -#define REG_NPAREN 3 /* \%(\) */ +// arguments for reg() +#define REG_NOPAREN 0 // toplevel reg() +#define REG_PAREN 1 // \(\) +#define REG_ZPAREN 2 // \z(\) +#define REG_NPAREN 3 // \%(\) + +typedef struct { + char_u *regparse; + int prevchr_len; + int curchr; + int prevchr; + int prevprevchr; + int nextchr; + int at_start; + int prev_at_start; + int regnpar; +} parse_state_T; -/* - * Forward declarations for vim_regcomp()'s friends. - */ -# define REGMBC(x) regmbc(x); -# define CASEMBC(x) case x: static regengine_T bt_regengine; static regengine_T nfa_regengine; +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "regexp.c.generated.h" +#endif + + // Return true if compiled regular expression "prog" can match a line break. int re_multiline(const regprog_T *prog) FUNC_ATTR_NONNULL_ALL @@ -795,312 +402,6 @@ static int get_equi_class(char_u **pp) /* - * Produce the bytes for equivalence class "c". - * Currently only handles latin1, latin9 and utf-8. - * NOTE: When changing this function, also change nfa_emit_equi_class() - */ -static void reg_equi_class(int c) -{ - { - switch (c) { - // Do not use '\300' style, it results in a negative number. - case 'A': case 0xc0: case 0xc1: case 0xc2: - case 0xc3: case 0xc4: case 0xc5: - CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd) - CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2) - regmbc('A'); regmbc(0xc0); regmbc(0xc1); - regmbc(0xc2); regmbc(0xc3); regmbc(0xc4); - regmbc(0xc5); - REGMBC(0x100) REGMBC(0x102) REGMBC(0x104) - REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0) - REGMBC(0x1ea2) - return; - case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06) - regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06) - return; - case 'C': case 0xc7: - CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c) - regmbc('C'); regmbc(0xc7); - REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a) - REGMBC(0x10c) - return; - case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a) - CASEMBC(0x1e0e) CASEMBC(0x1e10) - regmbc('D'); REGMBC(0x10e) REGMBC(0x110) - REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10) - return; - case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb: - CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118) - CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc) - regmbc('E'); regmbc(0xc8); regmbc(0xc9); - regmbc(0xca); regmbc(0xcb); - REGMBC(0x112) REGMBC(0x114) REGMBC(0x116) - REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba) - REGMBC(0x1ebc) - return; - case 'F': CASEMBC(0x1e1e) - regmbc('F'); REGMBC(0x1e1e) - return; - case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120) - CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4) - CASEMBC(0x1e20) - regmbc('G'); REGMBC(0x11c) REGMBC(0x11e) - REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4) - REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20) - return; - case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22) - CASEMBC(0x1e26) CASEMBC(0x1e28) - regmbc('H'); REGMBC(0x124) REGMBC(0x126) - REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28) - return; - case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf: - CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e) - CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8) - regmbc('I'); regmbc(0xcc); regmbc(0xcd); - regmbc(0xce); regmbc(0xcf); - REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c) - REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf) - REGMBC(0x1ec8) - return; - case 'J': CASEMBC(0x134) - regmbc('J'); REGMBC(0x134) - return; - case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30) - CASEMBC(0x1e34) - regmbc('K'); REGMBC(0x136) REGMBC(0x1e8) - REGMBC(0x1e30) REGMBC(0x1e34) - return; - case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d) - CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a) - regmbc('L'); REGMBC(0x139) REGMBC(0x13b) - REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141) - REGMBC(0x1e3a) - return; - case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40) - regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40) - return; - case 'N': case 0xd1: - CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44) - CASEMBC(0x1e48) - regmbc('N'); regmbc(0xd1); - REGMBC(0x143) REGMBC(0x145) REGMBC(0x147) - REGMBC(0x1e44) REGMBC(0x1e48) - return; - case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5: - case 0xd6: case 0xd8: - CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0) - CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece) - regmbc('O'); regmbc(0xd2); regmbc(0xd3); - regmbc(0xd4); regmbc(0xd5); regmbc(0xd6); - regmbc(0xd8); - REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150) - REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea) - REGMBC(0x1ec) REGMBC(0x1ece) - return; - case 'P': case 0x1e54: case 0x1e56: - regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56) - return; - case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158) - CASEMBC(0x1e58) CASEMBC(0x1e5e) - regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158) - REGMBC(0x1e58) REGMBC(0x1e5e) - return; - case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e) - CASEMBC(0x160) CASEMBC(0x1e60) - regmbc('S'); REGMBC(0x15a) REGMBC(0x15c) - REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60) - return; - case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166) - CASEMBC(0x1e6a) CASEMBC(0x1e6e) - regmbc('T'); REGMBC(0x162) REGMBC(0x164) - REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e) - return; - case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc: - CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e) - CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3) - CASEMBC(0x1ee6) - regmbc('U'); regmbc(0xd9); regmbc(0xda); - regmbc(0xdb); regmbc(0xdc); - REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c) - REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172) - REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6) - return; - case 'V': CASEMBC(0x1e7c) - regmbc('V'); REGMBC(0x1e7c) - return; - case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82) - CASEMBC(0x1e84) CASEMBC(0x1e86) - regmbc('W'); REGMBC(0x174) REGMBC(0x1e80) - REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86) - return; - case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c) - regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c) - return; - case 'Y': case 0xdd: - CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2) - CASEMBC(0x1ef6) CASEMBC(0x1ef8) - regmbc('Y'); regmbc(0xdd); - REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e) - REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8) - return; - case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d) - CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94) - regmbc('Z'); REGMBC(0x179) REGMBC(0x17b) - REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90) - REGMBC(0x1e94) - return; - case 'a': case 0xe0: case 0xe1: case 0xe2: - case 0xe3: case 0xe4: case 0xe5: - CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce) - CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3) - regmbc('a'); regmbc(0xe0); regmbc(0xe1); - regmbc(0xe2); regmbc(0xe3); regmbc(0xe4); - regmbc(0xe5); - REGMBC(0x101) REGMBC(0x103) REGMBC(0x105) - REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1) - REGMBC(0x1ea3) - return; - case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07) - regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07) - return; - case 'c': case 0xe7: - CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d) - regmbc('c'); regmbc(0xe7); - REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b) - REGMBC(0x10d) - return; - case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b) - CASEMBC(0x1e0f) CASEMBC(0x1e11) - regmbc('d'); REGMBC(0x10f) REGMBC(0x111) - REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11) - return; - case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb: - CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119) - CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd) - regmbc('e'); regmbc(0xe8); regmbc(0xe9); - regmbc(0xea); regmbc(0xeb); - REGMBC(0x113) REGMBC(0x115) REGMBC(0x117) - REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb) - REGMBC(0x1ebd) - return; - case 'f': CASEMBC(0x1e1f) - regmbc('f'); REGMBC(0x1e1f) - return; - case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121) - CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5) - CASEMBC(0x1e21) - regmbc('g'); REGMBC(0x11d) REGMBC(0x11f) - REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5) - REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21) - return; - case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23) - CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96) - regmbc('h'); REGMBC(0x125) REGMBC(0x127) - REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29) - REGMBC(0x1e96) - return; - case 'i': case 0xec: case 0xed: case 0xee: case 0xef: - CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f) - CASEMBC(0x1d0) CASEMBC(0x1ec9) - regmbc('i'); regmbc(0xec); regmbc(0xed); - regmbc(0xee); regmbc(0xef); - REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d) - REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9) - return; - case 'j': CASEMBC(0x135) CASEMBC(0x1f0) - regmbc('j'); REGMBC(0x135) REGMBC(0x1f0) - return; - case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31) - CASEMBC(0x1e35) - regmbc('k'); REGMBC(0x137) REGMBC(0x1e9) - REGMBC(0x1e31) REGMBC(0x1e35) - return; - case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e) - CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b) - regmbc('l'); REGMBC(0x13a) REGMBC(0x13c) - REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142) - REGMBC(0x1e3b) - return; - case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41) - regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41) - return; - case 'n': case 0xf1: - CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149) - CASEMBC(0x1e45) CASEMBC(0x1e49) - regmbc('n'); regmbc(0xf1); - REGMBC(0x144) REGMBC(0x146) REGMBC(0x148) - REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49) - return; - case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5: - case 0xf6: case 0xf8: - CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1) - CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf) - regmbc('o'); regmbc(0xf2); regmbc(0xf3); - regmbc(0xf4); regmbc(0xf5); regmbc(0xf6); - regmbc(0xf8); - REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151) - REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb) - REGMBC(0x1ed) REGMBC(0x1ecf) - return; - case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57) - regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57) - return; - case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159) - CASEMBC(0x1e59) CASEMBC(0x1e5f) - regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159) - REGMBC(0x1e59) REGMBC(0x1e5f) - return; - case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f) - CASEMBC(0x161) CASEMBC(0x1e61) - regmbc('s'); REGMBC(0x15b) REGMBC(0x15d) - REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61) - return; - case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167) - CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97) - regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167) - REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97) - return; - case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc: - CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f) - CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4) - CASEMBC(0x1ee7) - regmbc('u'); regmbc(0xf9); regmbc(0xfa); - regmbc(0xfb); regmbc(0xfc); - REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d) - REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173) - REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7) - return; - case 'v': CASEMBC(0x1e7d) - regmbc('v'); REGMBC(0x1e7d) - return; - case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83) - CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98) - regmbc('w'); REGMBC(0x175) REGMBC(0x1e81) - REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87) - REGMBC(0x1e98) - return; - case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d) - regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d) - return; - case 'y': case 0xfd: case 0xff: - CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99) - CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9) - regmbc('y'); regmbc(0xfd); regmbc(0xff); - REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99) - REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9) - return; - case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e) - CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95) - regmbc('z'); REGMBC(0x17a) REGMBC(0x17c) - REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91) - REGMBC(0x1e95) - return; - } - } - regmbc(c); -} - -/* * Check for a collating element "[.a.]". "pp" points to the '['. * Returns a character. Zero means that no item was recognized. Otherwise * "pp" is advanced to after the item. @@ -1123,7 +424,7 @@ static int get_coll_element(char_u **pp) return 0; } -static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */ +static int reg_cpo_lit; // 'cpoptions' contains 'l' flag static void get_cpo_flags(void) { @@ -1139,10 +440,12 @@ static char_u *skip_anyof(char_u *p) { int l; - if (*p == '^') /* Complement of range. */ - ++p; - if (*p == ']' || *p == '-') - ++p; + if (*p == '^') { // Complement of range. + p++; + } + if (*p == ']' || *p == '-') { + p++; + } while (*p != NUL && *p != ']') { if ((l = utfc_ptr2len(p)) > 1) { p += l; @@ -1202,1550 +505,32 @@ char_u *skip_regexp(char_u *startp, int dirc, int magic, char_u **newp) break; } else if (p[0] == '\\' && p[1] != NUL) { if (dirc == '?' && newp != NULL && p[1] == '?') { - /* change "\?" to "?", make a copy first. */ + // change "\?" to "?", make a copy first. if (*newp == NULL) { *newp = vim_strsave(startp); p = *newp + (p - startp); } STRMOVE(p, p + 1); - } else - ++p; /* skip next character */ - if (*p == 'v') + } else { + p++; // skip next character + } + if (*p == 'v') { mymagic = MAGIC_ALL; - else if (*p == 'V') + } else if (*p == 'V') { mymagic = MAGIC_NONE; - } - } - return p; -} - -/// Return true if the back reference is legal. We must have seen the close -/// brace. -/// TODO(vim): Should also check that we don't refer to something repeated -/// (+*=): what instance of the repetition should we match? -static int seen_endbrace(int refnum) -{ - if (!had_endbrace[refnum]) { - char_u *p; - - // Trick: check if "@<=" or "@<!" follows, in which case - // the \1 can appear before the referenced match. - for (p = regparse; *p != NUL; p++) { - if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '=')) { - break; - } } - - if (*p == NUL) { - emsg(_("E65: Illegal back reference")); - rc_did_emsg = true; - return false; } } - return true; -} - -/* - * bt_regcomp() - compile a regular expression into internal code for the - * traditional back track matcher. - * Returns the program in allocated space. Returns NULL for an error. - * - * We can't allocate space until we know how big the compiled form will be, - * but we can't compile it (and thus know how big it is) until we've got a - * place to put the code. So we cheat: we compile it twice, once with code - * generation turned off and size counting turned on, and once "for real". - * This also means that we don't allocate space until we are sure that the - * thing really will compile successfully, and we never have to move the - * code and thus invalidate pointers into it. (Note that it has to be in - * one piece because free() must be able to free it all.) - * - * Whether upper/lower case is to be ignored is decided when executing the - * program, it does not matter here. - * - * Beware that the optimization-preparation code in here knows about some - * of the structure of the compiled regexp. - * "re_flags": RE_MAGIC and/or RE_STRING. - */ -static regprog_T *bt_regcomp(char_u *expr, int re_flags) -{ - char_u *scan; - char_u *longest; - int len; - int flags; - - if (expr == NULL) { - IEMSG_RET_NULL(_(e_null)); - } - - init_class_tab(); - - /* - * First pass: determine size, legality. - */ - regcomp_start(expr, re_flags); - regcode = JUST_CALC_SIZE; - regc(REGMAGIC); - if (reg(REG_NOPAREN, &flags) == NULL) - return NULL; - - /* Allocate space. */ - bt_regprog_T *r = xmalloc(sizeof(bt_regprog_T) + regsize); - r->re_in_use = false; - - /* - * Second pass: emit code. - */ - regcomp_start(expr, re_flags); - regcode = r->program; - regc(REGMAGIC); - if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong) { - xfree(r); - if (reg_toolong) - EMSG_RET_NULL(_("E339: Pattern too long")); - return NULL; - } - - /* Dig out information for optimizations. */ - r->regstart = NUL; /* Worst-case defaults. */ - r->reganch = 0; - r->regmust = NULL; - r->regmlen = 0; - r->regflags = regflags; - if (flags & HASNL) - r->regflags |= RF_HASNL; - if (flags & HASLOOKBH) - r->regflags |= RF_LOOKBH; - /* Remember whether this pattern has any \z specials in it. */ - r->reghasz = re_has_z; - scan = r->program + 1; /* First BRANCH. */ - if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ - scan = OPERAND(scan); - - /* Starting-point info. */ - if (OP(scan) == BOL || OP(scan) == RE_BOF) { - r->reganch++; - scan = regnext(scan); - } - - if (OP(scan) == EXACTLY) { - r->regstart = utf_ptr2char(OPERAND(scan)); - } else if (OP(scan) == BOW - || OP(scan) == EOW - || OP(scan) == NOTHING - || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN - || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) { - char_u *regnext_scan = regnext(scan); - if (OP(regnext_scan) == EXACTLY) { - r->regstart = utf_ptr2char(OPERAND(regnext_scan)); - } - } - - /* - * If there's something expensive in the r.e., find the longest - * literal string that must appear and make it the regmust. Resolve - * ties in favor of later strings, since the regstart check works - * with the beginning of the r.e. and avoiding duplication - * strengthens checking. Not a strong reason, but sufficient in the - * absence of others. - */ - /* - * When the r.e. starts with BOW, it is faster to look for a regmust - * first. Used a lot for "#" and "*" commands. (Added by mool). - */ - if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW) - && !(flags & HASNL)) { - longest = NULL; - len = 0; - for (; scan != NULL; scan = regnext(scan)) - if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len) { - longest = OPERAND(scan); - len = (int)STRLEN(OPERAND(scan)); - } - r->regmust = longest; - r->regmlen = len; - } - } -#ifdef BT_REGEXP_DUMP - regdump(expr, r); -#endif - r->engine = &bt_regengine; - return (regprog_T *)r; -} - -/* - * Free a compiled regexp program, returned by bt_regcomp(). - */ -static void bt_regfree(regprog_T *prog) -{ - xfree(prog); + return p; } -/* - * Setup to parse the regexp. Used once to get the length and once to do it. - */ -static void -regcomp_start ( - char_u *expr, - int re_flags /* see vim_regcomp() */ -) -{ - initchr(expr); - if (re_flags & RE_MAGIC) - reg_magic = MAGIC_ON; - else - reg_magic = MAGIC_OFF; - reg_string = (re_flags & RE_STRING); - reg_strict = (re_flags & RE_STRICT); - get_cpo_flags(); - - num_complex_braces = 0; - regnpar = 1; - memset(had_endbrace, 0, sizeof(had_endbrace)); - regnzpar = 1; - re_has_z = 0; - regsize = 0L; - reg_toolong = false; - regflags = 0; - had_eol = false; -} - -/* - * Check if during the previous call to vim_regcomp the EOL item "$" has been - * found. This is messy, but it works fine. - */ -int vim_regcomp_had_eol(void) -{ - return had_eol; -} // variables used for parsing +static int prevchr_len; // byte length of previous char static int at_start; // True when on the first character static int prev_at_start; // True when on the second character /* - * Parse regular expression, i.e. main body or parenthesized thing. - * - * Caller must absorb opening parenthesis. - * - * Combining parenthesis handling with the base level of regular expression - * is a trifle forced, but the need to tie the tails of the branches to what - * follows makes it hard to avoid. - */ -static char_u * -reg ( - int paren, /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */ - int *flagp -) -{ - char_u *ret; - char_u *br; - char_u *ender; - int parno = 0; - int flags; - - *flagp = HASWIDTH; /* Tentatively. */ - - if (paren == REG_ZPAREN) { - /* Make a ZOPEN node. */ - if (regnzpar >= NSUBEXP) - EMSG_RET_NULL(_("E50: Too many \\z(")); - parno = regnzpar; - regnzpar++; - ret = regnode(ZOPEN + parno); - } else if (paren == REG_PAREN) { - /* Make a MOPEN node. */ - if (regnpar >= NSUBEXP) - EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); - parno = regnpar; - ++regnpar; - ret = regnode(MOPEN + parno); - } else if (paren == REG_NPAREN) { - /* Make a NOPEN node. */ - ret = regnode(NOPEN); - } else - ret = NULL; - - /* Pick up the branches, linking them together. */ - br = regbranch(&flags); - if (br == NULL) - return NULL; - if (ret != NULL) - regtail(ret, br); /* [MZ]OPEN -> first. */ - else - ret = br; - /* If one of the branches can be zero-width, the whole thing can. - * If one of the branches has * at start or matches a line-break, the - * whole thing can. */ - if (!(flags & HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); - while (peekchr() == Magic('|')) { - skipchr(); - br = regbranch(&flags); - if (br == NULL || reg_toolong) - return NULL; - regtail(ret, br); /* BRANCH -> BRANCH. */ - if (!(flags & HASWIDTH)) - *flagp &= ~HASWIDTH; - *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); - } - - /* Make a closing node, and hook it on the end. */ - ender = regnode( - paren == REG_ZPAREN ? ZCLOSE + parno : - paren == REG_PAREN ? MCLOSE + parno : - paren == REG_NPAREN ? NCLOSE : END); - regtail(ret, ender); - - /* Hook the tails of the branches to the closing node. */ - for (br = ret; br != NULL; br = regnext(br)) - regoptail(br, ender); - - /* Check for proper termination. */ - if (paren != REG_NOPAREN && getchr() != Magic(')')) { - if (paren == REG_ZPAREN) - EMSG_RET_NULL(_("E52: Unmatched \\z(")); - else if (paren == REG_NPAREN) - EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); - else - EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); - } else if (paren == REG_NOPAREN && peekchr() != NUL) { - if (curchr == Magic(')')) - EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); - else - EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ - /* NOTREACHED */ - } - // Here we set the flag allowing back references to this set of - // parentheses. - if (paren == REG_PAREN) { - had_endbrace[parno] = true; // have seen the close paren - } - return ret; -} - -/* - * Parse one alternative of an | operator. - * Implements the & operator. - */ -static char_u *regbranch(int *flagp) -{ - char_u *ret; - char_u *chain = NULL; - char_u *latest; - int flags; - - *flagp = WORST | HASNL; /* Tentatively. */ - - ret = regnode(BRANCH); - for (;; ) { - latest = regconcat(&flags); - if (latest == NULL) - return NULL; - /* If one of the branches has width, the whole thing has. If one of - * the branches anchors at start-of-line, the whole thing does. - * If one of the branches uses look-behind, the whole thing does. */ - *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH); - /* If one of the branches doesn't match a line-break, the whole thing - * doesn't. */ - *flagp &= ~HASNL | (flags & HASNL); - if (chain != NULL) - regtail(chain, latest); - if (peekchr() != Magic('&')) - break; - skipchr(); - regtail(latest, regnode(END)); /* operand ends */ - if (reg_toolong) - break; - reginsert(MATCH, latest); - chain = latest; - } - - return ret; -} - -/* - * Parse one alternative of an | or & operator. - * Implements the concatenation operator. - */ -static char_u *regconcat(int *flagp) -{ - char_u *first = NULL; - char_u *chain = NULL; - char_u *latest; - int flags; - int cont = true; - - *flagp = WORST; /* Tentatively. */ - - while (cont) { - switch (peekchr()) { - case NUL: - case Magic('|'): - case Magic('&'): - case Magic(')'): - cont = false; - break; - case Magic('Z'): - regflags |= RF_ICOMBINE; - skipchr_keepstart(); - break; - case Magic('c'): - regflags |= RF_ICASE; - skipchr_keepstart(); - break; - case Magic('C'): - regflags |= RF_NOICASE; - skipchr_keepstart(); - break; - case Magic('v'): - reg_magic = MAGIC_ALL; - skipchr_keepstart(); - curchr = -1; - break; - case Magic('m'): - reg_magic = MAGIC_ON; - skipchr_keepstart(); - curchr = -1; - break; - case Magic('M'): - reg_magic = MAGIC_OFF; - skipchr_keepstart(); - curchr = -1; - break; - case Magic('V'): - reg_magic = MAGIC_NONE; - skipchr_keepstart(); - curchr = -1; - break; - default: - latest = regpiece(&flags); - if (latest == NULL || reg_toolong) - return NULL; - *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH); - if (chain == NULL) /* First piece. */ - *flagp |= flags & SPSTART; - else - regtail(chain, latest); - chain = latest; - if (first == NULL) - first = latest; - break; - } - } - if (first == NULL) /* Loop ran zero times. */ - first = regnode(NOTHING); - return first; -} - -/* - * Parse something followed by possible [*+=]. - * - * Note that the branching code sequences used for = and the general cases - * of * and + are somewhat optimized: they use the same NOTHING node as - * both the endmarker for their branch list and the body of the last branch. - * It might seem that this node could be dispensed with entirely, but the - * endmarker role is not redundant. - */ -static char_u *regpiece(int *flagp) -{ - char_u *ret; - int op; - char_u *next; - int flags; - long minval; - long maxval; - - ret = regatom(&flags); - if (ret == NULL) - return NULL; - - op = peekchr(); - if (re_multi_type(op) == NOT_MULTI) { - *flagp = flags; - return ret; - } - /* default flags */ - *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH))); - - skipchr(); - switch (op) { - case Magic('*'): - if (flags & SIMPLE) - reginsert(STAR, ret); - else { - /* Emit x* as (x&|), where & means "self". */ - reginsert(BRANCH, ret); /* Either x */ - regoptail(ret, regnode(BACK)); /* and loop */ - regoptail(ret, ret); /* back */ - regtail(ret, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } - break; - - case Magic('+'): - if (flags & SIMPLE) - reginsert(PLUS, ret); - else { - /* Emit x+ as x(&|), where & means "self". */ - next = regnode(BRANCH); /* Either */ - regtail(ret, next); - regtail(regnode(BACK), ret); /* loop back */ - regtail(next, regnode(BRANCH)); /* or */ - regtail(ret, regnode(NOTHING)); /* null. */ - } - *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH))); - break; - - case Magic('@'): - { - int lop = END; - int64_t nr = getdecchrs(); - - switch (no_Magic(getchr())) { - case '=': lop = MATCH; break; /* \@= */ - case '!': lop = NOMATCH; break; /* \@! */ - case '>': lop = SUBPAT; break; /* \@> */ - case '<': switch (no_Magic(getchr())) { - case '=': lop = BEHIND; break; /* \@<= */ - case '!': lop = NOBEHIND; break; /* \@<! */ - } - } - if (lop == END) - EMSG2_RET_NULL(_("E59: invalid character after %s@"), - reg_magic == MAGIC_ALL); - /* Look behind must match with behind_pos. */ - if (lop == BEHIND || lop == NOBEHIND) { - regtail(ret, regnode(BHPOS)); - *flagp |= HASLOOKBH; - } - regtail(ret, regnode(END)); /* operand ends */ - if (lop == BEHIND || lop == NOBEHIND) { - if (nr < 0) - nr = 0; /* no limit is same as zero limit */ - reginsert_nr(lop, (uint32_t)nr, ret); - } else - reginsert(lop, ret); - break; - } - - case Magic('?'): - case Magic('='): - /* Emit x= as (x|) */ - reginsert(BRANCH, ret); /* Either x */ - regtail(ret, regnode(BRANCH)); /* or */ - next = regnode(NOTHING); /* null. */ - regtail(ret, next); - regoptail(ret, next); - break; - - case Magic('{'): - if (!read_limits(&minval, &maxval)) - return NULL; - if (flags & SIMPLE) { - reginsert(BRACE_SIMPLE, ret); - reginsert_limits(BRACE_LIMITS, minval, maxval, ret); - } else { - if (num_complex_braces >= 10) - EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), - reg_magic == MAGIC_ALL); - reginsert(BRACE_COMPLEX + num_complex_braces, ret); - regoptail(ret, regnode(BACK)); - regoptail(ret, ret); - reginsert_limits(BRACE_LIMITS, minval, maxval, ret); - ++num_complex_braces; - } - if (minval > 0 && maxval > 0) - *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH))); - break; - } - if (re_multi_type(peekchr()) != NOT_MULTI) { - // Can't have a multi follow a multi. - if (peekchr() == Magic('*')) { - snprintf((char *)IObuff, IOSIZE, _("E61: Nested %s*"), - reg_magic >= MAGIC_ON ? "" : "\\"); - } else { - snprintf((char *)IObuff, IOSIZE, _("E62: Nested %s%c"), - reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr())); - } - EMSG_RET_NULL((char *)IObuff); - } - - return ret; -} - -/* When making changes to classchars also change nfa_classcodes. */ -static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; -static int classcodes[] = { - ANY, IDENT, SIDENT, KWORD, SKWORD, - FNAME, SFNAME, PRINT, SPRINT, - WHITE, NWHITE, DIGIT, NDIGIT, - HEX, NHEX, OCTAL, NOCTAL, - WORD, NWORD, HEAD, NHEAD, - ALPHA, NALPHA, LOWER, NLOWER, - UPPER, NUPPER -}; - -/* - * Parse the lowest level. - * - * Optimization: gobbles an entire sequence of ordinary characters so that - * it can turn them into a single node, which is smaller to store and - * faster to run. Don't do this when one_exactly is set. - */ -static char_u *regatom(int *flagp) -{ - char_u *ret; - int flags; - int c; - char_u *p; - int extra = 0; - int save_prev_at_start = prev_at_start; - - *flagp = WORST; /* Tentatively. */ - - c = getchr(); - switch (c) { - case Magic('^'): - ret = regnode(BOL); - break; - - case Magic('$'): - ret = regnode(EOL); - had_eol = true; - break; - - case Magic('<'): - ret = regnode(BOW); - break; - - case Magic('>'): - ret = regnode(EOW); - break; - - case Magic('_'): - c = no_Magic(getchr()); - if (c == '^') { /* "\_^" is start-of-line */ - ret = regnode(BOL); - break; - } - if (c == '$') { /* "\_$" is end-of-line */ - ret = regnode(EOL); - had_eol = true; - break; - } - - extra = ADD_NL; - *flagp |= HASNL; - - /* "\_[" is character range plus newline */ - if (c == '[') - goto collection; - - // "\_x" is character class plus newline - FALLTHROUGH; - - /* - * Character classes. - */ - case Magic('.'): - case Magic('i'): - case Magic('I'): - case Magic('k'): - case Magic('K'): - case Magic('f'): - case Magic('F'): - case Magic('p'): - case Magic('P'): - case Magic('s'): - case Magic('S'): - case Magic('d'): - case Magic('D'): - case Magic('x'): - case Magic('X'): - case Magic('o'): - case Magic('O'): - case Magic('w'): - case Magic('W'): - case Magic('h'): - case Magic('H'): - case Magic('a'): - case Magic('A'): - case Magic('l'): - case Magic('L'): - case Magic('u'): - case Magic('U'): - p = vim_strchr(classchars, no_Magic(c)); - if (p == NULL) - EMSG_RET_NULL(_("E63: invalid use of \\_")); - /* When '.' is followed by a composing char ignore the dot, so that - * the composing char is matched here. */ - if (c == Magic('.') && utf_iscomposing(peekchr())) { - c = getchr(); - goto do_multibyte; - } - ret = regnode(classcodes[p - classchars] + extra); - *flagp |= HASWIDTH | SIMPLE; - break; - - case Magic('n'): - if (reg_string) { - /* In a string "\n" matches a newline character. */ - ret = regnode(EXACTLY); - regc(NL); - regc(NUL); - *flagp |= HASWIDTH | SIMPLE; - } else { - /* In buffer text "\n" matches the end of a line. */ - ret = regnode(NEWL); - *flagp |= HASWIDTH | HASNL; - } - break; - - case Magic('('): - if (one_exactly) - EMSG_ONE_RET_NULL; - ret = reg(REG_PAREN, &flags); - if (ret == NULL) - return NULL; - *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); - break; - - case NUL: - case Magic('|'): - case Magic('&'): - case Magic(')'): - if (one_exactly) - EMSG_ONE_RET_NULL; - IEMSG_RET_NULL(_(e_internal)); // Supposed to be caught earlier. - // NOTREACHED - - case Magic('='): - case Magic('?'): - case Magic('+'): - case Magic('@'): - case Magic('{'): - case Magic('*'): - c = no_Magic(c); - snprintf((char *)IObuff, IOSIZE, _("E64: %s%c follows nothing"), - (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL) - ? "" : "\\", c); - EMSG_RET_NULL((char *)IObuff); - // NOTREACHED - - case Magic('~'): /* previous substitute pattern */ - if (reg_prev_sub != NULL) { - char_u *lp; - - ret = regnode(EXACTLY); - lp = reg_prev_sub; - while (*lp != NUL) - regc(*lp++); - regc(NUL); - if (*reg_prev_sub != NUL) { - *flagp |= HASWIDTH; - if ((lp - reg_prev_sub) == 1) - *flagp |= SIMPLE; - } - } else - EMSG_RET_NULL(_(e_nopresub)); - break; - - case Magic('1'): - case Magic('2'): - case Magic('3'): - case Magic('4'): - case Magic('5'): - case Magic('6'): - case Magic('7'): - case Magic('8'): - case Magic('9'): - { - int refnum; - - refnum = c - Magic('0'); - if (!seen_endbrace(refnum)) { - return NULL; - } - ret = regnode(BACKREF + refnum); - } - break; - - case Magic('z'): - { - c = no_Magic(getchr()); - switch (c) { - case '(': if ((reg_do_extmatch & REX_SET) == 0) - EMSG_RET_NULL(_(e_z_not_allowed)); - if (one_exactly) - EMSG_ONE_RET_NULL; - ret = reg(REG_ZPAREN, &flags); - if (ret == NULL) - return NULL; - *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH); - re_has_z = REX_SET; - break; - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': if ((reg_do_extmatch & REX_USE) == 0) - EMSG_RET_NULL(_(e_z1_not_allowed)); - ret = regnode(ZREF + c - '0'); - re_has_z = REX_USE; - break; - - case 's': ret = regnode(MOPEN + 0); - if (!re_mult_next("\\zs")) { - return NULL; - } - break; - - case 'e': ret = regnode(MCLOSE + 0); - if (!re_mult_next("\\ze")) { - return NULL; - } - break; - - default: EMSG_RET_NULL(_("E68: Invalid character after \\z")); - } - } - break; - - case Magic('%'): - { - c = no_Magic(getchr()); - switch (c) { - /* () without a back reference */ - case '(': - if (one_exactly) - EMSG_ONE_RET_NULL; - ret = reg(REG_NPAREN, &flags); - if (ret == NULL) - return NULL; - *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); - break; - - /* Catch \%^ and \%$ regardless of where they appear in the - * pattern -- regardless of whether or not it makes sense. */ - case '^': - ret = regnode(RE_BOF); - break; - - case '$': - ret = regnode(RE_EOF); - break; - - case '#': - ret = regnode(CURSOR); - break; - - case 'V': - ret = regnode(RE_VISUAL); - break; - - case 'C': - ret = regnode(RE_COMPOSING); - break; - - /* \%[abc]: Emit as a list of branches, all ending at the last - * branch which matches nothing. */ - case '[': - if (one_exactly) /* doesn't nest */ - EMSG_ONE_RET_NULL; - { - char_u *lastbranch; - char_u *lastnode = NULL; - char_u *br; - - ret = NULL; - while ((c = getchr()) != ']') { - if (c == NUL) - EMSG2_RET_NULL(_(e_missing_sb), - reg_magic == MAGIC_ALL); - br = regnode(BRANCH); - if (ret == NULL) { - ret = br; - } else { - regtail(lastnode, br); - if (reg_toolong) { - return NULL; - } - } - - ungetchr(); - one_exactly = true; - lastnode = regatom(flagp); - one_exactly = false; - if (lastnode == NULL) { - return NULL; - } - } - if (ret == NULL) - EMSG2_RET_NULL(_(e_empty_sb), - reg_magic == MAGIC_ALL); - lastbranch = regnode(BRANCH); - br = regnode(NOTHING); - if (ret != JUST_CALC_SIZE) { - regtail(lastnode, br); - regtail(lastbranch, br); - /* connect all branches to the NOTHING - * branch at the end */ - for (br = ret; br != lastnode; ) { - if (OP(br) == BRANCH) { - regtail(br, lastbranch); - if (reg_toolong) { - return NULL; - } - br = OPERAND(br); - } else - br = regnext(br); - } - } - *flagp &= ~(HASWIDTH | SIMPLE); - break; - } - - case 'd': /* %d123 decimal */ - case 'o': /* %o123 octal */ - case 'x': /* %xab hex 2 */ - case 'u': /* %uabcd hex 4 */ - case 'U': /* %U1234abcd hex 8 */ - { - int64_t i; - - switch (c) { - case 'd': i = getdecchrs(); break; - case 'o': i = getoctchrs(); break; - case 'x': i = gethexchrs(2); break; - case 'u': i = gethexchrs(4); break; - case 'U': i = gethexchrs(8); break; - default: i = -1; break; - } - - if (i < 0 || i > INT_MAX) { - EMSG2_RET_NULL(_("E678: Invalid character after %s%%[dxouU]"), - reg_magic == MAGIC_ALL); - } - if (use_multibytecode(i)) { - ret = regnode(MULTIBYTECODE); - } else { - ret = regnode(EXACTLY); - } - if (i == 0) { - regc(0x0a); - } else { - regmbc(i); - } - regc(NUL); - *flagp |= HASWIDTH; - break; - } - - default: - if (ascii_isdigit(c) || c == '<' || c == '>' - || c == '\'') { - uint32_t n = 0; - int cmp; - - cmp = c; - if (cmp == '<' || cmp == '>') - c = getchr(); - while (ascii_isdigit(c)) { - n = n * 10 + (uint32_t)(c - '0'); - c = getchr(); - } - if (c == '\'' && n == 0) { - /* "\%'m", "\%<'m" and "\%>'m": Mark */ - c = getchr(); - ret = regnode(RE_MARK); - if (ret == JUST_CALC_SIZE) - regsize += 2; - else { - *regcode++ = c; - *regcode++ = cmp; - } - break; - } else if (c == 'l' || c == 'c' || c == 'v') { - if (c == 'l') { - ret = regnode(RE_LNUM); - if (save_prev_at_start) { - at_start = true; - } - } else if (c == 'c') { - ret = regnode(RE_COL); - } else { - ret = regnode(RE_VCOL); - } - if (ret == JUST_CALC_SIZE) { - regsize += 5; - } else { - // put the number and the optional - // comparator after the opcode - regcode = re_put_uint32(regcode, n); - *regcode++ = cmp; - } - break; - } - } - - EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), - reg_magic == MAGIC_ALL); - } - } - break; - - case Magic('['): -collection: - { - char_u *lp; - - /* - * If there is no matching ']', we assume the '[' is a normal - * character. This makes 'incsearch' and ":help [" work. - */ - lp = skip_anyof(regparse); - if (*lp == ']') { /* there is a matching ']' */ - int startc = -1; /* > 0 when next '-' is a range */ - int endc; - - /* - * In a character class, different parsing rules apply. - * Not even \ is special anymore, nothing is. - */ - if (*regparse == '^') { /* Complement of range. */ - ret = regnode(ANYBUT + extra); - regparse++; - } else - ret = regnode(ANYOF + extra); - - /* At the start ']' and '-' mean the literal character. */ - if (*regparse == ']' || *regparse == '-') { - startc = *regparse; - regc(*regparse++); - } - - while (*regparse != NUL && *regparse != ']') { - if (*regparse == '-') { - ++regparse; - /* The '-' is not used for a range at the end and - * after or before a '\n'. */ - if (*regparse == ']' || *regparse == NUL - || startc == -1 - || (regparse[0] == '\\' && regparse[1] == 'n')) { - regc('-'); - startc = '-'; /* [--x] is a range */ - } else { - /* Also accept "a-[.z.]" */ - endc = 0; - if (*regparse == '[') - endc = get_coll_element(®parse); - if (endc == 0) { - endc = mb_ptr2char_adv((const char_u **)®parse); - } - - /* Handle \o40, \x20 and \u20AC style sequences */ - if (endc == '\\' && !reg_cpo_lit) - endc = coll_get_char(); - - if (startc > endc) { - EMSG_RET_NULL(_(e_reverse_range)); - } - if (utf_char2len(startc) > 1 - || utf_char2len(endc) > 1) { - // Limit to a range of 256 chars - if (endc > startc + 256) { - EMSG_RET_NULL(_(e_large_class)); - } - while (++startc <= endc) { - regmbc(startc); - } - } else { - while (++startc <= endc) - regc(startc); - } - startc = -1; - } - } - /* - * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim - * accepts "\t", "\e", etc., but only when the 'l' flag in - * 'cpoptions' is not included. - */ - else if (*regparse == '\\' - && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL - || (!reg_cpo_lit - && vim_strchr(REGEXP_ABBR, - regparse[1]) != NULL))) { - regparse++; - if (*regparse == 'n') { - /* '\n' in range: also match NL */ - if (ret != JUST_CALC_SIZE) { - /* Using \n inside [^] does not change what - * matches. "[^\n]" is the same as ".". */ - if (*ret == ANYOF) { - *ret = ANYOF + ADD_NL; - *flagp |= HASNL; - } - /* else: must have had a \n already */ - } - regparse++; - startc = -1; - } else if (*regparse == 'd' - || *regparse == 'o' - || *regparse == 'x' - || *regparse == 'u' - || *regparse == 'U') { - startc = coll_get_char(); - if (startc == 0) - regc(0x0a); - else - regmbc(startc); - } else { - startc = backslash_trans(*regparse++); - regc(startc); - } - } else if (*regparse == '[') { - int c_class; - int cu; - - c_class = get_char_class(®parse); - startc = -1; - /* Characters assumed to be 8 bits! */ - switch (c_class) { - case CLASS_NONE: - c_class = get_equi_class(®parse); - if (c_class != 0) { - /* produce equivalence class */ - reg_equi_class(c_class); - } else if ((c_class = - get_coll_element(®parse)) != 0) { - /* produce a collating element */ - regmbc(c_class); - } else { - /* literal '[', allow [[-x] as a range */ - startc = *regparse++; - regc(startc); - } - break; - case CLASS_ALNUM: - for (cu = 1; cu < 128; cu++) { - if (isalnum(cu)) { - regmbc(cu); - } - } - break; - case CLASS_ALPHA: - for (cu = 1; cu < 128; cu++) { - if (isalpha(cu)) { - regmbc(cu); - } - } - break; - case CLASS_BLANK: - regc(' '); - regc('\t'); - break; - case CLASS_CNTRL: - for (cu = 1; cu <= 127; cu++) { - if (iscntrl(cu)) { - regmbc(cu); - } - } - break; - case CLASS_DIGIT: - for (cu = 1; cu <= 127; cu++) { - if (ascii_isdigit(cu)) { - regmbc(cu); - } - } - break; - case CLASS_GRAPH: - for (cu = 1; cu <= 127; cu++) { - if (isgraph(cu)) { - regmbc(cu); - } - } - break; - case CLASS_LOWER: - for (cu = 1; cu <= 255; cu++) { - if (mb_islower(cu) && cu != 170 && cu != 186) { - regmbc(cu); - } - } - break; - case CLASS_PRINT: - for (cu = 1; cu <= 255; cu++) { - if (vim_isprintc(cu)) { - regmbc(cu); - } - } - break; - case CLASS_PUNCT: - for (cu = 1; cu < 128; cu++) { - if (ispunct(cu)) { - regmbc(cu); - } - } - break; - case CLASS_SPACE: - for (cu = 9; cu <= 13; cu++) - regc(cu); - regc(' '); - break; - case CLASS_UPPER: - for (cu = 1; cu <= 255; cu++) { - if (mb_isupper(cu)) { - regmbc(cu); - } - } - break; - case CLASS_XDIGIT: - for (cu = 1; cu <= 255; cu++) { - if (ascii_isxdigit(cu)) { - regmbc(cu); - } - } - break; - case CLASS_TAB: - regc('\t'); - break; - case CLASS_RETURN: - regc('\r'); - break; - case CLASS_BACKSPACE: - regc('\b'); - break; - case CLASS_ESCAPE: - regc(ESC); - break; - case CLASS_IDENT: - for (cu = 1; cu <= 255; cu++) { - if (vim_isIDc(cu)) { - regmbc(cu); - } - } - break; - case CLASS_KEYWORD: - for (cu = 1; cu <= 255; cu++) { - if (reg_iswordc(cu)) { - regmbc(cu); - } - } - break; - case CLASS_FNAME: - for (cu = 1; cu <= 255; cu++) { - if (vim_isfilec(cu)) { - regmbc(cu); - } - } - break; - } - } else { - // produce a multibyte character, including any - // following composing characters. - startc = utf_ptr2char(regparse); - int len = utfc_ptr2len(regparse); - if (utf_char2len(startc) != len) { - // composing chars - startc = -1; - } - while (--len >= 0) { - regc(*regparse++); - } - } - } - regc(NUL); - prevchr_len = 1; /* last char was the ']' */ - if (*regparse != ']') - EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */ - skipchr(); /* let's be friends with the lexer again */ - *flagp |= HASWIDTH | SIMPLE; - break; - } else if (reg_strict) - EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); - } - FALLTHROUGH; - - default: - { - int len; - - /* A multi-byte character is handled as a separate atom if it's - * before a multi and when it's a composing char. */ - if (use_multibytecode(c)) { -do_multibyte: - ret = regnode(MULTIBYTECODE); - regmbc(c); - *flagp |= HASWIDTH | SIMPLE; - break; - } - - ret = regnode(EXACTLY); - - /* - * Append characters as long as: - * - there is no following multi, we then need the character in - * front of it as a single character operand - * - not running into a Magic character - * - "one_exactly" is not set - * But always emit at least one character. Might be a Multi, - * e.g., a "[" without matching "]". - */ - for (len = 0; c != NUL && (len == 0 - || (re_multi_type(peekchr()) == NOT_MULTI - && !one_exactly - && !is_Magic(c))); ++len) { - c = no_Magic(c); - { - regmbc(c); - { - int l; - - /* Need to get composing character too. */ - for (;; ) { - l = utf_ptr2len(regparse); - if (!utf_composinglike(regparse, regparse + l)) { - break; - } - regmbc(utf_ptr2char(regparse)); - skipchr(); - } - } - } - c = getchr(); - } - ungetchr(); - - regc(NUL); - *flagp |= HASWIDTH; - if (len == 1) - *flagp |= SIMPLE; - } - break; - } - - return ret; -} - -/// Used in a place where no * or \+ can follow. -static bool re_mult_next(char *what) -{ - if (re_multi_type(peekchr()) == MULTI_MULT) { - EMSG2_RET_FAIL(_("E888: (NFA regexp) cannot repeat %s"), what); - } - return true; -} - -// Return true if MULTIBYTECODE should be used instead of EXACTLY for -// character "c". -static bool use_multibytecode(int c) -{ - return utf_char2len(c) > 1 - && (re_multi_type(peekchr()) != NOT_MULTI - || utf_iscomposing(c)); -} - -/* - * Emit a node. - * Return pointer to generated code. - */ -static char_u *regnode(int op) -{ - char_u *ret; - - ret = regcode; - if (ret == JUST_CALC_SIZE) - regsize += 3; - else { - *regcode++ = op; - *regcode++ = NUL; /* Null "next" pointer. */ - *regcode++ = NUL; - } - return ret; -} - -/* - * Emit (if appropriate) a byte of code - */ -static void regc(int b) -{ - if (regcode == JUST_CALC_SIZE) - regsize++; - else - *regcode++ = b; -} - -/* - * Emit (if appropriate) a multi-byte character of code - */ -static void regmbc(int c) -{ - if (regcode == JUST_CALC_SIZE) { - regsize += utf_char2len(c); - } else { - regcode += utf_char2bytes(c, regcode); - } -} - -/* - * Insert an operator in front of already-emitted operand - * - * Means relocating the operand. - */ -static void reginsert(int op, char_u *opnd) -{ - char_u *src; - char_u *dst; - char_u *place; - - if (regcode == JUST_CALC_SIZE) { - regsize += 3; - return; - } - src = regcode; - regcode += 3; - dst = regcode; - while (src > opnd) - *--dst = *--src; - - place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = NUL; - *place = NUL; -} - -/* - * Insert an operator in front of already-emitted operand. - * Add a number to the operator. - */ -static void reginsert_nr(int op, long val, char_u *opnd) -{ - char_u *src; - char_u *dst; - char_u *place; - - if (regcode == JUST_CALC_SIZE) { - regsize += 7; - return; - } - src = regcode; - regcode += 7; - dst = regcode; - while (src > opnd) - *--dst = *--src; - - place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = NUL; - *place++ = NUL; - assert(val >= 0 && (uintmax_t)val <= UINT32_MAX); - re_put_uint32(place, (uint32_t)val); -} - -/* - * Insert an operator in front of already-emitted operand. - * The operator has the given limit values as operands. Also set next pointer. - * - * Means relocating the operand. - */ -static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) -{ - char_u *src; - char_u *dst; - char_u *place; - - if (regcode == JUST_CALC_SIZE) { - regsize += 11; - return; - } - src = regcode; - regcode += 11; - dst = regcode; - while (src > opnd) - *--dst = *--src; - - place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = NUL; - *place++ = NUL; - assert(minval >= 0 && (uintmax_t)minval <= UINT32_MAX); - place = re_put_uint32(place, (uint32_t)minval); - assert(maxval >= 0 && (uintmax_t)maxval <= UINT32_MAX); - place = re_put_uint32(place, (uint32_t)maxval); - regtail(opnd, place); -} - -/* - * Write a four bytes number at "p" and return pointer to the next char. - */ -static char_u *re_put_uint32(char_u *p, uint32_t val) -{ - *p++ = (char_u) ((val >> 24) & 0377); - *p++ = (char_u) ((val >> 16) & 0377); - *p++ = (char_u) ((val >> 8) & 0377); - *p++ = (char_u) (val & 0377); - return p; -} - -// Set the next-pointer at the end of a node chain. -static void regtail(char_u *p, char_u *val) -{ - int offset; - - if (p == JUST_CALC_SIZE) { - return; - } - - // Find last node. - char_u *scan = p; - for (;; ) { - char_u *temp = regnext(scan); - if (temp == NULL) { - break; - } - scan = temp; - } - - if (OP(scan) == BACK) { - offset = (int)(scan - val); - } else { - offset = (int)(val - scan); - } - // When the offset uses more than 16 bits it can no longer fit in the two - // bytes available. Use a global flag to avoid having to check return - // values in too many places. - if (offset > 0xffff) { - reg_toolong = true; - } else { - *(scan + 1) = (char_u)(((unsigned)offset >> 8) & 0377); - *(scan + 2) = (char_u)(offset & 0377); - } -} - -/* - * Like regtail, on item after a BRANCH; nop if none. - */ -static void regoptail(char_u *p, char_u *val) -{ - /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */ - if (p == NULL || p == JUST_CALC_SIZE - || (OP(p) != BRANCH - && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9))) - return; - regtail(OPERAND(p), val); -} - -/* - * Functions for getting characters from the regexp input. - */ - -/* * Start parsing at "str". */ static void initchr(char_u *str) @@ -2806,9 +591,10 @@ static int peekchr(void) case '.': case '[': case '~': - /* magic when 'magic' is on */ - if (reg_magic >= MAGIC_ON) + // magic when 'magic' is on + if (reg_magic >= MAGIC_ON) { curchr = Magic(curchr); + } break; case '(': case ')': @@ -2823,18 +609,19 @@ static int peekchr(void) case '|': case '<': case '>': - case '#': /* future ext. */ - case '"': /* future ext. */ - case '\'': /* future ext. */ - case ',': /* future ext. */ - case '-': /* future ext. */ - case ':': /* future ext. */ - case ';': /* future ext. */ - case '`': /* future ext. */ - case '/': /* Can't be used in / command */ - /* magic only after "\v" */ - if (reg_magic == MAGIC_ALL) + case '#': // future ext. + case '"': // future ext. + case '\'': // future ext. + case ',': // future ext. + case '-': // future ext. + case ':': // future ext. + case ';': // future ext. + case '`': // future ext. + case '/': // Can't be used in / command + // magic only after "\v" + if (reg_magic == MAGIC_ALL) { curchr = Magic(curchr); + } break; case '*': // * is not magic as the very first character, eg "?*ptr", when @@ -2947,11 +734,12 @@ static int peekchr(void) */ static void skipchr(void) { - /* peekchr() eats a backslash, do the same here */ - if (*regparse == '\\') + // peekchr() eats a backslash, do the same here + if (*regparse == '\\') { prevchr_len = 1; - else + } else { prevchr_len = 0; + } if (regparse[prevchr_len] != NUL) { // Exclude composing chars that utfc_ptr2len does include. prevchr_len += utf_ptr2len(regparse + prevchr_len); @@ -2961,7 +749,7 @@ static void skipchr(void) at_start = false; prevprevchr = prevchr; prevchr = curchr; - curchr = nextchr; /* use previously unget char, or -1 */ + curchr = nextchr; // use previously unget char, or -1 nextchr = -1; } @@ -3054,8 +842,8 @@ static int64_t getdecchrs(void) break; nr *= 10; nr += c - '0'; - ++regparse; - curchr = -1; /* no longer valid */ + regparse++; + curchr = -1; // no longer valid } if (i == 0) @@ -3091,29 +879,6 @@ static int64_t getoctchrs(void) return nr; } -/* - * Get a number after a backslash that is inside []. - * When nothing is recognized return a backslash. - */ -static int coll_get_char(void) -{ - int64_t nr = -1; - - switch (*regparse++) { - case 'd': nr = getdecchrs(); break; - case 'o': nr = getoctchrs(); break; - case 'x': nr = gethexchrs(2); break; - case 'u': nr = gethexchrs(4); break; - case 'U': nr = gethexchrs(8); break; - } - if (nr < 0 || nr > INT_MAX) { - // If getting the number fails be backwards compatible: the character - // is a backslash. - regparse--; - nr = '\\'; - } - return nr; -} /* * read_limits - Read two integers to be taken as a minimum and maximum. @@ -3149,9 +914,7 @@ static int read_limits(long *minval, long *maxval) regparse++; // Allow either \{...} or \{...\} } if (*regparse != '}') { - snprintf((char *)IObuff, IOSIZE, _("E554: Syntax error in %s{...}"), - reg_magic == MAGIC_ALL ? "" : "\\"); - EMSG_RET_FAIL((char *)IObuff); + EMSG2_RET_FAIL(_("E554: Syntax error in %s{...}"), reg_magic == MAGIC_ALL); } /* @@ -3163,7 +926,7 @@ static int read_limits(long *minval, long *maxval) *minval = *maxval; *maxval = tmp; } - skipchr(); /* let's be friends with the lexer again */ + skipchr(); // let's be friends with the lexer again return OK; } @@ -3175,22 +938,6 @@ static int read_limits(long *minval, long *maxval) * Global work variables for vim_regexec(). */ -/* Save the sub-expressions before attempting a match. */ -#define save_se(savep, posp, pp) \ - REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp)) - -/* After a failed match restore the sub-expressions. */ -#define restore_se(savep, posp, pp) { \ - if (REG_MULTI) \ - *(posp) = (savep)->se_u.pos; \ - else \ - *(pp) = (savep)->se_u.ptr; } - - -#ifdef REGEXP_DEBUG -int regnarrate = 0; -#endif - // Sometimes need to save a copy of a line. Since alloc()/free() is very // slow, we keep one allocated piece of memory and only re-allocate it when // it's too small. It's freed in bt_regexec_both() when finished. @@ -3266,41 +1013,6 @@ typedef struct { static regexec_T rex; static bool rex_in_use = false; -/* - * "regstack" and "backpos" are used by regmatch(). They are kept over calls - * to avoid invoking malloc() and free() often. - * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T - * or regbehind_T. - * "backpos_T" is a table with backpos_T for BACK - */ -static garray_T regstack = GA_EMPTY_INIT_VALUE; -static garray_T backpos = GA_EMPTY_INIT_VALUE; - -/* - * Both for regstack and backpos tables we use the following strategy of - * allocation (to reduce malloc/free calls): - * - Initial size is fairly small. - * - When needed, the tables are grown bigger (8 times at first, double after - * that). - * - After executing the match we free the memory only if the array has grown. - * Thus the memory is kept allocated when it's at the initial size. - * This makes it fast while not keeping a lot of memory allocated. - * A three times speed increase was observed when using many simple patterns. - */ -#define REGSTACK_INITIAL 2048 -#define BACKPOS_INITIAL 64 - -#if defined(EXITFREE) -void free_regexp_stuff(void) -{ - ga_clear(®stack); - ga_clear(&backpos); - xfree(reg_tofree); - xfree(reg_prev_sub); -} - -#endif - // Return true if character 'c' is included in 'iskeyword' option for // "reg_buf" buffer. static bool reg_iswordc(int c) @@ -3325,312 +1037,15 @@ static char_u *reg_getline(linenr_T lnum) return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, false); } -static regsave_T behind_pos; - -static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */ -static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */ -static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */ -static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */ +static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning +static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches +static lpos_T reg_startzpos[NSUBEXP]; // idem, beginning pos +static lpos_T reg_endzpos[NSUBEXP]; // idem, end pos // true if using multi-line regexp. #define REG_MULTI (rex.reg_match == NULL) /* - * Match a regexp against a string. - * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). - * Uses curbuf for line count and 'iskeyword'. - * If "line_lbr" is true, consider a "\n" in "line" to be a line break. - * - * Returns 0 for failure, number of lines contained in the match otherwise. - */ -static int -bt_regexec_nl ( - regmatch_T *rmp, - char_u *line, /* string to match against */ - colnr_T col, /* column to start looking for match */ - bool line_lbr -) -{ - rex.reg_match = rmp; - rex.reg_mmatch = NULL; - rex.reg_maxline = 0; - rex.reg_line_lbr = line_lbr; - rex.reg_buf = curbuf; - rex.reg_win = NULL; - rex.reg_ic = rmp->rm_ic; - rex.reg_icombine = false; - rex.reg_maxcol = 0; - - long r = bt_regexec_both(line, col, NULL, NULL); - assert(r <= INT_MAX); - return (int)r; -} - -/// Wrapper around strchr which accounts for case-insensitive searches and -/// non-ASCII characters. -/// -/// This function is used a lot for simple searches, keep it fast! -/// -/// @param s string to search -/// @param c character to find in @a s -/// -/// @return NULL if no match, otherwise pointer to the position in @a s -static inline char_u *cstrchr(const char_u *const s, const int c) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL - FUNC_ATTR_ALWAYS_INLINE -{ - if (!rex.reg_ic) { - return vim_strchr(s, c); - } - - // Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is - // expected to be highly optimized. - if (c > 0x80) { - const int folded_c = utf_fold(c); - for (const char_u *p = s; *p != NUL; p += utfc_ptr2len(p)) { - if (utf_fold(utf_ptr2char(p)) == folded_c) { - return (char_u *)p; - } - } - return NULL; - } - - int cc; - if (ASCII_ISUPPER(c)) { - cc = TOLOWER_ASC(c); - } else if (ASCII_ISLOWER(c)) { - cc = TOUPPER_ASC(c); - } else { - return vim_strchr(s, c); - } - - char tofind[] = { (char)c, (char)cc, NUL }; - return (char_u *)strpbrk((const char *)s, tofind); -} - -/// Matches a regexp against multiple lines. -/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). -/// Uses curbuf for line count and 'iskeyword'. -/// -/// @param win Window in which to search or NULL -/// @param buf Buffer in which to search -/// @param lnum Number of line to start looking for match -/// @param col Column to start looking for match -/// @param tm Timeout limit or NULL -/// -/// @return zero if there is no match and number of lines contained in the match -/// otherwise. -static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, - linenr_T lnum, colnr_T col, - proftime_T *tm, int *timed_out) -{ - rex.reg_match = NULL; - rex.reg_mmatch = rmp; - rex.reg_buf = buf; - rex.reg_win = win; - rex.reg_firstlnum = lnum; - rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; - rex.reg_line_lbr = false; - rex.reg_ic = rmp->rmm_ic; - rex.reg_icombine = false; - rex.reg_maxcol = rmp->rmm_maxcol; - - return bt_regexec_both(NULL, col, tm, timed_out); -} - -/// Match a regexp against a string ("line" points to the string) or multiple -/// lines (if "line" is NULL, use reg_getline()). -/// @return 0 for failure, or number of lines contained in the match. -static long bt_regexec_both(char_u *line, - colnr_T col, // column to start search - proftime_T *tm, // timeout limit or NULL - int *timed_out) // flag set on timeout or NULL -{ - bt_regprog_T *prog; - char_u *s; - long retval = 0L; - - /* Create "regstack" and "backpos" if they are not allocated yet. - * We allocate *_INITIAL amount of bytes first and then set the grow size - * to much bigger value to avoid many malloc calls in case of deep regular - * expressions. */ - if (regstack.ga_data == NULL) { - /* Use an item size of 1 byte, since we push different things - * onto the regstack. */ - ga_init(®stack, 1, REGSTACK_INITIAL); - ga_grow(®stack, REGSTACK_INITIAL); - ga_set_growsize(®stack, REGSTACK_INITIAL * 8); - } - - if (backpos.ga_data == NULL) { - ga_init(&backpos, sizeof(backpos_T), BACKPOS_INITIAL); - ga_grow(&backpos, BACKPOS_INITIAL); - ga_set_growsize(&backpos, BACKPOS_INITIAL * 8); - } - - if (REG_MULTI) { - prog = (bt_regprog_T *)rex.reg_mmatch->regprog; - line = reg_getline((linenr_T)0); - rex.reg_startpos = rex.reg_mmatch->startpos; - rex.reg_endpos = rex.reg_mmatch->endpos; - } else { - prog = (bt_regprog_T *)rex.reg_match->regprog; - rex.reg_startp = rex.reg_match->startp; - rex.reg_endp = rex.reg_match->endp; - } - - /* Be paranoid... */ - if (prog == NULL || line == NULL) { - iemsg(_(e_null)); - goto theend; - } - - /* Check validity of program. */ - if (prog_magic_wrong()) - goto theend; - - // If the start column is past the maximum column: no need to try. - if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) { - goto theend; - } - - // If pattern contains "\c" or "\C": overrule value of rex.reg_ic - if (prog->regflags & RF_ICASE) { - rex.reg_ic = true; - } else if (prog->regflags & RF_NOICASE) { - rex.reg_ic = false; - } - - // If pattern contains "\Z" overrule value of rex.reg_icombine - if (prog->regflags & RF_ICOMBINE) { - rex.reg_icombine = true; - } - - /* If there is a "must appear" string, look for it. */ - if (prog->regmust != NULL) { - int c = utf_ptr2char(prog->regmust); - s = line + col; - - // This is used very often, esp. for ":global". Use two versions of - // the loop to avoid overhead of conditions. - if (!rex.reg_ic) { - while ((s = vim_strchr(s, c)) != NULL) { - if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { - break; // Found it. - } - MB_PTR_ADV(s); - } - } else { - while ((s = cstrchr(s, c)) != NULL) { - if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { - break; // Found it. - } - MB_PTR_ADV(s); - } - } - if (s == NULL) { // Not present. - goto theend; - } - } - - rex.line = line; - rex.lnum = 0; - reg_toolong = false; - - /* Simplest case: Anchored match need be tried only once. */ - if (prog->reganch) { - int c = utf_ptr2char(rex.line + col); - if (prog->regstart == NUL - || prog->regstart == c - || (rex.reg_ic - && (utf_fold(prog->regstart) == utf_fold(c) - || (c < 255 && prog->regstart < 255 - && mb_tolower(prog->regstart) == mb_tolower(c))))) { - retval = regtry(prog, col, tm, timed_out); - } else { - retval = 0; - } - } else { - int tm_count = 0; - /* Messy cases: unanchored match. */ - while (!got_int) { - if (prog->regstart != NUL) { - // Skip until the char we know it must start with. - s = cstrchr(rex.line + col, prog->regstart); - if (s == NULL) { - retval = 0; - break; - } - col = (int)(s - rex.line); - } - - // Check for maximum column to try. - if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) { - retval = 0; - break; - } - - retval = regtry(prog, col, tm, timed_out); - if (retval > 0) { - break; - } - - // if not currently on the first line, get it again - if (rex.lnum != 0) { - rex.lnum = 0; - rex.line = reg_getline((linenr_T)0); - } - if (rex.line[col] == NUL) { - break; - } - col += utfc_ptr2len(rex.line + col); - // Check for timeout once in a twenty times to avoid overhead. - if (tm != NULL && ++tm_count == 20) { - tm_count = 0; - if (profile_passed_limit(*tm)) { - if (timed_out != NULL) { - *timed_out = true; - } - break; - } - } - } - } - -theend: - /* Free "reg_tofree" when it's a bit big. - * Free regstack and backpos if they are bigger than their initial size. */ - if (reg_tofreelen > 400) { - XFREE_CLEAR(reg_tofree); - } - if (regstack.ga_maxlen > REGSTACK_INITIAL) - ga_clear(®stack); - if (backpos.ga_maxlen > BACKPOS_INITIAL) - ga_clear(&backpos); - - if (retval > 0) { - // Make sure the end is never before the start. Can happen when \zs - // and \ze are used. - if (REG_MULTI) { - const lpos_T *const start = &rex.reg_mmatch->startpos[0]; - const lpos_T *const end = &rex.reg_mmatch->endpos[0]; - - if (end->lnum < start->lnum - || (end->lnum == start->lnum && end->col < start->col)) { - rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0]; - } - } else { - if (rex.reg_match->endp[0] < rex.reg_match->startp[0]) { - rex.reg_match->endp[0] = rex.reg_match->startp[0]; - } - } - } - - return retval; -} - - -/* * Create a new extmatch and mark it as referenced once. */ static reg_extmatch_T *make_extmatch(void) @@ -3666,75 +1081,6 @@ void unref_extmatch(reg_extmatch_T *em) } } -/// Try match of "prog" with at rex.line["col"]. -/// @returns 0 for failure, or number of lines contained in the match. -static long regtry(bt_regprog_T *prog, - colnr_T col, - proftime_T *tm, // timeout limit or NULL - int *timed_out) // flag set on timeout or NULL -{ - rex.input = rex.line + col; - rex.need_clear_subexpr = true; - // Clear the external match subpointers if necessaey. - rex.need_clear_zsubexpr = (prog->reghasz == REX_SET); - - if (regmatch(prog->program + 1, tm, timed_out) == 0) { - return 0; - } - - cleanup_subexpr(); - if (REG_MULTI) { - if (rex.reg_startpos[0].lnum < 0) { - rex.reg_startpos[0].lnum = 0; - rex.reg_startpos[0].col = col; - } - if (rex.reg_endpos[0].lnum < 0) { - rex.reg_endpos[0].lnum = rex.lnum; - rex.reg_endpos[0].col = (int)(rex.input - rex.line); - } else { - // Use line number of "\ze". - rex.lnum = rex.reg_endpos[0].lnum; - } - } else { - if (rex.reg_startp[0] == NULL) { - rex.reg_startp[0] = rex.line + col; - } - if (rex.reg_endp[0] == NULL) { - rex.reg_endp[0] = rex.input; - } - } - /* Package any found \z(...\) matches for export. Default is none. */ - unref_extmatch(re_extmatch_out); - re_extmatch_out = NULL; - - if (prog->reghasz == REX_SET) { - int i; - - cleanup_zsubexpr(); - re_extmatch_out = make_extmatch(); - for (i = 0; i < NSUBEXP; i++) { - if (REG_MULTI) { - /* Only accept single line matches. */ - if (reg_startzpos[i].lnum >= 0 - && reg_endzpos[i].lnum == reg_startzpos[i].lnum - && reg_endzpos[i].col >= reg_startzpos[i].col) { - re_extmatch_out->matches[i] = - vim_strnsave(reg_getline(reg_startzpos[i].lnum) - + reg_startzpos[i].col, - reg_endzpos[i].col - - reg_startzpos[i].col); - } - } else { - if (reg_startzp[i] != NULL && reg_endzp[i] != NULL) - re_extmatch_out->matches[i] = - vim_strnsave(reg_startzp[i], reg_endzp[i] - reg_startzp[i]); - } - } - } - return 1 + rex.lnum; -} - - // Get class of previous character. static int reg_prev_class(void) { @@ -3790,8 +1136,8 @@ static bool reg_match_visual(void) return false; } + col = (colnr_T)(rex.input - rex.line); if (mode == 'v') { - col = (colnr_T)(rex.input - rex.line); if ((lnum == top.lnum && col < top.col) || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e'))) { return false; @@ -3806,8 +1152,12 @@ static bool reg_match_visual(void) if (top.col == MAXCOL || bot.col == MAXCOL || curswant == MAXCOL) { end = MAXCOL; } - unsigned int cols_u = win_linetabsize(wp, rex.line, - (colnr_T)(rex.input - rex.line)); + + // getvvcol() flushes rex.line, need to get it again + rex.line = reg_getline(rex.lnum); + rex.input = rex.line + col; + + unsigned int cols_u = win_linetabsize(wp, rex.line, col); assert(cols_u <= MAXCOL); colnr_T cols = (colnr_T)cols_u; if (cols < start || cols > end - (*p_sel == 'e')) { @@ -3817,1803 +1167,6 @@ static bool reg_match_visual(void) return true; } -#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) - -/* - * The arguments from BRACE_LIMITS are stored here. They are actually local - * to regmatch(), but they are here to reduce the amount of stack space used - * (it can be called recursively many times). - */ -static long bl_minval; -static long bl_maxval; - -/// Main matching routine -/// -/// Conceptually the strategy is simple: Check to see whether the current node -/// matches, push an item onto the regstack and loop to see whether the rest -/// matches, and then act accordingly. In practice we make some effort to -/// avoid using the regstack, in particular by going through "ordinary" nodes -/// (that don't need to know whether the rest of the match failed) by a nested -/// loop. -/// -/// Returns true when there is a match. Leaves rex.input and rex.lnum -/// just after the last matched character. -/// Returns false when there is no match. Leaves rex.input and rex.lnum in an -/// undefined state! -static bool regmatch( - char_u *scan, // Current node. - proftime_T *tm, // timeout limit or NULL - int *timed_out // flag set on timeout or NULL -) -{ - char_u *next; /* Next node. */ - int op; - int c; - regitem_T *rp; - int no; - int status; // one of the RA_ values: - int tm_count = 0; -#define RA_FAIL 1 // something failed, abort -#define RA_CONT 2 // continue in inner loop -#define RA_BREAK 3 // break inner loop -#define RA_MATCH 4 // successful match -#define RA_NOMATCH 5 // didn't match - - // Make "regstack" and "backpos" empty. They are allocated and freed in - // bt_regexec_both() to reduce malloc()/free() calls. - regstack.ga_len = 0; - backpos.ga_len = 0; - - /* - * Repeat until "regstack" is empty. - */ - for (;; ) { - /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q". - * Allow interrupting them with CTRL-C. */ - fast_breakcheck(); - -#ifdef REGEXP_DEBUG - if (scan != NULL && regnarrate) { - mch_errmsg((char *)regprop(scan)); - mch_errmsg("(\n"); - } -#endif - - /* - * Repeat for items that can be matched sequentially, without using the - * regstack. - */ - for (;; ) { - if (got_int || scan == NULL) { - status = RA_FAIL; - break; - } - // Check for timeout once in a 100 times to avoid overhead. - if (tm != NULL && ++tm_count == 100) { - tm_count = 0; - if (profile_passed_limit(*tm)) { - if (timed_out != NULL) { - *timed_out = true; - } - status = RA_FAIL; - break; - } - } - status = RA_CONT; - -#ifdef REGEXP_DEBUG - if (regnarrate) { - mch_errmsg((char *)regprop(scan)); - mch_errmsg("...\n"); - if (re_extmatch_in != NULL) { - int i; - - mch_errmsg(_("External submatches:\n")); - for (i = 0; i < NSUBEXP; i++) { - mch_errmsg(" \""); - if (re_extmatch_in->matches[i] != NULL) - mch_errmsg((char *)re_extmatch_in->matches[i]); - mch_errmsg("\"\n"); - } - } - } -#endif - next = regnext(scan); - - op = OP(scan); - // Check for character class with NL added. - if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI - && *rex.input == NUL && rex.lnum <= rex.reg_maxline) { - reg_nextline(); - } else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n') { - ADVANCE_REGINPUT(); - } else { - if (WITH_NL(op)) { - op -= ADD_NL; - } - c = utf_ptr2char(rex.input); - switch (op) { - case BOL: - if (rex.input != rex.line) { - status = RA_NOMATCH; - } - break; - - case EOL: - if (c != NUL) { - status = RA_NOMATCH; - } - break; - - case RE_BOF: - // We're not at the beginning of the file when below the first - // line where we started, not at the start of the line or we - // didn't start at the first line of the buffer. - if (rex.lnum != 0 || rex.input != rex.line - || (REG_MULTI && rex.reg_firstlnum > 1)) { - status = RA_NOMATCH; - } - break; - - case RE_EOF: - if (rex.lnum != rex.reg_maxline || c != NUL) { - status = RA_NOMATCH; - } - break; - - case CURSOR: - // Check if the buffer is in a window and compare the - // rex.reg_win->w_cursor position to the match position. - if (rex.reg_win == NULL - || (rex.lnum + rex.reg_firstlnum != rex.reg_win->w_cursor.lnum) - || ((colnr_T)(rex.input - rex.line) != - rex.reg_win->w_cursor.col)) { - status = RA_NOMATCH; - } - break; - - case RE_MARK: - /* Compare the mark position to the match position. */ - { - int mark = OPERAND(scan)[0]; - int cmp = OPERAND(scan)[1]; - pos_T *pos; - - pos = getmark_buf(rex.reg_buf, mark, false); - if (pos == NULL // mark doesn't exist - || pos->lnum <= 0) { // mark isn't set in reg_buf - status = RA_NOMATCH; - } else { - const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum - && pos->col == MAXCOL - ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) - : pos->col; - - if (pos->lnum == rex.lnum + rex.reg_firstlnum - ? (pos_col == (colnr_T)(rex.input - rex.line) - ? (cmp == '<' || cmp == '>') - : (pos_col < (colnr_T)(rex.input - rex.line) - ? cmp != '>' - : cmp != '<')) - : (pos->lnum < rex.lnum + rex.reg_firstlnum - ? cmp != '>' - : cmp != '<')) { - status = RA_NOMATCH; - } - } - } - break; - - case RE_VISUAL: - if (!reg_match_visual()) - status = RA_NOMATCH; - break; - - case RE_LNUM: - assert(rex.lnum + rex.reg_firstlnum >= 0 - && (uintmax_t)(rex.lnum + rex.reg_firstlnum) <= UINT32_MAX); - if (!REG_MULTI - || !re_num_cmp((uint32_t)(rex.lnum + rex.reg_firstlnum), scan)) { - status = RA_NOMATCH; - } - break; - - case RE_COL: - assert(rex.input - rex.line + 1 >= 0 - && (uintmax_t)(rex.input - rex.line + 1) <= UINT32_MAX); - if (!re_num_cmp((uint32_t)(rex.input - rex.line + 1), scan)) { - status = RA_NOMATCH; - } - break; - - case RE_VCOL: - if (!re_num_cmp(win_linetabsize(rex.reg_win == NULL - ? curwin : rex.reg_win, - rex.line, - (colnr_T)(rex.input - rex.line)) + 1, - scan)) { - status = RA_NOMATCH; - } - break; - - case BOW: // \<word; rex.input points to w - if (c == NUL) { // Can't match at end of line - status = RA_NOMATCH; - } else { - // Get class of current and previous char (if it exists). - const int this_class = - mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); - if (this_class <= 1) { - status = RA_NOMATCH; // Not on a word at all. - } else if (reg_prev_class() == this_class) { - status = RA_NOMATCH; // Previous char is in same word. - } - } - break; - - case EOW: // word\>; rex.input points after d - if (rex.input == rex.line) { // Can't match at start of line - status = RA_NOMATCH; - } else { - int this_class, prev_class; - - // Get class of current and previous char (if it exists). - this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); - prev_class = reg_prev_class(); - if (this_class == prev_class - || prev_class == 0 || prev_class == 1) { - status = RA_NOMATCH; - } - } - break; // Matched with EOW - - case ANY: - // ANY does not match new lines. - if (c == NUL) { - status = RA_NOMATCH; - } else { - ADVANCE_REGINPUT(); - } - break; - - case IDENT: - if (!vim_isIDc(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case SIDENT: - if (ascii_isdigit(*rex.input) || !vim_isIDc(c)) { - status = RA_NOMATCH; - } else { - ADVANCE_REGINPUT(); - } - break; - - case KWORD: - if (!vim_iswordp_buf(rex.input, rex.reg_buf)) { - status = RA_NOMATCH; - } else { - ADVANCE_REGINPUT(); - } - break; - - case SKWORD: - if (ascii_isdigit(*rex.input) - || !vim_iswordp_buf(rex.input, rex.reg_buf)) { - status = RA_NOMATCH; - } else { - ADVANCE_REGINPUT(); - } - break; - - case FNAME: - if (!vim_isfilec(c)) { - status = RA_NOMATCH; - } else { - ADVANCE_REGINPUT(); - } - break; - - case SFNAME: - if (ascii_isdigit(*rex.input) || !vim_isfilec(c)) { - status = RA_NOMATCH; - } else { - ADVANCE_REGINPUT(); - } - break; - - case PRINT: - if (!vim_isprintc(utf_ptr2char(rex.input))) { - status = RA_NOMATCH; - } else { - ADVANCE_REGINPUT(); - } - break; - - case SPRINT: - if (ascii_isdigit(*rex.input) || !vim_isprintc(utf_ptr2char(rex.input))) { - status = RA_NOMATCH; - } else { - ADVANCE_REGINPUT(); - } - break; - - case WHITE: - if (!ascii_iswhite(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NWHITE: - if (c == NUL || ascii_iswhite(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case DIGIT: - if (!ri_digit(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NDIGIT: - if (c == NUL || ri_digit(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case HEX: - if (!ri_hex(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NHEX: - if (c == NUL || ri_hex(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case OCTAL: - if (!ri_octal(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NOCTAL: - if (c == NUL || ri_octal(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case WORD: - if (!ri_word(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NWORD: - if (c == NUL || ri_word(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case HEAD: - if (!ri_head(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NHEAD: - if (c == NUL || ri_head(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case ALPHA: - if (!ri_alpha(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NALPHA: - if (c == NUL || ri_alpha(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case LOWER: - if (!ri_lower(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NLOWER: - if (c == NUL || ri_lower(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case UPPER: - if (!ri_upper(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case NUPPER: - if (c == NUL || ri_upper(c)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case EXACTLY: - { - int len; - char_u *opnd; - - opnd = OPERAND(scan); - // Inline the first byte, for speed. - if (*opnd != *rex.input - && (!rex.reg_ic)) { - status = RA_NOMATCH; - } else if (*opnd == NUL) { - // match empty string always works; happens when "~" is - // empty. - } else { - if (opnd[1] == NUL && !rex.reg_ic) { - len = 1; // matched a single byte above - } else { - // Need to match first byte again for multi-byte. - len = (int)STRLEN(opnd); - if (cstrncmp(opnd, rex.input, &len) != 0) { - status = RA_NOMATCH; - } - } - // Check for following composing character, unless %C - // follows (skips over all composing chars). - if (status != RA_NOMATCH - && utf_composinglike(rex.input, rex.input + len) - && !rex.reg_icombine - && OP(next) != RE_COMPOSING) { - // raaron: This code makes a composing character get - // ignored, which is the correct behavior (sometimes) - // for voweled Hebrew texts. - status = RA_NOMATCH; - } - if (status != RA_NOMATCH) { - rex.input += len; - } - } - } - break; - - case ANYOF: - case ANYBUT: - if (c == NUL) - status = RA_NOMATCH; - else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF)) - status = RA_NOMATCH; - else - ADVANCE_REGINPUT(); - break; - - case MULTIBYTECODE: - { - int i, len; - - const char_u *opnd = OPERAND(scan); - // Safety check (just in case 'encoding' was changed since - // compiling the program). - if ((len = utfc_ptr2len(opnd)) < 2) { - status = RA_NOMATCH; - break; - } - const int opndc = utf_ptr2char(opnd); - if (utf_iscomposing(opndc)) { - // When only a composing char is given match at any - // position where that composing char appears. - status = RA_NOMATCH; - for (i = 0; rex.input[i] != NUL; - i += utf_ptr2len(rex.input + i)) { - const int inpc = utf_ptr2char(rex.input + i); - if (!utf_iscomposing(inpc)) { - if (i > 0) { - break; - } - } else if (opndc == inpc) { - // Include all following composing chars. - len = i + utfc_ptr2len(rex.input + i); - status = RA_MATCH; - break; - } - } - } else { - for (i = 0; i < len; i++) { - if (opnd[i] != rex.input[i]) { - status = RA_NOMATCH; - break; - } - } - } - rex.input += len; - } - break; - - case RE_COMPOSING: - { - // Skip composing characters. - while (utf_iscomposing(utf_ptr2char(rex.input))) { - MB_CPTR_ADV(rex.input); - } - } - break; - - case NOTHING: - break; - - case BACK: - { - int i; - - /* - * When we run into BACK we need to check if we don't keep - * looping without matching any input. The second and later - * times a BACK is encountered it fails if the input is still - * at the same position as the previous time. - * The positions are stored in "backpos" and found by the - * current value of "scan", the position in the RE program. - */ - backpos_T *bp = (backpos_T *)backpos.ga_data; - for (i = 0; i < backpos.ga_len; ++i) - if (bp[i].bp_scan == scan) - break; - if (i == backpos.ga_len) { - backpos_T *p = GA_APPEND_VIA_PTR(backpos_T, &backpos); - p->bp_scan = scan; - } else if (reg_save_equal(&bp[i].bp_pos)) - /* Still at same position as last time, fail. */ - status = RA_NOMATCH; - - assert(status != RA_FAIL); - if (status != RA_NOMATCH) { - reg_save(&bp[i].bp_pos, &backpos); - } - } - break; - - case MOPEN + 0: /* Match start: \zs */ - case MOPEN + 1: /* \( */ - case MOPEN + 2: - case MOPEN + 3: - case MOPEN + 4: - case MOPEN + 5: - case MOPEN + 6: - case MOPEN + 7: - case MOPEN + 8: - case MOPEN + 9: - { - no = op - MOPEN; - cleanup_subexpr(); - rp = regstack_push(RS_MOPEN, scan); - if (rp == NULL) - status = RA_FAIL; - else { - rp->rs_no = no; - save_se(&rp->rs_un.sesave, &rex.reg_startpos[no], - &rex.reg_startp[no]); - // We simply continue and handle the result when done. - } - } - break; - - case NOPEN: /* \%( */ - case NCLOSE: /* \) after \%( */ - if (regstack_push(RS_NOPEN, scan) == NULL) - status = RA_FAIL; - /* We simply continue and handle the result when done. */ - break; - - case ZOPEN + 1: - case ZOPEN + 2: - case ZOPEN + 3: - case ZOPEN + 4: - case ZOPEN + 5: - case ZOPEN + 6: - case ZOPEN + 7: - case ZOPEN + 8: - case ZOPEN + 9: - { - no = op - ZOPEN; - cleanup_zsubexpr(); - rp = regstack_push(RS_ZOPEN, scan); - if (rp == NULL) - status = RA_FAIL; - else { - rp->rs_no = no; - save_se(&rp->rs_un.sesave, ®_startzpos[no], - ®_startzp[no]); - /* We simply continue and handle the result when done. */ - } - } - break; - - case MCLOSE + 0: /* Match end: \ze */ - case MCLOSE + 1: /* \) */ - case MCLOSE + 2: - case MCLOSE + 3: - case MCLOSE + 4: - case MCLOSE + 5: - case MCLOSE + 6: - case MCLOSE + 7: - case MCLOSE + 8: - case MCLOSE + 9: - { - no = op - MCLOSE; - cleanup_subexpr(); - rp = regstack_push(RS_MCLOSE, scan); - if (rp == NULL) { - status = RA_FAIL; - } else { - rp->rs_no = no; - save_se(&rp->rs_un.sesave, &rex.reg_endpos[no], &rex.reg_endp[no]); - // We simply continue and handle the result when done. - } - } - break; - - case ZCLOSE + 1: /* \) after \z( */ - case ZCLOSE + 2: - case ZCLOSE + 3: - case ZCLOSE + 4: - case ZCLOSE + 5: - case ZCLOSE + 6: - case ZCLOSE + 7: - case ZCLOSE + 8: - case ZCLOSE + 9: - { - no = op - ZCLOSE; - cleanup_zsubexpr(); - rp = regstack_push(RS_ZCLOSE, scan); - if (rp == NULL) - status = RA_FAIL; - else { - rp->rs_no = no; - save_se(&rp->rs_un.sesave, ®_endzpos[no], - ®_endzp[no]); - /* We simply continue and handle the result when done. */ - } - } - break; - - case BACKREF + 1: - case BACKREF + 2: - case BACKREF + 3: - case BACKREF + 4: - case BACKREF + 5: - case BACKREF + 6: - case BACKREF + 7: - case BACKREF + 8: - case BACKREF + 9: - { - int len; - - no = op - BACKREF; - cleanup_subexpr(); - if (!REG_MULTI) { // Single-line regexp - if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL) { - // Backref was not set: Match an empty string. - len = 0; - } else { - // Compare current input with back-ref in the same line. - len = (int)(rex.reg_endp[no] - rex.reg_startp[no]); - if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0) { - status = RA_NOMATCH; - } - } - } else { // Multi-line regexp - if (rex.reg_startpos[no].lnum < 0 || rex.reg_endpos[no].lnum < 0) { - // Backref was not set: Match an empty string. - len = 0; - } else { - if (rex.reg_startpos[no].lnum == rex.lnum - && rex.reg_endpos[no].lnum == rex.lnum) { - // Compare back-ref within the current line. - len = rex.reg_endpos[no].col - rex.reg_startpos[no].col; - if (cstrncmp(rex.line + rex.reg_startpos[no].col, - rex.input, &len) != 0) { - status = RA_NOMATCH; - } - } else { - // Messy situation: Need to compare between two lines. - int r = match_with_backref(rex.reg_startpos[no].lnum, - rex.reg_startpos[no].col, - rex.reg_endpos[no].lnum, - rex.reg_endpos[no].col, - &len); - if (r != RA_MATCH) { - status = r; - } - } - } - } - - // Matched the backref, skip over it. - rex.input += len; - } - break; - - case ZREF + 1: - case ZREF + 2: - case ZREF + 3: - case ZREF + 4: - case ZREF + 5: - case ZREF + 6: - case ZREF + 7: - case ZREF + 8: - case ZREF + 9: - { - cleanup_zsubexpr(); - no = op - ZREF; - if (re_extmatch_in != NULL - && re_extmatch_in->matches[no] != NULL) { - int len = (int)STRLEN(re_extmatch_in->matches[no]); - if (cstrncmp(re_extmatch_in->matches[no], rex.input, &len) != 0) { - status = RA_NOMATCH; - } else { - rex.input += len; - } - } else { - // Backref was not set: Match an empty string. - } - } - break; - - case BRANCH: - { - if (OP(next) != BRANCH) /* No choice. */ - next = OPERAND(scan); /* Avoid recursion. */ - else { - rp = regstack_push(RS_BRANCH, scan); - if (rp == NULL) - status = RA_FAIL; - else - status = RA_BREAK; /* rest is below */ - } - } - break; - - case BRACE_LIMITS: - { - if (OP(next) == BRACE_SIMPLE) { - bl_minval = OPERAND_MIN(scan); - bl_maxval = OPERAND_MAX(scan); - } else if (OP(next) >= BRACE_COMPLEX - && OP(next) < BRACE_COMPLEX + 10) { - no = OP(next) - BRACE_COMPLEX; - brace_min[no] = OPERAND_MIN(scan); - brace_max[no] = OPERAND_MAX(scan); - brace_count[no] = 0; - } else { - internal_error("BRACE_LIMITS"); - status = RA_FAIL; - } - } - break; - - case BRACE_COMPLEX + 0: - case BRACE_COMPLEX + 1: - case BRACE_COMPLEX + 2: - case BRACE_COMPLEX + 3: - case BRACE_COMPLEX + 4: - case BRACE_COMPLEX + 5: - case BRACE_COMPLEX + 6: - case BRACE_COMPLEX + 7: - case BRACE_COMPLEX + 8: - case BRACE_COMPLEX + 9: - { - no = op - BRACE_COMPLEX; - ++brace_count[no]; - - /* If not matched enough times yet, try one more */ - if (brace_count[no] <= (brace_min[no] <= brace_max[no] - ? brace_min[no] : brace_max[no])) { - rp = regstack_push(RS_BRCPLX_MORE, scan); - if (rp == NULL) - status = RA_FAIL; - else { - rp->rs_no = no; - reg_save(&rp->rs_un.regsave, &backpos); - next = OPERAND(scan); - /* We continue and handle the result when done. */ - } - break; - } - - /* If matched enough times, may try matching some more */ - if (brace_min[no] <= brace_max[no]) { - /* Range is the normal way around, use longest match */ - if (brace_count[no] <= brace_max[no]) { - rp = regstack_push(RS_BRCPLX_LONG, scan); - if (rp == NULL) - status = RA_FAIL; - else { - rp->rs_no = no; - reg_save(&rp->rs_un.regsave, &backpos); - next = OPERAND(scan); - /* We continue and handle the result when done. */ - } - } - } else { - /* Range is backwards, use shortest match first */ - if (brace_count[no] <= brace_min[no]) { - rp = regstack_push(RS_BRCPLX_SHORT, scan); - if (rp == NULL) - status = RA_FAIL; - else { - reg_save(&rp->rs_un.regsave, &backpos); - /* We continue and handle the result when done. */ - } - } - } - } - break; - - case BRACE_SIMPLE: - case STAR: - case PLUS: - { - regstar_T rst; - - /* - * Lookahead to avoid useless match attempts when we know - * what character comes next. - */ - if (OP(next) == EXACTLY) { - rst.nextb = *OPERAND(next); - if (rex.reg_ic) { - if (mb_isupper(rst.nextb)) { - rst.nextb_ic = mb_tolower(rst.nextb); - } else { - rst.nextb_ic = mb_toupper(rst.nextb); - } - } else { - rst.nextb_ic = rst.nextb; - } - } else { - rst.nextb = NUL; - rst.nextb_ic = NUL; - } - if (op != BRACE_SIMPLE) { - rst.minval = (op == STAR) ? 0 : 1; - rst.maxval = MAX_LIMIT; - } else { - rst.minval = bl_minval; - rst.maxval = bl_maxval; - } - - /* - * When maxval > minval, try matching as much as possible, up - * to maxval. When maxval < minval, try matching at least the - * minimal number (since the range is backwards, that's also - * maxval!). - */ - rst.count = regrepeat(OPERAND(scan), rst.maxval); - if (got_int) { - status = RA_FAIL; - break; - } - if (rst.minval <= rst.maxval - ? rst.count >= rst.minval : rst.count >= rst.maxval) { - /* It could match. Prepare for trying to match what - * follows. The code is below. Parameters are stored in - * a regstar_T on the regstack. */ - if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { - emsg(_(e_maxmempat)); - status = RA_FAIL; - } else { - ga_grow(®stack, sizeof(regstar_T)); - regstack.ga_len += sizeof(regstar_T); - rp = regstack_push(rst.minval <= rst.maxval - ? RS_STAR_LONG : RS_STAR_SHORT, scan); - if (rp == NULL) - status = RA_FAIL; - else { - *(((regstar_T *)rp) - 1) = rst; - status = RA_BREAK; /* skip the restore bits */ - } - } - } else - status = RA_NOMATCH; - - } - break; - - case NOMATCH: - case MATCH: - case SUBPAT: - rp = regstack_push(RS_NOMATCH, scan); - if (rp == NULL) - status = RA_FAIL; - else { - rp->rs_no = op; - reg_save(&rp->rs_un.regsave, &backpos); - next = OPERAND(scan); - /* We continue and handle the result when done. */ - } - break; - - case BEHIND: - case NOBEHIND: - /* Need a bit of room to store extra positions. */ - if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { - emsg(_(e_maxmempat)); - status = RA_FAIL; - } else { - ga_grow(®stack, sizeof(regbehind_T)); - regstack.ga_len += sizeof(regbehind_T); - rp = regstack_push(RS_BEHIND1, scan); - if (rp == NULL) - status = RA_FAIL; - else { - /* Need to save the subexpr to be able to restore them - * when there is a match but we don't use it. */ - save_subexpr(((regbehind_T *)rp) - 1); - - rp->rs_no = op; - reg_save(&rp->rs_un.regsave, &backpos); - /* First try if what follows matches. If it does then we - * check the behind match by looping. */ - } - } - break; - - case BHPOS: - if (REG_MULTI) { - if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line) - || behind_pos.rs_u.pos.lnum != rex.lnum) { - status = RA_NOMATCH; - } - } else if (behind_pos.rs_u.ptr != rex.input) { - status = RA_NOMATCH; - } - break; - - case NEWL: - if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline - || rex.reg_line_lbr) && (c != '\n' || !rex.reg_line_lbr)) { - status = RA_NOMATCH; - } else if (rex.reg_line_lbr) { - ADVANCE_REGINPUT(); - } else { - reg_nextline(); - } - break; - - case END: - status = RA_MATCH; /* Success! */ - break; - - default: - iemsg(_(e_re_corr)); -#ifdef REGEXP_DEBUG - printf("Illegal op code %d\n", op); -#endif - status = RA_FAIL; - break; - } - } - - /* If we can't continue sequentially, break the inner loop. */ - if (status != RA_CONT) - break; - - /* Continue in inner loop, advance to next item. */ - scan = next; - - } /* end of inner loop */ - - /* - * If there is something on the regstack execute the code for the state. - * If the state is popped then loop and use the older state. - */ - while (!GA_EMPTY(®stack) && status != RA_FAIL) { - rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; - switch (rp->rs_state) { - case RS_NOPEN: - /* Result is passed on as-is, simply pop the state. */ - regstack_pop(&scan); - break; - - case RS_MOPEN: - // Pop the state. Restore pointers when there is no match. - if (status == RA_NOMATCH) { - restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no], - &rex.reg_startp[rp->rs_no]); - } - regstack_pop(&scan); - break; - - case RS_ZOPEN: - /* Pop the state. Restore pointers when there is no match. */ - if (status == RA_NOMATCH) - restore_se(&rp->rs_un.sesave, ®_startzpos[rp->rs_no], - ®_startzp[rp->rs_no]); - regstack_pop(&scan); - break; - - case RS_MCLOSE: - // Pop the state. Restore pointers when there is no match. - if (status == RA_NOMATCH) { - restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no], - &rex.reg_endp[rp->rs_no]); - } - regstack_pop(&scan); - break; - - case RS_ZCLOSE: - /* Pop the state. Restore pointers when there is no match. */ - if (status == RA_NOMATCH) - restore_se(&rp->rs_un.sesave, ®_endzpos[rp->rs_no], - ®_endzp[rp->rs_no]); - regstack_pop(&scan); - break; - - case RS_BRANCH: - if (status == RA_MATCH) - /* this branch matched, use it */ - regstack_pop(&scan); - else { - if (status != RA_BREAK) { - /* After a non-matching branch: try next one. */ - reg_restore(&rp->rs_un.regsave, &backpos); - scan = rp->rs_scan; - } - if (scan == NULL || OP(scan) != BRANCH) { - /* no more branches, didn't find a match */ - status = RA_NOMATCH; - regstack_pop(&scan); - } else { - /* Prepare to try a branch. */ - rp->rs_scan = regnext(scan); - reg_save(&rp->rs_un.regsave, &backpos); - scan = OPERAND(scan); - } - } - break; - - case RS_BRCPLX_MORE: - /* Pop the state. Restore pointers when there is no match. */ - if (status == RA_NOMATCH) { - reg_restore(&rp->rs_un.regsave, &backpos); - --brace_count[rp->rs_no]; /* decrement match count */ - } - regstack_pop(&scan); - break; - - case RS_BRCPLX_LONG: - /* Pop the state. Restore pointers when there is no match. */ - if (status == RA_NOMATCH) { - /* There was no match, but we did find enough matches. */ - reg_restore(&rp->rs_un.regsave, &backpos); - --brace_count[rp->rs_no]; - /* continue with the items after "\{}" */ - status = RA_CONT; - } - regstack_pop(&scan); - if (status == RA_CONT) - scan = regnext(scan); - break; - - case RS_BRCPLX_SHORT: - /* Pop the state. Restore pointers when there is no match. */ - if (status == RA_NOMATCH) - /* There was no match, try to match one more item. */ - reg_restore(&rp->rs_un.regsave, &backpos); - regstack_pop(&scan); - if (status == RA_NOMATCH) { - scan = OPERAND(scan); - status = RA_CONT; - } - break; - - case RS_NOMATCH: - /* Pop the state. If the operand matches for NOMATCH or - * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup, - * except for SUBPAT, and continue with the next item. */ - if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH)) - status = RA_NOMATCH; - else { - status = RA_CONT; - if (rp->rs_no != SUBPAT) /* zero-width */ - reg_restore(&rp->rs_un.regsave, &backpos); - } - regstack_pop(&scan); - if (status == RA_CONT) - scan = regnext(scan); - break; - - case RS_BEHIND1: - if (status == RA_NOMATCH) { - regstack_pop(&scan); - regstack.ga_len -= sizeof(regbehind_T); - } else { - /* The stuff after BEHIND/NOBEHIND matches. Now try if - * the behind part does (not) match before the current - * position in the input. This must be done at every - * position in the input and checking if the match ends at - * the current position. */ - - /* save the position after the found match for next */ - reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); - - /* Start looking for a match with operand at the current - * position. Go back one character until we find the - * result, hitting the start of the line or the previous - * line (for multi-line matching). - * Set behind_pos to where the match should end, BHPOS - * will match it. Save the current value. */ - (((regbehind_T *)rp) - 1)->save_behind = behind_pos; - behind_pos = rp->rs_un.regsave; - - rp->rs_state = RS_BEHIND2; - - reg_restore(&rp->rs_un.regsave, &backpos); - scan = OPERAND(rp->rs_scan) + 4; - } - break; - - case RS_BEHIND2: - /* - * Looping for BEHIND / NOBEHIND match. - */ - if (status == RA_MATCH && reg_save_equal(&behind_pos)) { - /* found a match that ends where "next" started */ - behind_pos = (((regbehind_T *)rp) - 1)->save_behind; - if (rp->rs_no == BEHIND) - reg_restore(&(((regbehind_T *)rp) - 1)->save_after, - &backpos); - else { - /* But we didn't want a match. Need to restore the - * subexpr, because what follows matched, so they have - * been set. */ - status = RA_NOMATCH; - restore_subexpr(((regbehind_T *)rp) - 1); - } - regstack_pop(&scan); - regstack.ga_len -= sizeof(regbehind_T); - } else { - long limit; - - /* No match or a match that doesn't end where we want it: Go - * back one character. May go to previous line once. */ - no = OK; - limit = OPERAND_MIN(rp->rs_scan); - if (REG_MULTI) { - if (limit > 0 - && ((rp->rs_un.regsave.rs_u.pos.lnum - < behind_pos.rs_u.pos.lnum - ? (colnr_T)STRLEN(rex.line) - : behind_pos.rs_u.pos.col) - - rp->rs_un.regsave.rs_u.pos.col >= limit)) - no = FAIL; - else if (rp->rs_un.regsave.rs_u.pos.col == 0) { - if (rp->rs_un.regsave.rs_u.pos.lnum - < behind_pos.rs_u.pos.lnum - || reg_getline( - --rp->rs_un.regsave.rs_u.pos.lnum) - == NULL) - no = FAIL; - else { - reg_restore(&rp->rs_un.regsave, &backpos); - rp->rs_un.regsave.rs_u.pos.col = - (colnr_T)STRLEN(rex.line); - } - } else { - const char_u *const line = - reg_getline(rp->rs_un.regsave.rs_u.pos.lnum); - - rp->rs_un.regsave.rs_u.pos.col -= - utf_head_off(line, - line + rp->rs_un.regsave.rs_u.pos.col - 1) - + 1; - } - } else { - if (rp->rs_un.regsave.rs_u.ptr == rex.line) { - no = FAIL; - } else { - MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr); - if (limit > 0 - && (long)(behind_pos.rs_u.ptr - - rp->rs_un.regsave.rs_u.ptr) > limit) { - no = FAIL; - } - } - } - if (no == OK) { - /* Advanced, prepare for finding match again. */ - reg_restore(&rp->rs_un.regsave, &backpos); - scan = OPERAND(rp->rs_scan) + 4; - if (status == RA_MATCH) { - /* We did match, so subexpr may have been changed, - * need to restore them for the next try. */ - status = RA_NOMATCH; - restore_subexpr(((regbehind_T *)rp) - 1); - } - } else { - /* Can't advance. For NOBEHIND that's a match. */ - behind_pos = (((regbehind_T *)rp) - 1)->save_behind; - if (rp->rs_no == NOBEHIND) { - reg_restore(&(((regbehind_T *)rp) - 1)->save_after, - &backpos); - status = RA_MATCH; - } else { - /* We do want a proper match. Need to restore the - * subexpr if we had a match, because they may have - * been set. */ - if (status == RA_MATCH) { - status = RA_NOMATCH; - restore_subexpr(((regbehind_T *)rp) - 1); - } - } - regstack_pop(&scan); - regstack.ga_len -= sizeof(regbehind_T); - } - } - break; - - case RS_STAR_LONG: - case RS_STAR_SHORT: - { - regstar_T *rst = ((regstar_T *)rp) - 1; - - if (status == RA_MATCH) { - regstack_pop(&scan); - regstack.ga_len -= sizeof(regstar_T); - break; - } - - /* Tried once already, restore input pointers. */ - if (status != RA_BREAK) - reg_restore(&rp->rs_un.regsave, &backpos); - - /* Repeat until we found a position where it could match. */ - for (;; ) { - if (status != RA_BREAK) { - /* Tried first position already, advance. */ - if (rp->rs_state == RS_STAR_LONG) { - /* Trying for longest match, but couldn't or - * didn't match -- back up one char. */ - if (--rst->count < rst->minval) - break; - if (rex.input == rex.line) { - // backup to last char of previous line - rex.lnum--; - rex.line = reg_getline(rex.lnum); - // Just in case regrepeat() didn't count right. - if (rex.line == NULL) { - break; - } - rex.input = rex.line + STRLEN(rex.line); - fast_breakcheck(); - } else { - MB_PTR_BACK(rex.line, rex.input); - } - } else { - /* Range is backwards, use shortest match first. - * Careful: maxval and minval are exchanged! - * Couldn't or didn't match: try advancing one - * char. */ - if (rst->count == rst->minval - || regrepeat(OPERAND(rp->rs_scan), 1L) == 0) - break; - ++rst->count; - } - if (got_int) - break; - } else - status = RA_NOMATCH; - - // If it could match, try it. - if (rst->nextb == NUL || *rex.input == rst->nextb - || *rex.input == rst->nextb_ic) { - reg_save(&rp->rs_un.regsave, &backpos); - scan = regnext(rp->rs_scan); - status = RA_CONT; - break; - } - } - if (status != RA_CONT) { - /* Failed. */ - regstack_pop(&scan); - regstack.ga_len -= sizeof(regstar_T); - status = RA_NOMATCH; - } - } - break; - } - - /* If we want to continue the inner loop or didn't pop a state - * continue matching loop */ - if (status == RA_CONT || rp == (regitem_T *) - ((char *)regstack.ga_data + regstack.ga_len) - 1) - break; - } - - /* May need to continue with the inner loop, starting at "scan". */ - if (status == RA_CONT) - continue; - - /* - * If the regstack is empty or something failed we are done. - */ - if (GA_EMPTY(®stack) || status == RA_FAIL) { - if (scan == NULL) { - /* - * We get here only if there's trouble -- normally "case END" is - * the terminating point. - */ - iemsg(_(e_re_corr)); -#ifdef REGEXP_DEBUG - printf("Premature EOL\n"); -#endif - } - return status == RA_MATCH; - } - - } /* End of loop until the regstack is empty. */ - - /* NOTREACHED */ -} - -/* - * Push an item onto the regstack. - * Returns pointer to new item. Returns NULL when out of memory. - */ -static regitem_T *regstack_push(regstate_T state, char_u *scan) -{ - regitem_T *rp; - - if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { - emsg(_(e_maxmempat)); - return NULL; - } - ga_grow(®stack, sizeof(regitem_T)); - - rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len); - rp->rs_state = state; - rp->rs_scan = scan; - - regstack.ga_len += sizeof(regitem_T); - return rp; -} - -/* - * Pop an item from the regstack. - */ -static void regstack_pop(char_u **scan) -{ - regitem_T *rp; - - rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; - *scan = rp->rs_scan; - - regstack.ga_len -= sizeof(regitem_T); -} - -/* - * regrepeat - repeatedly match something simple, return how many. - * Advances rex.input (and rex.lnum) to just after the matched chars. - */ -static int -regrepeat ( - char_u *p, - long maxcount /* maximum number of matches allowed */ -) -{ - long count = 0; - char_u *opnd; - int mask; - int testval = 0; - - char_u *scan = rex.input; // Make local copy of rex.input for speed. - opnd = OPERAND(p); - switch (OP(p)) { - case ANY: - case ANY + ADD_NL: - while (count < maxcount) { - /* Matching anything means we continue until end-of-line (or - * end-of-file for ANY + ADD_NL), only limited by maxcount. */ - while (*scan != NUL && count < maxcount) { - count++; - MB_PTR_ADV(scan); - } - if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline - || rex.reg_line_lbr || count == maxcount) { - break; - } - count++; // count the line-break - reg_nextline(); - scan = rex.input; - if (got_int) { - break; - } - } - break; - - case IDENT: - case IDENT + ADD_NL: - testval = 1; - FALLTHROUGH; - case SIDENT: - case SIDENT + ADD_NL: - while (count < maxcount) { - if (vim_isIDc(utf_ptr2char(scan)) && (testval || !ascii_isdigit(*scan))) { - MB_PTR_ADV(scan); - } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline - || rex.reg_line_lbr) { - break; - } - reg_nextline(); - scan = rex.input; - if (got_int) { - break; - } - } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { - scan++; - } else { - break; - } - ++count; - } - break; - - case KWORD: - case KWORD + ADD_NL: - testval = 1; - FALLTHROUGH; - case SKWORD: - case SKWORD + ADD_NL: - while (count < maxcount) { - if (vim_iswordp_buf(scan, rex.reg_buf) - && (testval || !ascii_isdigit(*scan))) { - MB_PTR_ADV(scan); - } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline - || rex.reg_line_lbr) { - break; - } - reg_nextline(); - scan = rex.input; - if (got_int) { - break; - } - } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { - scan++; - } else { - break; - } - count++; - } - break; - - case FNAME: - case FNAME + ADD_NL: - testval = 1; - FALLTHROUGH; - case SFNAME: - case SFNAME + ADD_NL: - while (count < maxcount) { - if (vim_isfilec(utf_ptr2char(scan)) && (testval || !ascii_isdigit(*scan))) { - MB_PTR_ADV(scan); - } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline - || rex.reg_line_lbr) { - break; - } - reg_nextline(); - scan = rex.input; - if (got_int) { - break; - } - } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { - scan++; - } else { - break; - } - count++; - } - break; - - case PRINT: - case PRINT + ADD_NL: - testval = 1; - FALLTHROUGH; - case SPRINT: - case SPRINT + ADD_NL: - while (count < maxcount) { - if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline - || rex.reg_line_lbr) { - break; - } - reg_nextline(); - scan = rex.input; - if (got_int) { - break; - } - } else if (vim_isprintc(utf_ptr2char(scan)) == 1 - && (testval || !ascii_isdigit(*scan))) { - MB_PTR_ADV(scan); - } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { - scan++; - } else { - break; - } - count++; - } - break; - - case WHITE: - case WHITE + ADD_NL: - testval = mask = RI_WHITE; -do_class: - while (count < maxcount) { - int l; - if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline - || rex.reg_line_lbr) { - break; - } - reg_nextline(); - scan = rex.input; - if (got_int) { - break; - } - } else if ((l = utfc_ptr2len(scan)) > 1) { - if (testval != 0) { - break; - } - scan += l; - } else if ((class_tab[*scan] & mask) == testval) { - scan++; - } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { - scan++; - } else { - break; - } - ++count; - } - break; - - case NWHITE: - case NWHITE + ADD_NL: - mask = RI_WHITE; - goto do_class; - case DIGIT: - case DIGIT + ADD_NL: - testval = mask = RI_DIGIT; - goto do_class; - case NDIGIT: - case NDIGIT + ADD_NL: - mask = RI_DIGIT; - goto do_class; - case HEX: - case HEX + ADD_NL: - testval = mask = RI_HEX; - goto do_class; - case NHEX: - case NHEX + ADD_NL: - mask = RI_HEX; - goto do_class; - case OCTAL: - case OCTAL + ADD_NL: - testval = mask = RI_OCTAL; - goto do_class; - case NOCTAL: - case NOCTAL + ADD_NL: - mask = RI_OCTAL; - goto do_class; - case WORD: - case WORD + ADD_NL: - testval = mask = RI_WORD; - goto do_class; - case NWORD: - case NWORD + ADD_NL: - mask = RI_WORD; - goto do_class; - case HEAD: - case HEAD + ADD_NL: - testval = mask = RI_HEAD; - goto do_class; - case NHEAD: - case NHEAD + ADD_NL: - mask = RI_HEAD; - goto do_class; - case ALPHA: - case ALPHA + ADD_NL: - testval = mask = RI_ALPHA; - goto do_class; - case NALPHA: - case NALPHA + ADD_NL: - mask = RI_ALPHA; - goto do_class; - case LOWER: - case LOWER + ADD_NL: - testval = mask = RI_LOWER; - goto do_class; - case NLOWER: - case NLOWER + ADD_NL: - mask = RI_LOWER; - goto do_class; - case UPPER: - case UPPER + ADD_NL: - testval = mask = RI_UPPER; - goto do_class; - case NUPPER: - case NUPPER + ADD_NL: - mask = RI_UPPER; - goto do_class; - - case EXACTLY: - { - int cu, cl; - - // This doesn't do a multi-byte character, because a MULTIBYTECODE - // would have been used for it. It does handle single-byte - // characters, such as latin1. - if (rex.reg_ic) { - cu = mb_toupper(*opnd); - cl = mb_tolower(*opnd); - while (count < maxcount && (*scan == cu || *scan == cl)) { - count++; - scan++; - } - } else { - cu = *opnd; - while (count < maxcount && *scan == cu) { - count++; - scan++; - } - } - break; - } - - case MULTIBYTECODE: - { - int i, len, cf = 0; - - /* Safety check (just in case 'encoding' was changed since - * compiling the program). */ - if ((len = utfc_ptr2len(opnd)) > 1) { - if (rex.reg_ic) { - cf = utf_fold(utf_ptr2char(opnd)); - } - while (count < maxcount && utfc_ptr2len(scan) >= len) { - for (i = 0; i < len; i++) { - if (opnd[i] != scan[i]) { - break; - } - } - if (i < len && (!rex.reg_ic - || utf_fold(utf_ptr2char(scan)) != cf)) { - break; - } - scan += len; - ++count; - } - } - } - break; - - case ANYOF: - case ANYOF + ADD_NL: - testval = 1; - FALLTHROUGH; - - case ANYBUT: - case ANYBUT + ADD_NL: - while (count < maxcount) { - int len; - if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline - || rex.reg_line_lbr) { - break; - } - reg_nextline(); - scan = rex.input; - if (got_int) { - break; - } - } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { - scan++; - } else if ((len = utfc_ptr2len(scan)) > 1) { - if ((cstrchr(opnd, utf_ptr2char(scan)) == NULL) == testval) { - break; - } - scan += len; - } else { - if ((cstrchr(opnd, *scan) == NULL) == testval) - break; - ++scan; - } - ++count; - } - break; - - case NEWL: - while (count < maxcount - && ((*scan == NUL && rex.lnum <= rex.reg_maxline && !rex.reg_line_lbr - && REG_MULTI) || (*scan == '\n' && rex.reg_line_lbr))) { - count++; - if (rex.reg_line_lbr) { - ADVANCE_REGINPUT(); - } else { - reg_nextline(); - } - scan = rex.input; - if (got_int) { - break; - } - } - break; - - default: // Oh dear. Called inappropriately. - iemsg(_(e_re_corr)); -#ifdef REGEXP_DEBUG - printf("Called regrepeat with op code %d\n", OP(p)); -#endif - break; - } - - rex.input = scan; - - return (int)count; -} - -/* - * regnext - dig the "next" pointer out of a node - * Returns NULL when calculating size, when there is no next item and when - * there is an error. - */ -static char_u *regnext(char_u *p) - FUNC_ATTR_NONNULL_ALL -{ - int offset; - - if (p == JUST_CALC_SIZE || reg_toolong) - return NULL; - - offset = NEXT(p); - if (offset == 0) - return NULL; - - if (OP(p) == BACK) - return p - offset; - else - return p + offset; -} - /* * Check the regexp program for its magic number. * Return true if it's wrong. @@ -5659,7 +1212,7 @@ static void cleanup_zsubexpr(void) { if (rex.need_clear_zsubexpr) { if (REG_MULTI) { - /* Use 0xff to set lnum to -1 */ + // Use 0xff to set lnum to -1 memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP); memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP); } else { @@ -5670,45 +1223,6 @@ static void cleanup_zsubexpr(void) } } -// Save the current subexpr to "bp", so that they can be restored -// later by restore_subexpr(). -static void save_subexpr(regbehind_T *bp) - FUNC_ATTR_NONNULL_ALL -{ - // When "rex.need_clear_subexpr" is set we don't need to save the values, only - // remember that this flag needs to be set again when restoring. - bp->save_need_clear_subexpr = rex.need_clear_subexpr; - if (!rex.need_clear_subexpr) { - for (int i = 0; i < NSUBEXP; i++) { - if (REG_MULTI) { - bp->save_start[i].se_u.pos = rex.reg_startpos[i]; - bp->save_end[i].se_u.pos = rex.reg_endpos[i]; - } else { - bp->save_start[i].se_u.ptr = rex.reg_startp[i]; - bp->save_end[i].se_u.ptr = rex.reg_endp[i]; - } - } - } -} - -// Restore the subexpr from "bp". -static void restore_subexpr(regbehind_T *bp) - FUNC_ATTR_NONNULL_ALL -{ - // Only need to restore saved values when they are not to be cleared. - rex.need_clear_subexpr = bp->save_need_clear_subexpr; - if (!rex.need_clear_subexpr) { - for (int i = 0; i < NSUBEXP; i++) { - if (REG_MULTI) { - rex.reg_startpos[i] = bp->save_start[i].se_u.pos; - rex.reg_endpos[i] = bp->save_end[i].se_u.pos; - } else { - rex.reg_startp[i] = bp->save_start[i].se_u.ptr; - rex.reg_endp[i] = bp->save_end[i].se_u.ptr; - } - } - } -} // Advance rex.lnum, rex.line and rex.input to the next line. static void reg_nextline(void) @@ -5718,81 +1232,6 @@ static void reg_nextline(void) fast_breakcheck(); } -// Save the input line and position in a regsave_T. -static void reg_save(regsave_T *save, garray_T *gap) - FUNC_ATTR_NONNULL_ALL -{ - if (REG_MULTI) { - save->rs_u.pos.col = (colnr_T)(rex.input - rex.line); - save->rs_u.pos.lnum = rex.lnum; - } else { - save->rs_u.ptr = rex.input; - } - save->rs_len = gap->ga_len; -} - -// Restore the input line and position from a regsave_T. -static void reg_restore(regsave_T *save, garray_T *gap) - FUNC_ATTR_NONNULL_ALL -{ - if (REG_MULTI) { - if (rex.lnum != save->rs_u.pos.lnum) { - // only call reg_getline() when the line number changed to save - // a bit of time - rex.lnum = save->rs_u.pos.lnum; - rex.line = reg_getline(rex.lnum); - } - rex.input = rex.line + save->rs_u.pos.col; - } else { - rex.input = save->rs_u.ptr; - } - gap->ga_len = save->rs_len; -} - -// Return true if current position is equal to saved position. -static bool reg_save_equal(const regsave_T *save) - FUNC_ATTR_NONNULL_ALL -{ - if (REG_MULTI) { - return rex.lnum == save->rs_u.pos.lnum - && rex.input == rex.line + save->rs_u.pos.col; - } - return rex.input == save->rs_u.ptr; -} - -/* - * Tentatively set the sub-expression start to the current position (after - * calling regmatch() they will have changed). Need to save the existing - * values for when there is no match. - * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), - * depending on REG_MULTI. - */ -static void save_se_multi(save_se_T *savep, lpos_T *posp) -{ - savep->se_u.pos = *posp; - posp->lnum = rex.lnum; - posp->col = (colnr_T)(rex.input - rex.line); -} - -static void save_se_one(save_se_T *savep, char_u **pp) -{ - savep->se_u.ptr = *pp; - *pp = rex.input; -} - -/* - * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL. - */ -static int re_num_cmp(uint32_t val, char_u *scan) -{ - uint32_t n = (uint32_t)OPERAND_MIN(scan); - - if (OPERAND_CMP(scan) == '>') - return val > n; - if (OPERAND_CMP(scan) == '<') - return val < n; - return val == n; -} /* * Check whether a backreference matches. @@ -5800,7 +1239,12 @@ static int re_num_cmp(uint32_t val, char_u *scan) * If "bytelen" is not NULL, it is set to the byte length of the match in the * last line. */ -static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen) +static int match_with_backref( + linenr_T start_lnum, + colnr_T start_col, + linenr_T end_lnum, + colnr_T end_col, + int *bytelen) { linenr_T clnum = start_lnum; colnr_T ccol = start_col; @@ -5815,7 +1259,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e if (rex.line != reg_tofree) { len = (int)STRLEN(rex.line); if (reg_tofree == NULL || len >= (int)reg_tofreelen) { - len += 50; /* get some extra */ + len += 50; // get some extra xfree(reg_tofree); reg_tofree = xmalloc(len); reg_tofreelen = len; @@ -5825,7 +1269,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e rex.line = reg_tofree; } - /* Get the line to compare with. */ + // Get the line to compare with. p = reg_getline(clnum); assert(p); @@ -5847,7 +1291,7 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e return RA_NOMATCH; // text too short } - /* Advance to next line. */ + // Advance to next line. reg_nextline(); if (bytelen != NULL) *bytelen = 0; @@ -5862,520 +1306,72 @@ static int match_with_backref(linenr_T start_lnum, colnr_T start_col, linenr_T e return RA_MATCH; } -#ifdef BT_REGEXP_DUMP - -/* - * regdump - dump a regexp onto stdout in vaguely comprehensible form - */ -static void regdump(char_u *pattern, bt_regprog_T *r) -{ - char_u *s; - int op = EXACTLY; /* Arbitrary non-END op. */ - char_u *next; - char_u *end = NULL; - FILE *f; - -#ifdef BT_REGEXP_LOG - f = fopen("bt_regexp_log.log", "a"); -#else - f = stdout; -#endif - if (f == NULL) - return; - fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", - pattern); - - s = r->program + 1; - /* - * Loop until we find the END that isn't before a referred next (an END - * can also appear in a NOMATCH operand). - */ - while (op != END || s <= end) { - op = OP(s); - fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ - next = regnext(s); - if (next == NULL) /* Next ptr. */ - fprintf(f, "(0)"); - else - fprintf(f, "(%d)", (int)((s - r->program) + (next - s))); - if (end < next) - end = next; - if (op == BRACE_LIMITS) { - /* Two ints */ - fprintf(f, " minval %" PRId64 ", maxval %" PRId64, - (int64_t)OPERAND_MIN(s), (int64_t)OPERAND_MAX(s)); - s += 8; - } else if (op == BEHIND || op == NOBEHIND) { - /* one int */ - fprintf(f, " count %" PRId64, (int64_t)OPERAND_MIN(s)); - s += 4; - } else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL) { - // one int plus comparator - fprintf(f, " count %" PRId64, (int64_t)OPERAND_MIN(s)); - s += 5; - } - s += 3; - if (op == ANYOF || op == ANYOF + ADD_NL - || op == ANYBUT || op == ANYBUT + ADD_NL - || op == EXACTLY) { - /* Literal string, where present. */ - fprintf(f, "\nxxxxxxxxx\n"); - while (*s != NUL) - fprintf(f, "%c", *s++); - fprintf(f, "\nxxxxxxxxx\n"); - s++; - } - fprintf(f, "\r\n"); - } - - /* Header fields of interest. */ - if (r->regstart != NUL) - fprintf(f, "start `%s' 0x%x; ", r->regstart < 256 - ? (char *)transchar(r->regstart) - : "multibyte", r->regstart); - if (r->reganch) - fprintf(f, "anchored; "); - if (r->regmust != NULL) - fprintf(f, "must have \"%s\"", r->regmust); - fprintf(f, "\r\n"); - -#ifdef BT_REGEXP_LOG - fclose(f); -#endif -} -#endif /* BT_REGEXP_DUMP */ - -#ifdef REGEXP_DEBUG -/* - * regprop - printable representation of opcode - */ -static char_u *regprop(char_u *op) +/// Used in a place where no * or \+ can follow. +static bool re_mult_next(char *what) { - char *p; - static char buf[50]; - - STRCPY(buf, ":"); - - switch ((int) OP(op)) { - case BOL: - p = "BOL"; - break; - case EOL: - p = "EOL"; - break; - case RE_BOF: - p = "BOF"; - break; - case RE_EOF: - p = "EOF"; - break; - case CURSOR: - p = "CURSOR"; - break; - case RE_VISUAL: - p = "RE_VISUAL"; - break; - case RE_LNUM: - p = "RE_LNUM"; - break; - case RE_MARK: - p = "RE_MARK"; - break; - case RE_COL: - p = "RE_COL"; - break; - case RE_VCOL: - p = "RE_VCOL"; - break; - case BOW: - p = "BOW"; - break; - case EOW: - p = "EOW"; - break; - case ANY: - p = "ANY"; - break; - case ANY + ADD_NL: - p = "ANY+NL"; - break; - case ANYOF: - p = "ANYOF"; - break; - case ANYOF + ADD_NL: - p = "ANYOF+NL"; - break; - case ANYBUT: - p = "ANYBUT"; - break; - case ANYBUT + ADD_NL: - p = "ANYBUT+NL"; - break; - case IDENT: - p = "IDENT"; - break; - case IDENT + ADD_NL: - p = "IDENT+NL"; - break; - case SIDENT: - p = "SIDENT"; - break; - case SIDENT + ADD_NL: - p = "SIDENT+NL"; - break; - case KWORD: - p = "KWORD"; - break; - case KWORD + ADD_NL: - p = "KWORD+NL"; - break; - case SKWORD: - p = "SKWORD"; - break; - case SKWORD + ADD_NL: - p = "SKWORD+NL"; - break; - case FNAME: - p = "FNAME"; - break; - case FNAME + ADD_NL: - p = "FNAME+NL"; - break; - case SFNAME: - p = "SFNAME"; - break; - case SFNAME + ADD_NL: - p = "SFNAME+NL"; - break; - case PRINT: - p = "PRINT"; - break; - case PRINT + ADD_NL: - p = "PRINT+NL"; - break; - case SPRINT: - p = "SPRINT"; - break; - case SPRINT + ADD_NL: - p = "SPRINT+NL"; - break; - case WHITE: - p = "WHITE"; - break; - case WHITE + ADD_NL: - p = "WHITE+NL"; - break; - case NWHITE: - p = "NWHITE"; - break; - case NWHITE + ADD_NL: - p = "NWHITE+NL"; - break; - case DIGIT: - p = "DIGIT"; - break; - case DIGIT + ADD_NL: - p = "DIGIT+NL"; - break; - case NDIGIT: - p = "NDIGIT"; - break; - case NDIGIT + ADD_NL: - p = "NDIGIT+NL"; - break; - case HEX: - p = "HEX"; - break; - case HEX + ADD_NL: - p = "HEX+NL"; - break; - case NHEX: - p = "NHEX"; - break; - case NHEX + ADD_NL: - p = "NHEX+NL"; - break; - case OCTAL: - p = "OCTAL"; - break; - case OCTAL + ADD_NL: - p = "OCTAL+NL"; - break; - case NOCTAL: - p = "NOCTAL"; - break; - case NOCTAL + ADD_NL: - p = "NOCTAL+NL"; - break; - case WORD: - p = "WORD"; - break; - case WORD + ADD_NL: - p = "WORD+NL"; - break; - case NWORD: - p = "NWORD"; - break; - case NWORD + ADD_NL: - p = "NWORD+NL"; - break; - case HEAD: - p = "HEAD"; - break; - case HEAD + ADD_NL: - p = "HEAD+NL"; - break; - case NHEAD: - p = "NHEAD"; - break; - case NHEAD + ADD_NL: - p = "NHEAD+NL"; - break; - case ALPHA: - p = "ALPHA"; - break; - case ALPHA + ADD_NL: - p = "ALPHA+NL"; - break; - case NALPHA: - p = "NALPHA"; - break; - case NALPHA + ADD_NL: - p = "NALPHA+NL"; - break; - case LOWER: - p = "LOWER"; - break; - case LOWER + ADD_NL: - p = "LOWER+NL"; - break; - case NLOWER: - p = "NLOWER"; - break; - case NLOWER + ADD_NL: - p = "NLOWER+NL"; - break; - case UPPER: - p = "UPPER"; - break; - case UPPER + ADD_NL: - p = "UPPER+NL"; - break; - case NUPPER: - p = "NUPPER"; - break; - case NUPPER + ADD_NL: - p = "NUPPER+NL"; - break; - case BRANCH: - p = "BRANCH"; - break; - case EXACTLY: - p = "EXACTLY"; - break; - case NOTHING: - p = "NOTHING"; - break; - case BACK: - p = "BACK"; - break; - case END: - p = "END"; - break; - case MOPEN + 0: - p = "MATCH START"; - break; - case MOPEN + 1: - case MOPEN + 2: - case MOPEN + 3: - case MOPEN + 4: - case MOPEN + 5: - case MOPEN + 6: - case MOPEN + 7: - case MOPEN + 8: - case MOPEN + 9: - sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN); - p = NULL; - break; - case MCLOSE + 0: - p = "MATCH END"; - break; - case MCLOSE + 1: - case MCLOSE + 2: - case MCLOSE + 3: - case MCLOSE + 4: - case MCLOSE + 5: - case MCLOSE + 6: - case MCLOSE + 7: - case MCLOSE + 8: - case MCLOSE + 9: - sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE); - p = NULL; - break; - case BACKREF + 1: - case BACKREF + 2: - case BACKREF + 3: - case BACKREF + 4: - case BACKREF + 5: - case BACKREF + 6: - case BACKREF + 7: - case BACKREF + 8: - case BACKREF + 9: - sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF); - p = NULL; - break; - case NOPEN: - p = "NOPEN"; - break; - case NCLOSE: - p = "NCLOSE"; - break; - case ZOPEN + 1: - case ZOPEN + 2: - case ZOPEN + 3: - case ZOPEN + 4: - case ZOPEN + 5: - case ZOPEN + 6: - case ZOPEN + 7: - case ZOPEN + 8: - case ZOPEN + 9: - sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN); - p = NULL; - break; - case ZCLOSE + 1: - case ZCLOSE + 2: - case ZCLOSE + 3: - case ZCLOSE + 4: - case ZCLOSE + 5: - case ZCLOSE + 6: - case ZCLOSE + 7: - case ZCLOSE + 8: - case ZCLOSE + 9: - sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE); - p = NULL; - break; - case ZREF + 1: - case ZREF + 2: - case ZREF + 3: - case ZREF + 4: - case ZREF + 5: - case ZREF + 6: - case ZREF + 7: - case ZREF + 8: - case ZREF + 9: - sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF); - p = NULL; - break; - case STAR: - p = "STAR"; - break; - case PLUS: - p = "PLUS"; - break; - case NOMATCH: - p = "NOMATCH"; - break; - case MATCH: - p = "MATCH"; - break; - case BEHIND: - p = "BEHIND"; - break; - case NOBEHIND: - p = "NOBEHIND"; - break; - case SUBPAT: - p = "SUBPAT"; - break; - case BRACE_LIMITS: - p = "BRACE_LIMITS"; - break; - case BRACE_SIMPLE: - p = "BRACE_SIMPLE"; - break; - case BRACE_COMPLEX + 0: - case BRACE_COMPLEX + 1: - case BRACE_COMPLEX + 2: - case BRACE_COMPLEX + 3: - case BRACE_COMPLEX + 4: - case BRACE_COMPLEX + 5: - case BRACE_COMPLEX + 6: - case BRACE_COMPLEX + 7: - case BRACE_COMPLEX + 8: - case BRACE_COMPLEX + 9: - sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX); - p = NULL; - break; - case MULTIBYTECODE: - p = "MULTIBYTECODE"; - break; - case NEWL: - p = "NEWL"; - break; - default: - sprintf(buf + STRLEN(buf), "corrupt %d", OP(op)); - p = NULL; - break; + if (re_multi_type(peekchr()) == MULTI_MULT) { + semsg(_("E888: (NFA regexp) cannot repeat %s"), what); + rc_did_emsg = true; + return false; } - if (p != NULL) - STRCAT(buf, p); - return (char_u *)buf; + return true; } -#endif /* REGEXP_DEBUG */ - +typedef struct { + int a, b, c; +} decomp_T; -/* 0xfb20 - 0xfb4f */ +// 0xfb20 - 0xfb4f static decomp_T decomp_table[0xfb4f-0xfb20+1] = { - {0x5e2,0,0}, /* 0xfb20 alt ayin */ - {0x5d0,0,0}, /* 0xfb21 alt alef */ - {0x5d3,0,0}, /* 0xfb22 alt dalet */ - {0x5d4,0,0}, /* 0xfb23 alt he */ - {0x5db,0,0}, /* 0xfb24 alt kaf */ - {0x5dc,0,0}, /* 0xfb25 alt lamed */ - {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */ - {0x5e8,0,0}, /* 0xfb27 alt resh */ - {0x5ea,0,0}, /* 0xfb28 alt tav */ - {'+', 0, 0}, /* 0xfb29 alt plus */ - {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */ - {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */ - {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */ - {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */ - {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */ - {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */ - {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */ - {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */ - {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */ - {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */ - {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */ - {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */ - {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */ - {0xfb37, 0, 0}, /* 0xfb37 -- */ - {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */ - {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */ - {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */ - {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */ - {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */ - {0xfb3d, 0, 0}, /* 0xfb3d -- */ - {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */ - {0xfb3f, 0, 0}, /* 0xfb3f -- */ - {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */ - {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */ - {0xfb42, 0, 0}, /* 0xfb42 -- */ - {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */ - {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */ - {0xfb45, 0, 0}, /* 0xfb45 -- */ - {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */ - {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */ - {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */ - {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */ - {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */ - {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */ - {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */ - {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */ - {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */ - {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */ + { 0x5e2, 0, 0 }, // 0xfb20 alt ayin + { 0x5d0, 0, 0 }, // 0xfb21 alt alef + { 0x5d3, 0, 0 }, // 0xfb22 alt dalet + { 0x5d4, 0, 0 }, // 0xfb23 alt he + { 0x5db, 0, 0 }, // 0xfb24 alt kaf + { 0x5dc, 0, 0 }, // 0xfb25 alt lamed + { 0x5dd, 0, 0 }, // 0xfb26 alt mem-sofit + { 0x5e8, 0, 0 }, // 0xfb27 alt resh + { 0x5ea, 0, 0 }, // 0xfb28 alt tav + { '+', 0, 0 }, // 0xfb29 alt plus + { 0x5e9, 0x5c1, 0 }, // 0xfb2a shin+shin-dot + { 0x5e9, 0x5c2, 0 }, // 0xfb2b shin+sin-dot + { 0x5e9, 0x5c1, 0x5bc }, // 0xfb2c shin+shin-dot+dagesh + { 0x5e9, 0x5c2, 0x5bc }, // 0xfb2d shin+sin-dot+dagesh + { 0x5d0, 0x5b7, 0 }, // 0xfb2e alef+patah + { 0x5d0, 0x5b8, 0 }, // 0xfb2f alef+qamats + { 0x5d0, 0x5b4, 0 }, // 0xfb30 alef+hiriq + { 0x5d1, 0x5bc, 0 }, // 0xfb31 bet+dagesh + { 0x5d2, 0x5bc, 0 }, // 0xfb32 gimel+dagesh + { 0x5d3, 0x5bc, 0 }, // 0xfb33 dalet+dagesh + { 0x5d4, 0x5bc, 0 }, // 0xfb34 he+dagesh + { 0x5d5, 0x5bc, 0 }, // 0xfb35 vav+dagesh + { 0x5d6, 0x5bc, 0 }, // 0xfb36 zayin+dagesh + { 0xfb37, 0, 0 }, // 0xfb37 -- UNUSED + { 0x5d8, 0x5bc, 0 }, // 0xfb38 tet+dagesh + { 0x5d9, 0x5bc, 0 }, // 0xfb39 yud+dagesh + { 0x5da, 0x5bc, 0 }, // 0xfb3a kaf sofit+dagesh + { 0x5db, 0x5bc, 0 }, // 0xfb3b kaf+dagesh + { 0x5dc, 0x5bc, 0 }, // 0xfb3c lamed+dagesh + { 0xfb3d, 0, 0 }, // 0xfb3d -- UNUSED + { 0x5de, 0x5bc, 0 }, // 0xfb3e mem+dagesh + { 0xfb3f, 0, 0 }, // 0xfb3f -- UNUSED + { 0x5e0, 0x5bc, 0 }, // 0xfb40 nun+dagesh + { 0x5e1, 0x5bc, 0 }, // 0xfb41 samech+dagesh + { 0xfb42, 0, 0 }, // 0xfb42 -- UNUSED + { 0x5e3, 0x5bc, 0 }, // 0xfb43 pe sofit+dagesh + { 0x5e4, 0x5bc, 0 }, // 0xfb44 pe+dagesh + { 0xfb45, 0, 0 }, // 0xfb45 -- UNUSED + { 0x5e6, 0x5bc, 0 }, // 0xfb46 tsadi+dagesh + { 0x5e7, 0x5bc, 0 }, // 0xfb47 qof+dagesh + { 0x5e8, 0x5bc, 0 }, // 0xfb48 resh+dagesh + { 0x5e9, 0x5bc, 0 }, // 0xfb49 shin+dagesh + { 0x5ea, 0x5bc, 0 }, // 0xfb4a tav+dagesh + { 0x5d5, 0x5b9, 0 }, // 0xfb4b vav+holam + { 0x5d1, 0x5bf, 0 }, // 0xfb4c bet+rafe + { 0x5db, 0x5bf, 0 }, // 0xfb4d kaf+rafe + { 0x5e4, 0x5bf, 0 }, // 0xfb4e pe+rafe + { 0x5d0, 0x5dc, 0 } // 0xfb4f alef-lamed }; static void mb_decompose(int c, int *c1, int *c2, int *c3) @@ -6444,12 +1440,53 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n) return result; } +/// Wrapper around strchr which accounts for case-insensitive searches and +/// non-ASCII characters. +/// +/// This function is used a lot for simple searches, keep it fast! +/// +/// @param s string to search +/// @param c character to find in @a s +/// +/// @return NULL if no match, otherwise pointer to the position in @a s +static inline char_u *cstrchr(const char_u *const s, const int c) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL + FUNC_ATTR_ALWAYS_INLINE +{ + if (!rex.reg_ic) { + return vim_strchr(s, c); + } + + // Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is + // expected to be highly optimized. + if (c > 0x80) { + const int folded_c = utf_fold(c); + for (const char_u *p = s; *p != NUL; p += utfc_ptr2len(p)) { + if (utf_fold(utf_ptr2char(p)) == folded_c) { + return (char_u *)p; + } + } + return NULL; + } + + int cc; + if (ASCII_ISUPPER(c)) { + cc = TOLOWER_ASC(c); + } else if (ASCII_ISLOWER(c)) { + cc = TOUPPER_ASC(c); + } else { + return vim_strchr(s, c); + } + + char tofind[] = { (char)c, (char)cc, NUL }; + return (char_u *)strpbrk((const char *)s, tofind); +} + //////////////////////////////////////////////////////////////// // regsub stuff // //////////////////////////////////////////////////////////////// -/* This stuff below really confuses cc on an SGI -- webb */ - +// This stuff below really confuses cc on an SGI -- webb static fptr_T do_upper(int *d, int c) @@ -6503,13 +1540,13 @@ char_u *regtilde(char_u *source, int magic) for (p = newsub; *p; ++p) { if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) { if (reg_prev_sub != NULL) { - /* length = len(newsub) - 1 + len(prev_sub) + 1 */ + // length = len(newsub) - 1 + len(prev_sub) + 1 prevlen = (int)STRLEN(reg_prev_sub); tmpsub = xmalloc(STRLEN(newsub) + prevlen); - /* copy prefix */ - len = (int)(p - newsub); /* not including ~ */ + // copy prefix + len = (int)(p - newsub); // not including ~ memmove(tmpsub, newsub, (size_t)len); - /* interpret tilde */ + // interpret tilde memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen); // copy postfix if (!magic) { @@ -6517,15 +1554,17 @@ char_u *regtilde(char_u *source, int magic) } STRCPY(tmpsub + len + prevlen, p + 1); - if (newsub != source) /* already allocated newsub */ + if (newsub != source) { // already allocated newsub xfree(newsub); + } newsub = tmpsub; p = newsub + len + prevlen; - } else if (magic) - STRMOVE(p, p + 1); /* remove '~' */ - else - STRMOVE(p, p + 2); /* remove '\~' */ - --p; + } else if (magic) { + STRMOVE(p, p + 1); // remove '~' + } else { + STRMOVE(p, p + 2); // remove '\~' + } + p--; } else { if (*p == '\\' && p[1]) { // skip escaped characters p++; @@ -6644,7 +1683,8 @@ int vim_regsub(regmatch_T *rmp, char_u *source, typval_T *expr, char_u *dest, return result; } -int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, int copy, int magic, int backslash) +int vim_regsub_multi(regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, + int copy, int magic, int backslash) { regexec_T rex_save; bool rex_in_use_save = rex_in_use; @@ -6682,8 +1722,8 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int no = -1; fptr_T func_all = (fptr_T)NULL; fptr_T func_one = (fptr_T)NULL; - linenr_T clnum = 0; /* init for GCC */ - int len = 0; /* init for GCC */ + linenr_T clnum = 0; // init for GCC + int len = 0; // init for GCC static char_u *eval_result = NULL; // We need to keep track of how many backslashes we escape, so that the byte @@ -6795,7 +1835,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } } if (had_backslash && backslash) { - /* Backslashes will be consumed, need to double them. */ + // Backslashes will be consumed, need to double them. s = vim_strsave_escaped(eval_result, (char_u *)"\\"); xfree(eval_result); eval_result = s; @@ -6835,9 +1875,9 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } } } - if (no < 0) { /* Ordinary character. */ + if (no < 0) { // Ordinary character. if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL) { - /* Copy a special key as-is. */ + // Copy a special key as-is. if (copy) { *dst++ = c; *dst++ = *src++; @@ -6965,14 +2005,15 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } else { c = utf_ptr2char(s); - if (func_one != (fptr_T)NULL) - /* Turbo C complains without the typecast */ + if (func_one != (fptr_T)NULL) { + // Turbo C complains without the typecast func_one = (fptr_T)(func_one(&cc, c)); - else if (func_all != (fptr_T)NULL) - /* Turbo C complains without the typecast */ + } else if (func_all != (fptr_T)NULL) { + // Turbo C complains without the typecast func_all = (fptr_T)(func_all(&cc, c)); - else /* just copy */ + } else { // just copy cc = c; + } { int l; @@ -7167,6 +2208,12 @@ list_T *reg_submatch_list(int no) return list; } +// XXX Do not allow headers generator to catch definitions from regexp_nfa.c +#ifndef DO_NOT_DEFINE_EMPTY_ATTRIBUTES +# include "nvim/regexp_bt.c" +# include "nvim/regexp_nfa.c" +#endif + static regengine_T bt_regengine = { bt_regcomp, @@ -7176,12 +2223,6 @@ static regengine_T bt_regengine = (char_u *)"" }; - -// XXX Do not allow headers generator to catch definitions from regexp_nfa.c -#ifndef DO_NOT_DEFINE_EMPTY_ATTRIBUTES -# include "nvim/regexp_nfa.c" -#endif - static regengine_T nfa_regengine = { nfa_regcomp, @@ -7217,7 +2258,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) regexp_engine = p_re; - /* Check for prefix "\%#=", that sets the regexp engine */ + // Check for prefix "\%#=", that sets the regexp engine if (STRNCMP(expr, "\\%#=", 4) == 0) { int newengine = expr[4] - '0'; @@ -7302,6 +2343,18 @@ void vim_regfree(regprog_T *prog) prog->engine->regfree(prog); } + +#if defined(EXITFREE) +void free_regexp_stuff(void) +{ + ga_clear(®stack); + ga_clear(&backpos); + xfree(reg_tofree); + xfree(reg_prev_sub); +} + +#endif + static void report_re_switch(char_u *pat) { if (p_verbose > 0) { @@ -7324,8 +2377,7 @@ static void report_re_switch(char_u *pat) /// @param nl /// /// @return true if there is a match, false if not. -static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, - bool nl) +static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, bool nl) { regexec_T rex_save; bool rex_in_use_save = rex_in_use; @@ -7382,8 +2434,7 @@ static bool vim_regexec_string(regmatch_T *rmp, char_u *line, colnr_T col, // Note: "*prog" may be freed and changed. // Return true if there is a match, false if not. -bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, - colnr_T col) +bool vim_regexec_prog(regprog_T **prog, bool ignore_case, char_u *line, colnr_T col) { regmatch_T regmatch = { .regprog = *prog, .rm_ic = ignore_case }; bool r = vim_regexec_string(®match, line, col, false); @@ -7415,13 +2466,12 @@ bool vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) /// match otherwise. long vim_regexec_multi( regmmatch_T *rmp, - win_T *win, // window in which to search or NULL - buf_T *buf, // buffer in which to search - linenr_T lnum, // nr of line to start looking for match - colnr_T col, // column to start looking for match - proftime_T *tm, // timeout limit or NULL - int *timed_out // flag is set when timeout limit reached -) + win_T *win, // window in which to search or NULL + buf_T *buf, // buffer in which to search + linenr_T lnum, // nr of line to start looking for match + colnr_T col, // column to start looking for match + proftime_T *tm, // timeout limit or NULL + int *timed_out) // flag is set when timeout limit reached FUNC_ATTR_NONNULL_ARG(1) { regexec_T rex_save; diff --git a/src/nvim/regexp.h b/src/nvim/regexp.h index 9527afed58..6edbcf8a31 100644 --- a/src/nvim/regexp.h +++ b/src/nvim/regexp.h @@ -19,6 +19,7 @@ // regexp.c #ifdef INCLUDE_GENERATED_DECLARATIONS # include "regexp.h.generated.h" +# include "regexp_bt.h.generated.h" #endif #endif // NVIM_REGEXP_H diff --git a/src/nvim/regexp_bt.c b/src/nvim/regexp_bt.c new file mode 100644 index 0000000000..f6804e8415 --- /dev/null +++ b/src/nvim/regexp_bt.c @@ -0,0 +1,4951 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + +// uncrustify:off + +/* + * + * Backtracking regular expression implementation. + * + * This file is included in "regexp.c". + * + * NOTICE: + * + * This is NOT the original regular expression code as written by Henry + * Spencer. This code has been modified specifically for use with the VIM + * editor, and should not be used separately from Vim. If you want a good + * regular expression library, get the original code. The copyright notice + * that follows is from the original. + * + * END NOTICE + * + * Copyright (c) 1986 by University of Toronto. + * Written by Henry Spencer. Not derived from licensed software. + * + * Permission is granted to anyone to use this software for any + * purpose on any computer system, and to redistribute it freely, + * subject to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of + * this software, no matter how awful, even if they arise + * from defects in it. + * + * 2. The origin of this software must not be misrepresented, either + * by explicit claim or by omission. + * + * 3. Altered versions must be plainly marked as such, and must not + * be misrepresented as being the original software. + * + * Beware that some of this code is subtly aware of the way operator + * precedence is structured in regular expressions. Serious changes in + * regular-expression syntax might require a total rethink. + * + * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert + * Webb, Ciaran McCreesh and Bram Moolenaar. + * Named character class support added by Walter Briscoe (1998 Jul 01) + */ + + +/* + * The "internal use only" fields in regexp_defs.h are present to pass info from + * compile to execute that permits the execute phase to run lots faster on + * simple cases. They are: + * + * regstart char that must begin a match; NUL if none obvious; Can be a + * multi-byte character. + * reganch is the match anchored (at beginning-of-line only)? + * regmust string (pointer into program) that match must include, or NULL + * regmlen length of regmust string + * regflags RF_ values or'ed together + * + * Regstart and reganch permit very fast decisions on suitable starting points + * for a match, cutting down the work a lot. Regmust permits fast rejection + * of lines that cannot possibly match. The regmust tests are costly enough + * that vim_regcomp() supplies a regmust only if the r.e. contains something + * potentially expensive (at present, the only such thing detected is * or + + * at the start of the r.e., which can involve a lot of backup). Regmlen is + * supplied because the test in vim_regexec() needs it and vim_regcomp() is + * computing it anyway. + */ + +/* + * Structure for regexp "program". This is essentially a linear encoding + * of a nondeterministic finite-state machine (aka syntax charts or + * "railroad normal form" in parsing technology). Each node is an opcode + * plus a "next" pointer, possibly plus an operand. "Next" pointers of + * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" + * pointer with a BRANCH on both ends of it is connecting two alternatives. + * (Here we have one of the subtle syntax dependencies: an individual BRANCH + * (as opposed to a collection of them) is never concatenated with anything + * because of operator precedence). The "next" pointer of a BRACES_COMPLEX + * node points to the node after the stuff to be repeated. + * The operand of some types of node is a literal string; for others, it is a + * node leading into a sub-FSM. In particular, the operand of a BRANCH node + * is the first node of the branch. + * (NB this is *not* a tree structure: the tail of the branch connects to the + * thing following the set of BRANCHes.) + * + * pattern is coded like: + * + * +-----------------+ + * | V + * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END + * | ^ | ^ + * +------+ +----------+ + * + * + * +------------------+ + * V | + * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END + * | | ^ ^ + * | +---------------+ | + * +---------------------------------------------+ + * + * + * +----------------------+ + * V | + * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END + * | | ^ ^ + * | +-----------+ | + * +--------------------------------------------------+ + * + * + * +-------------------------+ + * V | + * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END + * | | ^ + * | +----------------+ + * +-----------------------------------------------+ + * + * + * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END + * | | ^ ^ + * | +----------------+ | + * +--------------------------------+ + * + * +---------+ + * | V + * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END + * | | | | ^ ^ + * | | | +-----+ | + * | | +----------------+ | + * | +---------------------------+ | + * +------------------------------------------------------+ + * + * They all start with a BRANCH for "\|" alternatives, even when there is only + * one alternative. + */ + +#include <assert.h> +#include <inttypes.h> +#include <stdbool.h> +#include <string.h> + +#include "nvim/regexp.h" +#include "nvim/garray.h" + +/* + * The opcodes are: + */ + +// definition number opnd? meaning +#define END 0 // End of program or NOMATCH operand. +#define BOL 1 // Match "" at beginning of line. +#define EOL 2 // Match "" at end of line. +#define BRANCH 3 // node Match this alternative, or the + // next... +#define BACK 4 // Match "", "next" ptr points backward. +#define EXACTLY 5 // str Match this string. +#define NOTHING 6 // Match empty string. +#define STAR 7 // node Match this (simple) thing 0 or more + // times. +#define PLUS 8 // node Match this (simple) thing 1 or more + // times. +#define MATCH 9 // node match the operand zero-width +#define NOMATCH 10 // node check for no match with operand +#define BEHIND 11 // node look behind for a match with operand +#define NOBEHIND 12 // node look behind for no match with operand +#define SUBPAT 13 // node match the operand here +#define BRACE_SIMPLE 14 // node Match this (simple) thing between m and + // n times (\{m,n\}). +#define BOW 15 // Match "" after [^a-zA-Z0-9_] +#define EOW 16 // Match "" at [^a-zA-Z0-9_] +#define BRACE_LIMITS 17 // nr nr define the min & max for BRACE_SIMPLE + // and BRACE_COMPLEX. +#define NEWL 18 // Match line-break +#define BHPOS 19 // End position for BEHIND or NOBEHIND + + +// character classes: 20-48 normal, 50-78 include a line-break +#define ADD_NL 30 +#define FIRST_NL ANY + ADD_NL +#define ANY 20 // Match any one character. +#define ANYOF 21 // str Match any character in this string. +#define ANYBUT 22 // str Match any character not in this + // string. +#define IDENT 23 // Match identifier char +#define SIDENT 24 // Match identifier char but no digit +#define KWORD 25 // Match keyword char +#define SKWORD 26 // Match word char but no digit +#define FNAME 27 // Match file name char +#define SFNAME 28 // Match file name char but no digit +#define PRINT 29 // Match printable char +#define SPRINT 30 // Match printable char but no digit +#define WHITE 31 // Match whitespace char +#define NWHITE 32 // Match non-whitespace char +#define DIGIT 33 // Match digit char +#define NDIGIT 34 // Match non-digit char +#define HEX 35 // Match hex char +#define NHEX 36 // Match non-hex char +#define OCTAL 37 // Match octal char +#define NOCTAL 38 // Match non-octal char +#define WORD 39 // Match word char +#define NWORD 40 // Match non-word char +#define HEAD 41 // Match head char +#define NHEAD 42 // Match non-head char +#define ALPHA 43 // Match alpha char +#define NALPHA 44 // Match non-alpha char +#define LOWER 45 // Match lowercase char +#define NLOWER 46 // Match non-lowercase char +#define UPPER 47 // Match uppercase char +#define NUPPER 48 // Match non-uppercase char +#define LAST_NL NUPPER + ADD_NL +// -V:WITH_NL:560 +#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL) + +#define MOPEN 80 // -89 Mark this point in input as start of + // \( … \) subexpr. MOPEN + 0 marks start of + // match. +#define MCLOSE 90 // -99 Analogous to MOPEN. MCLOSE + 0 marks + // end of match. +#define BACKREF 100 // -109 node Match same string again \1-\9. + +# define ZOPEN 110 // -119 Mark this point in input as start of + // \z( … \) subexpr. +# define ZCLOSE 120 // -129 Analogous to ZOPEN. +# define ZREF 130 // -139 node Match external submatch \z1-\z9 + +#define BRACE_COMPLEX 140 // -149 node Match nodes between m & n times + +#define NOPEN 150 // Mark this point in input as start of + // \%( subexpr. +#define NCLOSE 151 // Analogous to NOPEN. + +#define MULTIBYTECODE 200 // mbc Match one multi-byte character +#define RE_BOF 201 // Match "" at beginning of file. +#define RE_EOF 202 // Match "" at end of file. +#define CURSOR 203 // Match location of cursor. + +#define RE_LNUM 204 // nr cmp Match line number +#define RE_COL 205 // nr cmp Match column number +#define RE_VCOL 206 // nr cmp Match virtual column number + +#define RE_MARK 207 // mark cmp Match mark position +#define RE_VISUAL 208 // Match Visual area +#define RE_COMPOSING 209 // any composing characters + +/* + * Flags to be passed up and down. + */ +#define HASWIDTH 0x1 // Known never to match null string. +#define SIMPLE 0x2 // Simple enough to be STAR/PLUS operand. +#define SPSTART 0x4 // Starts with * or +. +#define HASNL 0x8 // Contains some \n. +#define HASLOOKBH 0x10 // Contains "\@<=" or "\@<!". +#define WORST 0 // Worst case. + + +static int prevchr_len; ///< byte length of previous char +static int num_complex_braces; ///< Complex \{...} count +static char_u *regcode; ///< Code-emit pointer, or JUST_CALC_SIZE +static long regsize; ///< Code size. +static int reg_toolong; ///< true when offset out of range +static char_u had_endbrace[NSUBEXP]; ///< flags, true if end of () found +static long brace_min[10]; ///< Minimums for complex brace repeats +static long brace_max[10]; ///< Maximums for complex brace repeats +static int brace_count[10]; ///< Current counts for complex brace repeats +static int one_exactly = false; ///< only do one char for EXACTLY + +// When making changes to classchars also change nfa_classcodes. +static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; +static int classcodes[] = { + ANY, IDENT, SIDENT, KWORD, SKWORD, + FNAME, SFNAME, PRINT, SPRINT, + WHITE, NWHITE, DIGIT, NDIGIT, + HEX, NHEX, OCTAL, NOCTAL, + WORD, NWORD, HEAD, NHEAD, + ALPHA, NALPHA, LOWER, NLOWER, + UPPER, NUPPER +}; + +/* + * When regcode is set to this value, code is not emitted and size is computed + * instead. + */ +#define JUST_CALC_SIZE ((char_u *) -1) + +// Values for rs_state in regitem_T. +typedef enum regstate_E { + RS_NOPEN = 0, // NOPEN and NCLOSE + RS_MOPEN, // MOPEN + [0-9] + RS_MCLOSE, // MCLOSE + [0-9] + RS_ZOPEN, // ZOPEN + [0-9] + RS_ZCLOSE, // ZCLOSE + [0-9] + RS_BRANCH, // BRANCH + RS_BRCPLX_MORE, // BRACE_COMPLEX and trying one more match + RS_BRCPLX_LONG, // BRACE_COMPLEX and trying longest match + RS_BRCPLX_SHORT, // BRACE_COMPLEX and trying shortest match + RS_NOMATCH, // NOMATCH + RS_BEHIND1, // BEHIND / NOBEHIND matching rest + RS_BEHIND2, // BEHIND / NOBEHIND matching behind part + RS_STAR_LONG, // STAR/PLUS/BRACE_SIMPLE longest match + RS_STAR_SHORT // STAR/PLUS/BRACE_SIMPLE shortest match +} regstate_T; + +/* + * Structure used to save the current input state, when it needs to be + * restored after trying a match. Used by reg_save() and reg_restore(). + * Also stores the length of "backpos". + */ +typedef struct +{ + union + { + char_u *ptr; // rex.input pointer, for single-line regexp + lpos_T pos; // rex.input pos, for multi-line regexp + } rs_u; + int rs_len; +} regsave_T; + +// struct to save start/end pointer/position in for \(\) +typedef struct +{ + union + { + char_u *ptr; + lpos_T pos; + } se_u; +} save_se_T; + +// used for BEHIND and NOBEHIND matching +typedef struct regbehind_S +{ + regsave_T save_after; + regsave_T save_behind; + int save_need_clear_subexpr; + save_se_T save_start[NSUBEXP]; + save_se_T save_end[NSUBEXP]; +} regbehind_T; + +/* + * When there are alternatives a regstate_T is put on the regstack to remember + * what we are doing. + * Before it may be another type of item, depending on rs_state, to remember + * more things. + */ +typedef struct regitem_S +{ + regstate_T rs_state; // what we are doing, one of RS_ above + short rs_no; // submatch nr or BEHIND/NOBEHIND + char_u *rs_scan; // current node in program + union + { + save_se_T sesave; + regsave_T regsave; + } rs_un; // room for saving rex.input +} regitem_T; + + +// used for STAR, PLUS and BRACE_SIMPLE matching +typedef struct regstar_S +{ + int nextb; // next byte + int nextb_ic; // next byte reverse case + long count; + long minval; + long maxval; +} regstar_T; + +// used to store input position when a BACK was encountered, so that we now if +// we made any progress since the last time. +typedef struct backpos_S +{ + char_u *bp_scan; // "scan" where BACK was encountered + regsave_T bp_pos; // last input position +} backpos_T; + +/* + * "regstack" and "backpos" are used by regmatch(). They are kept over calls + * to avoid invoking malloc() and free() often. + * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T + * or regbehind_T. + * "backpos_T" is a table with backpos_T for BACK + */ +static garray_T regstack = GA_EMPTY_INIT_VALUE; +static garray_T backpos = GA_EMPTY_INIT_VALUE; + +static regsave_T behind_pos; + +/* + * Both for regstack and backpos tables we use the following strategy of + * allocation (to reduce malloc/free calls): + * - Initial size is fairly small. + * - When needed, the tables are grown bigger (8 times at first, double after + * that). + * - After executing the match we free the memory only if the array has grown. + * Thus the memory is kept allocated when it's at the initial size. + * This makes it fast while not keeping a lot of memory allocated. + * A three times speed increase was observed when using many simple patterns. + */ +#define REGSTACK_INITIAL 2048 +#define BACKPOS_INITIAL 64 + +/* + * Opcode notes: + * + * BRANCH The set of branches constituting a single choice are hooked + * together with their "next" pointers, since precedence prevents + * anything being concatenated to any individual branch. The + * "next" pointer of the last BRANCH in a choice points to the + * thing following the whole choice. This is also where the + * final "next" pointer of each individual branch points; each + * branch starts with the operand node of a BRANCH node. + * + * BACK Normal "next" pointers all implicitly point forward; BACK + * exists to make loop structures possible. + * + * STAR,PLUS '=', and complex '*' and '+', are implemented as circular + * BRANCH structures using BACK. Simple cases (one character + * per match) are implemented with STAR and PLUS for speed + * and to minimize recursive plunges. + * + * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX + * node, and defines the min and max limits to be used for that + * node. + * + * MOPEN,MCLOSE ...are numbered at compile time. + * ZOPEN,ZCLOSE ...ditto + */ + +/* + * A node is one char of opcode followed by two chars of "next" pointer. + * "Next" pointers are stored as two 8-bit bytes, high order first. The + * value is a positive offset from the opcode of the node containing it. + * An operand, if any, simply follows the node. (Note that much of the + * code generation knows about this implicit relationship.) + * + * Using two bytes for the "next" pointer is vast overkill for most things, + * but allows patterns to get big without disasters. + */ +#define OP(p) ((int)(*(p))) +#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377)) +#define OPERAND(p) ((p) + 3) +// Obtain an operand that was stored as four bytes, MSB first. +#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \ + + ((long)(p)[5] << 8) + (long)(p)[6]) +// Obtain a second operand stored as four bytes. +#define OPERAND_MAX(p) OPERAND_MIN((p) + 4) +// Obtain a second single-byte operand stored after a four bytes operand. +#define OPERAND_CMP(p) (p)[7] + +static char_u *reg(int paren, int *flagp); + +#ifdef BT_REGEXP_DUMP +static void regdump(char_u *, bt_regprog_T *); +#endif + + +#ifdef REGEXP_DEBUG +static char_u *regprop(char_u *); + +static int regnarrate = 0; +#endif + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "regexp_bt.c.generated.h" +#endif + + +/* + * Setup to parse the regexp. Used once to get the length and once to do it. + */ +static void regcomp_start( + char_u *expr, + int re_flags) // see vim_regcomp() +{ + initchr(expr); + if (re_flags & RE_MAGIC) + reg_magic = MAGIC_ON; + else + reg_magic = MAGIC_OFF; + reg_string = (re_flags & RE_STRING); + reg_strict = (re_flags & RE_STRICT); + get_cpo_flags(); + + num_complex_braces = 0; + regnpar = 1; + memset(had_endbrace, 0, sizeof(had_endbrace)); + regnzpar = 1; + re_has_z = 0; + regsize = 0L; + reg_toolong = false; + regflags = 0; + had_eol = false; +} + +// Return true if MULTIBYTECODE should be used instead of EXACTLY for +// character "c". +static bool use_multibytecode(int c) +{ + return utf_char2len(c) > 1 + && (re_multi_type(peekchr()) != NOT_MULTI + || utf_iscomposing(c)); +} + + +/* + * Emit (if appropriate) a byte of code + */ +static void regc(int b) +{ + if (regcode == JUST_CALC_SIZE) + regsize++; + else + *regcode++ = b; +} + +/* + * Emit (if appropriate) a multi-byte character of code + */ +static void regmbc(int c) +{ + if (regcode == JUST_CALC_SIZE) { + regsize += utf_char2len(c); + } else { + regcode += utf_char2bytes(c, regcode); + } +} + +#define REGMBC(x) regmbc(x); +#define CASEMBC(x) case x: + +/* + * Produce the bytes for equivalence class "c". + * Currently only handles latin1, latin9 and utf-8. + * NOTE: When changing this function, also change nfa_emit_equi_class() + */ +static void reg_equi_class(int c) +{ + { + switch (c) { + // Do not use '\300' style, it results in a negative number. + case 'A': case 0xc0: case 0xc1: case 0xc2: + case 0xc3: case 0xc4: case 0xc5: + CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd) + CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2) + regmbc('A'); regmbc(0xc0); regmbc(0xc1); + regmbc(0xc2); regmbc(0xc3); regmbc(0xc4); + regmbc(0xc5); + REGMBC(0x100) REGMBC(0x102) REGMBC(0x104) + REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0) + REGMBC(0x1ea2) + return; + case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06) + regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06) + return; + case 'C': case 0xc7: + CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c) + regmbc('C'); regmbc(0xc7); + REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a) + REGMBC(0x10c) + return; + case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a) + CASEMBC(0x1e0e) CASEMBC(0x1e10) + regmbc('D'); REGMBC(0x10e) REGMBC(0x110) + REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10) + return; + case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb: + CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118) + CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc) + regmbc('E'); regmbc(0xc8); regmbc(0xc9); + regmbc(0xca); regmbc(0xcb); + REGMBC(0x112) REGMBC(0x114) REGMBC(0x116) + REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba) + REGMBC(0x1ebc) + return; + case 'F': CASEMBC(0x1e1e) + regmbc('F'); REGMBC(0x1e1e) + return; + case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120) + CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4) + CASEMBC(0x1e20) + regmbc('G'); REGMBC(0x11c) REGMBC(0x11e) + REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4) + REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20) + return; + case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22) + CASEMBC(0x1e26) CASEMBC(0x1e28) + regmbc('H'); REGMBC(0x124) REGMBC(0x126) + REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28) + return; + case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf: + CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e) + CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8) + regmbc('I'); regmbc(0xcc); regmbc(0xcd); + regmbc(0xce); regmbc(0xcf); + REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c) + REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf) + REGMBC(0x1ec8) + return; + case 'J': CASEMBC(0x134) + regmbc('J'); REGMBC(0x134) + return; + case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30) + CASEMBC(0x1e34) + regmbc('K'); REGMBC(0x136) REGMBC(0x1e8) + REGMBC(0x1e30) REGMBC(0x1e34) + return; + case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d) + CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a) + regmbc('L'); REGMBC(0x139) REGMBC(0x13b) + REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141) + REGMBC(0x1e3a) + return; + case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40) + regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40) + return; + case 'N': case 0xd1: + CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44) + CASEMBC(0x1e48) + regmbc('N'); regmbc(0xd1); + REGMBC(0x143) REGMBC(0x145) REGMBC(0x147) + REGMBC(0x1e44) REGMBC(0x1e48) + return; + case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5: + case 0xd6: case 0xd8: + CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0) + CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece) + regmbc('O'); regmbc(0xd2); regmbc(0xd3); + regmbc(0xd4); regmbc(0xd5); regmbc(0xd6); + regmbc(0xd8); + REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150) + REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea) + REGMBC(0x1ec) REGMBC(0x1ece) + return; + case 'P': case 0x1e54: case 0x1e56: + regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56) + return; + case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158) + CASEMBC(0x1e58) CASEMBC(0x1e5e) + regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158) + REGMBC(0x1e58) REGMBC(0x1e5e) + return; + case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e) + CASEMBC(0x160) CASEMBC(0x1e60) + regmbc('S'); REGMBC(0x15a) REGMBC(0x15c) + REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60) + return; + case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166) + CASEMBC(0x1e6a) CASEMBC(0x1e6e) + regmbc('T'); REGMBC(0x162) REGMBC(0x164) + REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e) + return; + case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc: + CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e) + CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3) + CASEMBC(0x1ee6) + regmbc('U'); regmbc(0xd9); regmbc(0xda); + regmbc(0xdb); regmbc(0xdc); + REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c) + REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172) + REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6) + return; + case 'V': CASEMBC(0x1e7c) + regmbc('V'); REGMBC(0x1e7c) + return; + case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82) + CASEMBC(0x1e84) CASEMBC(0x1e86) + regmbc('W'); REGMBC(0x174) REGMBC(0x1e80) + REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86) + return; + case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c) + regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c) + return; + case 'Y': case 0xdd: + CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2) + CASEMBC(0x1ef6) CASEMBC(0x1ef8) + regmbc('Y'); regmbc(0xdd); + REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e) + REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8) + return; + case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d) + CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94) + regmbc('Z'); REGMBC(0x179) REGMBC(0x17b) + REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90) + REGMBC(0x1e94) + return; + case 'a': case 0xe0: case 0xe1: case 0xe2: + case 0xe3: case 0xe4: case 0xe5: + CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce) + CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3) + regmbc('a'); regmbc(0xe0); regmbc(0xe1); + regmbc(0xe2); regmbc(0xe3); regmbc(0xe4); + regmbc(0xe5); + REGMBC(0x101) REGMBC(0x103) REGMBC(0x105) + REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1) + REGMBC(0x1ea3) + return; + case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07) + regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07) + return; + case 'c': case 0xe7: + CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d) + regmbc('c'); regmbc(0xe7); + REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b) + REGMBC(0x10d) + return; + case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b) + CASEMBC(0x1e0f) CASEMBC(0x1e11) + regmbc('d'); REGMBC(0x10f) REGMBC(0x111) + REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11) + return; + case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb: + CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119) + CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd) + regmbc('e'); regmbc(0xe8); regmbc(0xe9); + regmbc(0xea); regmbc(0xeb); + REGMBC(0x113) REGMBC(0x115) REGMBC(0x117) + REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb) + REGMBC(0x1ebd) + return; + case 'f': CASEMBC(0x1e1f) + regmbc('f'); REGMBC(0x1e1f) + return; + case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121) + CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5) + CASEMBC(0x1e21) + regmbc('g'); REGMBC(0x11d) REGMBC(0x11f) + REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5) + REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21) + return; + case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23) + CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96) + regmbc('h'); REGMBC(0x125) REGMBC(0x127) + REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29) + REGMBC(0x1e96) + return; + case 'i': case 0xec: case 0xed: case 0xee: case 0xef: + CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f) + CASEMBC(0x1d0) CASEMBC(0x1ec9) + regmbc('i'); regmbc(0xec); regmbc(0xed); + regmbc(0xee); regmbc(0xef); + REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d) + REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9) + return; + case 'j': CASEMBC(0x135) CASEMBC(0x1f0) + regmbc('j'); REGMBC(0x135) REGMBC(0x1f0) + return; + case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31) + CASEMBC(0x1e35) + regmbc('k'); REGMBC(0x137) REGMBC(0x1e9) + REGMBC(0x1e31) REGMBC(0x1e35) + return; + case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e) + CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b) + regmbc('l'); REGMBC(0x13a) REGMBC(0x13c) + REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142) + REGMBC(0x1e3b) + return; + case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41) + regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41) + return; + case 'n': case 0xf1: + CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149) + CASEMBC(0x1e45) CASEMBC(0x1e49) + regmbc('n'); regmbc(0xf1); + REGMBC(0x144) REGMBC(0x146) REGMBC(0x148) + REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49) + return; + case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5: + case 0xf6: case 0xf8: + CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1) + CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf) + regmbc('o'); regmbc(0xf2); regmbc(0xf3); + regmbc(0xf4); regmbc(0xf5); regmbc(0xf6); + regmbc(0xf8); + REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151) + REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb) + REGMBC(0x1ed) REGMBC(0x1ecf) + return; + case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57) + regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57) + return; + case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159) + CASEMBC(0x1e59) CASEMBC(0x1e5f) + regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159) + REGMBC(0x1e59) REGMBC(0x1e5f) + return; + case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f) + CASEMBC(0x161) CASEMBC(0x1e61) + regmbc('s'); REGMBC(0x15b) REGMBC(0x15d) + REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61) + return; + case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167) + CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97) + regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167) + REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97) + return; + case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc: + CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f) + CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4) + CASEMBC(0x1ee7) + regmbc('u'); regmbc(0xf9); regmbc(0xfa); + regmbc(0xfb); regmbc(0xfc); + REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d) + REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173) + REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7) + return; + case 'v': CASEMBC(0x1e7d) + regmbc('v'); REGMBC(0x1e7d) + return; + case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83) + CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98) + regmbc('w'); REGMBC(0x175) REGMBC(0x1e81) + REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87) + REGMBC(0x1e98) + return; + case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d) + regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d) + return; + case 'y': case 0xfd: case 0xff: + CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99) + CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9) + regmbc('y'); regmbc(0xfd); regmbc(0xff); + REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99) + REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9) + return; + case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e) + CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95) + regmbc('z'); REGMBC(0x17a) REGMBC(0x17c) + REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91) + REGMBC(0x1e95) + return; + } + } + regmbc(c); +} + + + +/* + * Emit a node. + * Return pointer to generated code. + */ +static char_u *regnode(int op) +{ + char_u *ret; + + ret = regcode; + if (ret == JUST_CALC_SIZE) + regsize += 3; + else { + *regcode++ = op; + *regcode++ = NUL; // Null "next" pointer. + *regcode++ = NUL; + } + return ret; +} + +/* + * Write a four bytes number at "p" and return pointer to the next char. + */ +static char_u *re_put_uint32(char_u *p, uint32_t val) +{ + *p++ = (char_u)((val >> 24) & 0377); + *p++ = (char_u)((val >> 16) & 0377); + *p++ = (char_u)((val >> 8) & 0377); + *p++ = (char_u)(val & 0377); + return p; +} + +/* + * regnext - dig the "next" pointer out of a node + * Returns NULL when calculating size, when there is no next item and when + * there is an error. + */ +static char_u *regnext(char_u *p) + FUNC_ATTR_NONNULL_ALL +{ + int offset; + + if (p == JUST_CALC_SIZE || reg_toolong) + return NULL; + + offset = NEXT(p); + if (offset == 0) + return NULL; + + if (OP(p) == BACK) + return p - offset; + else + return p + offset; +} + +// Set the next-pointer at the end of a node chain. +static void regtail(char_u *p, char_u *val) +{ + int offset; + + if (p == JUST_CALC_SIZE) { + return; + } + + // Find last node. + char_u *scan = p; + for (;; ) { + char_u *temp = regnext(scan); + if (temp == NULL) { + break; + } + scan = temp; + } + + if (OP(scan) == BACK) { + offset = (int)(scan - val); + } else { + offset = (int)(val - scan); + } + // When the offset uses more than 16 bits it can no longer fit in the two + // bytes available. Use a global flag to avoid having to check return + // values in too many places. + if (offset > 0xffff) { + reg_toolong = true; + } else { + *(scan + 1) = (char_u)(((unsigned)offset >> 8) & 0377); + *(scan + 2) = (char_u)(offset & 0377); + } +} + +/* + * Like regtail, on item after a BRANCH; nop if none. + */ +static void regoptail(char_u *p, char_u *val) +{ + // When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" + if (p == NULL || p == JUST_CALC_SIZE + || (OP(p) != BRANCH + && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9))) + return; + regtail(OPERAND(p), val); +} + + +/* + * Insert an operator in front of already-emitted operand + * + * Means relocating the operand. + */ +static void reginsert(int op, char_u *opnd) +{ + char_u *src; + char_u *dst; + char_u *place; + + if (regcode == JUST_CALC_SIZE) { + regsize += 3; + return; + } + src = regcode; + regcode += 3; + dst = regcode; + while (src > opnd) + *--dst = *--src; + + place = opnd; // Op node, where operand used to be. + *place++ = op; + *place++ = NUL; + *place = NUL; +} + +/* + * Insert an operator in front of already-emitted operand. + * Add a number to the operator. + */ +static void reginsert_nr(int op, long val, char_u *opnd) +{ + char_u *src; + char_u *dst; + char_u *place; + + if (regcode == JUST_CALC_SIZE) { + regsize += 7; + return; + } + src = regcode; + regcode += 7; + dst = regcode; + while (src > opnd) + *--dst = *--src; + + place = opnd; // Op node, where operand used to be. + *place++ = op; + *place++ = NUL; + *place++ = NUL; + assert(val >= 0 && (uintmax_t)val <= UINT32_MAX); + re_put_uint32(place, (uint32_t)val); +} + +/* + * Insert an operator in front of already-emitted operand. + * The operator has the given limit values as operands. Also set next pointer. + * + * Means relocating the operand. + */ +static void reginsert_limits(int op, long minval, long maxval, char_u *opnd) +{ + char_u *src; + char_u *dst; + char_u *place; + + if (regcode == JUST_CALC_SIZE) { + regsize += 11; + return; + } + src = regcode; + regcode += 11; + dst = regcode; + while (src > opnd) + *--dst = *--src; + + place = opnd; // Op node, where operand used to be. + *place++ = op; + *place++ = NUL; + *place++ = NUL; + assert(minval >= 0 && (uintmax_t)minval <= UINT32_MAX); + place = re_put_uint32(place, (uint32_t)minval); + assert(maxval >= 0 && (uintmax_t)maxval <= UINT32_MAX); + place = re_put_uint32(place, (uint32_t)maxval); + regtail(opnd, place); +} + +/// Return true if the back reference is legal. We must have seen the close +/// brace. +/// TODO(vim): Should also check that we don't refer to something repeated +/// (+*=): what instance of the repetition should we match? +static int seen_endbrace(int refnum) +{ + if (!had_endbrace[refnum]) { + char_u *p; + + // Trick: check if "@<=" or "@<!" follows, in which case + // the \1 can appear before the referenced match. + for (p = regparse; *p != NUL; p++) { + if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '=')) { + break; + } + } + + if (*p == NUL) { + emsg(_("E65: Illegal back reference")); + rc_did_emsg = true; + return false; + } + } + return true; +} + +/* + * Parse the lowest level. + * + * Optimization: gobbles an entire sequence of ordinary characters so that + * it can turn them into a single node, which is smaller to store and + * faster to run. Don't do this when one_exactly is set. + */ +static char_u *regatom(int *flagp) +{ + char_u *ret; + int flags; + int c; + char_u *p; + int extra = 0; + int save_prev_at_start = prev_at_start; + + *flagp = WORST; // Tentatively. + + c = getchr(); + switch (c) { + case Magic('^'): + ret = regnode(BOL); + break; + + case Magic('$'): + ret = regnode(EOL); + had_eol = true; + break; + + case Magic('<'): + ret = regnode(BOW); + break; + + case Magic('>'): + ret = regnode(EOW); + break; + + case Magic('_'): + c = no_Magic(getchr()); + if (c == '^') { // "\_^" is start-of-line + ret = regnode(BOL); + break; + } + if (c == '$') { // "\_$" is end-of-line + ret = regnode(EOL); + had_eol = true; + break; + } + + extra = ADD_NL; + *flagp |= HASNL; + + // "\_[" is character range plus newline + if (c == '[') + goto collection; + + // "\_x" is character class plus newline + FALLTHROUGH; + + // Character classes. + case Magic('.'): + case Magic('i'): + case Magic('I'): + case Magic('k'): + case Magic('K'): + case Magic('f'): + case Magic('F'): + case Magic('p'): + case Magic('P'): + case Magic('s'): + case Magic('S'): + case Magic('d'): + case Magic('D'): + case Magic('x'): + case Magic('X'): + case Magic('o'): + case Magic('O'): + case Magic('w'): + case Magic('W'): + case Magic('h'): + case Magic('H'): + case Magic('a'): + case Magic('A'): + case Magic('l'): + case Magic('L'): + case Magic('u'): + case Magic('U'): + p = vim_strchr(classchars, no_Magic(c)); + if (p == NULL) + EMSG_RET_NULL(_("E63: invalid use of \\_")); + // When '.' is followed by a composing char ignore the dot, so that + // the composing char is matched here. + if (c == Magic('.') && utf_iscomposing(peekchr())) { + c = getchr(); + goto do_multibyte; + } + ret = regnode(classcodes[p - classchars] + extra); + *flagp |= HASWIDTH | SIMPLE; + break; + + case Magic('n'): + if (reg_string) { + // In a string "\n" matches a newline character. + ret = regnode(EXACTLY); + regc(NL); + regc(NUL); + *flagp |= HASWIDTH | SIMPLE; + } else { + // In buffer text "\n" matches the end of a line. + ret = regnode(NEWL); + *flagp |= HASWIDTH | HASNL; + } + break; + + case Magic('('): + if (one_exactly) + EMSG_ONE_RET_NULL; + ret = reg(REG_PAREN, &flags); + if (ret == NULL) + return NULL; + *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); + break; + + case NUL: + case Magic('|'): + case Magic('&'): + case Magic(')'): + if (one_exactly) + EMSG_ONE_RET_NULL; + IEMSG_RET_NULL(_(e_internal)); // Supposed to be caught earlier. + // NOTREACHED + + case Magic('='): + case Magic('?'): + case Magic('+'): + case Magic('@'): + case Magic('{'): + case Magic('*'): + c = no_Magic(c); + EMSG3_RET_NULL(_("E64: %s%c follows nothing"), + (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL), c); + // NOTREACHED + + case Magic('~'): // previous substitute pattern + if (reg_prev_sub != NULL) { + char_u *lp; + + ret = regnode(EXACTLY); + lp = reg_prev_sub; + while (*lp != NUL) + regc(*lp++); + regc(NUL); + if (*reg_prev_sub != NUL) { + *flagp |= HASWIDTH; + if ((lp - reg_prev_sub) == 1) + *flagp |= SIMPLE; + } + } else + EMSG_RET_NULL(_(e_nopresub)); + break; + + case Magic('1'): + case Magic('2'): + case Magic('3'): + case Magic('4'): + case Magic('5'): + case Magic('6'): + case Magic('7'): + case Magic('8'): + case Magic('9'): + { + int refnum; + + refnum = c - Magic('0'); + if (!seen_endbrace(refnum)) { + return NULL; + } + ret = regnode(BACKREF + refnum); + } + break; + + case Magic('z'): + { + c = no_Magic(getchr()); + switch (c) { + case '(': if ((reg_do_extmatch & REX_SET) == 0) + EMSG_RET_NULL(_(e_z_not_allowed)); + if (one_exactly) + EMSG_ONE_RET_NULL; + ret = reg(REG_ZPAREN, &flags); + if (ret == NULL) + return NULL; + *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH); + re_has_z = REX_SET; + break; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': if ((reg_do_extmatch & REX_USE) == 0) + EMSG_RET_NULL(_(e_z1_not_allowed)); + ret = regnode(ZREF + c - '0'); + re_has_z = REX_USE; + break; + + case 's': ret = regnode(MOPEN + 0); + if (!re_mult_next("\\zs")) { + return NULL; + } + break; + + case 'e': ret = regnode(MCLOSE + 0); + if (!re_mult_next("\\ze")) { + return NULL; + } + break; + + default: EMSG_RET_NULL(_("E68: Invalid character after \\z")); + } + } + break; + + case Magic('%'): + { + c = no_Magic(getchr()); + switch (c) { + // () without a back reference + case '(': + if (one_exactly) + EMSG_ONE_RET_NULL; + ret = reg(REG_NPAREN, &flags); + if (ret == NULL) + return NULL; + *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); + break; + + // Catch \%^ and \%$ regardless of where they appear in the + // pattern -- regardless of whether or not it makes sense. + case '^': + ret = regnode(RE_BOF); + break; + + case '$': + ret = regnode(RE_EOF); + break; + + case '#': + ret = regnode(CURSOR); + break; + + case 'V': + ret = regnode(RE_VISUAL); + break; + + case 'C': + ret = regnode(RE_COMPOSING); + break; + + // \%[abc]: Emit as a list of branches, all ending at the last + // branch which matches nothing. + case '[': + if (one_exactly) // doesn't nest + EMSG_ONE_RET_NULL; + { + char_u *lastbranch; + char_u *lastnode = NULL; + char_u *br; + + ret = NULL; + while ((c = getchr()) != ']') { + if (c == NUL) + EMSG2_RET_NULL(_(e_missing_sb), + reg_magic == MAGIC_ALL); + br = regnode(BRANCH); + if (ret == NULL) { + ret = br; + } else { + regtail(lastnode, br); + if (reg_toolong) { + return NULL; + } + } + + ungetchr(); + one_exactly = true; + lastnode = regatom(flagp); + one_exactly = false; + if (lastnode == NULL) { + return NULL; + } + } + if (ret == NULL) + EMSG2_RET_NULL(_(e_empty_sb), + reg_magic == MAGIC_ALL); + lastbranch = regnode(BRANCH); + br = regnode(NOTHING); + if (ret != JUST_CALC_SIZE) { + regtail(lastnode, br); + regtail(lastbranch, br); + // connect all branches to the NOTHING + // branch at the end + for (br = ret; br != lastnode; ) { + if (OP(br) == BRANCH) { + regtail(br, lastbranch); + if (reg_toolong) { + return NULL; + } + br = OPERAND(br); + } else + br = regnext(br); + } + } + *flagp &= ~(HASWIDTH | SIMPLE); + break; + } + + case 'd': // %d123 decimal + case 'o': // %o123 octal + case 'x': // %xab hex 2 + case 'u': // %uabcd hex 4 + case 'U': // %U1234abcd hex 8 + { + int64_t i; + + switch (c) { + case 'd': i = getdecchrs(); break; + case 'o': i = getoctchrs(); break; + case 'x': i = gethexchrs(2); break; + case 'u': i = gethexchrs(4); break; + case 'U': i = gethexchrs(8); break; + default: i = -1; break; + } + + if (i < 0 || i > INT_MAX) { + EMSG2_RET_NULL(_("E678: Invalid character after %s%%[dxouU]"), + reg_magic == MAGIC_ALL); + } + if (use_multibytecode(i)) { + ret = regnode(MULTIBYTECODE); + } else { + ret = regnode(EXACTLY); + } + if (i == 0) { + regc(0x0a); + } else { + regmbc(i); + } + regc(NUL); + *flagp |= HASWIDTH; + break; + } + + default: + if (ascii_isdigit(c) || c == '<' || c == '>' + || c == '\'') { + uint32_t n = 0; + int cmp; + + cmp = c; + if (cmp == '<' || cmp == '>') + c = getchr(); + while (ascii_isdigit(c)) { + n = n * 10 + (uint32_t)(c - '0'); + c = getchr(); + } + if (c == '\'' && n == 0) { + // "\%'m", "\%<'m" and "\%>'m": Mark + c = getchr(); + ret = regnode(RE_MARK); + if (ret == JUST_CALC_SIZE) + regsize += 2; + else { + *regcode++ = c; + *regcode++ = cmp; + } + break; + } else if (c == 'l' || c == 'c' || c == 'v') { + if (c == 'l') { + ret = regnode(RE_LNUM); + if (save_prev_at_start) { + at_start = true; + } + } else if (c == 'c') { + ret = regnode(RE_COL); + } else { + ret = regnode(RE_VCOL); + } + if (ret == JUST_CALC_SIZE) { + regsize += 5; + } else { + // put the number and the optional + // comparator after the opcode + regcode = re_put_uint32(regcode, n); + *regcode++ = cmp; + } + break; + } + } + + EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), + reg_magic == MAGIC_ALL); + } + } + break; + + case Magic('['): +collection: + { + char_u *lp; + + // If there is no matching ']', we assume the '[' is a normal + // character. This makes 'incsearch' and ":help [" work. + lp = skip_anyof(regparse); + if (*lp == ']') { // there is a matching ']' + int startc = -1; // > 0 when next '-' is a range + int endc; + + // In a character class, different parsing rules apply. + // Not even \ is special anymore, nothing is. + if (*regparse == '^') { // Complement of range. + ret = regnode(ANYBUT + extra); + regparse++; + } else + ret = regnode(ANYOF + extra); + + // At the start ']' and '-' mean the literal character. + if (*regparse == ']' || *regparse == '-') { + startc = *regparse; + regc(*regparse++); + } + + while (*regparse != NUL && *regparse != ']') { + if (*regparse == '-') { + ++regparse; + // The '-' is not used for a range at the end and + // after or before a '\n'. + if (*regparse == ']' || *regparse == NUL + || startc == -1 + || (regparse[0] == '\\' && regparse[1] == 'n')) { + regc('-'); + startc = '-'; // [--x] is a range + } else { + // Also accept "a-[.z.]" + endc = 0; + if (*regparse == '[') + endc = get_coll_element(®parse); + if (endc == 0) { + endc = mb_ptr2char_adv((const char_u **)®parse); + } + + // Handle \o40, \x20 and \u20AC style sequences + if (endc == '\\' && !reg_cpo_lit) + endc = coll_get_char(); + + if (startc > endc) { + EMSG_RET_NULL(_(e_reverse_range)); + } + if (utf_char2len(startc) > 1 + || utf_char2len(endc) > 1) { + // Limit to a range of 256 chars + if (endc > startc + 256) { + EMSG_RET_NULL(_(e_large_class)); + } + while (++startc <= endc) { + regmbc(startc); + } + } else { + while (++startc <= endc) + regc(startc); + } + startc = -1; + } + } + // Only "\]", "\^", "\]" and "\\" are special in Vi. Vim + // accepts "\t", "\e", etc., but only when the 'l' flag in + // 'cpoptions' is not included. + else if (*regparse == '\\' + && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL + || (!reg_cpo_lit + && vim_strchr(REGEXP_ABBR, + regparse[1]) != NULL))) { + regparse++; + if (*regparse == 'n') { + // '\n' in range: also match NL + if (ret != JUST_CALC_SIZE) { + // Using \n inside [^] does not change what + // matches. "[^\n]" is the same as ".". + if (*ret == ANYOF) { + *ret = ANYOF + ADD_NL; + *flagp |= HASNL; + } + // else: must have had a \n already + } + regparse++; + startc = -1; + } else if (*regparse == 'd' + || *regparse == 'o' + || *regparse == 'x' + || *regparse == 'u' + || *regparse == 'U') { + startc = coll_get_char(); + if (startc == 0) + regc(0x0a); + else + regmbc(startc); + } else { + startc = backslash_trans(*regparse++); + regc(startc); + } + } else if (*regparse == '[') { + int c_class; + int cu; + + c_class = get_char_class(®parse); + startc = -1; + // Characters assumed to be 8 bits! + switch (c_class) { + case CLASS_NONE: + c_class = get_equi_class(®parse); + if (c_class != 0) { + // produce equivalence class + reg_equi_class(c_class); + } else if ((c_class = + get_coll_element(®parse)) != 0) { + // produce a collating element + regmbc(c_class); + } else { + // literal '[', allow [[-x] as a range + startc = *regparse++; + regc(startc); + } + break; + case CLASS_ALNUM: + for (cu = 1; cu < 128; cu++) { + if (isalnum(cu)) { + regmbc(cu); + } + } + break; + case CLASS_ALPHA: + for (cu = 1; cu < 128; cu++) { + if (isalpha(cu)) { + regmbc(cu); + } + } + break; + case CLASS_BLANK: + regc(' '); + regc('\t'); + break; + case CLASS_CNTRL: + for (cu = 1; cu <= 127; cu++) { + if (iscntrl(cu)) { + regmbc(cu); + } + } + break; + case CLASS_DIGIT: + for (cu = 1; cu <= 127; cu++) { + if (ascii_isdigit(cu)) { + regmbc(cu); + } + } + break; + case CLASS_GRAPH: + for (cu = 1; cu <= 127; cu++) { + if (isgraph(cu)) { + regmbc(cu); + } + } + break; + case CLASS_LOWER: + for (cu = 1; cu <= 255; cu++) { + if (mb_islower(cu) && cu != 170 && cu != 186) { + regmbc(cu); + } + } + break; + case CLASS_PRINT: + for (cu = 1; cu <= 255; cu++) { + if (vim_isprintc(cu)) { + regmbc(cu); + } + } + break; + case CLASS_PUNCT: + for (cu = 1; cu < 128; cu++) { + if (ispunct(cu)) { + regmbc(cu); + } + } + break; + case CLASS_SPACE: + for (cu = 9; cu <= 13; cu++) + regc(cu); + regc(' '); + break; + case CLASS_UPPER: + for (cu = 1; cu <= 255; cu++) { + if (mb_isupper(cu)) { + regmbc(cu); + } + } + break; + case CLASS_XDIGIT: + for (cu = 1; cu <= 255; cu++) { + if (ascii_isxdigit(cu)) { + regmbc(cu); + } + } + break; + case CLASS_TAB: + regc('\t'); + break; + case CLASS_RETURN: + regc('\r'); + break; + case CLASS_BACKSPACE: + regc('\b'); + break; + case CLASS_ESCAPE: + regc(ESC); + break; + case CLASS_IDENT: + for (cu = 1; cu <= 255; cu++) { + if (vim_isIDc(cu)) { + regmbc(cu); + } + } + break; + case CLASS_KEYWORD: + for (cu = 1; cu <= 255; cu++) { + if (reg_iswordc(cu)) { + regmbc(cu); + } + } + break; + case CLASS_FNAME: + for (cu = 1; cu <= 255; cu++) { + if (vim_isfilec(cu)) { + regmbc(cu); + } + } + break; + } + } else { + // produce a multibyte character, including any + // following composing characters. + startc = utf_ptr2char(regparse); + int len = utfc_ptr2len(regparse); + if (utf_char2len(startc) != len) { + // composing chars + startc = -1; + } + while (--len >= 0) { + regc(*regparse++); + } + } + } + regc(NUL); + prevchr_len = 1; // last char was the ']' + if (*regparse != ']') + EMSG_RET_NULL(_(e_toomsbra)); // Cannot happen? + skipchr(); // let's be friends with the lexer again + *flagp |= HASWIDTH | SIMPLE; + break; + } else if (reg_strict) + EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); + } + FALLTHROUGH; + + default: + { + int len; + + // A multi-byte character is handled as a separate atom if it's + // before a multi and when it's a composing char. + if (use_multibytecode(c)) { +do_multibyte: + ret = regnode(MULTIBYTECODE); + regmbc(c); + *flagp |= HASWIDTH | SIMPLE; + break; + } + + ret = regnode(EXACTLY); + + // Append characters as long as: + // - there is no following multi, we then need the character in + // front of it as a single character operand + // - not running into a Magic character + // - "one_exactly" is not set + // But always emit at least one character. Might be a Multi, + // e.g., a "[" without matching "]". + for (len = 0; c != NUL && (len == 0 + || (re_multi_type(peekchr()) == NOT_MULTI + && !one_exactly + && !is_Magic(c))); ++len) { + c = no_Magic(c); + { + regmbc(c); + { + int l; + + // Need to get composing character too. + for (;; ) { + l = utf_ptr2len(regparse); + if (!utf_composinglike(regparse, regparse + l)) { + break; + } + regmbc(utf_ptr2char(regparse)); + skipchr(); + } + } + } + c = getchr(); + } + ungetchr(); + + regc(NUL); + *flagp |= HASWIDTH; + if (len == 1) + *flagp |= SIMPLE; + } + break; + } + + return ret; +} + +/* + * Parse something followed by possible [*+=]. + * + * Note that the branching code sequences used for = and the general cases + * of * and + are somewhat optimized: they use the same NOTHING node as + * both the endmarker for their branch list and the body of the last branch. + * It might seem that this node could be dispensed with entirely, but the + * endmarker role is not redundant. + */ +static char_u *regpiece(int *flagp) +{ + char_u *ret; + int op; + char_u *next; + int flags; + long minval; + long maxval; + + ret = regatom(&flags); + if (ret == NULL) + return NULL; + + op = peekchr(); + if (re_multi_type(op) == NOT_MULTI) { + *flagp = flags; + return ret; + } + // default flags + *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH))); + + skipchr(); + switch (op) { + case Magic('*'): + if (flags & SIMPLE) + reginsert(STAR, ret); + else { + // Emit x* as (x&|), where & means "self". + reginsert(BRANCH, ret); // Either x + regoptail(ret, regnode(BACK)); // and loop + regoptail(ret, ret); // back + regtail(ret, regnode(BRANCH)); // or + regtail(ret, regnode(NOTHING)); // null. + } + break; + + case Magic('+'): + if (flags & SIMPLE) + reginsert(PLUS, ret); + else { + // Emit x+ as x(&|), where & means "self". + next = regnode(BRANCH); // Either + regtail(ret, next); + regtail(regnode(BACK), ret); // loop back + regtail(next, regnode(BRANCH)); // or + regtail(ret, regnode(NOTHING)); // null. + } + *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH))); + break; + + case Magic('@'): + { + int lop = END; + int64_t nr = getdecchrs(); + + switch (no_Magic(getchr())) { + case '=': lop = MATCH; break; // \@= + case '!': lop = NOMATCH; break; // \@! + case '>': lop = SUBPAT; break; // \@> + case '<': switch (no_Magic(getchr())) { + case '=': lop = BEHIND; break; // \@<= + case '!': lop = NOBEHIND; break; // \@<! + } + } + if (lop == END) + EMSG2_RET_NULL(_("E59: invalid character after %s@"), + reg_magic == MAGIC_ALL); + // Look behind must match with behind_pos. + if (lop == BEHIND || lop == NOBEHIND) { + regtail(ret, regnode(BHPOS)); + *flagp |= HASLOOKBH; + } + regtail(ret, regnode(END)); // operand ends + if (lop == BEHIND || lop == NOBEHIND) { + if (nr < 0) + nr = 0; // no limit is same as zero limit + reginsert_nr(lop, (uint32_t)nr, ret); + } else + reginsert(lop, ret); + break; + } + + case Magic('?'): + case Magic('='): + // Emit x= as (x|) + reginsert(BRANCH, ret); // Either x + regtail(ret, regnode(BRANCH)); // or + next = regnode(NOTHING); // null. + regtail(ret, next); + regoptail(ret, next); + break; + + case Magic('{'): + if (!read_limits(&minval, &maxval)) + return NULL; + if (flags & SIMPLE) { + reginsert(BRACE_SIMPLE, ret); + reginsert_limits(BRACE_LIMITS, minval, maxval, ret); + } else { + if (num_complex_braces >= 10) + EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), + reg_magic == MAGIC_ALL); + reginsert(BRACE_COMPLEX + num_complex_braces, ret); + regoptail(ret, regnode(BACK)); + regoptail(ret, ret); + reginsert_limits(BRACE_LIMITS, minval, maxval, ret); + ++num_complex_braces; + } + if (minval > 0 && maxval > 0) + *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH))); + break; + } + if (re_multi_type(peekchr()) != NOT_MULTI) { + // Can't have a multi follow a multi. + if (peekchr() == Magic('*')) { + EMSG2_RET_NULL(_("E61: Nested %s*"), reg_magic >= MAGIC_ON); + } + EMSG3_RET_NULL(_("E62: Nested %s%c"), reg_magic == MAGIC_ALL, no_Magic(peekchr())); + } + + return ret; +} + +/* + * Parse one alternative of an | or & operator. + * Implements the concatenation operator. + */ +static char_u *regconcat(int *flagp) +{ + char_u *first = NULL; + char_u *chain = NULL; + char_u *latest; + int flags; + int cont = true; + + *flagp = WORST; // Tentatively. + + while (cont) { + switch (peekchr()) { + case NUL: + case Magic('|'): + case Magic('&'): + case Magic(')'): + cont = false; + break; + case Magic('Z'): + regflags |= RF_ICOMBINE; + skipchr_keepstart(); + break; + case Magic('c'): + regflags |= RF_ICASE; + skipchr_keepstart(); + break; + case Magic('C'): + regflags |= RF_NOICASE; + skipchr_keepstart(); + break; + case Magic('v'): + reg_magic = MAGIC_ALL; + skipchr_keepstart(); + curchr = -1; + break; + case Magic('m'): + reg_magic = MAGIC_ON; + skipchr_keepstart(); + curchr = -1; + break; + case Magic('M'): + reg_magic = MAGIC_OFF; + skipchr_keepstart(); + curchr = -1; + break; + case Magic('V'): + reg_magic = MAGIC_NONE; + skipchr_keepstart(); + curchr = -1; + break; + default: + latest = regpiece(&flags); + if (latest == NULL || reg_toolong) + return NULL; + *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH); + if (chain == NULL) // First piece. + *flagp |= flags & SPSTART; + else + regtail(chain, latest); + chain = latest; + if (first == NULL) + first = latest; + break; + } + } + if (first == NULL) // Loop ran zero times. + first = regnode(NOTHING); + return first; +} + +/* + * Parse one alternative of an | operator. + * Implements the & operator. + */ +static char_u *regbranch(int *flagp) +{ + char_u *ret; + char_u *chain = NULL; + char_u *latest; + int flags; + + *flagp = WORST | HASNL; // Tentatively. + + ret = regnode(BRANCH); + for (;; ) { + latest = regconcat(&flags); + if (latest == NULL) + return NULL; + // If one of the branches has width, the whole thing has. If one of + // the branches anchors at start-of-line, the whole thing does. + // If one of the branches uses look-behind, the whole thing does. + *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH); + // If one of the branches doesn't match a line-break, the whole thing + // doesn't. + *flagp &= ~HASNL | (flags & HASNL); + if (chain != NULL) + regtail(chain, latest); + if (peekchr() != Magic('&')) + break; + skipchr(); + regtail(latest, regnode(END)); // operand ends + if (reg_toolong) + break; + reginsert(MATCH, latest); + chain = latest; + } + + return ret; +} + +/* + * Parse regular expression, i.e. main body or parenthesized thing. + * + * Caller must absorb opening parenthesis. + * + * Combining parenthesis handling with the base level of regular expression + * is a trifle forced, but the need to tie the tails of the branches to what + * follows makes it hard to avoid. + */ +static char_u *reg( + int paren, // REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN + int *flagp) +{ + char_u *ret; + char_u *br; + char_u *ender; + int parno = 0; + int flags; + + *flagp = HASWIDTH; // Tentatively. + + if (paren == REG_ZPAREN) { + // Make a ZOPEN node. + if (regnzpar >= NSUBEXP) + EMSG_RET_NULL(_("E50: Too many \\z(")); + parno = regnzpar; + regnzpar++; + ret = regnode(ZOPEN + parno); + } else if (paren == REG_PAREN) { + // Make a MOPEN node. + if (regnpar >= NSUBEXP) + EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); + parno = regnpar; + ++regnpar; + ret = regnode(MOPEN + parno); + } else if (paren == REG_NPAREN) { + // Make a NOPEN node. + ret = regnode(NOPEN); + } else + ret = NULL; + + // Pick up the branches, linking them together. + br = regbranch(&flags); + if (br == NULL) + return NULL; + if (ret != NULL) + regtail(ret, br); // [MZ]OPEN -> first. + else + ret = br; + // If one of the branches can be zero-width, the whole thing can. + // If one of the branches has * at start or matches a line-break, the + // whole thing can. + if (!(flags & HASWIDTH)) + *flagp &= ~HASWIDTH; + *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); + while (peekchr() == Magic('|')) { + skipchr(); + br = regbranch(&flags); + if (br == NULL || reg_toolong) + return NULL; + regtail(ret, br); // BRANCH -> BRANCH. + if (!(flags & HASWIDTH)) + *flagp &= ~HASWIDTH; + *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); + } + + // Make a closing node, and hook it on the end. + ender = regnode( + paren == REG_ZPAREN ? ZCLOSE + parno : + paren == REG_PAREN ? MCLOSE + parno : + paren == REG_NPAREN ? NCLOSE : END); + regtail(ret, ender); + + // Hook the tails of the branches to the closing node. + for (br = ret; br != NULL; br = regnext(br)) + regoptail(br, ender); + + // Check for proper termination. + if (paren != REG_NOPAREN && getchr() != Magic(')')) { + if (paren == REG_ZPAREN) + EMSG_RET_NULL(_("E52: Unmatched \\z(")); + else if (paren == REG_NPAREN) + EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); + else + EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); + } else if (paren == REG_NOPAREN && peekchr() != NUL) { + if (curchr == Magic(')')) + EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); + else + EMSG_RET_NULL(_(e_trailing)); // "Can't happen". + // NOTREACHED + } + // Here we set the flag allowing back references to this set of + // parentheses. + if (paren == REG_PAREN) { + had_endbrace[parno] = true; // have seen the close paren + } + return ret; +} + + +/* + * bt_regcomp() - compile a regular expression into internal code for the + * traditional back track matcher. + * Returns the program in allocated space. Returns NULL for an error. + * + * We can't allocate space until we know how big the compiled form will be, + * but we can't compile it (and thus know how big it is) until we've got a + * place to put the code. So we cheat: we compile it twice, once with code + * generation turned off and size counting turned on, and once "for real". + * This also means that we don't allocate space until we are sure that the + * thing really will compile successfully, and we never have to move the + * code and thus invalidate pointers into it. (Note that it has to be in + * one piece because free() must be able to free it all.) + * + * Whether upper/lower case is to be ignored is decided when executing the + * program, it does not matter here. + * + * Beware that the optimization-preparation code in here knows about some + * of the structure of the compiled regexp. + * "re_flags": RE_MAGIC and/or RE_STRING. + */ +static regprog_T *bt_regcomp(char_u *expr, int re_flags) +{ + char_u *scan; + char_u *longest; + int len; + int flags; + + if (expr == NULL) { + IEMSG_RET_NULL(_(e_null)); + } + + init_class_tab(); + + // First pass: determine size, legality. + regcomp_start(expr, re_flags); + regcode = JUST_CALC_SIZE; + regc(REGMAGIC); + if (reg(REG_NOPAREN, &flags) == NULL) + return NULL; + + // Allocate space. + bt_regprog_T *r = xmalloc(sizeof(bt_regprog_T) + regsize); + r->re_in_use = false; + + // Second pass: emit code. + regcomp_start(expr, re_flags); + regcode = r->program; + regc(REGMAGIC); + if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong) { + xfree(r); + if (reg_toolong) + EMSG_RET_NULL(_("E339: Pattern too long")); + return NULL; + } + + // Dig out information for optimizations. + r->regstart = NUL; // Worst-case defaults. + r->reganch = 0; + r->regmust = NULL; + r->regmlen = 0; + r->regflags = regflags; + if (flags & HASNL) + r->regflags |= RF_HASNL; + if (flags & HASLOOKBH) + r->regflags |= RF_LOOKBH; + // Remember whether this pattern has any \z specials in it. + r->reghasz = re_has_z; + scan = r->program + 1; // First BRANCH. + if (OP(regnext(scan)) == END) { // Only one top-level choice. + scan = OPERAND(scan); + + // Starting-point info. + if (OP(scan) == BOL || OP(scan) == RE_BOF) { + r->reganch++; + scan = regnext(scan); + } + + if (OP(scan) == EXACTLY) { + r->regstart = utf_ptr2char(OPERAND(scan)); + } else if (OP(scan) == BOW + || OP(scan) == EOW + || OP(scan) == NOTHING + || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN + || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) { + char_u *regnext_scan = regnext(scan); + if (OP(regnext_scan) == EXACTLY) { + r->regstart = utf_ptr2char(OPERAND(regnext_scan)); + } + } + + // If there's something expensive in the r.e., find the longest + // literal string that must appear and make it the regmust. Resolve + // ties in favor of later strings, since the regstart check works + // with the beginning of the r.e. and avoiding duplication + // strengthens checking. Not a strong reason, but sufficient in the + // absence of others. + + // When the r.e. starts with BOW, it is faster to look for a regmust + // first. Used a lot for "#" and "*" commands. (Added by mool). + if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW) + && !(flags & HASNL)) { + longest = NULL; + len = 0; + for (; scan != NULL; scan = regnext(scan)) + if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len) { + longest = OPERAND(scan); + len = (int)STRLEN(OPERAND(scan)); + } + r->regmust = longest; + r->regmlen = len; + } + } +#ifdef BT_REGEXP_DUMP + regdump(expr, r); +#endif + r->engine = &bt_regengine; + return (regprog_T *)r; +} + +/* + * Check if during the previous call to vim_regcomp the EOL item "$" has been + * found. This is messy, but it works fine. + */ +int vim_regcomp_had_eol(void) +{ + return had_eol; +} + +/* + * Get a number after a backslash that is inside []. + * When nothing is recognized return a backslash. + */ +static int coll_get_char(void) +{ + int64_t nr = -1; + + switch (*regparse++) { + case 'd': nr = getdecchrs(); break; + case 'o': nr = getoctchrs(); break; + case 'x': nr = gethexchrs(2); break; + case 'u': nr = gethexchrs(4); break; + case 'U': nr = gethexchrs(8); break; + } + if (nr < 0 || nr > INT_MAX) { + // If getting the number fails be backwards compatible: the character + // is a backslash. + regparse--; + nr = '\\'; + } + return nr; +} + +/* + * Free a compiled regexp program, returned by bt_regcomp(). + */ +static void bt_regfree(regprog_T *prog) +{ + xfree(prog); +} + +#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) + +/* + * The arguments from BRACE_LIMITS are stored here. They are actually local + * to regmatch(), but they are here to reduce the amount of stack space used + * (it can be called recursively many times). + */ +static long bl_minval; +static long bl_maxval; + +// Save the input line and position in a regsave_T. +static void reg_save(regsave_T *save, garray_T *gap) + FUNC_ATTR_NONNULL_ALL +{ + if (REG_MULTI) { + save->rs_u.pos.col = (colnr_T)(rex.input - rex.line); + save->rs_u.pos.lnum = rex.lnum; + } else { + save->rs_u.ptr = rex.input; + } + save->rs_len = gap->ga_len; +} + +// Restore the input line and position from a regsave_T. +static void reg_restore(regsave_T *save, garray_T *gap) + FUNC_ATTR_NONNULL_ALL +{ + if (REG_MULTI) { + if (rex.lnum != save->rs_u.pos.lnum) { + // only call reg_getline() when the line number changed to save + // a bit of time + rex.lnum = save->rs_u.pos.lnum; + rex.line = reg_getline(rex.lnum); + } + rex.input = rex.line + save->rs_u.pos.col; + } else { + rex.input = save->rs_u.ptr; + } + gap->ga_len = save->rs_len; +} + +// Return true if current position is equal to saved position. +static bool reg_save_equal(const regsave_T *save) + FUNC_ATTR_NONNULL_ALL +{ + if (REG_MULTI) { + return rex.lnum == save->rs_u.pos.lnum + && rex.input == rex.line + save->rs_u.pos.col; + } + return rex.input == save->rs_u.ptr; +} + +// Save the sub-expressions before attempting a match. +#define save_se(savep, posp, pp) \ + REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp)) + +// After a failed match restore the sub-expressions. +#define restore_se(savep, posp, pp) { \ + if (REG_MULTI) \ + *(posp) = (savep)->se_u.pos; \ + else \ + *(pp) = (savep)->se_u.ptr; } + +/* + * Tentatively set the sub-expression start to the current position (after + * calling regmatch() they will have changed). Need to save the existing + * values for when there is no match. + * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), + * depending on REG_MULTI. + */ +static void save_se_multi(save_se_T *savep, lpos_T *posp) +{ + savep->se_u.pos = *posp; + posp->lnum = rex.lnum; + posp->col = (colnr_T)(rex.input - rex.line); +} + +static void save_se_one(save_se_T *savep, char_u **pp) +{ + savep->se_u.ptr = *pp; + *pp = rex.input; +} + +/* + * regrepeat - repeatedly match something simple, return how many. + * Advances rex.input (and rex.lnum) to just after the matched chars. + */ + static int +regrepeat( + char_u *p, + long maxcount) // maximum number of matches allowed +{ + long count = 0; + char_u *opnd; + int mask; + int testval = 0; + + char_u *scan = rex.input; // Make local copy of rex.input for speed. + opnd = OPERAND(p); + switch (OP(p)) { + case ANY: + case ANY + ADD_NL: + while (count < maxcount) { + // Matching anything means we continue until end-of-line (or + // end-of-file for ANY + ADD_NL), only limited by maxcount. + while (*scan != NUL && count < maxcount) { + count++; + MB_PTR_ADV(scan); + } + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline + || rex.reg_line_lbr || count == maxcount) { + break; + } + count++; // count the line-break + reg_nextline(); + scan = rex.input; + if (got_int) { + break; + } + } + break; + + case IDENT: + case IDENT + ADD_NL: + testval = 1; + FALLTHROUGH; + case SIDENT: + case SIDENT + ADD_NL: + while (count < maxcount) { + if (vim_isIDc(utf_ptr2char(scan)) && (testval || !ascii_isdigit(*scan))) { + MB_PTR_ADV(scan); + } else if (*scan == NUL) { + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline + || rex.reg_line_lbr) { + break; + } + reg_nextline(); + scan = rex.input; + if (got_int) { + break; + } + } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { + scan++; + } else { + break; + } + ++count; + } + break; + + case KWORD: + case KWORD + ADD_NL: + testval = 1; + FALLTHROUGH; + case SKWORD: + case SKWORD + ADD_NL: + while (count < maxcount) { + if (vim_iswordp_buf(scan, rex.reg_buf) + && (testval || !ascii_isdigit(*scan))) { + MB_PTR_ADV(scan); + } else if (*scan == NUL) { + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline + || rex.reg_line_lbr) { + break; + } + reg_nextline(); + scan = rex.input; + if (got_int) { + break; + } + } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { + scan++; + } else { + break; + } + count++; + } + break; + + case FNAME: + case FNAME + ADD_NL: + testval = 1; + FALLTHROUGH; + case SFNAME: + case SFNAME + ADD_NL: + while (count < maxcount) { + if (vim_isfilec(utf_ptr2char(scan)) && (testval || !ascii_isdigit(*scan))) { + MB_PTR_ADV(scan); + } else if (*scan == NUL) { + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline + || rex.reg_line_lbr) { + break; + } + reg_nextline(); + scan = rex.input; + if (got_int) { + break; + } + } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { + scan++; + } else { + break; + } + count++; + } + break; + + case PRINT: + case PRINT + ADD_NL: + testval = 1; + FALLTHROUGH; + case SPRINT: + case SPRINT + ADD_NL: + while (count < maxcount) { + if (*scan == NUL) { + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline + || rex.reg_line_lbr) { + break; + } + reg_nextline(); + scan = rex.input; + if (got_int) { + break; + } + } else if (vim_isprintc(utf_ptr2char(scan)) == 1 + && (testval || !ascii_isdigit(*scan))) { + MB_PTR_ADV(scan); + } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { + scan++; + } else { + break; + } + count++; + } + break; + + case WHITE: + case WHITE + ADD_NL: + testval = mask = RI_WHITE; +do_class: + while (count < maxcount) { + int l; + if (*scan == NUL) { + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline + || rex.reg_line_lbr) { + break; + } + reg_nextline(); + scan = rex.input; + if (got_int) { + break; + } + } else if ((l = utfc_ptr2len(scan)) > 1) { + if (testval != 0) { + break; + } + scan += l; + } else if ((class_tab[*scan] & mask) == testval) { + scan++; + } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { + scan++; + } else { + break; + } + ++count; + } + break; + + case NWHITE: + case NWHITE + ADD_NL: + mask = RI_WHITE; + goto do_class; + case DIGIT: + case DIGIT + ADD_NL: + testval = mask = RI_DIGIT; + goto do_class; + case NDIGIT: + case NDIGIT + ADD_NL: + mask = RI_DIGIT; + goto do_class; + case HEX: + case HEX + ADD_NL: + testval = mask = RI_HEX; + goto do_class; + case NHEX: + case NHEX + ADD_NL: + mask = RI_HEX; + goto do_class; + case OCTAL: + case OCTAL + ADD_NL: + testval = mask = RI_OCTAL; + goto do_class; + case NOCTAL: + case NOCTAL + ADD_NL: + mask = RI_OCTAL; + goto do_class; + case WORD: + case WORD + ADD_NL: + testval = mask = RI_WORD; + goto do_class; + case NWORD: + case NWORD + ADD_NL: + mask = RI_WORD; + goto do_class; + case HEAD: + case HEAD + ADD_NL: + testval = mask = RI_HEAD; + goto do_class; + case NHEAD: + case NHEAD + ADD_NL: + mask = RI_HEAD; + goto do_class; + case ALPHA: + case ALPHA + ADD_NL: + testval = mask = RI_ALPHA; + goto do_class; + case NALPHA: + case NALPHA + ADD_NL: + mask = RI_ALPHA; + goto do_class; + case LOWER: + case LOWER + ADD_NL: + testval = mask = RI_LOWER; + goto do_class; + case NLOWER: + case NLOWER + ADD_NL: + mask = RI_LOWER; + goto do_class; + case UPPER: + case UPPER + ADD_NL: + testval = mask = RI_UPPER; + goto do_class; + case NUPPER: + case NUPPER + ADD_NL: + mask = RI_UPPER; + goto do_class; + + case EXACTLY: + { + int cu, cl; + + // This doesn't do a multi-byte character, because a MULTIBYTECODE + // would have been used for it. It does handle single-byte + // characters, such as latin1. + if (rex.reg_ic) { + cu = mb_toupper(*opnd); + cl = mb_tolower(*opnd); + while (count < maxcount && (*scan == cu || *scan == cl)) { + count++; + scan++; + } + } else { + cu = *opnd; + while (count < maxcount && *scan == cu) { + count++; + scan++; + } + } + break; + } + + case MULTIBYTECODE: + { + int i, len, cf = 0; + + // Safety check (just in case 'encoding' was changed since + // compiling the program). + if ((len = utfc_ptr2len(opnd)) > 1) { + if (rex.reg_ic) { + cf = utf_fold(utf_ptr2char(opnd)); + } + while (count < maxcount && utfc_ptr2len(scan) >= len) { + for (i = 0; i < len; i++) { + if (opnd[i] != scan[i]) { + break; + } + } + if (i < len && (!rex.reg_ic + || utf_fold(utf_ptr2char(scan)) != cf)) { + break; + } + scan += len; + ++count; + } + } + } + break; + + case ANYOF: + case ANYOF + ADD_NL: + testval = 1; + FALLTHROUGH; + + case ANYBUT: + case ANYBUT + ADD_NL: + while (count < maxcount) { + int len; + if (*scan == NUL) { + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline + || rex.reg_line_lbr) { + break; + } + reg_nextline(); + scan = rex.input; + if (got_int) { + break; + } + } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { + scan++; + } else if ((len = utfc_ptr2len(scan)) > 1) { + if ((cstrchr(opnd, utf_ptr2char(scan)) == NULL) == testval) { + break; + } + scan += len; + } else { + if ((cstrchr(opnd, *scan) == NULL) == testval) + break; + ++scan; + } + ++count; + } + break; + + case NEWL: + while (count < maxcount + && ((*scan == NUL && rex.lnum <= rex.reg_maxline && !rex.reg_line_lbr + && REG_MULTI) || (*scan == '\n' && rex.reg_line_lbr))) { + count++; + if (rex.reg_line_lbr) { + ADVANCE_REGINPUT(); + } else { + reg_nextline(); + } + scan = rex.input; + if (got_int) { + break; + } + } + break; + + default: // Oh dear. Called inappropriately. + iemsg(_(e_re_corr)); +#ifdef REGEXP_DEBUG + printf("Called regrepeat with op code %d\n", OP(p)); +#endif + break; + } + + rex.input = scan; + + return (int)count; +} + +/* + * Push an item onto the regstack. + * Returns pointer to new item. Returns NULL when out of memory. + */ +static regitem_T *regstack_push(regstate_T state, char_u *scan) +{ + regitem_T *rp; + + if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { + emsg(_(e_maxmempat)); + return NULL; + } + ga_grow(®stack, sizeof(regitem_T)); + + rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len); + rp->rs_state = state; + rp->rs_scan = scan; + + regstack.ga_len += sizeof(regitem_T); + return rp; +} + +/* + * Pop an item from the regstack. + */ +static void regstack_pop(char_u **scan) +{ + regitem_T *rp; + + rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; + *scan = rp->rs_scan; + + regstack.ga_len -= sizeof(regitem_T); +} + +// Save the current subexpr to "bp", so that they can be restored +// later by restore_subexpr(). +static void save_subexpr(regbehind_T *bp) + FUNC_ATTR_NONNULL_ALL +{ + // When "rex.need_clear_subexpr" is set we don't need to save the values, only + // remember that this flag needs to be set again when restoring. + bp->save_need_clear_subexpr = rex.need_clear_subexpr; + if (!rex.need_clear_subexpr) { + for (int i = 0; i < NSUBEXP; i++) { + if (REG_MULTI) { + bp->save_start[i].se_u.pos = rex.reg_startpos[i]; + bp->save_end[i].se_u.pos = rex.reg_endpos[i]; + } else { + bp->save_start[i].se_u.ptr = rex.reg_startp[i]; + bp->save_end[i].se_u.ptr = rex.reg_endp[i]; + } + } + } +} + +// Restore the subexpr from "bp". +static void restore_subexpr(regbehind_T *bp) + FUNC_ATTR_NONNULL_ALL +{ + // Only need to restore saved values when they are not to be cleared. + rex.need_clear_subexpr = bp->save_need_clear_subexpr; + if (!rex.need_clear_subexpr) { + for (int i = 0; i < NSUBEXP; i++) { + if (REG_MULTI) { + rex.reg_startpos[i] = bp->save_start[i].se_u.pos; + rex.reg_endpos[i] = bp->save_end[i].se_u.pos; + } else { + rex.reg_startp[i] = bp->save_start[i].se_u.ptr; + rex.reg_endp[i] = bp->save_end[i].se_u.ptr; + } + } + } +} +/// Main matching routine +/// +/// Conceptually the strategy is simple: Check to see whether the current node +/// matches, push an item onto the regstack and loop to see whether the rest +/// matches, and then act accordingly. In practice we make some effort to +/// avoid using the regstack, in particular by going through "ordinary" nodes +/// (that don't need to know whether the rest of the match failed) by a nested +/// loop. +/// +/// Returns true when there is a match. Leaves rex.input and rex.lnum +/// just after the last matched character. +/// Returns false when there is no match. Leaves rex.input and rex.lnum in an +/// undefined state! +static bool regmatch( + char_u *scan, // Current node. + proftime_T *tm, // timeout limit or NULL + int *timed_out) // flag set on timeout or NULL +{ + char_u *next; // Next node. + int op; + int c; + regitem_T *rp; + int no; + int status; // one of the RA_ values: + int tm_count = 0; + + // Make "regstack" and "backpos" empty. They are allocated and freed in + // bt_regexec_both() to reduce malloc()/free() calls. + regstack.ga_len = 0; + backpos.ga_len = 0; + + // Repeat until "regstack" is empty. + for (;; ) { + // Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q". + // Allow interrupting them with CTRL-C. + fast_breakcheck(); + +#ifdef REGEXP_DEBUG + if (scan != NULL && regnarrate) { + mch_errmsg((char *)regprop(scan)); + mch_errmsg("(\n"); + } +#endif + + // Repeat for items that can be matched sequentially, without using the + // regstack. + for (;; ) { + if (got_int || scan == NULL) { + status = RA_FAIL; + break; + } + // Check for timeout once in a 100 times to avoid overhead. + if (tm != NULL && ++tm_count == 100) { + tm_count = 0; + if (profile_passed_limit(*tm)) { + if (timed_out != NULL) { + *timed_out = true; + } + status = RA_FAIL; + break; + } + } + status = RA_CONT; + +#ifdef REGEXP_DEBUG + if (regnarrate) { + mch_errmsg((char *)regprop(scan)); + mch_errmsg("...\n"); + if (re_extmatch_in != NULL) { + int i; + + mch_errmsg(_("External submatches:\n")); + for (i = 0; i < NSUBEXP; i++) { + mch_errmsg(" \""); + if (re_extmatch_in->matches[i] != NULL) + mch_errmsg((char *)re_extmatch_in->matches[i]); + mch_errmsg("\"\n"); + } + } + } +#endif + next = regnext(scan); + + op = OP(scan); + // Check for character class with NL added. + if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI + && *rex.input == NUL && rex.lnum <= rex.reg_maxline) { + reg_nextline(); + } else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n') { + ADVANCE_REGINPUT(); + } else { + if (WITH_NL(op)) { + op -= ADD_NL; + } + c = utf_ptr2char(rex.input); + switch (op) { + case BOL: + if (rex.input != rex.line) { + status = RA_NOMATCH; + } + break; + + case EOL: + if (c != NUL) { + status = RA_NOMATCH; + } + break; + + case RE_BOF: + // We're not at the beginning of the file when below the first + // line where we started, not at the start of the line or we + // didn't start at the first line of the buffer. + if (rex.lnum != 0 || rex.input != rex.line + || (REG_MULTI && rex.reg_firstlnum > 1)) { + status = RA_NOMATCH; + } + break; + + case RE_EOF: + if (rex.lnum != rex.reg_maxline || c != NUL) { + status = RA_NOMATCH; + } + break; + + case CURSOR: + // Check if the buffer is in a window and compare the + // rex.reg_win->w_cursor position to the match position. + if (rex.reg_win == NULL + || (rex.lnum + rex.reg_firstlnum != rex.reg_win->w_cursor.lnum) + || ((colnr_T)(rex.input - rex.line) != + rex.reg_win->w_cursor.col)) { + status = RA_NOMATCH; + } + break; + + case RE_MARK: + // Compare the mark position to the match position. + { + int mark = OPERAND(scan)[0]; + int cmp = OPERAND(scan)[1]; + pos_T *pos; + + pos = getmark_buf(rex.reg_buf, mark, false); + if (pos == NULL // mark doesn't exist + || pos->lnum <= 0) { // mark isn't set in reg_buf + status = RA_NOMATCH; + } else { + const colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum + && pos->col == MAXCOL + ? (colnr_T)STRLEN(reg_getline(pos->lnum - rex.reg_firstlnum)) + : pos->col; + + if (pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos_col == (colnr_T)(rex.input - rex.line) + ? (cmp == '<' || cmp == '>') + : (pos_col < (colnr_T)(rex.input - rex.line) + ? cmp != '>' + : cmp != '<')) + : (pos->lnum < rex.lnum + rex.reg_firstlnum + ? cmp != '>' + : cmp != '<')) { + status = RA_NOMATCH; + } + } + } + break; + + case RE_VISUAL: + if (!reg_match_visual()) + status = RA_NOMATCH; + break; + + case RE_LNUM: + assert(rex.lnum + rex.reg_firstlnum >= 0 + && (uintmax_t)(rex.lnum + rex.reg_firstlnum) <= UINT32_MAX); + if (!REG_MULTI + || !re_num_cmp((uint32_t)(rex.lnum + rex.reg_firstlnum), scan)) { + status = RA_NOMATCH; + } + break; + + case RE_COL: + assert(rex.input - rex.line + 1 >= 0 + && (uintmax_t)(rex.input - rex.line + 1) <= UINT32_MAX); + if (!re_num_cmp((uint32_t)(rex.input - rex.line + 1), scan)) { + status = RA_NOMATCH; + } + break; + + case RE_VCOL: + if (!re_num_cmp(win_linetabsize(rex.reg_win == NULL + ? curwin : rex.reg_win, + rex.line, + (colnr_T)(rex.input - rex.line)) + 1, + scan)) { + status = RA_NOMATCH; + } + break; + + case BOW: // \<word; rex.input points to w + if (c == NUL) { // Can't match at end of line + status = RA_NOMATCH; + } else { + // Get class of current and previous char (if it exists). + const int this_class = + mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); + if (this_class <= 1) { + status = RA_NOMATCH; // Not on a word at all. + } else if (reg_prev_class() == this_class) { + status = RA_NOMATCH; // Previous char is in same word. + } + } + break; + + case EOW: // word\>; rex.input points after d + if (rex.input == rex.line) { // Can't match at start of line + status = RA_NOMATCH; + } else { + int this_class, prev_class; + + // Get class of current and previous char (if it exists). + this_class = mb_get_class_tab(rex.input, rex.reg_buf->b_chartab); + prev_class = reg_prev_class(); + if (this_class == prev_class + || prev_class == 0 || prev_class == 1) { + status = RA_NOMATCH; + } + } + break; // Matched with EOW + + case ANY: + // ANY does not match new lines. + if (c == NUL) { + status = RA_NOMATCH; + } else { + ADVANCE_REGINPUT(); + } + break; + + case IDENT: + if (!vim_isIDc(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case SIDENT: + if (ascii_isdigit(*rex.input) || !vim_isIDc(c)) { + status = RA_NOMATCH; + } else { + ADVANCE_REGINPUT(); + } + break; + + case KWORD: + if (!vim_iswordp_buf(rex.input, rex.reg_buf)) { + status = RA_NOMATCH; + } else { + ADVANCE_REGINPUT(); + } + break; + + case SKWORD: + if (ascii_isdigit(*rex.input) + || !vim_iswordp_buf(rex.input, rex.reg_buf)) { + status = RA_NOMATCH; + } else { + ADVANCE_REGINPUT(); + } + break; + + case FNAME: + if (!vim_isfilec(c)) { + status = RA_NOMATCH; + } else { + ADVANCE_REGINPUT(); + } + break; + + case SFNAME: + if (ascii_isdigit(*rex.input) || !vim_isfilec(c)) { + status = RA_NOMATCH; + } else { + ADVANCE_REGINPUT(); + } + break; + + case PRINT: + if (!vim_isprintc(utf_ptr2char(rex.input))) { + status = RA_NOMATCH; + } else { + ADVANCE_REGINPUT(); + } + break; + + case SPRINT: + if (ascii_isdigit(*rex.input) || !vim_isprintc(utf_ptr2char(rex.input))) { + status = RA_NOMATCH; + } else { + ADVANCE_REGINPUT(); + } + break; + + case WHITE: + if (!ascii_iswhite(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NWHITE: + if (c == NUL || ascii_iswhite(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case DIGIT: + if (!ri_digit(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NDIGIT: + if (c == NUL || ri_digit(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case HEX: + if (!ri_hex(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NHEX: + if (c == NUL || ri_hex(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case OCTAL: + if (!ri_octal(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NOCTAL: + if (c == NUL || ri_octal(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case WORD: + if (!ri_word(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NWORD: + if (c == NUL || ri_word(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case HEAD: + if (!ri_head(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NHEAD: + if (c == NUL || ri_head(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case ALPHA: + if (!ri_alpha(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NALPHA: + if (c == NUL || ri_alpha(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case LOWER: + if (!ri_lower(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NLOWER: + if (c == NUL || ri_lower(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case UPPER: + if (!ri_upper(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case NUPPER: + if (c == NUL || ri_upper(c)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case EXACTLY: + { + int len; + char_u *opnd; + + opnd = OPERAND(scan); + // Inline the first byte, for speed. + if (*opnd != *rex.input + && (!rex.reg_ic)) { + status = RA_NOMATCH; + } else if (*opnd == NUL) { + // match empty string always works; happens when "~" is + // empty. + } else { + if (opnd[1] == NUL && !rex.reg_ic) { + len = 1; // matched a single byte above + } else { + // Need to match first byte again for multi-byte. + len = (int)STRLEN(opnd); + if (cstrncmp(opnd, rex.input, &len) != 0) { + status = RA_NOMATCH; + } + } + // Check for following composing character, unless %C + // follows (skips over all composing chars). + if (status != RA_NOMATCH + && utf_composinglike(rex.input, rex.input + len) + && !rex.reg_icombine + && OP(next) != RE_COMPOSING) { + // raaron: This code makes a composing character get + // ignored, which is the correct behavior (sometimes) + // for voweled Hebrew texts. + status = RA_NOMATCH; + } + if (status != RA_NOMATCH) { + rex.input += len; + } + } + } + break; + + case ANYOF: + case ANYBUT: + if (c == NUL) + status = RA_NOMATCH; + else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; + + case MULTIBYTECODE: + { + int i, len; + + const char_u *opnd = OPERAND(scan); + // Safety check (just in case 'encoding' was changed since + // compiling the program). + if ((len = utfc_ptr2len(opnd)) < 2) { + status = RA_NOMATCH; + break; + } + const int opndc = utf_ptr2char(opnd); + if (utf_iscomposing(opndc)) { + // When only a composing char is given match at any + // position where that composing char appears. + status = RA_NOMATCH; + for (i = 0; rex.input[i] != NUL; + i += utf_ptr2len(rex.input + i)) { + const int inpc = utf_ptr2char(rex.input + i); + if (!utf_iscomposing(inpc)) { + if (i > 0) { + break; + } + } else if (opndc == inpc) { + // Include all following composing chars. + len = i + utfc_ptr2len(rex.input + i); + status = RA_MATCH; + break; + } + } + } else { + for (i = 0; i < len; i++) { + if (opnd[i] != rex.input[i]) { + status = RA_NOMATCH; + break; + } + } + } + rex.input += len; + } + break; + + case RE_COMPOSING: + { + // Skip composing characters. + while (utf_iscomposing(utf_ptr2char(rex.input))) { + MB_CPTR_ADV(rex.input); + } + } + break; + + case NOTHING: + break; + + case BACK: + { + int i; + + // When we run into BACK we need to check if we don't keep + // looping without matching any input. The second and later + // times a BACK is encountered it fails if the input is still + // at the same position as the previous time. + // The positions are stored in "backpos" and found by the + // current value of "scan", the position in the RE program. + backpos_T *bp = (backpos_T *)backpos.ga_data; + for (i = 0; i < backpos.ga_len; ++i) + if (bp[i].bp_scan == scan) + break; + if (i == backpos.ga_len) { + backpos_T *p = GA_APPEND_VIA_PTR(backpos_T, &backpos); + p->bp_scan = scan; + } else if (reg_save_equal(&bp[i].bp_pos)) + // Still at same position as last time, fail. + status = RA_NOMATCH; + + assert(status != RA_FAIL); + if (status != RA_NOMATCH) { + reg_save(&bp[i].bp_pos, &backpos); + } + } + break; + + case MOPEN + 0: // Match start: \zs + case MOPEN + 1: // \( + case MOPEN + 2: + case MOPEN + 3: + case MOPEN + 4: + case MOPEN + 5: + case MOPEN + 6: + case MOPEN + 7: + case MOPEN + 8: + case MOPEN + 9: + { + no = op - MOPEN; + cleanup_subexpr(); + rp = regstack_push(RS_MOPEN, scan); + if (rp == NULL) + status = RA_FAIL; + else { + rp->rs_no = no; + save_se(&rp->rs_un.sesave, &rex.reg_startpos[no], + &rex.reg_startp[no]); + // We simply continue and handle the result when done. + } + } + break; + + case NOPEN: // \%( + case NCLOSE: // \) after \%( + if (regstack_push(RS_NOPEN, scan) == NULL) + status = RA_FAIL; + // We simply continue and handle the result when done. + break; + + case ZOPEN + 1: + case ZOPEN + 2: + case ZOPEN + 3: + case ZOPEN + 4: + case ZOPEN + 5: + case ZOPEN + 6: + case ZOPEN + 7: + case ZOPEN + 8: + case ZOPEN + 9: + { + no = op - ZOPEN; + cleanup_zsubexpr(); + rp = regstack_push(RS_ZOPEN, scan); + if (rp == NULL) + status = RA_FAIL; + else { + rp->rs_no = no; + save_se(&rp->rs_un.sesave, ®_startzpos[no], + ®_startzp[no]); + // We simply continue and handle the result when done. + } + } + break; + + case MCLOSE + 0: // Match end: \ze + case MCLOSE + 1: // \) + case MCLOSE + 2: + case MCLOSE + 3: + case MCLOSE + 4: + case MCLOSE + 5: + case MCLOSE + 6: + case MCLOSE + 7: + case MCLOSE + 8: + case MCLOSE + 9: + { + no = op - MCLOSE; + cleanup_subexpr(); + rp = regstack_push(RS_MCLOSE, scan); + if (rp == NULL) { + status = RA_FAIL; + } else { + rp->rs_no = no; + save_se(&rp->rs_un.sesave, &rex.reg_endpos[no], &rex.reg_endp[no]); + // We simply continue and handle the result when done. + } + } + break; + + case ZCLOSE + 1: // \) after \z( + case ZCLOSE + 2: + case ZCLOSE + 3: + case ZCLOSE + 4: + case ZCLOSE + 5: + case ZCLOSE + 6: + case ZCLOSE + 7: + case ZCLOSE + 8: + case ZCLOSE + 9: + { + no = op - ZCLOSE; + cleanup_zsubexpr(); + rp = regstack_push(RS_ZCLOSE, scan); + if (rp == NULL) + status = RA_FAIL; + else { + rp->rs_no = no; + save_se(&rp->rs_un.sesave, ®_endzpos[no], + ®_endzp[no]); + // We simply continue and handle the result when done. + } + } + break; + + case BACKREF + 1: + case BACKREF + 2: + case BACKREF + 3: + case BACKREF + 4: + case BACKREF + 5: + case BACKREF + 6: + case BACKREF + 7: + case BACKREF + 8: + case BACKREF + 9: + { + int len; + + no = op - BACKREF; + cleanup_subexpr(); + if (!REG_MULTI) { // Single-line regexp + if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL) { + // Backref was not set: Match an empty string. + len = 0; + } else { + // Compare current input with back-ref in the same line. + len = (int)(rex.reg_endp[no] - rex.reg_startp[no]); + if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0) { + status = RA_NOMATCH; + } + } + } else { // Multi-line regexp + if (rex.reg_startpos[no].lnum < 0 || rex.reg_endpos[no].lnum < 0) { + // Backref was not set: Match an empty string. + len = 0; + } else { + if (rex.reg_startpos[no].lnum == rex.lnum + && rex.reg_endpos[no].lnum == rex.lnum) { + // Compare back-ref within the current line. + len = rex.reg_endpos[no].col - rex.reg_startpos[no].col; + if (cstrncmp(rex.line + rex.reg_startpos[no].col, + rex.input, &len) != 0) { + status = RA_NOMATCH; + } + } else { + // Messy situation: Need to compare between two lines. + int r = match_with_backref(rex.reg_startpos[no].lnum, + rex.reg_startpos[no].col, + rex.reg_endpos[no].lnum, + rex.reg_endpos[no].col, + &len); + if (r != RA_MATCH) { + status = r; + } + } + } + } + + // Matched the backref, skip over it. + rex.input += len; + } + break; + + case ZREF + 1: + case ZREF + 2: + case ZREF + 3: + case ZREF + 4: + case ZREF + 5: + case ZREF + 6: + case ZREF + 7: + case ZREF + 8: + case ZREF + 9: + { + cleanup_zsubexpr(); + no = op - ZREF; + if (re_extmatch_in != NULL + && re_extmatch_in->matches[no] != NULL) { + int len = (int)STRLEN(re_extmatch_in->matches[no]); + if (cstrncmp(re_extmatch_in->matches[no], rex.input, &len) != 0) { + status = RA_NOMATCH; + } else { + rex.input += len; + } + } else { + // Backref was not set: Match an empty string. + } + } + break; + + case BRANCH: + { + if (OP(next) != BRANCH) // No choice. + next = OPERAND(scan); // Avoid recursion. + else { + rp = regstack_push(RS_BRANCH, scan); + if (rp == NULL) + status = RA_FAIL; + else + status = RA_BREAK; // rest is below + } + } + break; + + case BRACE_LIMITS: + { + if (OP(next) == BRACE_SIMPLE) { + bl_minval = OPERAND_MIN(scan); + bl_maxval = OPERAND_MAX(scan); + } else if (OP(next) >= BRACE_COMPLEX + && OP(next) < BRACE_COMPLEX + 10) { + no = OP(next) - BRACE_COMPLEX; + brace_min[no] = OPERAND_MIN(scan); + brace_max[no] = OPERAND_MAX(scan); + brace_count[no] = 0; + } else { + internal_error("BRACE_LIMITS"); + status = RA_FAIL; + } + } + break; + + case BRACE_COMPLEX + 0: + case BRACE_COMPLEX + 1: + case BRACE_COMPLEX + 2: + case BRACE_COMPLEX + 3: + case BRACE_COMPLEX + 4: + case BRACE_COMPLEX + 5: + case BRACE_COMPLEX + 6: + case BRACE_COMPLEX + 7: + case BRACE_COMPLEX + 8: + case BRACE_COMPLEX + 9: + { + no = op - BRACE_COMPLEX; + ++brace_count[no]; + + // If not matched enough times yet, try one more + if (brace_count[no] <= (brace_min[no] <= brace_max[no] + ? brace_min[no] : brace_max[no])) { + rp = regstack_push(RS_BRCPLX_MORE, scan); + if (rp == NULL) + status = RA_FAIL; + else { + rp->rs_no = no; + reg_save(&rp->rs_un.regsave, &backpos); + next = OPERAND(scan); + // We continue and handle the result when done. + } + break; + } + + // If matched enough times, may try matching some more + if (brace_min[no] <= brace_max[no]) { + // Range is the normal way around, use longest match + if (brace_count[no] <= brace_max[no]) { + rp = regstack_push(RS_BRCPLX_LONG, scan); + if (rp == NULL) + status = RA_FAIL; + else { + rp->rs_no = no; + reg_save(&rp->rs_un.regsave, &backpos); + next = OPERAND(scan); + // We continue and handle the result when done. + } + } + } else { + // Range is backwards, use shortest match first + if (brace_count[no] <= brace_min[no]) { + rp = regstack_push(RS_BRCPLX_SHORT, scan); + if (rp == NULL) + status = RA_FAIL; + else { + reg_save(&rp->rs_un.regsave, &backpos); + // We continue and handle the result when done. + } + } + } + } + break; + + case BRACE_SIMPLE: + case STAR: + case PLUS: + { + regstar_T rst; + + // Lookahead to avoid useless match attempts when we know + // what character comes next. + if (OP(next) == EXACTLY) { + rst.nextb = *OPERAND(next); + if (rex.reg_ic) { + if (mb_isupper(rst.nextb)) { + rst.nextb_ic = mb_tolower(rst.nextb); + } else { + rst.nextb_ic = mb_toupper(rst.nextb); + } + } else { + rst.nextb_ic = rst.nextb; + } + } else { + rst.nextb = NUL; + rst.nextb_ic = NUL; + } + if (op != BRACE_SIMPLE) { + rst.minval = (op == STAR) ? 0 : 1; + rst.maxval = MAX_LIMIT; + } else { + rst.minval = bl_minval; + rst.maxval = bl_maxval; + } + + // When maxval > minval, try matching as much as possible, up + // to maxval. When maxval < minval, try matching at least the + // minimal number (since the range is backwards, that's also + // maxval!). + rst.count = regrepeat(OPERAND(scan), rst.maxval); + if (got_int) { + status = RA_FAIL; + break; + } + if (rst.minval <= rst.maxval + ? rst.count >= rst.minval : rst.count >= rst.maxval) { + // It could match. Prepare for trying to match what + // follows. The code is below. Parameters are stored in + // a regstar_T on the regstack. + if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { + emsg(_(e_maxmempat)); + status = RA_FAIL; + } else { + ga_grow(®stack, sizeof(regstar_T)); + regstack.ga_len += sizeof(regstar_T); + rp = regstack_push(rst.minval <= rst.maxval + ? RS_STAR_LONG : RS_STAR_SHORT, scan); + if (rp == NULL) + status = RA_FAIL; + else { + *(((regstar_T *)rp) - 1) = rst; + status = RA_BREAK; // skip the restore bits + } + } + } else + status = RA_NOMATCH; + + } + break; + + case NOMATCH: + case MATCH: + case SUBPAT: + rp = regstack_push(RS_NOMATCH, scan); + if (rp == NULL) + status = RA_FAIL; + else { + rp->rs_no = op; + reg_save(&rp->rs_un.regsave, &backpos); + next = OPERAND(scan); + // We continue and handle the result when done. + } + break; + + case BEHIND: + case NOBEHIND: + // Need a bit of room to store extra positions. + if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) { + emsg(_(e_maxmempat)); + status = RA_FAIL; + } else { + ga_grow(®stack, sizeof(regbehind_T)); + regstack.ga_len += sizeof(regbehind_T); + rp = regstack_push(RS_BEHIND1, scan); + if (rp == NULL) + status = RA_FAIL; + else { + // Need to save the subexpr to be able to restore them + // when there is a match but we don't use it. + save_subexpr(((regbehind_T *)rp) - 1); + + rp->rs_no = op; + reg_save(&rp->rs_un.regsave, &backpos); + // First try if what follows matches. If it does then we + // check the behind match by looping. + } + } + break; + + case BHPOS: + if (REG_MULTI) { + if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line) + || behind_pos.rs_u.pos.lnum != rex.lnum) { + status = RA_NOMATCH; + } + } else if (behind_pos.rs_u.ptr != rex.input) { + status = RA_NOMATCH; + } + break; + + case NEWL: + if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline + || rex.reg_line_lbr) && (c != '\n' || !rex.reg_line_lbr)) { + status = RA_NOMATCH; + } else if (rex.reg_line_lbr) { + ADVANCE_REGINPUT(); + } else { + reg_nextline(); + } + break; + + case END: + status = RA_MATCH; // Success! + break; + + default: + iemsg(_(e_re_corr)); +#ifdef REGEXP_DEBUG + printf("Illegal op code %d\n", op); +#endif + status = RA_FAIL; + break; + } + } + + // If we can't continue sequentially, break the inner loop. + if (status != RA_CONT) + break; + + // Continue in inner loop, advance to next item. + scan = next; + + } // end of inner loop + + // If there is something on the regstack execute the code for the state. + // If the state is popped then loop and use the older state. + while (!GA_EMPTY(®stack) && status != RA_FAIL) { + rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; + switch (rp->rs_state) { + case RS_NOPEN: + // Result is passed on as-is, simply pop the state. + regstack_pop(&scan); + break; + + case RS_MOPEN: + // Pop the state. Restore pointers when there is no match. + if (status == RA_NOMATCH) { + restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no], + &rex.reg_startp[rp->rs_no]); + } + regstack_pop(&scan); + break; + + case RS_ZOPEN: + // Pop the state. Restore pointers when there is no match. + if (status == RA_NOMATCH) + restore_se(&rp->rs_un.sesave, ®_startzpos[rp->rs_no], + ®_startzp[rp->rs_no]); + regstack_pop(&scan); + break; + + case RS_MCLOSE: + // Pop the state. Restore pointers when there is no match. + if (status == RA_NOMATCH) { + restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no], + &rex.reg_endp[rp->rs_no]); + } + regstack_pop(&scan); + break; + + case RS_ZCLOSE: + // Pop the state. Restore pointers when there is no match. + if (status == RA_NOMATCH) + restore_se(&rp->rs_un.sesave, ®_endzpos[rp->rs_no], + ®_endzp[rp->rs_no]); + regstack_pop(&scan); + break; + + case RS_BRANCH: + if (status == RA_MATCH) + // this branch matched, use it + regstack_pop(&scan); + else { + if (status != RA_BREAK) { + // After a non-matching branch: try next one. + reg_restore(&rp->rs_un.regsave, &backpos); + scan = rp->rs_scan; + } + if (scan == NULL || OP(scan) != BRANCH) { + // no more branches, didn't find a match + status = RA_NOMATCH; + regstack_pop(&scan); + } else { + // Prepare to try a branch. + rp->rs_scan = regnext(scan); + reg_save(&rp->rs_un.regsave, &backpos); + scan = OPERAND(scan); + } + } + break; + + case RS_BRCPLX_MORE: + // Pop the state. Restore pointers when there is no match. + if (status == RA_NOMATCH) { + reg_restore(&rp->rs_un.regsave, &backpos); + --brace_count[rp->rs_no]; // decrement match count + } + regstack_pop(&scan); + break; + + case RS_BRCPLX_LONG: + // Pop the state. Restore pointers when there is no match. + if (status == RA_NOMATCH) { + // There was no match, but we did find enough matches. + reg_restore(&rp->rs_un.regsave, &backpos); + --brace_count[rp->rs_no]; + // continue with the items after "\{}" + status = RA_CONT; + } + regstack_pop(&scan); + if (status == RA_CONT) + scan = regnext(scan); + break; + + case RS_BRCPLX_SHORT: + // Pop the state. Restore pointers when there is no match. + if (status == RA_NOMATCH) + // There was no match, try to match one more item. + reg_restore(&rp->rs_un.regsave, &backpos); + regstack_pop(&scan); + if (status == RA_NOMATCH) { + scan = OPERAND(scan); + status = RA_CONT; + } + break; + + case RS_NOMATCH: + // Pop the state. If the operand matches for NOMATCH or + // doesn't match for MATCH/SUBPAT, we fail. Otherwise backup, + // except for SUBPAT, and continue with the next item. + if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH)) + status = RA_NOMATCH; + else { + status = RA_CONT; + if (rp->rs_no != SUBPAT) // zero-width + reg_restore(&rp->rs_un.regsave, &backpos); + } + regstack_pop(&scan); + if (status == RA_CONT) + scan = regnext(scan); + break; + + case RS_BEHIND1: + if (status == RA_NOMATCH) { + regstack_pop(&scan); + regstack.ga_len -= sizeof(regbehind_T); + } else { + // The stuff after BEHIND/NOBEHIND matches. Now try if + // the behind part does (not) match before the current + // position in the input. This must be done at every + // position in the input and checking if the match ends at + // the current position. + + // save the position after the found match for next + reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); + + // Start looking for a match with operand at the current + // position. Go back one character until we find the + // result, hitting the start of the line or the previous + // line (for multi-line matching). + // Set behind_pos to where the match should end, BHPOS + // will match it. Save the current value. + (((regbehind_T *)rp) - 1)->save_behind = behind_pos; + behind_pos = rp->rs_un.regsave; + + rp->rs_state = RS_BEHIND2; + + reg_restore(&rp->rs_un.regsave, &backpos); + scan = OPERAND(rp->rs_scan) + 4; + } + break; + + case RS_BEHIND2: + // Looping for BEHIND / NOBEHIND match. + if (status == RA_MATCH && reg_save_equal(&behind_pos)) { + // found a match that ends where "next" started + behind_pos = (((regbehind_T *)rp) - 1)->save_behind; + if (rp->rs_no == BEHIND) + reg_restore(&(((regbehind_T *)rp) - 1)->save_after, + &backpos); + else { + // But we didn't want a match. Need to restore the + // subexpr, because what follows matched, so they have + // been set. + status = RA_NOMATCH; + restore_subexpr(((regbehind_T *)rp) - 1); + } + regstack_pop(&scan); + regstack.ga_len -= sizeof(regbehind_T); + } else { + long limit; + + // No match or a match that doesn't end where we want it: Go + // back one character. May go to previous line once. + no = OK; + limit = OPERAND_MIN(rp->rs_scan); + if (REG_MULTI) { + if (limit > 0 + && ((rp->rs_un.regsave.rs_u.pos.lnum + < behind_pos.rs_u.pos.lnum + ? (colnr_T)STRLEN(rex.line) + : behind_pos.rs_u.pos.col) + - rp->rs_un.regsave.rs_u.pos.col >= limit)) + no = FAIL; + else if (rp->rs_un.regsave.rs_u.pos.col == 0) { + if (rp->rs_un.regsave.rs_u.pos.lnum + < behind_pos.rs_u.pos.lnum + || reg_getline( + --rp->rs_un.regsave.rs_u.pos.lnum) + == NULL) + no = FAIL; + else { + reg_restore(&rp->rs_un.regsave, &backpos); + rp->rs_un.regsave.rs_u.pos.col = + (colnr_T)STRLEN(rex.line); + } + } else { + const char_u *const line = + reg_getline(rp->rs_un.regsave.rs_u.pos.lnum); + + rp->rs_un.regsave.rs_u.pos.col -= + utf_head_off(line, + line + rp->rs_un.regsave.rs_u.pos.col - 1) + + 1; + } + } else { + if (rp->rs_un.regsave.rs_u.ptr == rex.line) { + no = FAIL; + } else { + MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr); + if (limit > 0 + && (behind_pos.rs_u.ptr - rp->rs_un.regsave.rs_u.ptr) > (ptrdiff_t)limit) { + no = FAIL; + } + } + } + if (no == OK) { + // Advanced, prepare for finding match again. + reg_restore(&rp->rs_un.regsave, &backpos); + scan = OPERAND(rp->rs_scan) + 4; + if (status == RA_MATCH) { + // We did match, so subexpr may have been changed, + // need to restore them for the next try. + status = RA_NOMATCH; + restore_subexpr(((regbehind_T *)rp) - 1); + } + } else { + // Can't advance. For NOBEHIND that's a match. + behind_pos = (((regbehind_T *)rp) - 1)->save_behind; + if (rp->rs_no == NOBEHIND) { + reg_restore(&(((regbehind_T *)rp) - 1)->save_after, + &backpos); + status = RA_MATCH; + } else { + // We do want a proper match. Need to restore the + // subexpr if we had a match, because they may have + // been set. + if (status == RA_MATCH) { + status = RA_NOMATCH; + restore_subexpr(((regbehind_T *)rp) - 1); + } + } + regstack_pop(&scan); + regstack.ga_len -= sizeof(regbehind_T); + } + } + break; + + case RS_STAR_LONG: + case RS_STAR_SHORT: + { + regstar_T *rst = ((regstar_T *)rp) - 1; + + if (status == RA_MATCH) { + regstack_pop(&scan); + regstack.ga_len -= sizeof(regstar_T); + break; + } + + // Tried once already, restore input pointers. + if (status != RA_BREAK) + reg_restore(&rp->rs_un.regsave, &backpos); + + // Repeat until we found a position where it could match. + for (;; ) { + if (status != RA_BREAK) { + // Tried first position already, advance. + if (rp->rs_state == RS_STAR_LONG) { + // Trying for longest match, but couldn't or + // didn't match -- back up one char. + if (--rst->count < rst->minval) + break; + if (rex.input == rex.line) { + // backup to last char of previous line + rex.lnum--; + rex.line = reg_getline(rex.lnum); + // Just in case regrepeat() didn't count right. + if (rex.line == NULL) { + break; + } + rex.input = rex.line + STRLEN(rex.line); + fast_breakcheck(); + } else { + MB_PTR_BACK(rex.line, rex.input); + } + } else { + // Range is backwards, use shortest match first. + // Careful: maxval and minval are exchanged! + // Couldn't or didn't match: try advancing one + // char. + if (rst->count == rst->minval + || regrepeat(OPERAND(rp->rs_scan), 1L) == 0) + break; + ++rst->count; + } + if (got_int) + break; + } else + status = RA_NOMATCH; + + // If it could match, try it. + if (rst->nextb == NUL || *rex.input == rst->nextb + || *rex.input == rst->nextb_ic) { + reg_save(&rp->rs_un.regsave, &backpos); + scan = regnext(rp->rs_scan); + status = RA_CONT; + break; + } + } + if (status != RA_CONT) { + // Failed. + regstack_pop(&scan); + regstack.ga_len -= sizeof(regstar_T); + status = RA_NOMATCH; + } + } + break; + } + + // If we want to continue the inner loop or didn't pop a state + // continue matching loop + if (status == RA_CONT || rp == (regitem_T *) + ((char *)regstack.ga_data + regstack.ga_len) - 1) + break; + } + + // May need to continue with the inner loop, starting at "scan". + if (status == RA_CONT) + continue; + + // If the regstack is empty or something failed we are done. + if (GA_EMPTY(®stack) || status == RA_FAIL) { + if (scan == NULL) { + // We get here only if there's trouble -- normally "case END" is + // the terminating point. + iemsg(_(e_re_corr)); +#ifdef REGEXP_DEBUG + printf("Premature EOL\n"); +#endif + } + return status == RA_MATCH; + } + + } // End of loop until the regstack is empty. + + // NOTREACHED +} + +/// Try match of "prog" with at rex.line["col"]. +/// @returns 0 for failure, or number of lines contained in the match. +static long regtry(bt_regprog_T *prog, + colnr_T col, + proftime_T *tm, // timeout limit or NULL + int *timed_out) // flag set on timeout or NULL +{ + rex.input = rex.line + col; + rex.need_clear_subexpr = true; + // Clear the external match subpointers if necessaey. + rex.need_clear_zsubexpr = (prog->reghasz == REX_SET); + + if (regmatch(prog->program + 1, tm, timed_out) == 0) { + return 0; + } + + cleanup_subexpr(); + if (REG_MULTI) { + if (rex.reg_startpos[0].lnum < 0) { + rex.reg_startpos[0].lnum = 0; + rex.reg_startpos[0].col = col; + } + if (rex.reg_endpos[0].lnum < 0) { + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = (int)(rex.input - rex.line); + } else { + // Use line number of "\ze". + rex.lnum = rex.reg_endpos[0].lnum; + } + } else { + if (rex.reg_startp[0] == NULL) { + rex.reg_startp[0] = rex.line + col; + } + if (rex.reg_endp[0] == NULL) { + rex.reg_endp[0] = rex.input; + } + } + // Package any found \z(...\) matches for export. Default is none. + unref_extmatch(re_extmatch_out); + re_extmatch_out = NULL; + + if (prog->reghasz == REX_SET) { + int i; + + cleanup_zsubexpr(); + re_extmatch_out = make_extmatch(); + for (i = 0; i < NSUBEXP; i++) { + if (REG_MULTI) { + // Only accept single line matches. + if (reg_startzpos[i].lnum >= 0 + && reg_endzpos[i].lnum == reg_startzpos[i].lnum + && reg_endzpos[i].col >= reg_startzpos[i].col) { + re_extmatch_out->matches[i] = + vim_strnsave(reg_getline(reg_startzpos[i].lnum) + + reg_startzpos[i].col, + reg_endzpos[i].col + - reg_startzpos[i].col); + } + } else { + if (reg_startzp[i] != NULL && reg_endzp[i] != NULL) + re_extmatch_out->matches[i] = + vim_strnsave(reg_startzp[i], reg_endzp[i] - reg_startzp[i]); + } + } + } + return 1 + rex.lnum; +} + +/// Match a regexp against a string ("line" points to the string) or multiple +/// lines (if "line" is NULL, use reg_getline()). +/// @return 0 for failure, or number of lines contained in the match. +static long bt_regexec_both(char_u *line, + colnr_T col, // column to start search + proftime_T *tm, // timeout limit or NULL + int *timed_out) // flag set on timeout or NULL +{ + bt_regprog_T *prog; + char_u *s; + long retval = 0L; + + // Create "regstack" and "backpos" if they are not allocated yet. + // We allocate *_INITIAL amount of bytes first and then set the grow size + // to much bigger value to avoid many malloc calls in case of deep regular + // expressions. + if (regstack.ga_data == NULL) { + // Use an item size of 1 byte, since we push different things + // onto the regstack. + ga_init(®stack, 1, REGSTACK_INITIAL); + ga_grow(®stack, REGSTACK_INITIAL); + ga_set_growsize(®stack, REGSTACK_INITIAL * 8); + } + + if (backpos.ga_data == NULL) { + ga_init(&backpos, sizeof(backpos_T), BACKPOS_INITIAL); + ga_grow(&backpos, BACKPOS_INITIAL); + ga_set_growsize(&backpos, BACKPOS_INITIAL * 8); + } + + if (REG_MULTI) { + prog = (bt_regprog_T *)rex.reg_mmatch->regprog; + line = reg_getline((linenr_T)0); + rex.reg_startpos = rex.reg_mmatch->startpos; + rex.reg_endpos = rex.reg_mmatch->endpos; + } else { + prog = (bt_regprog_T *)rex.reg_match->regprog; + rex.reg_startp = rex.reg_match->startp; + rex.reg_endp = rex.reg_match->endp; + } + + // Be paranoid... + if (prog == NULL || line == NULL) { + iemsg(_(e_null)); + goto theend; + } + + // Check validity of program. + if (prog_magic_wrong()) + goto theend; + + // If the start column is past the maximum column: no need to try. + if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) { + goto theend; + } + + // If pattern contains "\c" or "\C": overrule value of rex.reg_ic + if (prog->regflags & RF_ICASE) { + rex.reg_ic = true; + } else if (prog->regflags & RF_NOICASE) { + rex.reg_ic = false; + } + + // If pattern contains "\Z" overrule value of rex.reg_icombine + if (prog->regflags & RF_ICOMBINE) { + rex.reg_icombine = true; + } + + // If there is a "must appear" string, look for it. + if (prog->regmust != NULL) { + int c = utf_ptr2char(prog->regmust); + s = line + col; + + // This is used very often, esp. for ":global". Use two versions of + // the loop to avoid overhead of conditions. + if (!rex.reg_ic) { + while ((s = vim_strchr(s, c)) != NULL) { + if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { + break; // Found it. + } + MB_PTR_ADV(s); + } + } else { + while ((s = cstrchr(s, c)) != NULL) { + if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { + break; // Found it. + } + MB_PTR_ADV(s); + } + } + if (s == NULL) { // Not present. + goto theend; + } + } + + rex.line = line; + rex.lnum = 0; + reg_toolong = false; + + // Simplest case: Anchored match need be tried only once. + if (prog->reganch) { + int c = utf_ptr2char(rex.line + col); + if (prog->regstart == NUL + || prog->regstart == c + || (rex.reg_ic + && (utf_fold(prog->regstart) == utf_fold(c) + || (c < 255 && prog->regstart < 255 + && mb_tolower(prog->regstart) == mb_tolower(c))))) { + retval = regtry(prog, col, tm, timed_out); + } else { + retval = 0; + } + } else { + int tm_count = 0; + // Messy cases: unanchored match. + while (!got_int) { + if (prog->regstart != NUL) { + // Skip until the char we know it must start with. + s = cstrchr(rex.line + col, prog->regstart); + if (s == NULL) { + retval = 0; + break; + } + col = (int)(s - rex.line); + } + + // Check for maximum column to try. + if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) { + retval = 0; + break; + } + + retval = regtry(prog, col, tm, timed_out); + if (retval > 0) { + break; + } + + // if not currently on the first line, get it again + if (rex.lnum != 0) { + rex.lnum = 0; + rex.line = reg_getline((linenr_T)0); + } + if (rex.line[col] == NUL) { + break; + } + col += utfc_ptr2len(rex.line + col); + // Check for timeout once in a twenty times to avoid overhead. + if (tm != NULL && ++tm_count == 20) { + tm_count = 0; + if (profile_passed_limit(*tm)) { + if (timed_out != NULL) { + *timed_out = true; + } + break; + } + } + } + } + +theend: + // Free "reg_tofree" when it's a bit big. + // Free regstack and backpos if they are bigger than their initial size. + if (reg_tofreelen > 400) { + XFREE_CLEAR(reg_tofree); + } + if (regstack.ga_maxlen > REGSTACK_INITIAL) + ga_clear(®stack); + if (backpos.ga_maxlen > BACKPOS_INITIAL) + ga_clear(&backpos); + + if (retval > 0) { + // Make sure the end is never before the start. Can happen when \zs + // and \ze are used. + if (REG_MULTI) { + const lpos_T *const start = &rex.reg_mmatch->startpos[0]; + const lpos_T *const end = &rex.reg_mmatch->endpos[0]; + + if (end->lnum < start->lnum + || (end->lnum == start->lnum && end->col < start->col)) { + rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0]; + } + } else { + if (rex.reg_match->endp[0] < rex.reg_match->startp[0]) { + rex.reg_match->endp[0] = rex.reg_match->startp[0]; + } + } + } + + return retval; +} + +/* + * Match a regexp against a string. + * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). + * Uses curbuf for line count and 'iskeyword'. + * If "line_lbr" is true, consider a "\n" in "line" to be a line break. + * + * Returns 0 for failure, number of lines contained in the match otherwise. + */ +static int bt_regexec_nl( + regmatch_T *rmp, + char_u *line, // string to match against + colnr_T col, // column to start looking for match + bool line_lbr) +{ + rex.reg_match = rmp; + rex.reg_mmatch = NULL; + rex.reg_maxline = 0; + rex.reg_line_lbr = line_lbr; + rex.reg_buf = curbuf; + rex.reg_win = NULL; + rex.reg_ic = rmp->rm_ic; + rex.reg_icombine = false; + rex.reg_maxcol = 0; + + long r = bt_regexec_both(line, col, NULL, NULL); + assert(r <= INT_MAX); + return (int)r; +} + + +/// Matches a regexp against multiple lines. +/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). +/// Uses curbuf for line count and 'iskeyword'. +/// +/// @param win Window in which to search or NULL +/// @param buf Buffer in which to search +/// @param lnum Number of line to start looking for match +/// @param col Column to start looking for match +/// @param tm Timeout limit or NULL +/// +/// @return zero if there is no match and number of lines contained in the match +/// otherwise. +static long bt_regexec_multi(regmmatch_T *rmp, win_T *win, buf_T *buf, + linenr_T lnum, colnr_T col, + proftime_T *tm, int *timed_out) +{ + rex.reg_match = NULL; + rex.reg_mmatch = rmp; + rex.reg_buf = buf; + rex.reg_win = win; + rex.reg_firstlnum = lnum; + rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; + rex.reg_line_lbr = false; + rex.reg_ic = rmp->rmm_ic; + rex.reg_icombine = false; + rex.reg_maxcol = rmp->rmm_maxcol; + + return bt_regexec_both(NULL, col, tm, timed_out); +} + +/* + * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL. + */ +static int re_num_cmp(uint32_t val, char_u *scan) +{ + uint32_t n = (uint32_t)OPERAND_MIN(scan); + + if (OPERAND_CMP(scan) == '>') + return val > n; + if (OPERAND_CMP(scan) == '<') + return val < n; + return val == n; +} + +#ifdef BT_REGEXP_DUMP + +/* + * regdump - dump a regexp onto stdout in vaguely comprehensible form + */ +static void regdump(char_u *pattern, bt_regprog_T *r) +{ + char_u *s; + int op = EXACTLY; // Arbitrary non-END op. + char_u *next; + char_u *end = NULL; + FILE *f; + +#ifdef BT_REGEXP_LOG + f = fopen("bt_regexp_log.log", "a"); +#else + f = stdout; +#endif + if (f == NULL) + return; + fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", + pattern); + + s = r->program + 1; + // Loop until we find the END that isn't before a referred next (an END + // can also appear in a NOMATCH operand). + while (op != END || s <= end) { + op = OP(s); + fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); // Where, what. + next = regnext(s); + if (next == NULL) // Next ptr. + fprintf(f, "(0)"); + else + fprintf(f, "(%d)", (int)((s - r->program) + (next - s))); + if (end < next) + end = next; + if (op == BRACE_LIMITS) { + // Two ints + fprintf(f, " minval %" PRId64 ", maxval %" PRId64, + (int64_t)OPERAND_MIN(s), (int64_t)OPERAND_MAX(s)); + s += 8; + } else if (op == BEHIND || op == NOBEHIND) { + // one int + fprintf(f, " count %" PRId64, (int64_t)OPERAND_MIN(s)); + s += 4; + } else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL) { + // one int plus comparator + fprintf(f, " count %" PRId64, (int64_t)OPERAND_MIN(s)); + s += 5; + } + s += 3; + if (op == ANYOF || op == ANYOF + ADD_NL + || op == ANYBUT || op == ANYBUT + ADD_NL + || op == EXACTLY) { + // Literal string, where present. + fprintf(f, "\nxxxxxxxxx\n"); + while (*s != NUL) + fprintf(f, "%c", *s++); + fprintf(f, "\nxxxxxxxxx\n"); + s++; + } + fprintf(f, "\r\n"); + } + + // Header fields of interest. + if (r->regstart != NUL) + fprintf(f, "start `%s' 0x%x; ", r->regstart < 256 + ? (char *)transchar(r->regstart) + : "multibyte", r->regstart); + if (r->reganch) + fprintf(f, "anchored; "); + if (r->regmust != NULL) + fprintf(f, "must have \"%s\"", r->regmust); + fprintf(f, "\r\n"); + +#ifdef BT_REGEXP_LOG + fclose(f); +#endif +} +#endif // BT_REGEXP_DUMP + +#ifdef REGEXP_DEBUG + +/* + * regprop - printable representation of opcode + */ +static char_u *regprop(char_u *op) +{ + char *p; + static char buf[50]; + + STRCPY(buf, ":"); + + switch ((int)OP(op)) { + case BOL: + p = "BOL"; + break; + case EOL: + p = "EOL"; + break; + case RE_BOF: + p = "BOF"; + break; + case RE_EOF: + p = "EOF"; + break; + case CURSOR: + p = "CURSOR"; + break; + case RE_VISUAL: + p = "RE_VISUAL"; + break; + case RE_LNUM: + p = "RE_LNUM"; + break; + case RE_MARK: + p = "RE_MARK"; + break; + case RE_COL: + p = "RE_COL"; + break; + case RE_VCOL: + p = "RE_VCOL"; + break; + case BOW: + p = "BOW"; + break; + case EOW: + p = "EOW"; + break; + case ANY: + p = "ANY"; + break; + case ANY + ADD_NL: + p = "ANY+NL"; + break; + case ANYOF: + p = "ANYOF"; + break; + case ANYOF + ADD_NL: + p = "ANYOF+NL"; + break; + case ANYBUT: + p = "ANYBUT"; + break; + case ANYBUT + ADD_NL: + p = "ANYBUT+NL"; + break; + case IDENT: + p = "IDENT"; + break; + case IDENT + ADD_NL: + p = "IDENT+NL"; + break; + case SIDENT: + p = "SIDENT"; + break; + case SIDENT + ADD_NL: + p = "SIDENT+NL"; + break; + case KWORD: + p = "KWORD"; + break; + case KWORD + ADD_NL: + p = "KWORD+NL"; + break; + case SKWORD: + p = "SKWORD"; + break; + case SKWORD + ADD_NL: + p = "SKWORD+NL"; + break; + case FNAME: + p = "FNAME"; + break; + case FNAME + ADD_NL: + p = "FNAME+NL"; + break; + case SFNAME: + p = "SFNAME"; + break; + case SFNAME + ADD_NL: + p = "SFNAME+NL"; + break; + case PRINT: + p = "PRINT"; + break; + case PRINT + ADD_NL: + p = "PRINT+NL"; + break; + case SPRINT: + p = "SPRINT"; + break; + case SPRINT + ADD_NL: + p = "SPRINT+NL"; + break; + case WHITE: + p = "WHITE"; + break; + case WHITE + ADD_NL: + p = "WHITE+NL"; + break; + case NWHITE: + p = "NWHITE"; + break; + case NWHITE + ADD_NL: + p = "NWHITE+NL"; + break; + case DIGIT: + p = "DIGIT"; + break; + case DIGIT + ADD_NL: + p = "DIGIT+NL"; + break; + case NDIGIT: + p = "NDIGIT"; + break; + case NDIGIT + ADD_NL: + p = "NDIGIT+NL"; + break; + case HEX: + p = "HEX"; + break; + case HEX + ADD_NL: + p = "HEX+NL"; + break; + case NHEX: + p = "NHEX"; + break; + case NHEX + ADD_NL: + p = "NHEX+NL"; + break; + case OCTAL: + p = "OCTAL"; + break; + case OCTAL + ADD_NL: + p = "OCTAL+NL"; + break; + case NOCTAL: + p = "NOCTAL"; + break; + case NOCTAL + ADD_NL: + p = "NOCTAL+NL"; + break; + case WORD: + p = "WORD"; + break; + case WORD + ADD_NL: + p = "WORD+NL"; + break; + case NWORD: + p = "NWORD"; + break; + case NWORD + ADD_NL: + p = "NWORD+NL"; + break; + case HEAD: + p = "HEAD"; + break; + case HEAD + ADD_NL: + p = "HEAD+NL"; + break; + case NHEAD: + p = "NHEAD"; + break; + case NHEAD + ADD_NL: + p = "NHEAD+NL"; + break; + case ALPHA: + p = "ALPHA"; + break; + case ALPHA + ADD_NL: + p = "ALPHA+NL"; + break; + case NALPHA: + p = "NALPHA"; + break; + case NALPHA + ADD_NL: + p = "NALPHA+NL"; + break; + case LOWER: + p = "LOWER"; + break; + case LOWER + ADD_NL: + p = "LOWER+NL"; + break; + case NLOWER: + p = "NLOWER"; + break; + case NLOWER + ADD_NL: + p = "NLOWER+NL"; + break; + case UPPER: + p = "UPPER"; + break; + case UPPER + ADD_NL: + p = "UPPER+NL"; + break; + case NUPPER: + p = "NUPPER"; + break; + case NUPPER + ADD_NL: + p = "NUPPER+NL"; + break; + case BRANCH: + p = "BRANCH"; + break; + case EXACTLY: + p = "EXACTLY"; + break; + case NOTHING: + p = "NOTHING"; + break; + case BACK: + p = "BACK"; + break; + case END: + p = "END"; + break; + case MOPEN + 0: + p = "MATCH START"; + break; + case MOPEN + 1: + case MOPEN + 2: + case MOPEN + 3: + case MOPEN + 4: + case MOPEN + 5: + case MOPEN + 6: + case MOPEN + 7: + case MOPEN + 8: + case MOPEN + 9: + sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN); + p = NULL; + break; + case MCLOSE + 0: + p = "MATCH END"; + break; + case MCLOSE + 1: + case MCLOSE + 2: + case MCLOSE + 3: + case MCLOSE + 4: + case MCLOSE + 5: + case MCLOSE + 6: + case MCLOSE + 7: + case MCLOSE + 8: + case MCLOSE + 9: + sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE); + p = NULL; + break; + case BACKREF + 1: + case BACKREF + 2: + case BACKREF + 3: + case BACKREF + 4: + case BACKREF + 5: + case BACKREF + 6: + case BACKREF + 7: + case BACKREF + 8: + case BACKREF + 9: + sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF); + p = NULL; + break; + case NOPEN: + p = "NOPEN"; + break; + case NCLOSE: + p = "NCLOSE"; + break; + case ZOPEN + 1: + case ZOPEN + 2: + case ZOPEN + 3: + case ZOPEN + 4: + case ZOPEN + 5: + case ZOPEN + 6: + case ZOPEN + 7: + case ZOPEN + 8: + case ZOPEN + 9: + sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN); + p = NULL; + break; + case ZCLOSE + 1: + case ZCLOSE + 2: + case ZCLOSE + 3: + case ZCLOSE + 4: + case ZCLOSE + 5: + case ZCLOSE + 6: + case ZCLOSE + 7: + case ZCLOSE + 8: + case ZCLOSE + 9: + sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE); + p = NULL; + break; + case ZREF + 1: + case ZREF + 2: + case ZREF + 3: + case ZREF + 4: + case ZREF + 5: + case ZREF + 6: + case ZREF + 7: + case ZREF + 8: + case ZREF + 9: + sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF); + p = NULL; + break; + case STAR: + p = "STAR"; + break; + case PLUS: + p = "PLUS"; + break; + case NOMATCH: + p = "NOMATCH"; + break; + case MATCH: + p = "MATCH"; + break; + case BEHIND: + p = "BEHIND"; + break; + case NOBEHIND: + p = "NOBEHIND"; + break; + case SUBPAT: + p = "SUBPAT"; + break; + case BRACE_LIMITS: + p = "BRACE_LIMITS"; + break; + case BRACE_SIMPLE: + p = "BRACE_SIMPLE"; + break; + case BRACE_COMPLEX + 0: + case BRACE_COMPLEX + 1: + case BRACE_COMPLEX + 2: + case BRACE_COMPLEX + 3: + case BRACE_COMPLEX + 4: + case BRACE_COMPLEX + 5: + case BRACE_COMPLEX + 6: + case BRACE_COMPLEX + 7: + case BRACE_COMPLEX + 8: + case BRACE_COMPLEX + 9: + sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX); + p = NULL; + break; + case MULTIBYTECODE: + p = "MULTIBYTECODE"; + break; + case NEWL: + p = "NEWL"; + break; + default: + sprintf(buf + STRLEN(buf), "corrupt %d", OP(op)); + p = NULL; + break; + } + if (p != NULL) + STRCAT(buf, p); + return (char_u *)buf; +} +#endif // REGEXP_DEBUG diff --git a/src/nvim/screen.c b/src/nvim/screen.c index 1cdee7c972..3a87cba7f9 100644 --- a/src/nvim/screen.c +++ b/src/nvim/screen.c @@ -2208,8 +2208,6 @@ static int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool noc buf_T *buf = wp->w_buffer; bool end_fill = (lnum == buf->b_ml.ml_line_count+1); - has_decor = decor_redraw_line(buf, lnum-1, &decor_state); - if (!number_only) { // To speed up the loop below, set extra_check when there is linebreak, // trailing white space and/or syntax processing to be done. @@ -2231,6 +2229,8 @@ static int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool noc } } + has_decor = decor_redraw_line(buf, lnum-1, &decor_state); + for (size_t k = 0; k < kv_size(*providers); k++) { DecorProvider *p = kv_A(*providers, k); if (p && p->redraw_line != LUA_NOREF) { @@ -2459,9 +2459,7 @@ static int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, bool noc memset(sattrs, 0, sizeof(sattrs)); num_signs = buf_get_signattrs(wp->w_buffer, lnum, sattrs); - if (decor_state.has_sign_decor) { - decor_redraw_signs(buf, &decor_state, lnum-1, &num_signs, sattrs); - } + decor_redraw_signs(buf, lnum-1, &num_signs, sattrs); // If this line has a sign with line highlighting set line_attr. // TODO(bfredl, vigoux): this should not take priority over decoration! diff --git a/src/nvim/spell.c b/src/nvim/spell.c index 1296d410f6..fb644daa39 100644 --- a/src/nvim/spell.c +++ b/src/nvim/spell.c @@ -3824,7 +3824,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so // At end of a prefix or at start of prefixtree: check for // following word. - if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) { + if (byts[arridx] == 0 || n == STATE_NOPREFIX) { // Set su->su_badflags to the caps type at this position. // Use the caps type until here for the prefix itself. n = nofold_len(fword, sp->ts_fidx, su->su_badptr); diff --git a/src/nvim/testdir/test_regexp_latin.vim b/src/nvim/testdir/test_regexp_latin.vim index a92f7e1192..a0f5ebfb9f 100644 --- a/src/nvim/testdir/test_regexp_latin.vim +++ b/src/nvim/testdir/test_regexp_latin.vim @@ -795,4 +795,20 @@ func Test_using_mark_position() bwipe! endfunc +func Test_using_visual_position() + " this was using freed memory + new + exe "norm 0o\<Esc>\<C-V>k\<C-X>o0" + /\%V + bwipe! +endfunc + +func Test_using_invalid_visual_position() + " this was going beyond the end of the line + new + exe "norm 0o000\<Esc>0\<C-V>$s0" + /\%V + bwipe! +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/nvim/testdir/test_registers.vim b/src/nvim/testdir/test_registers.vim index 23e39eba35..f78b748d71 100644 --- a/src/nvim/testdir/test_registers.vim +++ b/src/nvim/testdir/test_registers.vim @@ -62,7 +62,6 @@ func Test_display_registers() call assert_match('^\nType Name Content\n' \ . ' c "" a\n' \ . ' c "0 ba\n' - \ . ' c "1 b\n' \ . ' c "a b\n' \ . '.*' \ . ' c "- a\n' @@ -85,6 +84,90 @@ func Test_display_registers() let g:clipboard = save_clipboard endfunc +func Test_register_one() + " delete a line goes into register one + new + call setline(1, "one") + normal dd + call assert_equal("one\n", @1) + + " delete a word does not change register one, does change "- + call setline(1, "two") + normal de + call assert_equal("one\n", @1) + call assert_equal("two", @-) + + " delete a word with a register does not change register one + call setline(1, "three") + normal "ade + call assert_equal("three", @a) + call assert_equal("one\n", @1) + + " delete a word with register DOES change register one with one of a list of + " operators + " % + call setline(1, ["(12)3"]) + normal "ad% + call assert_equal("(12)", @a) + call assert_equal("(12)", @1) + + " ( + call setline(1, ["first second"]) + normal $"ad( + call assert_equal("first secon", @a) + call assert_equal("first secon", @1) + + " ) + call setline(1, ["First Second."]) + normal gg0"ad) + call assert_equal("First Second.", @a) + call assert_equal("First Second.", @1) + + " ` + call setline(1, ["start here."]) + normal gg0fhmx0"ad`x + call assert_equal("start ", @a) + call assert_equal("start ", @1) + + " / + call setline(1, ["searchX"]) + exe "normal gg0\"ad/X\<CR>" + call assert_equal("search", @a) + call assert_equal("search", @1) + + " ? + call setline(1, ["Ysearch"]) + exe "normal gg$\"ad?Y\<CR>" + call assert_equal("Ysearc", @a) + call assert_equal("Ysearc", @1) + + " n + call setline(1, ["Ynext"]) + normal gg$"adn + call assert_equal("Ynex", @a) + call assert_equal("Ynex", @1) + + " N + call setline(1, ["prevY"]) + normal gg0"adN + call assert_equal("prev", @a) + call assert_equal("prev", @1) + + " } + call setline(1, ["one", ""]) + normal gg0"ad} + call assert_equal("one\n", @a) + call assert_equal("one\n", @1) + + " { + call setline(1, ["", "two"]) + normal 2G$"ad{ + call assert_equal("\ntw", @a) + call assert_equal("\ntw", @1) + + bwipe! +endfunc + func Test_recording_status_in_ex_line() norm qx redraw! @@ -482,6 +565,82 @@ func Test_v_register() bwipe! endfunc +" Test for executing the contents of a register as an Ex command with line +" continuation. +func Test_execute_reg_as_ex_cmd() + " Line continuation with just two lines + let code =<< trim END + let l = [ + \ 1] + END + let @r = code->join("\n") + let l = [] + @r + call assert_equal([1], l) + + " Line continuation with more than two lines + let code =<< trim END + let l = [ + \ 1, + \ 2, + \ 3] + END + let @r = code->join("\n") + let l = [] + @r + call assert_equal([1, 2, 3], l) + + " use comments interspersed with code + let code =<< trim END + let l = [ + "\ one + \ 1, + "\ two + \ 2, + "\ three + \ 3] + END + let @r = code->join("\n") + let l = [] + @r + call assert_equal([1, 2, 3], l) + + " use line continuation in the middle + let code =<< trim END + let a = "one" + let l = [ + \ 1, + \ 2] + let b = "two" + END + let @r = code->join("\n") + let l = [] + @r + call assert_equal([1, 2], l) + call assert_equal("one", a) + call assert_equal("two", b) + + " only one line with a \ + let @r = "\\let l = 1" + call assert_fails('@r', 'E10:') + + " only one line with a "\ + let @r = ' "\ let i = 1' + @r + call assert_false(exists('i')) + + " first line also begins with a \ + let @r = "\\let l = [\n\\ 1]" + call assert_fails('@r', 'E10:') + + " Test with a large number of lines + let @r = "let str = \n" + let @r ..= repeat(" \\ 'abcdefghijklmnopqrstuvwxyz' ..\n", 312) + let @r ..= ' \ ""' + @r + call assert_equal(repeat('abcdefghijklmnopqrstuvwxyz', 312), str) +endfunc + func Test_ve_blockpaste() new set ve=all diff --git a/src/nvim/ui.c b/src/nvim/ui.c index 31b9614c34..7c67c058b0 100644 --- a/src/nvim/ui.c +++ b/src/nvim/ui.c @@ -639,8 +639,8 @@ void ui_grid_resize(handle_T grid_handle, int width, int height, Error *error) } } else { // non-positive indicates no request - wp->w_height_request = (int)MAX(height, 0); - wp->w_width_request = (int)MAX(width, 0); + wp->w_height_request = MAX(height, 0); + wp->w_width_request = MAX(width, 0); win_set_inner_size(wp); } } diff --git a/test/functional/api/vim_spec.lua b/test/functional/api/vim_spec.lua index 71cd055e08..e945a6c706 100644 --- a/test/functional/api/vim_spec.lua +++ b/test/functional/api/vim_spec.lua @@ -2621,24 +2621,24 @@ describe('API', function() eq({ str = 'a━━━b', width = 5 }, meths.eval_statusline('a%=b', { fillchar = '━', maxwidth = 5 })) end) - it('rejects double-width fillchar', function() - eq('fillchar must be a single-width character', - pcall_err(meths.eval_statusline, '', { fillchar = '哦' })) + it('treats double-width fillchar as single-width', function() + eq({ str = 'a哦哦哦b', width = 5 }, + meths.eval_statusline('a%=b', { fillchar = '哦', maxwidth = 5 })) end) - it('rejects control character fillchar', function() - eq('fillchar must be a single-width character', - pcall_err(meths.eval_statusline, '', { fillchar = '\a' })) + it('treats control character fillchar as single-width', function() + eq({ str = 'a\031\031\031b', width = 5 }, + meths.eval_statusline('a%=b', { fillchar = '\031', maxwidth = 5 })) end) it('rejects multiple-character fillchar', function() - eq('fillchar must be a single-width character', + eq('fillchar must be a single character', pcall_err(meths.eval_statusline, '', { fillchar = 'aa' })) end) it('rejects empty string fillchar', function() - eq('fillchar must be a single-width character', + eq('fillchar must be a single character', pcall_err(meths.eval_statusline, '', { fillchar = '' })) end) it('rejects non-string fillchar', function() - eq('fillchar must be a single-width character', + eq('fillchar must be a single character', pcall_err(meths.eval_statusline, '', { fillchar = 1 })) end) describe('highlight parsing', function() diff --git a/test/functional/editor/completion_spec.lua b/test/functional/editor/completion_spec.lua index befad29922..1a0ee54505 100644 --- a/test/functional/editor/completion_spec.lua +++ b/test/functional/editor/completion_spec.lua @@ -1193,4 +1193,23 @@ describe('completion', function() eq('foobar', eval('g:word')) feed('<esc>') end) + + it('does not crash if text is changed by first call to complete function #17489', function() + source([[ + func Complete(findstart, base) abort + if a:findstart + let col = col('.') + call complete_add('#') + return col - 1 + else + return [] + endif + endfunc + + set completeopt=longest + set completefunc=Complete + ]]) + feed('ifoo#<C-X><C-U>') + assert_alive() + end) end) diff --git a/test/functional/editor/put_spec.lua b/test/functional/editor/put_spec.lua index c367f8fdd0..cc9fce8f67 100644 --- a/test/functional/editor/put_spec.lua +++ b/test/functional/editor/put_spec.lua @@ -138,9 +138,9 @@ describe('put command', function() end -- create_test_defs() }}} local function find_cursor_position(expect_string) -- {{{ - -- There must only be one occurance of the character 'x' in + -- There must only be one occurrence of the character 'x' in -- expect_string. - -- This function removes that occurance, and returns the position that + -- This function removes that occurrence, and returns the position that -- it was in. -- This returns the cursor position that would leave the 'x' in that -- place if we feed 'ix<esc>' and the string existed before it. diff --git a/test/functional/provider/clipboard_spec.lua b/test/functional/provider/clipboard_spec.lua index 986db96a18..5bdfec574e 100644 --- a/test/functional/provider/clipboard_spec.lua +++ b/test/functional/provider/clipboard_spec.lua @@ -50,29 +50,33 @@ local function basic_register_test(noblock) text, stuff and some more some some text, stuff and some more]]) - -- deleting a word to named ("a) updates "1 (and not "-) + -- deleting a word to named ("a) doesn't update "1 or "- feed('gg"adwj"1P^"-P') expect([[ , stuff and some more - some textsome some text, stuff and some more]]) + some some random text + some some text, stuff and some more]]) -- deleting a line does update "" feed('ggdd""P') expect([[ , stuff and some more - some textsome some text, stuff and some more]]) + some some random text + some some text, stuff and some more]]) feed('ggw<c-v>jwyggP') if noblock then expect([[ stuf - me t + me s , stuff and some more - some textsome some text, stuff and some more]]) + some some random text + some some text, stuff and some more]]) else expect([[ stuf, stuff and some more - me tsome textsome some text, stuff and some more]]) + me ssome some random text + some some text, stuff and some more]]) end -- pasting in visual does unnamed delete of visual selection diff --git a/test/functional/ui/decorations_spec.lua b/test/functional/ui/decorations_spec.lua index ae5d34fa7a..7e71ada02d 100644 --- a/test/functional/ui/decorations_spec.lua +++ b/test/functional/ui/decorations_spec.lua @@ -1372,6 +1372,7 @@ describe('decorations: signs', function() screen:set_default_attr_ids { [1] = {foreground = Screen.colors.Blue4, background = Screen.colors.Grey}; [2] = {foreground = Screen.colors.Blue1, bold = true}; + [3] = {background = Screen.colors.Yellow1, foreground = Screen.colors.Blue1}; } ns = meths.create_namespace 'test' @@ -1634,4 +1635,99 @@ l5 end) + it('can add lots of signs', function() + screen:try_resize(40, 10) + command 'normal 10oa b c d e f g h' + + for i = 1, 10 do + meths.buf_set_extmark(0, ns, i, 0, { end_col = 1, hl_group='Todo' }) + meths.buf_set_extmark(0, ns, i, 2, { end_col = 3, hl_group='Todo' }) + meths.buf_set_extmark(0, ns, i, 4, { end_col = 5, hl_group='Todo' }) + meths.buf_set_extmark(0, ns, i, 6, { end_col = 7, hl_group='Todo' }) + meths.buf_set_extmark(0, ns, i, 8, { end_col = 9, hl_group='Todo' }) + meths.buf_set_extmark(0, ns, i, 10, { end_col = 11, hl_group='Todo' }) + meths.buf_set_extmark(0, ns, i, 12, { end_col = 13, hl_group='Todo' }) + meths.buf_set_extmark(0, ns, i, 14, { end_col = 15, hl_group='Todo' }) + meths.buf_set_extmark(0, ns, i, -1, { sign_text='W' }) + meths.buf_set_extmark(0, ns, i, -1, { sign_text='X' }) + meths.buf_set_extmark(0, ns, i, -1, { sign_text='Y' }) + meths.buf_set_extmark(0, ns, i, -1, { sign_text='Z' }) + end + + screen:expect{grid=[[ + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:h} | + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:h} | + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:h} | + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:h} | + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:h} | + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:h} | + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:h} | + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:h} | + X Y Z W {3:a} {3:b} {3:c} {3:d} {3:e} {3:f} {3:g} {3:^h} | + | + ]]} + end) + +end) + +describe('decorations: virt_text', function() + local screen + + before_each(function() + clear() + screen = Screen.new(50, 10) + screen:attach() + screen:set_default_attr_ids { + [1] = {foreground = Screen.colors.Brown}; + [2] = {foreground = Screen.colors.Fuchsia}; + [3] = {bold = true, foreground = Screen.colors.Blue1}; + } + end) + + it('avoids regression in #17638', function() + exec_lua[[ + vim.wo.number = true + vim.wo.relativenumber = true + ]] + + command 'normal 4ohello' + command 'normal aVIRTUAL' + + local ns = meths.create_namespace('test') + + meths.buf_set_extmark(0, ns, 2, 0, { + virt_text = {{"hello", "String"}}, + virt_text_win_col = 20, + }) + + screen:expect{grid=[[ + {1: 4 } | + {1: 3 }hello | + {1: 2 }hello {2:hello} | + {1: 1 }hello | + {1:5 }helloVIRTUA^L | + {3:~ }| + {3:~ }| + {3:~ }| + {3:~ }| + | + ]]} + + -- Trigger a screen update + feed('k') + + screen:expect{grid=[[ + {1: 3 } | + {1: 2 }hello | + {1: 1 }hello {2:hello} | + {1:4 }hell^o | + {1: 1 }helloVIRTUAL | + {3:~ }| + {3:~ }| + {3:~ }| + {3:~ }| + | + ]]} + end) + end) diff --git a/test/functional/ui/sign_spec.lua b/test/functional/ui/sign_spec.lua index f718786557..dbc92ca222 100644 --- a/test/functional/ui/sign_spec.lua +++ b/test/functional/ui/sign_spec.lua @@ -482,7 +482,7 @@ describe('Signs', function() ]]) end) - it('ignores signs with no icon and text when calculting the signcolumn width', function() + it('ignores signs with no icon and text when calculating the signcolumn width', function() feed('ia<cr>b<cr>c<cr><esc>') command('set number') command('set signcolumn=auto:2') |