diff options
87 files changed, 8100 insertions, 7198 deletions
@@ -40,6 +40,17 @@ Install from package Packages are in [Homebrew], [Debian], [Ubuntu], [Fedora], [Arch Linux], and [more](https://github.com/neovim/neovim/wiki/Installing-Neovim). +Project layout +-------------- + +- `.ci/`: Build server scripts +- `cmake/`: Build scripts +- `runtime/`: Application files +- [`src/`](src/nvim/README.md): Application source code +- `third-party/`: CMake sub-project to build third-party dependencies (if the + `USE_BUNDLED_DEPS` flag is undefined or `USE_BUNDLED` CMake option is false). +- [`test/`](test/README.md): Test files + What's been done so far ----------------------- diff --git a/contrib/YouCompleteMe/README.md b/contrib/YouCompleteMe/README.md index 0c07440a44..345a9d8d12 100644 --- a/contrib/YouCompleteMe/README.md +++ b/contrib/YouCompleteMe/README.md @@ -2,7 +2,9 @@ ## What is this? -This provides the code necessary to configure vim's YCM plugin to provide C semantic support (completion, go-to-definition, etc) for developers working on the Neovim project. +This provides the code necessary to configure vim's YCM plugin to provide C +semantic support (completion, go-to-definition, etc) for developers working on +the Neovim project. ## Installation @@ -13,10 +15,17 @@ Install [YouCompleteMe](https://github.com/Valloric/YouCompleteMe). ### Step 2 ```bash -cp contrib/YouCompleteMe/ycm_extra_conf.py src/.ycm_extra_conf.py +cp contrib/YouCompleteMe/ycm_extra_conf.py .ycm_extra_conf.py echo .ycm_extra_conf.py >> .git/info/exclude make +``` + +Tip: to improve source code navigation, add something like this to your nvim +configuration: -(Add the following somewhere in your vimrc) -autocmd FileType c nnoremap <buffer> <silent> <C-]> :YcmCompleter GoTo<cr> +```vim +au FileType c,cpp nnoremap <buffer> <c-]> :YcmCompleter GoTo<CR> ``` + +And use `ctrl+]` when the cursor is positioned in a symbol to quickly jump to a +definition or declaration. diff --git a/contrib/YouCompleteMe/ycm_extra_conf.py b/contrib/YouCompleteMe/ycm_extra_conf.py index 7c54677c8f..e436609ce2 100644 --- a/contrib/YouCompleteMe/ycm_extra_conf.py +++ b/contrib/YouCompleteMe/ycm_extra_conf.py @@ -9,47 +9,57 @@ def DirectoryOfThisScript(): def GetDatabase(): compilation_database_folder = os.path.join(DirectoryOfThisScript(), - '..', 'build') + 'build') if os.path.exists(compilation_database_folder): return ycm_core.CompilationDatabase(compilation_database_folder) return None -def IsHeaderFile(filename): - extension = os.path.splitext(filename)[1] - return extension == '.h' - - def GetCompilationInfoForFile(filename): database = GetDatabase() if not database: return None - if IsHeaderFile(filename): - basename = os.path.splitext(filename)[0] - c_file = basename + '.c' - # for pure headers (no c file), default to main.c - if not os.path.exists(c_file): - c_file = os.path.join(DirectoryOfThisScript(), 'nvim', 'main.c') - if os.path.exists(c_file): - compilation_info = database.GetCompilationInfoForFile(c_file) - if compilation_info.compiler_flags_: - return compilation_info - return None return database.GetCompilationInfoForFile(filename) +# It seems YCM does not resolve directories correctly. This function will +# adjust paths in the compiler flags to be absolute +def FixDirectories(args, compiler_working_dir): + def adjust_path(path): + return os.path.abspath(os.path.join(compiler_working_dir, path)) + + adjust_next_arg = False + new_args = [] + for arg in args: + if adjust_next_arg: + arg = adjust_path(arg) + adjust_next_arg = False + else: + for dir_flag in ['-I', '-isystem', '-o', '-c']: + if arg.startswith(dir_flag): + if arg != dir_flag: + # flag and path are concatenated in same arg + path = arg[len(dir_flag):] + new_path = adjust_path(path) + arg = '{0}{1}'.format(dir_flag, new_path) + else: + # path is specified in next argument + adjust_next_arg = True + new_args.append(arg) + return new_args + + def FlagsForFile(filename): compilation_info = GetCompilationInfoForFile(filename) if not compilation_info: return None # Add flags not needed for clang-the-binary, # but needed for libclang-the-library (YCM uses this last one). - flags = (list(compilation_info.compiler_flags_) - if compilation_info.compiler_flags_ - else []) + flags = FixDirectories((list(compilation_info.compiler_flags_) + if compilation_info.compiler_flags_ + else []), compilation_info.compiler_working_dir_) extra_flags = ['-Wno-newline-eof'] - final_flags = flags + extra_flags return { - 'flags': final_flags, + 'flags': flags + extra_flags, 'do_cache': True } diff --git a/runtime/autoload/remote/host.vim b/runtime/autoload/remote/host.vim index 51f7e5886f..e695fb7df7 100644 --- a/runtime/autoload/remote/host.vim +++ b/runtime/autoload/remote/host.vim @@ -2,7 +2,6 @@ let s:hosts = {} let s:plugin_patterns = {} let s:plugins_for_host = {} - " Register a host by associating it with a factory(funcref) function! remote#host#Register(name, pattern, factory) abort let s:hosts[a:name] = {'factory': a:factory, 'channel': 0, 'initialized': 0} @@ -13,7 +12,6 @@ function! remote#host#Register(name, pattern, factory) abort endif endfunction - " Register a clone to an existing host. The new host will use the same factory " as `source`, but it will run as a different process. This can be used by " plugins that should run isolated from other plugins created for the same host @@ -31,12 +29,8 @@ function! remote#host#RegisterClone(name, orig_name) abort \ } endfunction - " Get a host channel, bootstrapping it if necessary function! remote#host#Require(name) abort - if empty(s:plugins_for_host) - call remote#host#LoadRemotePlugins() - endif if !has_key(s:hosts, a:name) throw 'No host named "'.a:name.'" is registered' endif @@ -52,7 +46,6 @@ function! remote#host#Require(name) abort return host.channel endfunction - function! remote#host#IsRunning(name) abort if !has_key(s:hosts, a:name) throw 'No host named "'.a:name.'" is registered' @@ -60,7 +53,6 @@ function! remote#host#IsRunning(name) abort return s:hosts[a:name].channel != 0 endfunction - " Example of registering a Python plugin with two commands (one async), one " autocmd (async) and one function (sync): " @@ -117,73 +109,6 @@ function! remote#host#RegisterPlugin(host, path, specs) abort call add(plugins, {'path': a:path, 'specs': a:specs}) endfunction - -" Get the path to the rplugin manifest file. -function! s:GetManifestPath() abort - let manifest_base = '' - - if exists('$NVIM_RPLUGIN_MANIFEST') - return fnamemodify($NVIM_RPLUGIN_MANIFEST, ':p') - endif - - let dest = has('win32') ? '$LOCALAPPDATA' : '$XDG_DATA_HOME' - if !exists(dest) - let dest = has('win32') ? '~/AppData/Local' : '~/.local/share' - endif - - let dest = fnamemodify(expand(dest), ':p') - if !empty(dest) && !filereadable(dest) - let dest .= ('/' ==# dest[-1:] ? '' : '/') . 'nvim' - call mkdir(dest, 'p', 0700) - let manifest_base = dest - endif - - return manifest_base.'/rplugin.vim' -endfunction - - -" Old manifest file based on known script locations. -function! s:GetOldManifestPath() abort - let prefix = exists('$MYVIMRC') - \ ? $MYVIMRC - \ : matchstr(get(split(execute('scriptnames'), '\n'), 0, ''), '\f\+$') - return fnamemodify(expand(prefix, 1), ':h') - \.'/.'.fnamemodify(prefix, ':t').'-rplugin~' -endfunction - - -function! s:GetManifest() abort - let manifest = s:GetManifestPath() - - if !filereadable(manifest) - " Check if an old manifest file exists and move it to the new location. - let old_manifest = s:GetOldManifestPath() - if filereadable(old_manifest) - call rename(old_manifest, manifest) - endif - endif - - return manifest -endfunction - - -function! remote#host#LoadRemotePlugins() abort - let manifest = s:GetManifest() - if filereadable(manifest) - execute 'source' fnameescape(manifest) - endif -endfunction - - -function! remote#host#LoadRemotePluginsEvent(event, pattern) abort - autocmd! nvim-rplugin - call remote#host#LoadRemotePlugins() - if exists('#'.a:event.'#'.a:pattern) " Avoid 'No matching autocommands'. - execute 'silent doautocmd <nomodeline>' a:event a:pattern - endif -endfunction - - function! s:RegistrationCommands(host) abort " Register a temporary host clone for discovering specs let host_id = a:host.'-registration-clone' @@ -228,7 +153,6 @@ function! s:RegistrationCommands(host) abort return lines endfunction - function! remote#host#UpdateRemotePlugins() abort let commands = [] let hosts = keys(s:hosts) @@ -245,12 +169,11 @@ function! remote#host#UpdateRemotePlugins() abort endtry endif endfor - call writefile(commands, s:GetManifest()) + call writefile(commands, g:loaded_remote_plugins) echomsg printf('remote/host: generated rplugin manifest: %s', - \ s:GetManifest()) + \ g:loaded_remote_plugins) endfunction - function! remote#host#PluginsForHost(host) abort if !has_key(s:plugins_for_host, a:host) let s:plugins_for_host[a:host] = [] @@ -258,7 +181,6 @@ function! remote#host#PluginsForHost(host) abort return s:plugins_for_host[a:host] endfunction - function! remote#host#LoadErrorForHost(host, log) abort return 'Failed to load '. a:host . ' host. '. \ 'You can try to see what happened by starting nvim with '. @@ -266,7 +188,6 @@ function! remote#host#LoadErrorForHost(host, log) abort \ ' Also, the host stderr is available in messages.' endfunction - " Registration of standard hosts " Python/Python3 diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index 7ce47179b8..d94b3b7a2e 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -832,7 +832,7 @@ and the comparison is done on Numbers. This means that: > echo 0 == 'x' 1 because 'x' converted to a Number is zero. However: > - echo 0 == 'x' + echo [0] == ['x'] 0 Inside a List or Dictionary this conversion is not used. @@ -1503,7 +1503,7 @@ v:dying Normally zero. When a deadly signal is caught it's set to VimLeave autocommands will not be executed. *v:exiting* *exiting-variable* -v:exiting The exit value Nvim will use. Before exiting, it is |v:null|. +v:exiting Exit code, or |v:null| if not exiting. |VimLeave| *v:errmsg* *errmsg-variable* v:errmsg Last given error message. It's allowed to set this variable. @@ -1756,7 +1756,7 @@ v:profiling Normally zero. Set to one after using ":profile start". *v:progname* *progname-variable* v:progname Contains the name (with path removed) with which Nvim was invoked. Allows you to do special initialisations for any - other name you might symlink to Nvim. + name you might symlink to Nvim. Read-only. *v:progpath* *progpath-variable* @@ -1916,7 +1916,7 @@ v:vim_did_enter Zero until most of startup is done. It is set to one just *v:warningmsg* *warningmsg-variable* v:warningmsg Last given warning message. It's allowed to set this variable. - *v:windowid* *windowid-variable* {Nvim} + *v:windowid* *windowid-variable* v:windowid Application-specific window ID ("window handle" in MS-Windows) which may be set by any attached UI. Defaults to zero. Note: for windows inside Vim use |winnr()| or |win_getid()|. @@ -2056,13 +2056,13 @@ getcmdwintype() String return current command-line window type getcompletion({pat}, {type} [, {filtered}]) List list of cmdline completion matches getcurpos() List position of the cursor -getcwd([{winnr} [, {tabnr}]]) String the current working directory +getcwd([{winnr} [, {tabnr}]]) String get the current working directory getfontname([{name}]) String name of font being used getfperm({fname}) String file permissions of file {fname} getfsize({fname}) Number size in bytes of file {fname} getftime({fname}) Number last modification time of file getftype({fname}) String description of type of file {fname} -getline({lnum}) String line {lnum} of current buffer +getline({lnum}) String line {lnum} of current buffer getline({lnum}, {end}) List lines {lnum} to {end} of current buffer getloclist({nr}[, {what}]) List list of location list items getmatches() List list of current matches @@ -2071,20 +2071,20 @@ getpos({expr}) List position of cursor, mark, etc. getqflist([{what}]) List list of quickfix items getreg([{regname} [, 1 [, {list}]]]) String or List contents of register -getregtype([{regname}]) String type of register -gettabinfo( [{expr}]) List list of tab pages +getregtype([{regname}]) String type of register +gettabinfo([{expr}]) List list of tab pages gettabvar({nr}, {varname} [, {def}]) any variable {varname} in tab {nr} or {def} gettabwinvar({tabnr}, {winnr}, {name} [, {def}]) any {name} in {winnr} in tab page {tabnr} -getwininfo( [{winid}]) List list of windows +getwininfo([{winid}]) List list of windows getwinposx() Number X coord in pixels of GUI Vim window getwinposy() Number Y coord in pixels of GUI Vim window getwinvar({nr}, {varname} [, {def}]) any variable {varname} in window {nr} glob({expr} [, {nosuf} [, {list} [, {alllinks}]]]) any expand file wildcards in {expr} -glob2regpat({expr}) String convert a glob pat into a search pat +glob2regpat({expr}) String convert a glob pat into a search pat globpath({path}, {expr} [, {nosuf} [, {list} [, {alllinks}]]]) String do glob({expr}) for all dirs in {path} has({feature}) Number |TRUE| if feature {feature} supported @@ -2160,7 +2160,7 @@ matchlist({expr}, {pat}[, {start}[, {count}]]) List match and submatches of {pat} in {expr} matchstr({expr}, {pat}[, {start}[, {count}]]) String {count}'th match of {pat} in {expr} -matchstrpos( {expr}, {pat}[, {start}[, {count}]]) +matchstrpos({expr}, {pat}[, {start}[, {count}]]) List {count}'th match of {pat} in {expr} max({list}) Number maximum value of items in {list} min({list}) Number minimum value of items in {list} @@ -2179,7 +2179,7 @@ prevnonblank({lnum}) Number line nr of non-blank line <= {lnum} printf({fmt}, {expr1}...) String format text pumvisible() Number whether popup menu is visible pyeval({expr}) any evaluate |Python| expression -py3eval({expr}) any evaluate |python3| expression +py3eval({expr}) any evaluate |python3| expression range({expr} [, {max} [, {stride}]]) List items from {expr} to {max} readfile({fname} [, {binary} [, {max}]]) @@ -2192,15 +2192,15 @@ remote_expr({server}, {string} [, {idvar}]) remote_foreground({server}) Number bring Vim server to the foreground remote_peek({serverid} [, {retvar}]) Number check for reply string -remote_read({serverid}) String read reply string +remote_read({serverid}) String read reply string remote_send({server}, {string} [, {idvar}]) String send key sequence remove({list}, {idx} [, {end}]) any remove items {idx}-{end} from {list} remove({dict}, {key}) any remove entry {key} from {dict} rename({from}, {to}) Number rename (move) file from {from} to {to} -repeat({expr}, {count}) String repeat {expr} {count} times +repeat({expr}, {count}) String repeat {expr} {count} times resolve({filename}) String get filename a shortcut points to -reverse({list}) List reverse {list} in-place +reverse({list}) List reverse {list} in-place round({expr}) Float round off {expr} rpcnotify({channel}, {event}[, {args}...]) Sends an |RPC| notification to {channel} @@ -2228,7 +2228,7 @@ setbufvar({expr}, {varname}, {val}) set {varname} in buffer {expr} to {val} setcharsearch({dict}) Dict set character search from {dict} setcmdpos({pos}) Number set cursor position in command-line setfperm({fname}, {mode} Number set {fname} file permissions to {mode} -setline({lnum}, {line}) Number set line {lnum} to {line} +setline({lnum}, {line}) Number set line {lnum} to {line} setloclist({nr}, {list}[, {action}[, {what}]]) Number modify location list using {list} setmatches({list}) Number restore a list of matches @@ -2300,8 +2300,8 @@ test_garbagecollect_now() none free memory right now for testing timer_start({time}, {callback} [, {options}]) Number create a timer timer_stop({timer}) none stop a timer -tolower({expr}) String the String {expr} switched to lowercase -toupper({expr}) String the String {expr} switched to uppercase +tolower({expr}) String the String {expr} switched to lowercase +toupper({expr}) String the String {expr} switched to uppercase tr({src}, {fromstr}, {tostr}) String translate chars of {src} in {fromstr} to chars in {tostr} trunc({expr}) Float truncate Float {expr} @@ -2311,19 +2311,19 @@ undotree() List undo file tree uniq({list} [, {func} [, {dict}]]) List remove adjacent duplicates from a list values({dict}) List values in {dict} -virtcol({expr}) Number screen column of cursor or mark +virtcol({expr}) Number screen column of cursor or mark visualmode([expr]) String last visual mode used wildmenumode() Number whether 'wildmenu' mode is active -win_findbuf( {bufnr}) List find windows containing {bufnr} -win_getid( [{win} [, {tab}]]) Number get window ID for {win} in {tab} -win_gotoid( {expr}) Number go to window with ID {expr} -win_id2tabwin( {expr}) List get tab window nr from window ID -win_id2win( {expr}) Number get window nr from window ID +win_findbuf({bufnr}) List find windows containing {bufnr} +win_getid([{win} [, {tab}]]) Number get window ID for {win} in {tab} +win_gotoid({expr}) Number go to window with ID {expr} +win_id2tabwin({expr}) List get tab and window nr from window ID +win_id2win({expr}) Number get window nr from window ID winbufnr({nr}) Number buffer number of window {nr} wincol() Number window column of the cursor -winheight({nr}) Number height of window {nr} +winheight({nr}) Number height of window {nr} winline() Number window line of the cursor -winnr([{expr}]) Number number of current window +winnr([{expr}]) Number number of current window winrestcmd() String returns command to restore window sizes winrestview({dict}) none restore view of current window winsaveview() Dict save view of current window @@ -2331,7 +2331,7 @@ winwidth({nr}) Number width of window {nr} wordcount() Dict get byte/char/word statistics writefile({list}, {fname} [, {flags}]) Number write list of lines to file {fname} -xor({expr}, {expr}) Number bitwise XOR +xor({expr}, {expr}) Number bitwise XOR abs({expr}) *abs()* diff --git a/runtime/doc/vim_diff.txt b/runtime/doc/vim_diff.txt index bea69eb924..14bb934607 100644 --- a/runtime/doc/vim_diff.txt +++ b/runtime/doc/vim_diff.txt @@ -115,6 +115,7 @@ Options: Variables: |v:event| + |v:exiting| |v:progpath| is always absolute ("full") |v:windowid| is always available (for use by external UIs) diff --git a/runtime/plugin/gui_shim.vim b/runtime/plugin/gui_shim.vim index 28d82eb1c7..575b826b5e 100644 --- a/runtime/plugin/gui_shim.vim +++ b/runtime/plugin/gui_shim.vim @@ -4,6 +4,14 @@ if !has('win32') || !has('nvim') || exists('g:GuiLoaded') endif let g:GuiLoaded = 1 +" Close the GUI +function! GuiClose() abort + call rpcnotify(0, 'Gui', 'Close') +endfunction + +" Notify the GUI when exiting Neovim +autocmd VimLeave * call GuiClose() + " A replacement for foreground() function! GuiForeground() abort call rpcnotify(0, 'Gui', 'Foreground') diff --git a/runtime/plugin/rplugin.vim b/runtime/plugin/rplugin.vim index b4b03032b3..7d83668a30 100644 --- a/runtime/plugin/rplugin.vim +++ b/runtime/plugin/rplugin.vim @@ -1,16 +1,59 @@ if exists('g:loaded_remote_plugins') finish endif -let g:loaded_remote_plugins = 1 +let g:loaded_remote_plugins = '/path/to/manifest' + +" Get the path to the rplugin manifest file. +function! s:GetManifestPath() abort + let manifest_base = '' + + if exists('$NVIM_RPLUGIN_MANIFEST') + return fnamemodify($NVIM_RPLUGIN_MANIFEST, ':p') + endif + + let dest = has('win32') ? '$LOCALAPPDATA' : '$XDG_DATA_HOME' + if !exists(dest) + let dest = has('win32') ? '~/AppData/Local' : '~/.local/share' + endif + + let dest = fnamemodify(expand(dest), ':p') + if !empty(dest) && !filereadable(dest) + let dest .= ('/' ==# dest[-1:] ? '' : '/') . 'nvim' + call mkdir(dest, 'p', 0700) + let manifest_base = dest + endif + + return manifest_base.'/rplugin.vim' +endfunction + +" Old manifest file based on known script locations. +function! s:GetOldManifestPath() abort + let prefix = exists('$MYVIMRC') + \ ? $MYVIMRC + \ : matchstr(get(split(execute('scriptnames'), '\n'), 0, ''), '\f\+$') + return fnamemodify(expand(prefix, 1), ':h') + \.'/.'.fnamemodify(prefix, ':t').'-rplugin~' +endfunction + +function! s:GetManifest() abort + let manifest = s:GetManifestPath() + if !filereadable(manifest) + " Check if an old manifest file exists and move it to the new location. + let old_manifest = s:GetOldManifestPath() + if filereadable(old_manifest) + call rename(old_manifest, manifest) + endif + endif + return manifest +endfunction + +function! s:LoadRemotePlugins() abort + let g:loaded_remote_plugins = s:GetManifest() + if filereadable(g:loaded_remote_plugins) + execute 'source' fnameescape(g:loaded_remote_plugins) + endif +endfunction command! UpdateRemotePlugins call remote#host#UpdateRemotePlugins() -augroup nvim-rplugin - autocmd! - autocmd FuncUndefined * - \ call remote#host#LoadRemotePluginsEvent( - \ 'FuncUndefined', expand('<amatch>')) - autocmd CmdUndefined * - \ call remote#host#LoadRemotePluginsEvent( - \ 'CmdUndefined', expand('<amatch>')) -augroup END +call s:LoadRemotePlugins() diff --git a/src/nvim/CMakeLists.txt b/src/nvim/CMakeLists.txt index c21ec262b6..22cf1f3a3d 100644 --- a/src/nvim/CMakeLists.txt +++ b/src/nvim/CMakeLists.txt @@ -104,6 +104,7 @@ set(CONV_SOURCES screen.c search.c spell.c + spellfile.c syntax.c tag.c window.c) diff --git a/src/nvim/buffer_defs.h b/src/nvim/buffer_defs.h index a1b5633c32..3e9767adde 100644 --- a/src/nvim/buffer_defs.h +++ b/src/nvim/buffer_defs.h @@ -825,8 +825,7 @@ struct tabpage_S { frame_T *(tp_snapshot[SNAP_COUNT]); ///< window layout snapshots dictitem_T tp_winvar; ///< variable for "t:" Dictionary dict_T *tp_vars; ///< internal variables, local to tab page - char_u *localdir; ///< Absolute path of local directory or - ///< NULL + char_u *tp_localdir; ///< Absolute path of local CWD or NULL }; /* diff --git a/src/nvim/eval.c b/src/nvim/eval.c index 6dc7e5606e..7ca3d89665 100644 --- a/src/nvim/eval.c +++ b/src/nvim/eval.c @@ -10901,7 +10901,7 @@ static void f_getcwd(typval_T *argvars, typval_T *rettv, FunPtr fptr) } case kCdScopeTab: // FALLTHROUGH assert(tp); - from = tp->localdir; + from = tp->tp_localdir; if (from) { break; } @@ -12015,7 +12015,7 @@ static void f_haslocaldir(typval_T *argvars, typval_T *rettv, FunPtr fptr) break; case kCdScopeTab: assert(tp); - rettv->vval.v_number = tp->localdir ? 1 : 0; + rettv->vval.v_number = tp->tp_localdir ? 1 : 0; break; case kCdScopeGlobal: // The global scope never has a local directory @@ -16932,7 +16932,7 @@ static void f_strgetchar(typval_T *argvars, typval_T *rettv, FunPtr fptr) break; } charidx--; - byteidx += mb_cptr2len(str + byteidx); + byteidx += MB_CPTR2LEN(str + byteidx); } } } @@ -17054,7 +17054,7 @@ static void f_strcharpart(typval_T *argvars, typval_T *rettv, FunPtr fptr) { if (!error) { if (nchar > 0) { while (nchar > 0 && nbyte < slen) { - nbyte += mb_cptr2len(p + nbyte); + nbyte += MB_CPTR2LEN(p + nbyte); nchar--; } } else { @@ -17069,7 +17069,7 @@ static void f_strcharpart(typval_T *argvars, typval_T *rettv, FunPtr fptr) { if (off < 0) { len += 1; } else { - len += mb_cptr2len(p + off); + len += MB_CPTR2LEN(p + off); } charlen--; } diff --git a/src/nvim/event/libuv_process.h b/src/nvim/event/libuv_process.h index aaaa896e10..1132ce79ca 100644 --- a/src/nvim/event/libuv_process.h +++ b/src/nvim/event/libuv_process.h @@ -14,8 +14,9 @@ typedef struct libuv_process { static inline LibuvProcess libuv_process_init(Loop *loop, void *data) { - LibuvProcess rv; - rv.process = process_init(loop, kProcessTypeUv, data); + LibuvProcess rv = { + .process = process_init(loop, kProcessTypeUv, data) + }; return rv; } diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index 159e027793..1b83677807 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -4363,19 +4363,22 @@ void ex_help(exarg_T *eap) if (!curwin->w_buffer->b_help || cmdmod.tab != 0 ) { - if (cmdmod.tab != 0) + if (cmdmod.tab != 0) { wp = NULL; - else - for (wp = firstwin; wp != NULL; wp = wp->w_next) - if (wp->w_buffer != NULL && wp->w_buffer->b_help) + } else { + wp = NULL; + FOR_ALL_WINDOWS_IN_TAB(wp2, curtab) { + if (wp2->w_buffer != NULL && wp2->w_buffer->b_help) { + wp = wp2; break; - if (wp != NULL && wp->w_buffer->b_nwindows > 0) + } + } + } + if (wp != NULL && wp->w_buffer->b_nwindows > 0) { win_enter(wp, true); - else { - /* - * There is no help window yet. - * Try to open the file specified by the "helpfile" option. - */ + } else { + // There is no help window yet. + // Try to open the file specified by the "helpfile" option. if ((helpfd = mch_fopen((char *)p_hf, READBIN)) == NULL) { smsg(_("Sorry, help file \"%s\" not found"), p_hf); goto erret; diff --git a/src/nvim/ex_docmd.c b/src/nvim/ex_docmd.c index 87b6959101..c7cb875b88 100644 --- a/src/nvim/ex_docmd.c +++ b/src/nvim/ex_docmd.c @@ -50,6 +50,7 @@ #include "nvim/screen.h" #include "nvim/search.h" #include "nvim/spell.h" +#include "nvim/spellfile.h" #include "nvim/strings.h" #include "nvim/syntax.h" #include "nvim/tag.h" @@ -5850,18 +5851,20 @@ static void ex_quit_all(exarg_T *eap) */ static void ex_close(exarg_T *eap) { - win_T *win; + win_T *win = NULL; int winnr = 0; - if (cmdwin_type != 0) + if (cmdwin_type != 0) { cmdwin_result = Ctrl_C; - else if (!text_locked() && !curbuf_locked()) { - if (eap->addr_count == 0) + } else if (!text_locked() && !curbuf_locked()) { + if (eap->addr_count == 0) { ex_win_close(eap->forceit, curwin, NULL); - else { - for (win = firstwin; win != NULL; win = win->w_next) { + } else { + FOR_ALL_WINDOWS_IN_TAB(wp, curtab) { winnr++; - if (winnr == eap->line2) + if (winnr == eap->line2) { + win = wp; break; + } } if (win == NULL) win = lastwin; @@ -6073,12 +6076,14 @@ static void ex_hide(exarg_T *eap) win_close(curwin, FALSE); /* don't free buffer */ else { int winnr = 0; - win_T *win; + win_T *win = NULL; - for (win = firstwin; win != NULL; win = win->w_next) { + FOR_ALL_WINDOWS_IN_TAB(wp, curtab) { winnr++; - if (winnr == eap->line2) + if (winnr == eap->line2) { + win = wp; break; + } } if (win == NULL) win = lastwin; @@ -6845,7 +6850,8 @@ static void ex_syncbind(exarg_T *eap) /* * Set all scrollbind windows to the same topline. */ - for (curwin = firstwin; curwin; curwin = curwin->w_next) { + FOR_ALL_WINDOWS_IN_TAB(wp, curtab) { + curwin = wp; if (curwin->w_p_scb) { curbuf = curwin->w_buffer; y = topline - curwin->w_topline; @@ -6943,24 +6949,27 @@ void free_cd_dir(void) /// @param scope Scope of the function call (global, tab or window). void post_chdir(CdScope scope) { - // The local directory of the current window is always overwritten. + // Always overwrite the window-local CWD. xfree(curwin->w_localdir); curwin->w_localdir = NULL; - // Overwrite the local directory of the current tab page for `cd` and `tcd` + // Overwrite the tab-local CWD for :cd, :tcd. if (scope >= kCdScopeTab) { - xfree(curtab->localdir); - curtab->localdir = NULL; + xfree(curtab->tp_localdir); + curtab->tp_localdir = NULL; } if (scope < kCdScopeGlobal) { - // If still in global directory, need to remember current directory as - // global directory. + // If still in global directory, set CWD as the global directory. if (globaldir == NULL && prev_dir != NULL) { globaldir = vim_strsave(prev_dir); } } + char cwd[MAXPATHL]; + if (os_dirname((char_u *)cwd, MAXPATHL) != OK) { + return; + } switch (scope) { case kCdScopeGlobal: // We are now in the global directory, no need to remember its name. @@ -6968,23 +6977,17 @@ void post_chdir(CdScope scope) globaldir = NULL; break; case kCdScopeTab: - // Remember this local directory for the tab page. - if (os_dirname(NameBuff, MAXPATHL) == OK) { - curtab->localdir = vim_strsave(NameBuff); - } + curtab->tp_localdir = (char_u *)xstrdup(cwd); break; case kCdScopeWindow: - // Remember this local directory for the window. - if (os_dirname(NameBuff, MAXPATHL) == OK) { - curwin->w_localdir = vim_strsave(NameBuff); - } + curwin->w_localdir = (char_u *)xstrdup(cwd); break; case kCdScopeInvalid: - // We should never get here assert(false); } - shorten_fnames(TRUE); + shorten_fnames(true); + do_autocmd_dirchanged(cwd, scope); } /// `:cd`, `:tcd`, `:lcd`, `:chdir`, `:tchdir` and `:lchdir`. diff --git a/src/nvim/file_search.c b/src/nvim/file_search.c index 79a39c6503..b73d9944ce 100644 --- a/src/nvim/file_search.c +++ b/src/nvim/file_search.c @@ -1519,7 +1519,7 @@ theend: return file_name; } -static void do_autocmd_dirchanged(char_u *new_dir, CdScope scope) +void do_autocmd_dirchanged(char *new_dir, CdScope scope) { static bool recursive = false; @@ -1550,10 +1550,11 @@ static void do_autocmd_dirchanged(char_u *new_dir, CdScope scope) } dict_add_nr_str(dict, "scope", 0L, (char_u *)buf); - dict_add_nr_str(dict, "cwd", 0L, new_dir); + dict_add_nr_str(dict, "cwd", 0L, (char_u *)new_dir); dict_set_keys_readonly(dict); - apply_autocmds(EVENT_DIRCHANGED, (char_u *)buf, new_dir, false, NULL); + apply_autocmds(EVENT_DIRCHANGED, (char_u *)buf, (char_u *)new_dir, false, + NULL); dict_clear(dict); @@ -1565,14 +1566,25 @@ static void do_autocmd_dirchanged(char_u *new_dir, CdScope scope) /// @return OK or FAIL int vim_chdirfile(char_u *fname) { - char_u dir[MAXPATHL]; + char dir[MAXPATHL]; STRLCPY(dir, fname, MAXPATHL); - *path_tail_with_sep(dir) = NUL; - if (os_chdir((char *)dir) != 0) { + *path_tail_with_sep((char_u *)dir) = NUL; + + if (os_dirname(NameBuff, sizeof(NameBuff)) != OK) { + NameBuff[0] = NUL; + } + + if (os_chdir(dir) != 0) { return FAIL; } - do_autocmd_dirchanged(dir, kCdScopeWindow); + +#ifdef BACKSLASH_IN_FILENAME + slash_adjust(dir); +#endif + if (!strequal(dir, (char *)NameBuff)) { + do_autocmd_dirchanged(dir, kCdScopeWindow); + } return OK; } @@ -1587,10 +1599,6 @@ int vim_chdir(char_u *new_dir, CdScope scope) } int r = os_chdir((char *)dir_name); - if (r == 0) { - do_autocmd_dirchanged(dir_name, scope); - } - xfree(dir_name); return r; } diff --git a/src/nvim/fileio.c b/src/nvim/fileio.c index 4ea5121a91..040df707de 100644 --- a/src/nvim/fileio.c +++ b/src/nvim/fileio.c @@ -4733,7 +4733,6 @@ check_timestamps ( int focus /* called for GUI focus event */ ) { - buf_T *buf; int didit = 0; int n; @@ -4752,14 +4751,14 @@ check_timestamps ( if (!stuff_empty() || global_busy || !typebuf_typed() || autocmd_busy || curbuf_lock > 0 || allbuf_lock > 0 - ) - need_check_timestamps = TRUE; /* check later */ - else { - ++no_wait_return; - did_check_timestamps = TRUE; - already_warned = FALSE; - for (buf = firstbuf; buf != NULL; ) { - /* Only check buffers in a window. */ + ) { + need_check_timestamps = true; // check later + } else { + no_wait_return++; + did_check_timestamps = true; + already_warned = false; + FOR_ALL_BUFFERS(buf) { + // Only check buffers in a window. if (buf->b_nwindows > 0) { bufref_T bufref; set_bufref(&bufref, buf); @@ -4773,7 +4772,6 @@ check_timestamps ( continue; } } - buf = buf->b_next; } --no_wait_return; need_check_timestamps = FALSE; diff --git a/src/nvim/fold.c b/src/nvim/fold.c index 1423463800..7c0283971e 100644 --- a/src/nvim/fold.c +++ b/src/nvim/fold.c @@ -29,6 +29,7 @@ #include "nvim/strings.h" #include "nvim/syntax.h" #include "nvim/undo.h" +#include "nvim/ops.h" /* local declarations. {{{1 */ /* typedef fold_T {{{2 */ @@ -1593,29 +1594,32 @@ static void foldCreateMarkers(linenr_T start, linenr_T end) /* * Add "marker[markerlen]" in 'commentstring' to line "lnum". */ -static void foldAddMarker(linenr_T lnum, char_u *marker, size_t markerlen) +static void foldAddMarker(linenr_T lnum, const char_u *marker, size_t markerlen) { char_u *cms = curbuf->b_p_cms; char_u *line; char_u *newline; char_u *p = (char_u *)strstr((char *)curbuf->b_p_cms, "%s"); + bool line_is_comment = false; - /* Allocate a new line: old-line + 'cms'-start + marker + 'cms'-end */ + // Allocate a new line: old-line + 'cms'-start + marker + 'cms'-end line = ml_get(lnum); size_t line_len = STRLEN(line); if (u_save(lnum - 1, lnum + 1) == OK) { + // Check if the line ends with an unclosed comment + skip_comment(line, false, false, &line_is_comment); newline = xmalloc(line_len + markerlen + STRLEN(cms) + 1); STRCPY(newline, line); - if (p == NULL) + // Append the marker to the end of the line + if (p == NULL || line_is_comment) { STRLCPY(newline + line_len, marker, markerlen + 1); - else { + } else { STRCPY(newline + line_len, cms); memcpy(newline + line_len + (p - cms), marker, markerlen); STRCPY(newline + line_len + (p - cms) + markerlen, p + 2); } - - ml_replace(lnum, newline, FALSE); + ml_replace(lnum, newline, false); } } @@ -2535,10 +2539,10 @@ static void foldSplit(garray_T *gap, int i, linenr_T top, linenr_T bot) * 1 2 3 * 1 2 3 * top 2 3 4 5 - * 2 3 4 5 - * bot 2 3 4 5 - * 3 5 6 - * 3 5 6 + * 2 3 4 5 + * bot 2 3 4 5 + * 3 5 6 + * 3 5 6 * * 1: not changed * 2: truncate to stop above "top" diff --git a/src/nvim/getchar.c b/src/nvim/getchar.c index b7c6fd41f2..bae8ae6d91 100644 --- a/src/nvim/getchar.c +++ b/src/nvim/getchar.c @@ -1913,59 +1913,30 @@ static int vgetorpeek(int advance) if ((mp == NULL || max_mlen >= mp_match_len) && keylen != KEYLEN_PART_MAP) { - // When no matching mapping found or found a non-matching mapping - // that matches at least what the matching mapping matched: - // Check if we have a terminal code, when: - // mapping is allowed, - // keys have not been mapped, - // and not an ESC sequence, not in insert mode, - // and when not timed out. - if ((no_mapping == 0 || allow_keys != 0) - && (typebuf.tb_maplen == 0 - || (p_remap && typebuf.tb_noremap[ - typebuf.tb_off] == RM_YES)) - && !timedout) { - keylen = 0; - } else - keylen = 0; - if (keylen == 0) { /* no matching terminal code */ - /* When there was a matching mapping and no - * termcode could be replaced after another one, - * use that mapping (loop around). If there was - * no mapping use the character from the - * typeahead buffer right here. */ - if (mp == NULL) { - /* - * get a character: 2. from the typeahead buffer - */ - c = typebuf.tb_buf[typebuf.tb_off] & 255; - if (advance) { /* remove chars from tb_buf */ - cmd_silent = (typebuf.tb_silent > 0); - if (typebuf.tb_maplen > 0) - KeyTyped = FALSE; - else { - KeyTyped = TRUE; - /* write char to script file(s) */ - gotchars(typebuf.tb_buf - + typebuf.tb_off, 1); - } - KeyNoremap = typebuf.tb_noremap[ - typebuf.tb_off]; - del_typebuf(1, 0); + // No matching mapping found or found a non-matching mapping that + // matches at least what the matching mapping matched + keylen = 0; + // If there was no mapping, use the character from the typeahead + // buffer right here. Otherwise, use the mapping (loop around). + if (mp == NULL) { + // get a character: 2. from the typeahead buffer + c = typebuf.tb_buf[typebuf.tb_off] & 255; + if (advance) { // remove chars from tb_buf + cmd_silent = (typebuf.tb_silent > 0); + if (typebuf.tb_maplen > 0) { + KeyTyped = false; + } else { + KeyTyped = true; + // write char to script file(s) + gotchars(typebuf.tb_buf + typebuf.tb_off, 1); } - break; /* got character, break for loop */ + KeyNoremap = typebuf.tb_noremap[typebuf.tb_off]; + del_typebuf(1, 0); } - } - if (keylen > 0) { /* full matching terminal code */ - continue; /* try mapping again */ - } - - /* Partial match: get some more characters. When a - * matching mapping was found use that one. */ - if (mp == NULL || keylen < 0) - keylen = KEYLEN_PART_KEY; - else + break; // got character, break for loop + } else { keylen = mp_match_len; + } } /* complete match */ @@ -2460,7 +2431,7 @@ inchar ( if (typebuf_changed(tb_change_cnt)) return 0; - return fix_input_buffer(buf, len, script_char >= 0); + return fix_input_buffer(buf, len); } /* @@ -2468,12 +2439,7 @@ inchar ( * buf[] must have room to triple the number of bytes! * Returns the new length. */ -int -fix_input_buffer ( - char_u *buf, - int len, - int script /* TRUE when reading from a script */ -) +int fix_input_buffer(char_u *buf, int len) { if (!using_script()) { // Should not escape K_SPECIAL/CSI reading input from the user because vim @@ -2490,12 +2456,10 @@ fix_input_buffer ( // Replace NUL by K_SPECIAL KS_ZERO KE_FILLER // Replace K_SPECIAL by K_SPECIAL KS_SPECIAL KE_FILLER // Replace CSI by K_SPECIAL KS_EXTRA KE_CSI - // Don't replace K_SPECIAL when reading a script file. for (i = len; --i >= 0; ++p) { if (p[0] == NUL || (p[0] == K_SPECIAL - && !script - && (i < 2 || p[1] != KS_EXTRA))) { + && (i < 2 || p[1] != KS_EXTRA))) { memmove(p + 3, p + 1, (size_t)i); p[2] = (char_u)K_THIRD(p[0]); p[1] = (char_u)K_SECOND(p[0]); @@ -3753,8 +3717,10 @@ eval_map_expr ( */ char_u *vim_strsave_escape_csi(char_u *p) { - /* Need a buffer to hold up to three times as much. */ - char_u *res = xmalloc(STRLEN(p) * 3 + 1); + // Need a buffer to hold up to three times as much. Four in case of an + // illegal utf-8 byte: + // 0xc0 -> 0xc3 - 0x80 -> 0xc3 K_SPECIAL KS_SPECIAL KE_FILLER + char_u *res = xmalloc(STRLEN(p) * 4 + 1); char_u *d = res; for (char_u *s = p; *s != NUL; ) { if (s[0] == K_SPECIAL && s[1] != NUL && s[2] != NUL) { @@ -3763,17 +3729,10 @@ char_u *vim_strsave_escape_csi(char_u *p) *d++ = *s++; *d++ = *s++; } else { - int len = mb_char2len(PTR2CHAR(s)); - int len2 = mb_ptr2len(s); - /* Add character, possibly multi-byte to destination, escaping - * CSI and K_SPECIAL. */ + // Add character, possibly multi-byte to destination, escaping + // CSI and K_SPECIAL. Be careful, it can be an illegal byte! d = add_char2buf(PTR2CHAR(s), d); - while (len < len2) { - /* add following combining char */ - d = add_char2buf(PTR2CHAR(s + len), d); - len += mb_char2len(PTR2CHAR(s + len)); - } - mb_ptr_adv(s); + s += MB_CPTR2LEN(s); } } *d = NUL; diff --git a/src/nvim/globals.h b/src/nvim/globals.h index 07ea045c13..f8c7c9d330 100644 --- a/src/nvim/globals.h +++ b/src/nvim/globals.h @@ -556,7 +556,6 @@ EXTERN win_T *prevwin INIT(= NULL); /* previous window */ FOR_ALL_TABS(tp) \ FOR_ALL_WINDOWS_IN_TAB(wp, tp) -# define FOR_ALL_WINDOWS(wp) for (wp = firstwin; wp != NULL; wp = wp->w_next) # define FOR_ALL_WINDOWS_IN_TAB(wp, tp) \ for (win_T *wp = ((tp) == curtab) \ ? firstwin : (tp)->tp_firstwin; wp != NULL; wp = wp->w_next) diff --git a/src/nvim/if_cscope.c b/src/nvim/if_cscope.c index a05ac5f877..550d256de5 100644 --- a/src/nvim/if_cscope.c +++ b/src/nvim/if_cscope.c @@ -994,11 +994,12 @@ static int cs_find_common(char *opt, char *pat, int forceit, int verbose, return FALSE; } - if (*qfpos != '0') { - apply_autocmds(EVENT_QUICKFIXCMDPRE, (char_u *)"cscope", - curbuf->b_fname, TRUE, curbuf); - if (did_throw || force_abort) - return FALSE; + if (*qfpos != '0' + && apply_autocmds(EVENT_QUICKFIXCMDPRE, (char_u *)"cscope", + curbuf->b_fname, true, curbuf)) { + if (aborting()) { + return false; + } } } diff --git a/src/nvim/macros.h b/src/nvim/macros.h index df2b431e92..650bf76156 100644 --- a/src/nvim/macros.h +++ b/src/nvim/macros.h @@ -138,7 +138,7 @@ // Backup multi-byte pointer. Only use with "p" > "s" ! # define mb_ptr_back(s, p) (p -= mb_head_off((char_u *)s, (char_u *)p - 1) + 1) // get length of multi-byte char, not including composing chars -# define mb_cptr2len(p) utf_ptr2len(p) +# define MB_CPTR2LEN(p) utf_ptr2len(p) # define MB_COPY_CHAR(f, t) mb_copy_char((const char_u **)(&f), &t); diff --git a/src/nvim/memline.c b/src/nvim/memline.c index b67f550358..1a315fce8b 100644 --- a/src/nvim/memline.c +++ b/src/nvim/memline.c @@ -2318,7 +2318,7 @@ ml_append_int ( * * return FAIL for failure, OK otherwise */ -int ml_replace(linenr_T lnum, char_u *line, int copy) +int ml_replace(linenr_T lnum, char_u *line, bool copy) { if (line == NULL) /* just checking... */ return FAIL; diff --git a/src/nvim/memory.c b/src/nvim/memory.c index b593936d7b..58c01fbe7a 100644 --- a/src/nvim/memory.c +++ b/src/nvim/memory.c @@ -475,6 +475,13 @@ void *xmemdup(const void *data, size_t len) return memcpy(xmalloc(len), data, len); } +/// Returns true if strings `a` and `b` are equal. Arguments may be NULL. +bool strequal(const char *a, const char *b) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (a == NULL && b == NULL) || (a && b && strcmp(a, b) == 0); +} + /* * Avoid repeating the error message many times (they take 1 second each). * Did_outofmem_msg is reset when a character is read. diff --git a/src/nvim/misc1.c b/src/nvim/misc1.c index d49d1d8a21..0bb5a8468d 100644 --- a/src/nvim/misc1.c +++ b/src/nvim/misc1.c @@ -2321,8 +2321,8 @@ int get_keystroke(void) * terminal code to complete. */ n = os_inchar(buf + len, maxlen, len == 0 ? -1L : 100L, 0); if (n > 0) { - /* Replace zero and CSI by a special key code. */ - n = fix_input_buffer(buf + len, n, FALSE); + // Replace zero and CSI by a special key code. + n = fix_input_buffer(buf + len, n); len += n; waited = 0; } else if (len > 0) diff --git a/src/nvim/move.c b/src/nvim/move.c index bb6c032db1..4c1b8a8411 100644 --- a/src/nvim/move.c +++ b/src/nvim/move.c @@ -2137,7 +2137,8 @@ void do_check_cursorbind(void) * loop through the cursorbound windows */ VIsual_select = VIsual_active = 0; - for (curwin = firstwin; curwin; curwin = curwin->w_next) { + FOR_ALL_WINDOWS_IN_TAB(wp, curtab) { + curwin = wp; curbuf = curwin->w_buffer; /* skip original window and windows with 'noscrollbind' */ if (curwin != old_curwin && curwin->w_p_crb) { diff --git a/src/nvim/normal.c b/src/nvim/normal.c index d010b4b3a7..ee3c3f9f11 100644 --- a/src/nvim/normal.c +++ b/src/nvim/normal.c @@ -45,6 +45,7 @@ #include "nvim/screen.h" #include "nvim/search.h" #include "nvim/spell.h" +#include "nvim/spellfile.h" #include "nvim/strings.h" #include "nvim/syntax.h" #include "nvim/tag.h" @@ -3511,7 +3512,8 @@ void check_scrollbind(linenr_T topline_diff, long leftcol_diff) * loop through the scrollbound windows and scroll accordingly */ VIsual_select = VIsual_active = 0; - for (curwin = firstwin; curwin; curwin = curwin->w_next) { + FOR_ALL_WINDOWS_IN_TAB(wp, curtab) { + curwin = wp; curbuf = curwin->w_buffer; /* skip original window and windows with 'noscrollbind' */ if (curwin == old_curwin || !curwin->w_p_scb) { diff --git a/src/nvim/ops.c b/src/nvim/ops.c index 530193bd41..1e4d392754 100644 --- a/src/nvim/ops.c +++ b/src/nvim/ops.c @@ -3438,43 +3438,47 @@ dis_msg ( os_breakcheck(); } -/* - * If "process" is TRUE and the line begins with a comment leader (possibly - * after some white space), return a pointer to the text after it. Put a boolean - * value indicating whether the line ends with an unclosed comment in - * "is_comment". - * line - line to be processed, - * process - if FALSE, will only check whether the line ends with an unclosed - * comment, - * include_space - whether to also skip space following the comment leader, - * is_comment - will indicate whether the current line ends with an unclosed - * comment. - */ -static char_u *skip_comment(char_u *line, int process, int include_space, int *is_comment) +/// If \p "process" is true and the line begins with a comment leader (possibly +/// after some white space), return a pointer to the text after it. +/// Put a boolean value indicating whether the line ends with an unclosed +/// comment in "is_comment". +/// +/// @param line - line to be processed +/// @param process - if false, will only check whether the line ends +/// with an unclosed comment, +/// @param include_space - whether to skip space following the comment leader +/// @param[out] is_comment - whether the current line ends with an unclosed +/// comment. +char_u *skip_comment( + char_u *line, bool process, bool include_space, bool *is_comment +) { char_u *comment_flags = NULL; int lead_len; int leader_offset = get_last_leader_offset(line, &comment_flags); - *is_comment = FALSE; + *is_comment = false; if (leader_offset != -1) { /* Let's check whether the line ends with an unclosed comment. * If the last comment leader has COM_END in flags, there's no comment. */ while (*comment_flags) { if (*comment_flags == COM_END - || *comment_flags == ':') + || *comment_flags == ':') { break; - ++comment_flags; + } + comment_flags++; + } + if (*comment_flags != COM_END) { + *is_comment = true; } - if (*comment_flags != COM_END) - *is_comment = TRUE; } - if (process == FALSE) + if (process == false) { return line; + } - lead_len = get_leader_len(line, &comment_flags, FALSE, include_space); + lead_len = get_leader_len(line, &comment_flags, false, include_space); if (lead_len == 0) return line; @@ -3496,8 +3500,9 @@ static char_u *skip_comment(char_u *line, int process, int include_space, int *i * starting with a closing part of a three-part comment. That's good, * because we don't want to remove those as this would be annoying. */ - if (*comment_flags == ':' || *comment_flags == NUL) + if (*comment_flags == ':' || *comment_flags == NUL) { line += lead_len; + } return line; } @@ -3531,7 +3536,7 @@ int do_join(size_t count, int *comments = NULL; int remove_comments = (use_formatoptions == TRUE) && has_format_option(FO_REMOVE_COMS); - int prev_was_comment; + bool prev_was_comment; if (save_undo && u_save(curwin->w_cursor.lnum - 1, curwin->w_cursor.lnum + (linenr_T)count) == FAIL) { diff --git a/src/nvim/option.c b/src/nvim/option.c index 8990b59f57..2fae4aa848 100644 --- a/src/nvim/option.c +++ b/src/nvim/option.c @@ -59,6 +59,7 @@ #include "nvim/regexp.h" #include "nvim/screen.h" #include "nvim/spell.h" +#include "nvim/spellfile.h" #include "nvim/strings.h" #include "nvim/syntax.h" #include "nvim/ui.h" @@ -1490,7 +1491,7 @@ do_set ( new_value_alloced = true; if (newval == NULL) { newval = empty_option; - } else if (!(options[opt_idx].flags | P_NO_DEF_EXP)) { + } else if (!(options[opt_idx].flags & P_NO_DEF_EXP)) { s = option_expand(opt_idx, newval); if (s == NULL) { s = newval; @@ -3622,11 +3623,12 @@ set_bool_option ( char_u hash[UNDO_HASH_SIZE]; buf_T *save_curbuf = curbuf; - for (curbuf = firstbuf; curbuf != NULL; curbuf = curbuf->b_next) { - /* When 'undofile' is set globally: for every buffer, otherwise - * only for the current buffer: Try to read in the undofile, - * if one exists, the buffer wasn't changed and the buffer was - * loaded */ + FOR_ALL_BUFFERS(bp) { + curbuf = bp; + // When 'undofile' is set globally: for every buffer, otherwise + // only for the current buffer: Try to read in the undofile, + // if one exists, the buffer wasn't changed and the buffer was + // loaded if ((curbuf == save_curbuf || (opt_flags & OPT_GLOBAL) || opt_flags == 0) && !curbufIsChanged() && curbuf->b_ml.ml_mfp != NULL) { diff --git a/src/nvim/popupmnu.c b/src/nvim/popupmnu.c index 89180f76de..ea00afbd86 100644 --- a/src/nvim/popupmnu.c +++ b/src/nvim/popupmnu.c @@ -73,7 +73,6 @@ void pum_display(pumitem_T *array, int size, int selected, bool array_changed) int above_row; int below_row; int redo_count = 0; - win_T *pvwin; if (!pum_is_visible) { // To keep the code simple, we only allow changing the @@ -126,8 +125,10 @@ redo: kind_width = 0; extra_width = 0; - FOR_ALL_WINDOWS(pvwin) { - if (pvwin->w_p_pvw) { + win_T *pvwin = NULL; + FOR_ALL_WINDOWS_IN_TAB(wp, curtab) { + if (wp->w_p_pvw) { + pvwin = wp; break; } } diff --git a/src/nvim/quickfix.c b/src/nvim/quickfix.c index 8406dfc157..3f7975051f 100644 --- a/src/nvim/quickfix.c +++ b/src/nvim/quickfix.c @@ -565,7 +565,7 @@ static int qf_get_next_file_line(qfstate_T *state) bool discard = false; state->linelen = STRLEN(IObuff); if (state->linelen == IOSIZE - 1 - && !(IObuff[state->linelen - 1] == '\n')) { // NOLINT(whitespace/parens) + && !(IObuff[state->linelen - 1] == '\n')) { // The current line exceeds IObuff, continue reading using growbuf // until EOL or LINE_MAXLEN bytes is read. if (state->growbuf == NULL) { @@ -3012,11 +3012,11 @@ void ex_make(exarg_T *eap) case CMD_lgrepadd: au_name = (char_u *)"lgrepadd"; break; default: break; } - if (au_name != NULL) { - apply_autocmds(EVENT_QUICKFIXCMDPRE, au_name, - curbuf->b_fname, TRUE, curbuf); - if (did_throw || force_abort) + if (au_name != NULL && apply_autocmds(EVENT_QUICKFIXCMDPRE, au_name, + curbuf->b_fname, true, curbuf)) { + if (aborting()) { return; + } } if (eap->cmdidx == CMD_lmake || eap->cmdidx == CMD_lgrep @@ -3476,11 +3476,11 @@ void ex_vimgrep(exarg_T *eap) case CMD_lgrepadd: au_name = (char_u *)"lgrepadd"; break; default: break; } - if (au_name != NULL) { - apply_autocmds(EVENT_QUICKFIXCMDPRE, au_name, - curbuf->b_fname, TRUE, curbuf); - if (did_throw || force_abort) + if (au_name != NULL && apply_autocmds(EVENT_QUICKFIXCMDPRE, au_name, + curbuf->b_fname, true, curbuf)) { + if (aborting()) { return; + } } if (eap->cmdidx == CMD_lgrep @@ -4203,11 +4203,15 @@ static int qf_add_entries(qf_info_T *qi, list_T *list, char_u *title, return retval; } -static int qf_set_properties(qf_info_T *qi, dict_T *what) +static int qf_set_properties(qf_info_T *qi, dict_T *what, int action) { dictitem_T *di; int retval = FAIL; + int newlist = false; + if (action == ' ' || qi->qf_curlist == qi->qf_listcount) { + newlist = true; + } int qf_idx = qi->qf_curlist; // default is the current list if ((di = dict_find(what, (char_u *)"nr", -1)) != NULL) { // Use the specified quickfix/location list @@ -4219,6 +4223,12 @@ static int qf_set_properties(qf_info_T *qi, dict_T *what) } else { return FAIL; } + newlist = false; // use the specified list + } + + if (newlist) { + qf_new_list(qi, NULL); + qf_idx = qi->qf_curlist; } if ((di = dict_find(what, (char_u *)"title", -1)) != NULL) { @@ -4249,7 +4259,7 @@ int set_errorlist(win_T *wp, list_T *list, int action, char_u *title, } if (what != NULL) { - retval = qf_set_properties(qi, what); + retval = qf_set_properties(qi, what, action); } else { retval = qf_add_entries(qi, list, title, action); } @@ -4299,10 +4309,9 @@ void ex_cbuffer(exarg_T *eap) break; } - if (au_name != NULL) { - apply_autocmds(EVENT_QUICKFIXCMDPRE, (char_u *)au_name, - curbuf->b_fname, true, curbuf); - if (did_throw || force_abort) { + if (au_name != NULL && apply_autocmds(EVENT_QUICKFIXCMDPRE, (char_u *)au_name, + curbuf->b_fname, true, curbuf)) { + if (aborting()) { return; } } @@ -4385,10 +4394,9 @@ void ex_cexpr(exarg_T *eap) default: break; } - if (au_name != NULL) { - apply_autocmds(EVENT_QUICKFIXCMDPRE, (char_u *)au_name, - curbuf->b_fname, true, curbuf); - if (did_throw || force_abort) { + if (au_name != NULL && apply_autocmds(EVENT_QUICKFIXCMDPRE, (char_u *)au_name, + curbuf->b_fname, true, curbuf)) { + if (aborting()) { return; } } @@ -4444,11 +4452,11 @@ void ex_helpgrep(exarg_T *eap) case CMD_lhelpgrep: au_name = (char_u *)"lhelpgrep"; break; default: break; } - if (au_name != NULL) { - apply_autocmds(EVENT_QUICKFIXCMDPRE, au_name, - curbuf->b_fname, TRUE, curbuf); - if (did_throw || force_abort) + if (au_name != NULL && apply_autocmds(EVENT_QUICKFIXCMDPRE, au_name, + curbuf->b_fname, true, curbuf)) { + if (aborting()) { return; + } } /* Make 'cpoptions' empty, the 'l' flag should not be used here. */ diff --git a/src/nvim/spell.c b/src/nvim/spell.c index dea09cd633..12f982106a 100644 --- a/src/nvim/spell.c +++ b/src/nvim/spell.c @@ -1,5 +1,7 @@ // spell.c: code for spell checking // +// See spellfile.c for the Vim spell file format. +// // The spell checking mechanism uses a tree (aka trie). Each node in the tree // has a list of bytes that can appear (siblings). For each byte there is a // pointer to the node with the byte that follows in the word (child). @@ -63,226 +65,6 @@ // compute the maximum word score that can be used. #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) -// Vim spell file format: <HEADER> -// <SECTIONS> -// <LWORDTREE> -// <KWORDTREE> -// <PREFIXTREE> -// -// <HEADER>: <fileID> <versionnr> -// -// <fileID> 8 bytes "VIMspell" -// <versionnr> 1 byte VIMSPELLVERSION -// -// -// Sections make it possible to add information to the .spl file without -// making it incompatible with previous versions. There are two kinds of -// sections: -// 1. Not essential for correct spell checking. E.g. for making suggestions. -// These are skipped when not supported. -// 2. Optional information, but essential for spell checking when present. -// E.g. conditions for affixes. When this section is present but not -// supported an error message is given. -// -// <SECTIONS>: <section> ... <sectionend> -// -// <section>: <sectionID> <sectionflags> <sectionlen> (section contents) -// -// <sectionID> 1 byte number from 0 to 254 identifying the section -// -// <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct -// spell checking -// -// <sectionlen> 4 bytes length of section contents, MSB first -// -// <sectionend> 1 byte SN_END -// -// -// sectionID == SN_INFO: <infotext> -// <infotext> N bytes free format text with spell file info (version, -// website, etc) -// -// sectionID == SN_REGION: <regionname> ... -// <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. -// First <regionname> is region 1. -// -// sectionID == SN_CHARFLAGS: <charflagslen> <charflags> -// <folcharslen> <folchars> -// <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). -// <charflags> N bytes List of flags (first one is for character 128): -// 0x01 word character CF_WORD -// 0x02 upper-case character CF_UPPER -// <folcharslen> 2 bytes Number of bytes in <folchars>. -// <folchars> N bytes Folded characters, first one is for character 128. -// -// sectionID == SN_MIDWORD: <midword> -// <midword> N bytes Characters that are word characters only when used -// in the middle of a word. -// -// sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... -// <prefcondcnt> 2 bytes Number of <prefcond> items following. -// <prefcond> : <condlen> <condstr> -// <condlen> 1 byte Length of <condstr>. -// <condstr> N bytes Condition for the prefix. -// -// sectionID == SN_REP: <repcount> <rep> ... -// <repcount> 2 bytes number of <rep> items, MSB first. -// <rep> : <repfromlen> <repfrom> <reptolen> <repto> -// <repfromlen> 1 byte length of <repfrom> -// <repfrom> N bytes "from" part of replacement -// <reptolen> 1 byte length of <repto> -// <repto> N bytes "to" part of replacement -// -// sectionID == SN_REPSAL: <repcount> <rep> ... -// just like SN_REP but for soundfolded words -// -// sectionID == SN_SAL: <salflags> <salcount> <sal> ... -// <salflags> 1 byte flags for soundsalike conversion: -// SAL_F0LLOWUP -// SAL_COLLAPSE -// SAL_REM_ACCENTS -// <salcount> 2 bytes number of <sal> items following -// <sal> : <salfromlen> <salfrom> <saltolen> <salto> -// <salfromlen> 1 byte length of <salfrom> -// <salfrom> N bytes "from" part of soundsalike -// <saltolen> 1 byte length of <salto> -// <salto> N bytes "to" part of soundsalike -// -// sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> -// <sofofromlen> 2 bytes length of <sofofrom> -// <sofofrom> N bytes "from" part of soundfold -// <sofotolen> 2 bytes length of <sofoto> -// <sofoto> N bytes "to" part of soundfold -// -// sectionID == SN_SUGFILE: <timestamp> -// <timestamp> 8 bytes time in seconds that must match with .sug file -// -// sectionID == SN_NOSPLITSUGS: nothing -// -// sectionID == SN_NOCOMPOUNDSUGS: nothing -// -// sectionID == SN_WORDS: <word> ... -// <word> N bytes NUL terminated common word -// -// sectionID == SN_MAP: <mapstr> -// <mapstr> N bytes String with sequences of similar characters, -// separated by slashes. -// -// sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> -// <comppatcount> <comppattern> ... <compflags> -// <compmax> 1 byte Maximum nr of words in compound word. -// <compminlen> 1 byte Minimal word length for compounding. -// <compsylmax> 1 byte Maximum nr of syllables in compound word. -// <compoptions> 2 bytes COMP_ flags. -// <comppatcount> 2 bytes number of <comppattern> following -// <compflags> N bytes Flags from COMPOUNDRULE items, separated by -// slashes. -// -// <comppattern>: <comppatlen> <comppattext> -// <comppatlen> 1 byte length of <comppattext> -// <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN -// -// sectionID == SN_NOBREAK: (empty, its presence is what matters) -// -// sectionID == SN_SYLLABLE: <syllable> -// <syllable> N bytes String from SYLLABLE item. -// -// <LWORDTREE>: <wordtree> -// -// <KWORDTREE>: <wordtree> -// -// <PREFIXTREE>: <wordtree> -// -// -// <wordtree>: <nodecount> <nodedata> ... -// -// <nodecount> 4 bytes Number of nodes following. MSB first. -// -// <nodedata>: <siblingcount> <sibling> ... -// -// <siblingcount> 1 byte Number of siblings in this node. The siblings -// follow in sorted order. -// -// <sibling>: <byte> [ <nodeidx> <xbyte> -// | <flags> [<flags2>] [<region>] [<affixID>] -// | [<pflags>] <affixID> <prefcondnr> ] -// -// <byte> 1 byte Byte value of the sibling. Special cases: -// BY_NOFLAGS: End of word without flags and for all -// regions. -// For PREFIXTREE <affixID> and -// <prefcondnr> follow. -// BY_FLAGS: End of word, <flags> follow. -// For PREFIXTREE <pflags>, <affixID> -// and <prefcondnr> follow. -// BY_FLAGS2: End of word, <flags> and <flags2> -// follow. Not used in PREFIXTREE. -// BY_INDEX: Child of sibling is shared, <nodeidx> -// and <xbyte> follow. -// -// <nodeidx> 3 bytes Index of child for this sibling, MSB first. -// -// <xbyte> 1 byte Byte value of the sibling. -// -// <flags> 1 byte Bitmask of: -// WF_ALLCAP word must have only capitals -// WF_ONECAP first char of word must be capital -// WF_KEEPCAP keep-case word -// WF_FIXCAP keep-case word, all caps not allowed -// WF_RARE rare word -// WF_BANNED bad word -// WF_REGION <region> follows -// WF_AFX <affixID> follows -// -// <flags2> 1 byte Bitmask of: -// WF_HAS_AFF >> 8 word includes affix -// WF_NEEDCOMP >> 8 word only valid in compound -// WF_NOSUGGEST >> 8 word not used for suggestions -// WF_COMPROOT >> 8 word already a compound -// WF_NOCOMPBEF >> 8 no compounding before this word -// WF_NOCOMPAFT >> 8 no compounding after this word -// -// <pflags> 1 byte Bitmask of: -// WFP_RARE rare prefix -// WFP_NC non-combining prefix -// WFP_UP letter after prefix made upper case -// -// <region> 1 byte Bitmask for regions in which word is valid. When -// omitted it's valid in all regions. -// Lowest bit is for region 1. -// -// <affixID> 1 byte ID of affix that can be used with this word. In -// PREFIXTREE used for the required prefix ID. -// -// <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list -// from HEADER. -// -// All text characters are in 'encoding', but stored as single bytes. - -// Vim .sug file format: <SUGHEADER> -// <SUGWORDTREE> -// <SUGTABLE> -// -// <SUGHEADER>: <fileID> <versionnr> <timestamp> -// -// <fileID> 6 bytes "VIMsug" -// <versionnr> 1 byte VIMSUGVERSION -// <timestamp> 8 bytes timestamp that must match with .spl file -// -// -// <SUGWORDTREE>: <wordtree> (see above, no flags or region used) -// -// -// <SUGTABLE>: <sugwcount> <sugline> ... -// -// <sugwcount> 4 bytes number of <sugline> following -// -// <sugline>: <sugnr> ... NUL -// -// <sugnr>: X bytes word number that results in this soundfolded word, -// stored as an offset to the previous number in as -// few bytes as possible, see offset2bytes()) - #include <assert.h> #include <inttypes.h> #include <limits.h> @@ -323,254 +105,18 @@ #include "nvim/regexp.h" #include "nvim/screen.h" #include "nvim/search.h" +#include "nvim/spellfile.h" #include "nvim/strings.h" #include "nvim/syntax.h" -#include "nvim/ui.h" #include "nvim/undo.h" #include "nvim/os/os.h" #include "nvim/os/input.h" -#ifndef UNIX // it's in os/unix_defs.h for Unix -# include <time.h> // for time_t -#endif - -#define MAXWLEN 254 // Assume max. word len is this many bytes. - // Some places assume a word length fits in a - // byte, thus it can't be above 255. - -// Type used for indexes in the word tree need to be at least 4 bytes. If int -// is 8 bytes we could use something smaller, but what? -typedef int idx_T; - -# define SPL_FNAME_TMPL "%s.%s.spl" -# define SPL_FNAME_ADD ".add." -# define SPL_FNAME_ASCII ".ascii." - -// Flags used for a word. Only the lowest byte can be used, the region byte -// comes above it. -#define WF_REGION 0x01 // region byte follows -#define WF_ONECAP 0x02 // word with one capital (or all capitals) -#define WF_ALLCAP 0x04 // word must be all capitals -#define WF_RARE 0x08 // rare word -#define WF_BANNED 0x10 // bad word -#define WF_AFX 0x20 // affix ID follows -#define WF_FIXCAP 0x40 // keep-case word, allcap not allowed -#define WF_KEEPCAP 0x80 // keep-case word - -// for <flags2>, shifted up one byte to be used in wn_flags -#define WF_HAS_AFF 0x0100 // word includes affix -#define WF_NEEDCOMP 0x0200 // word only valid in compound -#define WF_NOSUGGEST 0x0400 // word not to be suggested -#define WF_COMPROOT 0x0800 // already compounded word, COMPOUNDROOT -#define WF_NOCOMPBEF 0x1000 // no compounding before this word -#define WF_NOCOMPAFT 0x2000 // no compounding after this word - // only used for su_badflags #define WF_MIXCAP 0x20 // mix of upper and lower case: macaRONI #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) -// flags for <pflags> -#define WFP_RARE 0x01 // rare prefix -#define WFP_NC 0x02 // prefix is not combining -#define WFP_UP 0x04 // to-upper prefix -#define WFP_COMPPERMIT 0x08 // prefix with COMPOUNDPERMITFLAG -#define WFP_COMPFORBID 0x10 // prefix with COMPOUNDFORBIDFLAG - -// Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one -// byte) and prefcondnr (two bytes). -#define WF_RAREPFX (WFP_RARE << 24) // rare postponed prefix -#define WF_PFX_NC (WFP_NC << 24) // non-combining postponed prefix -#define WF_PFX_UP (WFP_UP << 24) // to-upper postponed prefix -#define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) // postponed prefix with - // COMPOUNDPERMITFLAG -#define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) // postponed prefix with - // COMPOUNDFORBIDFLAG - - -// flags for <compoptions> -#define COMP_CHECKDUP 1 // CHECKCOMPOUNDDUP -#define COMP_CHECKREP 2 // CHECKCOMPOUNDREP -#define COMP_CHECKCASE 4 // CHECKCOMPOUNDCASE -#define COMP_CHECKTRIPLE 8 // CHECKCOMPOUNDTRIPLE - -// Special byte values for <byte>. Some are only used in the tree for -// postponed prefixes, some only in the other trees. This is a bit messy... -#define BY_NOFLAGS 0 // end of word without flags or region; for - // postponed prefix: no <pflags> -#define BY_INDEX 1 // child is shared, index follows -#define BY_FLAGS 2 // end of word, <flags> byte follows; for - // postponed prefix: <pflags> follows -#define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes - // follow; never used in prefix tree -#define BY_SPECIAL BY_FLAGS2 // highest special byte value - -// Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, -// si_repsal, sl_rep, and si_sal. Not for sl_sal! -// One replacement: from "ft_from" to "ft_to". -typedef struct fromto_S { - char_u *ft_from; - char_u *ft_to; -} fromto_T; - -// Info from "SAL" entries in ".aff" file used in sl_sal. -// The info is split for quick processing by spell_soundfold(). -// Note that "sm_oneof" and "sm_rules" point into sm_lead. -typedef struct salitem_S { - char_u *sm_lead; // leading letters - int sm_leadlen; // length of "sm_lead" - char_u *sm_oneof; // letters from () or NULL - char_u *sm_rules; // rules like ^, $, priority - char_u *sm_to; // replacement. - int *sm_lead_w; // wide character copy of "sm_lead" - int *sm_oneof_w; // wide character copy of "sm_oneof" - int *sm_to_w; // wide character copy of "sm_to" -} salitem_T; - -typedef int salfirst_T; - -// Values for SP_*ERROR are negative, positive values are used by -// read_cnt_string(). -#define SP_TRUNCERROR -1 // spell file truncated error -#define SP_FORMERROR -2 // format error in spell file -#define SP_OTHERERROR -3 // other error while reading spell file - -// Structure used to store words and other info for one language, loaded from -// a .spl file. -// The main access is through the tree in "sl_fbyts/sl_fidxs", storing the -// case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. -// -// The "byts" array stores the possible bytes in each tree node, preceded by -// the number of possible bytes, sorted on byte value: -// <len> <byte1> <byte2> ... -// The "idxs" array stores the index of the child node corresponding to the -// byte in "byts". -// Exception: when the byte is zero, the word may end here and "idxs" holds -// the flags, region mask and affixID for the word. There may be several -// zeros in sequence for alternative flag/region/affixID combinations. -typedef struct slang_S slang_T; - -struct slang_S { - slang_T *sl_next; // next language - char_u *sl_name; // language name "en", "en.rare", "nl", etc. - char_u *sl_fname; // name of .spl file - bool sl_add; // true if it's a .add file. - - char_u *sl_fbyts; // case-folded word bytes - idx_T *sl_fidxs; // case-folded word indexes - char_u *sl_kbyts; // keep-case word bytes - idx_T *sl_kidxs; // keep-case word indexes - char_u *sl_pbyts; // prefix tree word bytes - idx_T *sl_pidxs; // prefix tree word indexes - - char_u *sl_info; // infotext string or NULL - - char_u sl_regions[17]; // table with up to 8 region names plus NUL - - char_u *sl_midword; // MIDWORD string or NULL - - hashtab_T sl_wordcount; // hashtable with word count, wordcount_T - - int sl_compmax; // COMPOUNDWORDMAX (default: MAXWLEN) - int sl_compminlen; // COMPOUNDMIN (default: 0) - int sl_compsylmax; // COMPOUNDSYLMAX (default: MAXWLEN) - int sl_compoptions; // COMP_* flags - garray_T sl_comppat; // CHECKCOMPOUNDPATTERN items - regprog_T *sl_compprog; // COMPOUNDRULE turned into a regexp progrm - // (NULL when no compounding) - char_u *sl_comprules; // all COMPOUNDRULE concatenated (or NULL) - char_u *sl_compstartflags; // flags for first compound word - char_u *sl_compallflags; // all flags for compound words - bool sl_nobreak; // When true: no spaces between words - char_u *sl_syllable; // SYLLABLE repeatable chars or NULL - garray_T sl_syl_items; // syllable items - - int sl_prefixcnt; // number of items in "sl_prefprog" - regprog_T **sl_prefprog; // table with regprogs for prefixes - - garray_T sl_rep; // list of fromto_T entries from REP lines - int16_t sl_rep_first[256]; // indexes where byte first appears, -1 if - // there is none - garray_T sl_sal; // list of salitem_T entries from SAL lines - salfirst_T sl_sal_first[256]; // indexes where byte first appears, -1 if - // there is none - bool sl_followup; // SAL followup - bool sl_collapse; // SAL collapse_result - bool sl_rem_accents; // SAL remove_accents - bool sl_sofo; // SOFOFROM and SOFOTO instead of SAL items: - // "sl_sal_first" maps chars, when has_mbyte - // "sl_sal" is a list of wide char lists. - garray_T sl_repsal; // list of fromto_T entries from REPSAL lines - int16_t sl_repsal_first[256]; // sl_rep_first for REPSAL lines - bool sl_nosplitsugs; // don't suggest splitting a word - bool sl_nocompoundsugs; // don't suggest compounding - - // Info from the .sug file. Loaded on demand. - time_t sl_sugtime; // timestamp for .sug file - char_u *sl_sbyts; // soundfolded word bytes - idx_T *sl_sidxs; // soundfolded word indexes - buf_T *sl_sugbuf; // buffer with word number table - bool sl_sugloaded; // true when .sug file was loaded or failed to - // load - - bool sl_has_map; // true, if there is a MAP line - hashtab_T sl_map_hash; // MAP for multi-byte chars - int sl_map_array[256]; // MAP for first 256 chars - hashtab_T sl_sounddone; // table with soundfolded words that have - // handled, see add_sound_suggest() -}; - -// First language that is loaded, start of the linked list of loaded -// languages. -static slang_T *first_lang = NULL; - -// Flags used in .spl file for soundsalike flags. -#define SAL_F0LLOWUP 1 -#define SAL_COLLAPSE 2 -#define SAL_REM_ACCENTS 4 - -// Structure used in "b_langp", filled from 'spelllang'. -typedef struct langp_S { - slang_T *lp_slang; // info for this language - slang_T *lp_sallang; // language used for sound folding or NULL - slang_T *lp_replang; // language used for REP items or NULL - int lp_region; // bitmask for region or REGION_ALL -} langp_T; - -#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) - -#define REGION_ALL 0xff // word valid in all regions - -#define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file -#define VIMSPELLMAGICL 8 -#define VIMSPELLVERSION 50 - -#define VIMSUGMAGIC "VIMsug" // string at start of Vim .sug file -#define VIMSUGMAGICL 6 -#define VIMSUGVERSION 1 - -// Section IDs. Only renumber them when VIMSPELLVERSION changes! -#define SN_REGION 0 // <regionname> section -#define SN_CHARFLAGS 1 // charflags section -#define SN_MIDWORD 2 // <midword> section -#define SN_PREFCOND 3 // <prefcond> section -#define SN_REP 4 // REP items section -#define SN_SAL 5 // SAL items section -#define SN_SOFO 6 // soundfolding section -#define SN_MAP 7 // MAP items section -#define SN_COMPOUND 8 // compound words section -#define SN_SYLLABLE 9 // syllable section -#define SN_NOBREAK 10 // NOBREAK section -#define SN_SUGFILE 11 // timestamp for .sug file -#define SN_REPSAL 12 // REPSAL items section -#define SN_WORDS 13 // common words -#define SN_NOSPLITSUGS 14 // don't split word for suggestions -#define SN_INFO 15 // info section -#define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions -#define SN_END 255 // end of sections - -#define SNF_REQUIRED 1 // <sectionflags>: required section - // Result values. Lower number is accepted over higher one. #define SP_BANNED -1 #define SP_RARE 0 @@ -578,8 +124,12 @@ typedef struct langp_S { #define SP_LOCAL 2 #define SP_BAD 3 +// First language that is loaded, start of the linked list of loaded +// languages. +slang_T *first_lang = NULL; + // file used for "zG" and "zW" -static char_u *int_wordlist = NULL; +char_u *int_wordlist = NULL; typedef struct wordcount_S { uint16_t wc_count; // nr of times word was seen @@ -718,65 +268,6 @@ typedef struct matchinf_S { char_u *mi_end2; // "mi_end" without following word } matchinf_T; -// The tables used for recognizing word characters according to spelling. -// These are only used for the first 256 characters of 'encoding'. -typedef struct { - bool st_isw[256]; // flags: is word char - bool st_isu[256]; // flags: is uppercase char - char_u st_fold[256]; // chars: folded case - char_u st_upper[256]; // chars: upper case -} spelltab_T; - -// For finding suggestions: At each node in the tree these states are tried: -typedef enum { - STATE_START = 0, // At start of node check for NUL bytes (goodword - // ends); if badword ends there is a match, otherwise - // try splitting word. - STATE_NOPREFIX, // try without prefix - STATE_SPLITUNDO, // Undo splitting. - STATE_ENDNUL, // Past NUL bytes at start of the node. - STATE_PLAIN, // Use each byte of the node. - STATE_DEL, // Delete a byte from the bad word. - STATE_INS_PREP, // Prepare for inserting bytes. - STATE_INS, // Insert a byte in the bad word. - STATE_SWAP, // Swap two bytes. - STATE_UNSWAP, // Undo swap two characters. - STATE_SWAP3, // Swap two characters over three. - STATE_UNSWAP3, // Undo Swap two characters over three. - STATE_UNROT3L, // Undo rotate three characters left - STATE_UNROT3R, // Undo rotate three characters right - STATE_REP_INI, // Prepare for using REP items. - STATE_REP, // Use matching REP items from the .aff file. - STATE_REP_UNDO, // Undo a REP item replacement. - STATE_FINAL // End of this node. -} state_T; - -// Struct to keep the state at each level in suggest_try_change(). -typedef struct trystate_S { - state_T ts_state; // state at this level, STATE_ - int ts_score; // score - idx_T ts_arridx; // index in tree array, start of node - short ts_curi; // index in list of child nodes - char_u ts_fidx; // index in fword[], case-folded bad word - char_u ts_fidxtry; // ts_fidx at which bytes may be changed - char_u ts_twordlen; // valid length of tword[] - char_u ts_prefixdepth; // stack depth for end of prefix or - // PFD_PREFIXTREE or PFD_NOPREFIX - char_u ts_flags; // TSF_ flags - char_u ts_tcharlen; // number of bytes in tword character - char_u ts_tcharidx; // current byte index in tword character - char_u ts_isdiff; // DIFF_ values - char_u ts_fcharstart; // index in fword where badword char started - char_u ts_prewordlen; // length of word in "preword[]" - char_u ts_splitoff; // index in "tword" after last split - char_u ts_splitfidx; // "ts_fidx" at word split - char_u ts_complen; // nr of compound words used - char_u ts_compsplit; // index for "compflags" where word was spit - char_u ts_save_badflags; // su_badflags saved here - char_u ts_delidx; // index in fword for char that was deleted, - // valid when "ts_flags" has TSF_DIDDEL -} trystate_T; - // Structure used for the cookie argument of do_in_runtimepath(). typedef struct spelload_S { char_u sl_lang[MAXWLEN + 1]; // language name @@ -790,200 +281,8 @@ typedef struct syl_item_S { int sy_len; } syl_item_T; -#define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff - // and .dic file. -// Main structure to store the contents of a ".aff" file. -typedef struct afffile_S { - char_u *af_enc; // "SET", normalized, alloc'ed string or NULL - int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG - unsigned af_rare; // RARE ID for rare word - unsigned af_keepcase; // KEEPCASE ID for keep-case word - unsigned af_bad; // BAD ID for banned word - unsigned af_needaffix; // NEEDAFFIX ID - unsigned af_circumfix; // CIRCUMFIX ID - unsigned af_needcomp; // NEEDCOMPOUND ID - unsigned af_comproot; // COMPOUNDROOT ID - unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID - unsigned af_comppermit; // COMPOUNDPERMITFLAG ID - unsigned af_nosuggest; // NOSUGGEST ID - int af_pfxpostpone; // postpone prefixes without chop string and - // without flags - bool af_ignoreextra; // IGNOREEXTRA present - hashtab_T af_pref; // hashtable for prefixes, affheader_T - hashtab_T af_suff; // hashtable for suffixes, affheader_T - hashtab_T af_comp; // hashtable for compound flags, compitem_T -} afffile_T; - -#define AFT_CHAR 0 // flags are one character -#define AFT_LONG 1 // flags are two characters -#define AFT_CAPLONG 2 // flags are one or two characters -#define AFT_NUM 3 // flags are numbers, comma separated - -typedef struct affentry_S affentry_T; -// Affix entry from ".aff" file. Used for prefixes and suffixes. -struct affentry_S { - affentry_T *ae_next; // next affix with same name/number - char_u *ae_chop; // text to chop off basic word (can be NULL) - char_u *ae_add; // text to add to basic word (can be NULL) - char_u *ae_flags; // flags on the affix (can be NULL) - char_u *ae_cond; // condition (NULL for ".") - regprog_T *ae_prog; // regexp program for ae_cond or NULL - char ae_compforbid; // COMPOUNDFORBIDFLAG found - char ae_comppermit; // COMPOUNDPERMITFLAG found -}; - -# define AH_KEY_LEN 17 // 2 x 8 bytes + NUL - -// Affix header from ".aff" file. Used for af_pref and af_suff. -typedef struct affheader_S { - char_u ah_key[AH_KEY_LEN]; // key for hashtab == name of affix - unsigned ah_flag; // affix name as number, uses "af_flagtype" - int ah_newID; // prefix ID after renumbering; 0 if not used - int ah_combine; // suffix may combine with prefix - int ah_follows; // another affix block should be following - affentry_T *ah_first; // first affix entry -} affheader_T; - -#define HI2AH(hi) ((affheader_T *)(hi)->hi_key) - -// Flag used in compound items. -typedef struct compitem_S { - char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound - unsigned ci_flag; // affix name as number, uses "af_flagtype" - int ci_newID; // affix ID after renumbering. -} compitem_T; - -#define HI2CI(hi) ((compitem_T *)(hi)->hi_key) - -// Structure that is used to store the items in the word tree. This avoids -// the need to keep track of each allocated thing, everything is freed all at -// once after ":mkspell" is done. -// Note: "sb_next" must be just before "sb_data" to make sure the alignment of -// "sb_data" is correct for systems where pointers must be aligned on -// pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). -#define SBLOCKSIZE 16000 // size of sb_data -typedef struct sblock_S sblock_T; -struct sblock_S { - int sb_used; // nr of bytes already in use - sblock_T *sb_next; // next block in list - char_u sb_data[1]; // data, actually longer -}; - -// A node in the tree. -typedef struct wordnode_S wordnode_T; -struct wordnode_S { - union // shared to save space - { - char_u hashkey[6]; // the hash key, only used while compressing - int index; // index in written nodes (valid after first - // round) - } wn_u1; - union // shared to save space - { - wordnode_T *next; // next node with same hash key - wordnode_T *wnode; // parent node that will write this node - } wn_u2; - wordnode_T *wn_child; // child (next byte in word) - wordnode_T *wn_sibling; // next sibling (alternate byte in word, - // always sorted) - int wn_refs; // Nr. of references to this node. Only - // relevant for first node in a list of - // siblings, in following siblings it is - // always one. - char_u wn_byte; // Byte for this node. NUL for word end - - // Info for when "wn_byte" is NUL. - // In PREFIXTREE "wn_region" is used for the prefcondnr. - // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and - // "wn_region" the LSW of the wordnr. - char_u wn_affixID; // supported/required prefix ID or 0 - uint16_t wn_flags; // WF_ flags - short wn_region; // region mask - -#ifdef SPELL_PRINTTREE - int wn_nr; // sequence nr for printing -#endif -}; - -#define WN_MASK 0xffff // mask relevant bits of "wn_flags" - -#define HI2WN(hi) (wordnode_T *)((hi)->hi_key) - -// Info used while reading the spell files. -typedef struct spellinfo_S { - wordnode_T *si_foldroot; // tree with case-folded words - long si_foldwcount; // nr of words in si_foldroot - - wordnode_T *si_keeproot; // tree with keep-case words - long si_keepwcount; // nr of words in si_keeproot - - wordnode_T *si_prefroot; // tree with postponed prefixes - - long si_sugtree; // creating the soundfolding trie - - sblock_T *si_blocks; // memory blocks used - long si_blocks_cnt; // memory blocks allocated - int si_did_emsg; // TRUE when ran out of memory - - long si_compress_cnt; // words to add before lowering - // compression limit - wordnode_T *si_first_free; // List of nodes that have been freed during - // compression, linked by "wn_child" field. - long si_free_count; // number of nodes in si_first_free -#ifdef SPELL_PRINTTREE - int si_wordnode_nr; // sequence nr for nodes -#endif - buf_T *si_spellbuf; // buffer used to store soundfold word table - - int si_ascii; // handling only ASCII words - int si_add; // addition file - int si_clear_chartab; // when TRUE clear char tables - int si_region; // region mask - vimconv_T si_conv; // for conversion to 'encoding' - int si_memtot; // runtime memory used - int si_verbose; // verbose messages - int si_msg_count; // number of words added since last message - char_u *si_info; // info text chars or NULL - int si_region_count; // number of regions supported (1 when there - // are no regions) - char_u si_region_name[17]; // region names; used only if - // si_region_count > 1) - - garray_T si_rep; // list of fromto_T entries from REP lines - garray_T si_repsal; // list of fromto_T entries from REPSAL lines - garray_T si_sal; // list of fromto_T entries from SAL lines - char_u *si_sofofr; // SOFOFROM text - char_u *si_sofoto; // SOFOTO text - int si_nosugfile; // NOSUGFILE item found - int si_nosplitsugs; // NOSPLITSUGS item found - int si_nocompoundsugs; // NOCOMPOUNDSUGS item found - int si_followup; // soundsalike: ? - int si_collapse; // soundsalike: ? - hashtab_T si_commonwords; // hashtable for common words - time_t si_sugtime; // timestamp for .sug file - int si_rem_accents; // soundsalike: remove accents - garray_T si_map; // MAP info concatenated - char_u *si_midword; // MIDWORD chars or NULL - int si_compmax; // max nr of words for compounding - int si_compminlen; // minimal length for compounding - int si_compsylmax; // max nr of syllables for compounding - int si_compoptions; // COMP_ flags - garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as - // a string - char_u *si_compflags; // flags used for compounding - char_u si_nobreak; // NOBREAK - char_u *si_syllable; // syllable string - garray_T si_prefcond; // table with conditions for postponed - // prefixes, each stored as a string - int si_newprefID; // current value for ah_newID - int si_newcompID; // current value for compound ID -} spellinfo_T; - -static spelltab_T spelltab; -static int did_set_spelltab; - -#define CF_WORD 0x01 -#define CF_UPPER 0x02 +spelltab_T spelltab; +int did_set_spelltab; // structure used to store soundfolded words that add_sound_suggest() has // handled already. @@ -1025,34 +324,7 @@ typedef struct { #define FIND_COMPOUND 3 // find case-folded compound word #define FIND_KEEPCOMPOUND 4 // find keep-case compound word - -// Use our own character-case definitions, because the current locale may -// differ from what the .spl file uses. -// These must not be called with negative number! -#include <wchar.h> // for towupper() and towlower() -// Multi-byte implementation. For Unicode we can call utf_*(), but don't do -// that for ASCII, because we don't want to use 'casemap' here. Otherwise use -// the "w" library function for characters above 255. -#define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ - : (c) < \ - 256 ? (int)spelltab.st_fold[c] : (int)towlower(c)) - -#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ - : (c) < \ - 256 ? (int)spelltab.st_upper[c] : (int)towupper(c)) - -#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ - : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) - - -static char *e_format = N_("E759: Format error in spell file"); -static char *e_spell_trunc = N_("E758: Truncated spell file"); -static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); -static char *e_affname = N_("Affix name too long in %s line %d: %s"); -static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); -static char *e_affrange = N_( - "E762: Character in FOL, LOW or UPP is out of range"); -static char *msg_compressing = N_("Compressing word tree..."); +char *e_format = N_("E759: Format error in spell file"); // Remember what "z?" replaced. static char_u *repl_from = NULL; @@ -2359,7 +1631,7 @@ static void spell_load_lang(char_u *lang) // Return the encoding used for spell checking: Use 'encoding', except that we // use "latin1" for "latin9". And limit to 60 characters (just in case). -static char_u *spell_enc(void) +char_u *spell_enc(void) { if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) @@ -2377,7 +1649,7 @@ static void int_wordlist_spl(char_u *fname) // Allocate a new slang_T for language "lang". "lang" can be NULL. // Caller must fill "sl_next". -static slang_T *slang_alloc(char_u *lang) +slang_T *slang_alloc(char_u *lang) { slang_T *lp = xcalloc(1, sizeof(slang_T)); @@ -2393,7 +1665,7 @@ static slang_T *slang_alloc(char_u *lang) } // Free the contents of an slang_T and the structure itself. -static void slang_free(slang_T *lp) +void slang_free(slang_T *lp) { xfree(lp->sl_name); xfree(lp->sl_fname); @@ -2418,7 +1690,7 @@ static void free_fromto(fromto_T *ftp) { } // Clear an slang_T so that the file can be reloaded. -static void slang_clear(slang_T *lp) +void slang_clear(slang_T *lp) { garray_T *gap; @@ -2491,7 +1763,7 @@ static void slang_clear(slang_T *lp) } // Clear the info from the .sug file in "lp". -static void slang_clear_sug(slang_T *lp) +void slang_clear_sug(slang_T *lp) { xfree(lp->sl_sbyts); lp->sl_sbyts = NULL; @@ -2523,562 +1795,14 @@ static void spell_load_cb(char_u *fname, void *cookie) } } -// Load one spell file and store the info into a slang_T. -// -// This is invoked in three ways: -// - From spell_load_cb() to load a spell file for the first time. "lang" is -// the language name, "old_lp" is NULL. Will allocate an slang_T. -// - To reload a spell file that was changed. "lang" is NULL and "old_lp" -// points to the existing slang_T. -// - Just after writing a .spl file; it's read back to produce the .sug file. -// "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. -// -// Returns the slang_T the spell file was loaded into. NULL for error. -static slang_T * -spell_load_file ( - char_u *fname, - char_u *lang, - slang_T *old_lp, - bool silent // no error if file doesn't exist -) -{ - FILE *fd; - char_u buf[VIMSPELLMAGICL]; - char_u *p; - int i; - int n; - int len; - char_u *save_sourcing_name = sourcing_name; - linenr_T save_sourcing_lnum = sourcing_lnum; - slang_T *lp = NULL; - int c = 0; - int res; - - fd = mch_fopen((char *)fname, "r"); - if (fd == NULL) { - if (!silent) - EMSG2(_(e_notopen), fname); - else if (p_verbose > 2) { - verbose_enter(); - smsg((char *)e_notopen, fname); - verbose_leave(); - } - goto endFAIL; - } - if (p_verbose > 2) { - verbose_enter(); - smsg(_("Reading spell file \"%s\""), fname); - verbose_leave(); - } - - if (old_lp == NULL) { - lp = slang_alloc(lang); - - // Remember the file name, used to reload the file when it's updated. - lp->sl_fname = vim_strsave(fname); - - // Check for .add.spl. - lp->sl_add = strstr((char *)path_tail(fname), SPL_FNAME_ADD) != NULL; - } else - lp = old_lp; - - // Set sourcing_name, so that error messages mention the file name. - sourcing_name = fname; - sourcing_lnum = 0; - - // <HEADER>: <fileID> - for (i = 0; i < VIMSPELLMAGICL; ++i) - buf[i] = getc(fd); // <fileID> - if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) { - EMSG(_("E757: This does not look like a spell file")); - goto endFAIL; - } - c = getc(fd); // <versionnr> - if (c < VIMSPELLVERSION) { - EMSG(_("E771: Old spell file, needs to be updated")); - goto endFAIL; - } else if (c > VIMSPELLVERSION) { - EMSG(_("E772: Spell file is for newer version of Vim")); - goto endFAIL; - } - - - // <SECTIONS>: <section> ... <sectionend> - // <section>: <sectionID> <sectionflags> <sectionlen> (section contents) - for (;; ) { - n = getc(fd); // <sectionID> or <sectionend> - if (n == SN_END) - break; - c = getc(fd); // <sectionflags> - len = get4c(fd); // <sectionlen> - if (len < 0) - goto truncerr; - - res = 0; - switch (n) { - case SN_INFO: - lp->sl_info = READ_STRING(fd, len); // <infotext> - if (lp->sl_info == NULL) - goto endFAIL; - break; - - case SN_REGION: - res = read_region_section(fd, lp, len); - break; - - case SN_CHARFLAGS: - res = read_charflags_section(fd); - break; - - case SN_MIDWORD: - lp->sl_midword = READ_STRING(fd, len); // <midword> - if (lp->sl_midword == NULL) - goto endFAIL; - break; - - case SN_PREFCOND: - res = read_prefcond_section(fd, lp); - break; - - case SN_REP: - res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); - break; - - case SN_REPSAL: - res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); - break; - - case SN_SAL: - res = read_sal_section(fd, lp); - break; - - case SN_SOFO: - res = read_sofo_section(fd, lp); - break; - - case SN_MAP: - p = READ_STRING(fd, len); // <mapstr> - if (p == NULL) - goto endFAIL; - set_map_str(lp, p); - xfree(p); - break; - - case SN_WORDS: - res = read_words_section(fd, lp, len); - break; - - case SN_SUGFILE: - lp->sl_sugtime = get8ctime(fd); // <timestamp> - break; - - case SN_NOSPLITSUGS: - lp->sl_nosplitsugs = true; - break; - - case SN_NOCOMPOUNDSUGS: - lp->sl_nocompoundsugs = true; - break; - - case SN_COMPOUND: - res = read_compound(fd, lp, len); - break; - - case SN_NOBREAK: - lp->sl_nobreak = true; - break; - - case SN_SYLLABLE: - lp->sl_syllable = READ_STRING(fd, len); // <syllable> - if (lp->sl_syllable == NULL) - goto endFAIL; - if (init_syl_tab(lp) == FAIL) - goto endFAIL; - break; - - default: - // Unsupported section. When it's required give an error - // message. When it's not required skip the contents. - if (c & SNF_REQUIRED) { - EMSG(_("E770: Unsupported section in spell file")); - goto endFAIL; - } - while (--len >= 0) - if (getc(fd) < 0) - goto truncerr; - break; - } -someerror: - if (res == SP_FORMERROR) { - EMSG(_(e_format)); - goto endFAIL; - } - if (res == SP_TRUNCERROR) { -truncerr: - EMSG(_(e_spell_trunc)); - goto endFAIL; - } - if (res == SP_OTHERERROR) - goto endFAIL; - } - - // <LWORDTREE> - res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, false, 0); - if (res != 0) - goto someerror; - - // <KWORDTREE> - res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, false, 0); - if (res != 0) - goto someerror; - - // <PREFIXTREE> - res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, true, - lp->sl_prefixcnt); - if (res != 0) - goto someerror; - - // For a new file link it in the list of spell files. - if (old_lp == NULL && lang != NULL) { - lp->sl_next = first_lang; - first_lang = lp; - } - - goto endOK; - -endFAIL: - if (lang != NULL) - // truncating the name signals the error to spell_load_lang() - *lang = NUL; - if (lp != NULL && old_lp == NULL) - slang_free(lp); - lp = NULL; - -endOK: - if (fd != NULL) - fclose(fd); - sourcing_name = save_sourcing_name; - sourcing_lnum = save_sourcing_lnum; - - return lp; -} - -// Read a length field from "fd" in "cnt_bytes" bytes. -// Allocate memory, read the string into it and add a NUL at the end. -// Returns NULL when the count is zero. -// Sets "*cntp" to SP_*ERROR when there is an error, length of the result -// otherwise. -static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) -{ - int cnt = 0; - int i; - char_u *str; - - // read the length bytes, MSB first - for (i = 0; i < cnt_bytes; ++i) - cnt = (cnt << 8) + getc(fd); - if (cnt < 0) { - *cntp = SP_TRUNCERROR; - return NULL; - } - *cntp = cnt; - if (cnt == 0) - return NULL; // nothing to read, return NULL - - str = READ_STRING(fd, cnt); - if (str == NULL) - *cntp = SP_OTHERERROR; - return str; -} - -// Read SN_REGION: <regionname> ... -// Return SP_*ERROR flags. -static int read_region_section(FILE *fd, slang_T *lp, int len) -{ - int i; - - if (len > 16) - return SP_FORMERROR; - for (i = 0; i < len; ++i) - lp->sl_regions[i] = getc(fd); // <regionname> - lp->sl_regions[len] = NUL; - return 0; -} - -// Read SN_CHARFLAGS section: <charflagslen> <charflags> -// <folcharslen> <folchars> -// Return SP_*ERROR flags. -static int read_charflags_section(FILE *fd) -{ - char_u *flags; - char_u *fol; - int flagslen, follen; - - // <charflagslen> <charflags> - flags = read_cnt_string(fd, 1, &flagslen); - if (flagslen < 0) - return flagslen; - - // <folcharslen> <folchars> - fol = read_cnt_string(fd, 2, &follen); - if (follen < 0) { - xfree(flags); - return follen; - } - - // Set the word-char flags and fill SPELL_ISUPPER() table. - if (flags != NULL && fol != NULL) - set_spell_charflags(flags, flagslen, fol); - - xfree(flags); - xfree(fol); - - // When <charflagslen> is zero then <fcharlen> must also be zero. - if ((flags == NULL) != (fol == NULL)) - return SP_FORMERROR; - return 0; -} - -// Read SN_PREFCOND section. -// Return SP_*ERROR flags. -static int read_prefcond_section(FILE *fd, slang_T *lp) -{ - int cnt; - int i; - int n; - char_u *p; - char_u buf[MAXWLEN + 1]; - - // <prefcondcnt> <prefcond> ... - cnt = get2c(fd); // <prefcondcnt> - if (cnt <= 0) - return SP_FORMERROR; - - lp->sl_prefprog = xcalloc(cnt, sizeof(regprog_T *)); - lp->sl_prefixcnt = cnt; - - for (i = 0; i < cnt; ++i) { - // <prefcond> : <condlen> <condstr> - n = getc(fd); // <condlen> - if (n < 0 || n >= MAXWLEN) - return SP_FORMERROR; - - // When <condlen> is zero we have an empty condition. Otherwise - // compile the regexp program used to check for the condition. - if (n > 0) { - buf[0] = '^'; // always match at one position only - p = buf + 1; - while (n-- > 0) - *p++ = getc(fd); // <condstr> - *p = NUL; - lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); - } - } - return 0; -} - -// Read REP or REPSAL items section from "fd": <repcount> <rep> ... -// Return SP_*ERROR flags. -static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first) -{ - int cnt; - fromto_T *ftp; - - cnt = get2c(fd); // <repcount> - if (cnt < 0) - return SP_TRUNCERROR; - - ga_grow(gap, cnt); - - // <rep> : <repfromlen> <repfrom> <reptolen> <repto> - for (; gap->ga_len < cnt; ++gap->ga_len) { - int c; - ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; - ftp->ft_from = read_cnt_string(fd, 1, &c); - if (c < 0) - return c; - if (c == 0) - return SP_FORMERROR; - ftp->ft_to = read_cnt_string(fd, 1, &c); - if (c <= 0) { - xfree(ftp->ft_from); - if (c < 0) - return c; - return SP_FORMERROR; - } - } - - // Fill the first-index table. - for (int i = 0; i < 256; ++i) { - first[i] = -1; - } - for (int i = 0; i < gap->ga_len; ++i) { - ftp = &((fromto_T *)gap->ga_data)[i]; - if (first[*ftp->ft_from] == -1) - first[*ftp->ft_from] = i; - } - return 0; -} - -// Read SN_SAL section: <salflags> <salcount> <sal> ... -// Return SP_*ERROR flags. -static int read_sal_section(FILE *fd, slang_T *slang) -{ - int i; - int cnt; - garray_T *gap; - salitem_T *smp; - int ccnt; - char_u *p; - int c = NUL; - - slang->sl_sofo = false; - - i = getc(fd); // <salflags> - if (i & SAL_F0LLOWUP) - slang->sl_followup = true; - if (i & SAL_COLLAPSE) - slang->sl_collapse = true; - if (i & SAL_REM_ACCENTS) - slang->sl_rem_accents = true; - - cnt = get2c(fd); // <salcount> - if (cnt < 0) - return SP_TRUNCERROR; - - gap = &slang->sl_sal; - ga_init(gap, sizeof(salitem_T), 10); - ga_grow(gap, cnt + 1); - - // <sal> : <salfromlen> <salfrom> <saltolen> <salto> - for (; gap->ga_len < cnt; ++gap->ga_len) { - smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; - ccnt = getc(fd); // <salfromlen> - if (ccnt < 0) - return SP_TRUNCERROR; - p = xmalloc(ccnt + 2); - smp->sm_lead = p; - - // Read up to the first special char into sm_lead. - for (i = 0; i < ccnt; ++i) { - c = getc(fd); // <salfrom> - if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) - break; - *p++ = c; - } - smp->sm_leadlen = (int)(p - smp->sm_lead); - *p++ = NUL; - - // Put (abc) chars in sm_oneof, if any. - if (c == '(') { - smp->sm_oneof = p; - for (++i; i < ccnt; ++i) { - c = getc(fd); // <salfrom> - if (c == ')') - break; - *p++ = c; - } - *p++ = NUL; - if (++i < ccnt) - c = getc(fd); - } else - smp->sm_oneof = NULL; - - // Any following chars go in sm_rules. - smp->sm_rules = p; - if (i < ccnt) - // store the char we got while checking for end of sm_lead - *p++ = c; - for (++i; i < ccnt; ++i) - *p++ = getc(fd); // <salfrom> - *p++ = NUL; - - // <saltolen> <salto> - smp->sm_to = read_cnt_string(fd, 1, &ccnt); - if (ccnt < 0) { - xfree(smp->sm_lead); - return ccnt; - } - - if (has_mbyte) { - // convert the multi-byte strings to wide char strings - smp->sm_lead_w = mb_str2wide(smp->sm_lead); - smp->sm_leadlen = mb_charlen(smp->sm_lead); - if (smp->sm_oneof == NULL) - smp->sm_oneof_w = NULL; - else - smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); - if (smp->sm_to == NULL) - smp->sm_to_w = NULL; - else - smp->sm_to_w = mb_str2wide(smp->sm_to); - } - } - - if (!GA_EMPTY(gap)) { - // Add one extra entry to mark the end with an empty sm_lead. Avoids - // that we need to check the index every time. - smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; - p = xmalloc(1); - p[0] = NUL; - smp->sm_lead = p; - smp->sm_leadlen = 0; - smp->sm_oneof = NULL; - smp->sm_rules = p; - smp->sm_to = NULL; - if (has_mbyte) { - smp->sm_lead_w = mb_str2wide(smp->sm_lead); - smp->sm_leadlen = 0; - smp->sm_oneof_w = NULL; - smp->sm_to_w = NULL; - } - ++gap->ga_len; - } - - // Fill the first-index table. - set_sal_first(slang); - - return 0; -} - -// Read SN_WORDS: <word> ... -// Return SP_*ERROR flags. -static int read_words_section(FILE *fd, slang_T *lp, int len) -{ - int done = 0; - int i; - int c; - char_u word[MAXWLEN]; - - while (done < len) { - // Read one word at a time. - for (i = 0;; ++i) { - c = getc(fd); - if (c == EOF) - return SP_TRUNCERROR; - word[i] = c; - if (word[i] == NUL) - break; - if (i == MAXWLEN - 1) - return SP_FORMERROR; - } - - // Init the count to 10. - count_common_word(lp, word, -1, 10); - done += i + 1; - } - return 0; -} - -// Add a word to the hashtable of common words. -// If it's already there then the counter is increased. -static void -count_common_word ( - slang_T *lp, - char_u *word, - int len, // word length, -1 for upto NUL - int count // 1 to count once, 10 to init -) +/// Add a word to the hashtable of common words. +/// If it's already there then the counter is increased. +/// +/// @param[in] lp +/// @param[in] word added to common words hashtable +/// @param[in] len length of word or -1 for NUL terminated +/// @param[in] count 1 to count once, 10 to init +void count_common_word(slang_T *lp, char_u *word, int len, int count) { hash_T hash; hashitem_T *hi; @@ -3142,209 +1866,9 @@ score_wordcount_adj ( return score; } -// SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> -// Return SP_*ERROR flags. -static int read_sofo_section(FILE *fd, slang_T *slang) -{ - int cnt; - char_u *from, *to; - int res; - - slang->sl_sofo = true; - - // <sofofromlen> <sofofrom> - from = read_cnt_string(fd, 2, &cnt); - if (cnt < 0) - return cnt; - - // <sofotolen> <sofoto> - to = read_cnt_string(fd, 2, &cnt); - if (cnt < 0) { - xfree(from); - return cnt; - } - - // Store the info in slang->sl_sal and/or slang->sl_sal_first. - if (from != NULL && to != NULL) - res = set_sofo(slang, from, to); - else if (from != NULL || to != NULL) - res = SP_FORMERROR; // only one of two strings is an error - else - res = 0; - - xfree(from); - xfree(to); - return res; -} - -// Read the compound section from the .spl file: -// <compmax> <compminlen> <compsylmax> <compoptions> <compflags> -// Returns SP_*ERROR flags. -static int read_compound(FILE *fd, slang_T *slang, int len) -{ - int todo = len; - int c; - int atstart; - char_u *pat; - char_u *pp; - char_u *cp; - char_u *ap; - char_u *crp; - int cnt; - garray_T *gap; - - if (todo < 2) - return SP_FORMERROR; // need at least two bytes - - --todo; - c = getc(fd); // <compmax> - if (c < 2) - c = MAXWLEN; - slang->sl_compmax = c; - - --todo; - c = getc(fd); // <compminlen> - if (c < 1) - c = 0; - slang->sl_compminlen = c; - - --todo; - c = getc(fd); // <compsylmax> - if (c < 1) - c = MAXWLEN; - slang->sl_compsylmax = c; - - c = getc(fd); // <compoptions> - if (c != 0) - ungetc(c, fd); // be backwards compatible with Vim 7.0b - else { - --todo; - c = getc(fd); // only use the lower byte for now - --todo; - slang->sl_compoptions = c; - - gap = &slang->sl_comppat; - c = get2c(fd); // <comppatcount> - todo -= 2; - ga_init(gap, sizeof(char_u *), c); - ga_grow(gap, c); - while (--c >= 0) { - ((char_u **)(gap->ga_data))[gap->ga_len++] = - read_cnt_string(fd, 1, &cnt); - // <comppatlen> <comppattext> - if (cnt < 0) - return cnt; - todo -= cnt + 1; - } - } - if (todo < 0) - return SP_FORMERROR; - - // Turn the COMPOUNDRULE items into a regexp pattern: - // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". - // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. - // Conversion to utf-8 may double the size. - c = todo * 2 + 7; - if (enc_utf8) - c += todo * 2; - pat = xmalloc(c); - - // We also need a list of all flags that can appear at the start and one - // for all flags. - cp = xmalloc(todo + 1); - slang->sl_compstartflags = cp; - *cp = NUL; - - ap = xmalloc(todo + 1); - slang->sl_compallflags = ap; - *ap = NUL; - - // And a list of all patterns in their original form, for checking whether - // compounding may work in match_compoundrule(). This is freed when we - // encounter a wildcard, the check doesn't work then. - crp = xmalloc(todo + 1); - slang->sl_comprules = crp; - - pp = pat; - *pp++ = '^'; - *pp++ = '\\'; - *pp++ = '('; - - atstart = 1; - while (todo-- > 0) { - c = getc(fd); // <compflags> - if (c == EOF) { - xfree(pat); - return SP_TRUNCERROR; - } - - // Add all flags to "sl_compallflags". - if (vim_strchr((char_u *)"?*+[]/", c) == NULL - && !byte_in_str(slang->sl_compallflags, c)) { - *ap++ = c; - *ap = NUL; - } - - if (atstart != 0) { - // At start of item: copy flags to "sl_compstartflags". For a - // [abc] item set "atstart" to 2 and copy up to the ']'. - if (c == '[') - atstart = 2; - else if (c == ']') - atstart = 0; - else { - if (!byte_in_str(slang->sl_compstartflags, c)) { - *cp++ = c; - *cp = NUL; - } - if (atstart == 1) - atstart = 0; - } - } - - // Copy flag to "sl_comprules", unless we run into a wildcard. - if (crp != NULL) { - if (c == '?' || c == '+' || c == '*') { - xfree(slang->sl_comprules); - slang->sl_comprules = NULL; - crp = NULL; - } else - *crp++ = c; - } - - if (c == '/') { // slash separates two items - *pp++ = '\\'; - *pp++ = '|'; - atstart = 1; - } else { // normal char, "[abc]" and '*' are copied as-is - if (c == '?' || c == '+' || c == '~') - *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+" - if (enc_utf8) - pp += mb_char2bytes(c, pp); - else - *pp++ = c; - } - } - - *pp++ = '\\'; - *pp++ = ')'; - *pp++ = '$'; - *pp = NUL; - - if (crp != NULL) - *crp = NUL; - - slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); - xfree(pat); - if (slang->sl_compprog == NULL) - return SP_FORMERROR; - - return 0; -} - // Returns true if byte "n" appears in "str". // Like strchr() but independent of locale. -static bool byte_in_str(char_u *str, int n) +bool byte_in_str(char_u *str, int n) { char_u *p; @@ -3356,7 +1880,7 @@ static bool byte_in_str(char_u *str, int n) // Truncate "slang->sl_syllable" at the first slash and put the following items // in "slang->sl_syl_items". -static int init_syl_tab(slang_T *slang) +int init_syl_tab(slang_T *slang) { char_u *p; char_u *s; @@ -3433,294 +1957,6 @@ static int count_syllables(slang_T *slang, char_u *word) return cnt; } -// Set the SOFOFROM and SOFOTO items in language "lp". -// Returns SP_*ERROR flags when there is something wrong. -static int set_sofo(slang_T *lp, char_u *from, char_u *to) -{ - int i; - - garray_T *gap; - char_u *s; - char_u *p; - int c; - int *inp; - - if (has_mbyte) { - // Use "sl_sal" as an array with 256 pointers to a list of wide - // characters. The index is the low byte of the character. - // The list contains from-to pairs with a terminating NUL. - // sl_sal_first[] is used for latin1 "from" characters. - gap = &lp->sl_sal; - ga_init(gap, sizeof(int *), 1); - ga_grow(gap, 256); - memset(gap->ga_data, 0, sizeof(int *) * 256); - gap->ga_len = 256; - - // First count the number of items for each list. Temporarily use - // sl_sal_first[] for this. - for (p = from, s = to; *p != NUL && *s != NUL; ) { - c = mb_cptr2char_adv(&p); - mb_cptr_adv(s); - if (c >= 256) - ++lp->sl_sal_first[c & 0xff]; - } - if (*p != NUL || *s != NUL) // lengths differ - return SP_FORMERROR; - - // Allocate the lists. - for (i = 0; i < 256; ++i) - if (lp->sl_sal_first[i] > 0) { - p = xmalloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); - ((int **)gap->ga_data)[i] = (int *)p; - *(int *)p = 0; - } - - // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal - // list. - memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); - for (p = from, s = to; *p != NUL && *s != NUL; ) { - c = mb_cptr2char_adv(&p); - i = mb_cptr2char_adv(&s); - if (c >= 256) { - // Append the from-to chars at the end of the list with - // the low byte. - inp = ((int **)gap->ga_data)[c & 0xff]; - while (*inp != 0) - ++inp; - *inp++ = c; // from char - *inp++ = i; // to char - *inp++ = NUL; // NUL at the end - } else - // mapping byte to char is done in sl_sal_first[] - lp->sl_sal_first[c] = i; - } - } else { - // mapping bytes to bytes is done in sl_sal_first[] - if (STRLEN(from) != STRLEN(to)) - return SP_FORMERROR; - - for (i = 0; to[i] != NUL; ++i) - lp->sl_sal_first[from[i]] = to[i]; - lp->sl_sal.ga_len = 1; // indicates we have soundfolding - } - - return 0; -} - -// Fill the first-index table for "lp". -static void set_sal_first(slang_T *lp) -{ - salfirst_T *sfirst; - salitem_T *smp; - int c; - garray_T *gap = &lp->sl_sal; - - sfirst = lp->sl_sal_first; - for (int i = 0; i < 256; ++i) { - sfirst[i] = -1; - } - smp = (salitem_T *)gap->ga_data; - for (int i = 0; i < gap->ga_len; ++i) { - if (has_mbyte) - // Use the lowest byte of the first character. For latin1 it's - // the character, for other encodings it should differ for most - // characters. - c = *smp[i].sm_lead_w & 0xff; - else - c = *smp[i].sm_lead; - if (sfirst[c] == -1) { - sfirst[c] = i; - if (has_mbyte) { - int n; - - // Make sure all entries with this byte are following each - // other. Move the ones that are in the wrong position. Do - // keep the same ordering! - while (i + 1 < gap->ga_len - && (*smp[i + 1].sm_lead_w & 0xff) == c) - // Skip over entry with same index byte. - ++i; - - for (n = 1; i + n < gap->ga_len; ++n) - if ((*smp[i + n].sm_lead_w & 0xff) == c) { - salitem_T tsal; - - // Move entry with same index byte after the entries - // we already found. - ++i; - --n; - tsal = smp[i + n]; - memmove(smp + i + 1, smp + i, - sizeof(salitem_T) * n); - smp[i] = tsal; - } - } - } - } -} - -// Turn a multi-byte string into a wide character string. -// Return it in allocated memory. -static int *mb_str2wide(char_u *s) -{ - int i = 0; - - int *res = xmalloc((mb_charlen(s) + 1) * sizeof(int)); - for (char_u *p = s; *p != NUL; ) - res[i++] = mb_ptr2char_adv(&p); - res[i] = NUL; - - return res; -} - -// Reads a tree from the .spl or .sug file. -// Allocates the memory and stores pointers in "bytsp" and "idxsp". -// This is skipped when the tree has zero length. -// Returns zero when OK, SP_ value for an error. -static int -spell_read_tree ( - FILE *fd, - char_u **bytsp, - idx_T **idxsp, - bool prefixtree, // true for the prefix tree - int prefixcnt // when "prefixtree" is true: prefix count -) -{ - int idx; - char_u *bp; - idx_T *ip; - - // The tree size was computed when writing the file, so that we can - // allocate it as one long block. <nodecount> - int len = get4c(fd); - if (len < 0) - return SP_TRUNCERROR; - if (len > 0) { - // Allocate the byte array. - bp = xmalloc(len); - *bytsp = bp; - - // Allocate the index array. - ip = xcalloc(len, sizeof(*ip)); - *idxsp = ip; - - // Recursively read the tree and store it in the array. - idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); - if (idx < 0) - return idx; - } - return 0; -} - -// Read one row of siblings from the spell file and store it in the byte array -// "byts" and index array "idxs". Recursively read the children. -// -// NOTE: The code here must match put_node()! -// -// Returns the index (>= 0) following the siblings. -// Returns SP_TRUNCERROR if the file is shorter than expected. -// Returns SP_FORMERROR if there is a format error. -static idx_T -read_tree_node ( - FILE *fd, - char_u *byts, - idx_T *idxs, - int maxidx, // size of arrays - idx_T startidx, // current index in "byts" and "idxs" - bool prefixtree, // true for reading PREFIXTREE - int maxprefcondnr // maximum for <prefcondnr> -) -{ - int len; - int i; - int n; - idx_T idx = startidx; - int c; - int c2; -#define SHARED_MASK 0x8000000 - - len = getc(fd); // <siblingcount> - if (len <= 0) - return SP_TRUNCERROR; - - if (startidx + len >= maxidx) - return SP_FORMERROR; - byts[idx++] = len; - - // Read the byte values, flag/region bytes and shared indexes. - for (i = 1; i <= len; ++i) { - c = getc(fd); // <byte> - if (c < 0) - return SP_TRUNCERROR; - if (c <= BY_SPECIAL) { - if (c == BY_NOFLAGS && !prefixtree) { - // No flags, all regions. - idxs[idx] = 0; - c = 0; - } else if (c != BY_INDEX) { - if (prefixtree) { - // Read the optional pflags byte, the prefix ID and the - // condition nr. In idxs[] store the prefix ID in the low - // byte, the condition index shifted up 8 bits, the flags - // shifted up 24 bits. - if (c == BY_FLAGS) - c = getc(fd) << 24; // <pflags> - else - c = 0; - - c |= getc(fd); // <affixID> - - n = get2c(fd); // <prefcondnr> - if (n >= maxprefcondnr) - return SP_FORMERROR; - c |= (n << 8); - } else { // c must be BY_FLAGS or BY_FLAGS2 - // Read flags and optional region and prefix ID. In - // idxs[] the flags go in the low two bytes, region above - // that and prefix ID above the region. - c2 = c; - c = getc(fd); // <flags> - if (c2 == BY_FLAGS2) - c = (getc(fd) << 8) + c; // <flags2> - if (c & WF_REGION) - c = (getc(fd) << 16) + c; // <region> - if (c & WF_AFX) - c = (getc(fd) << 24) + c; // <affixID> - } - - idxs[idx] = c; - c = 0; - } else { // c == BY_INDEX - // <nodeidx> - n = get3c(fd); - if (n < 0 || n >= maxidx) - return SP_FORMERROR; - idxs[idx] = n + SHARED_MASK; - c = getc(fd); // <xbyte> - } - } - byts[idx++] = c; - } - - // Recursively read the children for non-shared siblings. - // Skip the end-of-word ones (zero byte value) and the shared ones (and - // remove SHARED_MASK) - for (i = 1; i <= len; ++i) - if (byts[startidx + i] != 0) { - if (idxs[startidx + i] & SHARED_MASK) - idxs[startidx + i] &= ~SHARED_MASK; - else { - idxs[startidx + i] = idx; - idx = read_tree_node(fd, byts, idxs, maxidx, idx, - prefixtree, maxprefcondnr); - if (idx < 0) - break; - } - } - - return idx; -} - // Parse 'spelllang' and set w_s->b_langp accordingly. // Returns NULL if it's OK, an error message otherwise. char_u *did_set_spelllang(win_T *wp) @@ -4051,16 +2287,17 @@ static int find_region(char_u *rp, char_u *region) return i / 2; } -// Return case type of word: -// w word 0 -// Word WF_ONECAP -// W WORD WF_ALLCAP -// WoRd wOrd WF_KEEPCAP -static int -captype ( - char_u *word, - char_u *end // When NULL use up to NUL byte. -) +/// Return case type of word: +/// w word 0 +/// Word WF_ONECAP +/// W WORD WF_ALLCAP +/// WoRd wOrd WF_KEEPCAP +/// +/// @param[in] word +/// @param[in] end End of word or NULL for NUL delimited string +/// +/// @returns Case type of word +int captype(char_u *word, char_u *end) { char_u *p; int c; @@ -4202,3222 +2439,6 @@ void spell_reload(void) } } -// Reload the spell file "fname" if it's loaded. -static void -spell_reload_one ( - char_u *fname, - bool added_word // invoked through "zg" -) -{ - slang_T *slang; - bool didit = false; - - for (slang = first_lang; slang != NULL; slang = slang->sl_next) { - if (path_full_compare(fname, slang->sl_fname, FALSE) == kEqualFiles) { - slang_clear(slang); - if (spell_load_file(fname, NULL, slang, false) == NULL) - // reloading failed, clear the language - slang_clear(slang); - redraw_all_later(SOME_VALID); - didit = true; - } - } - - // When "zg" was used and the file wasn't loaded yet, should redo - // 'spelllang' to load it now. - if (added_word && !didit) - did_set_spelllang(curwin); -} - - -// Functions for ":mkspell". - - -// In the postponed prefixes tree wn_flags is used to store the WFP_ flags, -// but it must be negative to indicate the prefix tree to tree_add_word(). -// Use a negative number with the lower 8 bits zero. -#define PFX_FLAGS -256 - -// flags for "condit" argument of store_aff_word() -#define CONDIT_COMB 1 // affix must combine -#define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag -#define CONDIT_SUF 4 // add a suffix for matching flags -#define CONDIT_AFF 8 // word already has an affix - -// Tunable parameters for when the tree is compressed. See 'mkspellmem'. -static long compress_start = 30000; // memory / SBLOCKSIZE -static long compress_inc = 100; // memory / SBLOCKSIZE -static long compress_added = 500000; // word count - -#ifdef SPELL_PRINTTREE -// For debugging the tree code: print the current tree in a (more or less) -// readable format, so that we can see what happens when adding a word and/or -// compressing the tree. -// Based on code from Olaf Seibert. -#define PRINTLINESIZE 1000 -#define PRINTWIDTH 6 - -#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ - PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) - -static char line1[PRINTLINESIZE]; -static char line2[PRINTLINESIZE]; -static char line3[PRINTLINESIZE]; - -static void spell_clear_flags(wordnode_T *node) -{ - wordnode_T *np; - - for (np = node; np != NULL; np = np->wn_sibling) { - np->wn_u1.index = FALSE; - spell_clear_flags(np->wn_child); - } -} - -static void spell_print_node(wordnode_T *node, int depth) -{ - if (node->wn_u1.index) { - // Done this node before, print the reference. - PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); - PRINTSOME(line2, depth, " ", 0, 0); - PRINTSOME(line3, depth, " ", 0, 0); - msg((char_u *)line1); - msg((char_u *)line2); - msg((char_u *)line3); - } else { - node->wn_u1.index = TRUE; - - if (node->wn_byte != NUL) { - if (node->wn_child != NULL) - PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); - else - // Cannot happen? - PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); - } else - PRINTSOME(line1, depth, " $ ", 0, 0); - - PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); - - if (node->wn_sibling != NULL) - PRINTSOME(line3, depth, " | ", 0, 0); - else - PRINTSOME(line3, depth, " ", 0, 0); - - if (node->wn_byte == NUL) { - msg((char_u *)line1); - msg((char_u *)line2); - msg((char_u *)line3); - } - - // do the children - if (node->wn_byte != NUL && node->wn_child != NULL) - spell_print_node(node->wn_child, depth + 1); - - // do the siblings - if (node->wn_sibling != NULL) { - // get rid of all parent details except | - STRCPY(line1, line3); - STRCPY(line2, line3); - spell_print_node(node->wn_sibling, depth); - } - } -} - -static void spell_print_tree(wordnode_T *root) -{ - if (root != NULL) { - // Clear the "wn_u1.index" fields, used to remember what has been - // done. - spell_clear_flags(root); - - // Recursively print the tree. - spell_print_node(root, 0); - } -} - -#endif // SPELL_PRINTTREE - -// Reads the affix file "fname". -// Returns an afffile_T, NULL for complete failure. -static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname) -{ - FILE *fd; - afffile_T *aff; - char_u rline[MAXLINELEN]; - char_u *line; - char_u *pc = NULL; -#define MAXITEMCNT 30 - char_u *(items[MAXITEMCNT]); - int itemcnt; - char_u *p; - int lnum = 0; - affheader_T *cur_aff = NULL; - bool did_postpone_prefix = false; - int aff_todo = 0; - hashtab_T *tp; - char_u *low = NULL; - char_u *fol = NULL; - char_u *upp = NULL; - int do_rep; - int do_repsal; - int do_sal; - int do_mapline; - bool found_map = false; - hashitem_T *hi; - int l; - int compminlen = 0; // COMPOUNDMIN value - int compsylmax = 0; // COMPOUNDSYLMAX value - int compoptions = 0; // COMP_ flags - int compmax = 0; // COMPOUNDWORDMAX value - char_u *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE - // concatenated - char_u *midword = NULL; // MIDWORD value - char_u *syllable = NULL; // SYLLABLE value - char_u *sofofrom = NULL; // SOFOFROM value - char_u *sofoto = NULL; // SOFOTO value - - // Open the file. - fd = mch_fopen((char *)fname, "r"); - if (fd == NULL) { - EMSG2(_(e_notopen), fname); - return NULL; - } - - vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); - spell_message(spin, IObuff); - - // Only do REP lines when not done in another .aff file already. - do_rep = GA_EMPTY(&spin->si_rep); - - // Only do REPSAL lines when not done in another .aff file already. - do_repsal = GA_EMPTY(&spin->si_repsal); - - // Only do SAL lines when not done in another .aff file already. - do_sal = GA_EMPTY(&spin->si_sal); - - // Only do MAP lines when not done in another .aff file already. - do_mapline = GA_EMPTY(&spin->si_map); - - // Allocate and init the afffile_T structure. - aff = (afffile_T *)getroom(spin, sizeof(afffile_T), true); - if (aff == NULL) { - fclose(fd); - return NULL; - } - hash_init(&aff->af_pref); - hash_init(&aff->af_suff); - hash_init(&aff->af_comp); - - // Read all the lines in the file one by one. - while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { - line_breakcheck(); - ++lnum; - - // Skip comment lines. - if (*rline == '#') - continue; - - // Convert from "SET" to 'encoding' when needed. - xfree(pc); - if (spin->si_conv.vc_type != CONV_NONE) { - pc = string_convert(&spin->si_conv, rline, NULL); - if (pc == NULL) { - smsg(_("Conversion failure for word in %s line %d: %s"), - fname, lnum, rline); - continue; - } - line = pc; - } else { - pc = NULL; - line = rline; - } - - // Split the line up in white separated items. Put a NUL after each - // item. - itemcnt = 0; - for (p = line;; ) { - while (*p != NUL && *p <= ' ') // skip white space and CR/NL - ++p; - if (*p == NUL) - break; - if (itemcnt == MAXITEMCNT) // too many items - break; - items[itemcnt++] = p; - // A few items have arbitrary text argument, don't split them. - if (itemcnt == 2 && spell_info_item(items[0])) - while (*p >= ' ' || *p == TAB) // skip until CR/NL - ++p; - else - while (*p > ' ') // skip until white space or CR/NL - ++p; - if (*p == NUL) - break; - *p++ = NUL; - } - - // Handle non-empty lines. - if (itemcnt > 0) { - if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) { - // Setup for conversion from "ENC" to 'encoding'. - aff->af_enc = enc_canonize(items[1]); - if (!spin->si_ascii - && convert_setup(&spin->si_conv, aff->af_enc, - p_enc) == FAIL) - smsg(_("Conversion in %s not supported: from %s to %s"), - fname, aff->af_enc, p_enc); - spin->si_conv.vc_fail = true; - } else if (is_aff_rule(items, itemcnt, "FLAG", 2) - && aff->af_flagtype == AFT_CHAR) { - if (STRCMP(items[1], "long") == 0) - aff->af_flagtype = AFT_LONG; - else if (STRCMP(items[1], "num") == 0) - aff->af_flagtype = AFT_NUM; - else if (STRCMP(items[1], "caplong") == 0) - aff->af_flagtype = AFT_CAPLONG; - else - smsg(_("Invalid value for FLAG in %s line %d: %s"), - fname, lnum, items[1]); - if (aff->af_rare != 0 - || aff->af_keepcase != 0 - || aff->af_bad != 0 - || aff->af_needaffix != 0 - || aff->af_circumfix != 0 - || aff->af_needcomp != 0 - || aff->af_comproot != 0 - || aff->af_nosuggest != 0 - || compflags != NULL - || aff->af_suff.ht_used > 0 - || aff->af_pref.ht_used > 0) - smsg(_("FLAG after using flags in %s line %d: %s"), - fname, lnum, items[1]); - } else if (spell_info_item(items[0]) && itemcnt > 1) { - p = (char_u *)getroom(spin, - (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) - + STRLEN(items[0]) - + STRLEN(items[1]) + 3, false); - if (p != NULL) { - if (spin->si_info != NULL) { - STRCPY(p, spin->si_info); - STRCAT(p, "\n"); - } - STRCAT(p, items[0]); - STRCAT(p, " "); - STRCAT(p, items[1]); - spin->si_info = p; - } - } else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) - && midword == NULL) { - midword = getroom_save(spin, items[1]); - } else if (is_aff_rule(items, itemcnt, "TRY", 2)) { - // ignored, we look in the tree for what chars may appear - } - // TODO: remove "RAR" later - else if ((is_aff_rule(items, itemcnt, "RAR", 2) - || is_aff_rule(items, itemcnt, "RARE", 2)) - && aff->af_rare == 0) { - aff->af_rare = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - } - // TODO: remove "KEP" later - else if ((is_aff_rule(items, itemcnt, "KEP", 2) - || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) - && aff->af_keepcase == 0) { - aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - } else if ((is_aff_rule(items, itemcnt, "BAD", 2) - || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) - && aff->af_bad == 0) { - aff->af_bad = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - } else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) - && aff->af_needaffix == 0) { - aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - } else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) - && aff->af_circumfix == 0) { - aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - } else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) - && aff->af_nosuggest == 0) { - aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - } else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) - || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) - && aff->af_needcomp == 0) { - aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - } else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) - && aff->af_comproot == 0) { - aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - } else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) - && aff->af_compforbid == 0) { - aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - if (aff->af_pref.ht_used > 0) - smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), - fname, lnum); - } else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) - && aff->af_comppermit == 0) { - aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - if (aff->af_pref.ht_used > 0) - smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), - fname, lnum); - } else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) - && compflags == NULL) { - // Turn flag "c" into COMPOUNDRULE compatible string "c+", - // "Na" into "Na+", "1234" into "1234+". - p = getroom(spin, STRLEN(items[1]) + 2, false); - STRCPY(p, items[1]); - STRCAT(p, "+"); - compflags = p; - } else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) { - // We don't use the count, but do check that it's a number and - // not COMPOUNDRULE mistyped. - if (atoi((char *)items[1]) == 0) - smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"), - fname, lnum, items[1]); - } else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) { - // Don't use the first rule if it is a number. - if (compflags != NULL || *skipdigits(items[1]) != NUL) { - // Concatenate this string to previously defined ones, - // using a slash to separate them. - l = (int)STRLEN(items[1]) + 1; - if (compflags != NULL) - l += (int)STRLEN(compflags) + 1; - p = getroom(spin, l, false); - if (compflags != NULL) { - STRCPY(p, compflags); - STRCAT(p, "/"); - } - STRCAT(p, items[1]); - compflags = p; - } - } else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) - && compmax == 0) { - compmax = atoi((char *)items[1]); - if (compmax == 0) - smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), - fname, lnum, items[1]); - } else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) - && compminlen == 0) { - compminlen = atoi((char *)items[1]); - if (compminlen == 0) - smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"), - fname, lnum, items[1]); - } else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) - && compsylmax == 0) { - compsylmax = atoi((char *)items[1]); - if (compsylmax == 0) - smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), - fname, lnum, items[1]); - } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) { - compoptions |= COMP_CHECKDUP; - } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) { - compoptions |= COMP_CHECKREP; - } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) { - compoptions |= COMP_CHECKCASE; - } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) { - compoptions |= COMP_CHECKTRIPLE; - } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) { - if (atoi((char *)items[1]) == 0) - smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), - fname, lnum, items[1]); - } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) { - garray_T *gap = &spin->si_comppat; - int i; - - // Only add the couple if it isn't already there. - for (i = 0; i < gap->ga_len - 1; i += 2) - if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 - && STRCMP(((char_u **)(gap->ga_data))[i + 1], - items[2]) == 0) - break; - if (i >= gap->ga_len) { - ga_grow(gap, 2); - ((char_u **)(gap->ga_data))[gap->ga_len++] - = getroom_save(spin, items[1]); - ((char_u **)(gap->ga_data))[gap->ga_len++] - = getroom_save(spin, items[2]); - } - } else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) - && syllable == NULL) { - syllable = getroom_save(spin, items[1]); - } else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) { - spin->si_nobreak = true; - } else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) { - spin->si_nosplitsugs = true; - } else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) { - spin->si_nocompoundsugs = true; - } else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) { - spin->si_nosugfile = true; - } else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) { - aff->af_pfxpostpone = true; - } else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) { - aff->af_ignoreextra = true; - } else if ((STRCMP(items[0], "PFX") == 0 - || STRCMP(items[0], "SFX") == 0) - && aff_todo == 0 - && itemcnt >= 4) { - int lasti = 4; - char_u key[AH_KEY_LEN]; - - if (*items[0] == 'P') - tp = &aff->af_pref; - else - tp = &aff->af_suff; - - // Myspell allows the same affix name to be used multiple - // times. The affix files that do this have an undocumented - // "S" flag on all but the last block, thus we check for that - // and store it in ah_follows. - STRLCPY(key, items[1], AH_KEY_LEN); - hi = hash_find(tp, key); - if (!HASHITEM_EMPTY(hi)) { - cur_aff = HI2AH(hi); - if (cur_aff->ah_combine != (*items[2] == 'Y')) - smsg(_("Different combining flag in continued affix block in %s line %d: %s"), - fname, lnum, items[1]); - if (!cur_aff->ah_follows) - smsg(_("Duplicate affix in %s line %d: %s"), - fname, lnum, items[1]); - } else { - // New affix letter. - cur_aff = (affheader_T *)getroom(spin, - sizeof(affheader_T), true); - if (cur_aff == NULL) - break; - cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], - fname, lnum); - if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) - break; - if (cur_aff->ah_flag == aff->af_bad - || cur_aff->ah_flag == aff->af_rare - || cur_aff->ah_flag == aff->af_keepcase - || cur_aff->ah_flag == aff->af_needaffix - || cur_aff->ah_flag == aff->af_circumfix - || cur_aff->ah_flag == aff->af_nosuggest - || cur_aff->ah_flag == aff->af_needcomp - || cur_aff->ah_flag == aff->af_comproot) - smsg(_("Affix also used for " - "BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST" - "in %s line %d: %s"), - fname, lnum, items[1]); - STRCPY(cur_aff->ah_key, items[1]); - hash_add(tp, cur_aff->ah_key); - - cur_aff->ah_combine = (*items[2] == 'Y'); - } - - // Check for the "S" flag, which apparently means that another - // block with the same affix name is following. - if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) { - ++lasti; - cur_aff->ah_follows = true; - } else - cur_aff->ah_follows = false; - - // Myspell allows extra text after the item, but that might - // mean mistakes go unnoticed. Require a comment-starter, - // unless IGNOREEXTRA is used. Hunspell uses a "-" item. - if (itemcnt > lasti - && !aff->af_ignoreextra - && *items[lasti] != '#') - smsg(_(e_afftrailing), fname, lnum, items[lasti]); - - if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) - smsg(_("Expected Y or N in %s line %d: %s"), - fname, lnum, items[2]); - - if (*items[0] == 'P' && aff->af_pfxpostpone) { - if (cur_aff->ah_newID == 0) { - // Use a new number in the .spl file later, to be able - // to handle multiple .aff files. - check_renumber(spin); - cur_aff->ah_newID = ++spin->si_newprefID; - - // We only really use ah_newID if the prefix is - // postponed. We know that only after handling all - // the items. - did_postpone_prefix = false; - } else - // Did use the ID in a previous block. - did_postpone_prefix = true; - } - - aff_todo = atoi((char *)items[3]); - } else if ((STRCMP(items[0], "PFX") == 0 - || STRCMP(items[0], "SFX") == 0) - && aff_todo > 0 - && STRCMP(cur_aff->ah_key, items[1]) == 0 - && itemcnt >= 5) { - affentry_T *aff_entry; - bool upper = false; - int lasti = 5; - - // Myspell allows extra text after the item, but that might - // mean mistakes go unnoticed. Require a comment-starter. - // Hunspell uses a "-" item. - if (itemcnt > lasti && *items[lasti] != '#' - && (STRCMP(items[lasti], "-") != 0 - || itemcnt != lasti + 1)) - smsg(_(e_afftrailing), fname, lnum, items[lasti]); - - // New item for an affix letter. - --aff_todo; - aff_entry = (affentry_T *)getroom(spin, - sizeof(affentry_T), true); - if (aff_entry == NULL) - break; - - if (STRCMP(items[2], "0") != 0) - aff_entry->ae_chop = getroom_save(spin, items[2]); - if (STRCMP(items[3], "0") != 0) { - aff_entry->ae_add = getroom_save(spin, items[3]); - - // Recognize flags on the affix: abcd/XYZ - aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); - if (aff_entry->ae_flags != NULL) { - *aff_entry->ae_flags++ = NUL; - aff_process_flags(aff, aff_entry); - } - } - - // Don't use an affix entry with non-ASCII characters when - // "spin->si_ascii" is true. - if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) - || has_non_ascii(aff_entry->ae_add))) { - aff_entry->ae_next = cur_aff->ah_first; - cur_aff->ah_first = aff_entry; - - if (STRCMP(items[4], ".") != 0) { - char_u buf[MAXLINELEN]; - - aff_entry->ae_cond = getroom_save(spin, items[4]); - if (*items[0] == 'P') - sprintf((char *)buf, "^%s", items[4]); - else - sprintf((char *)buf, "%s$", items[4]); - aff_entry->ae_prog = vim_regcomp(buf, - RE_MAGIC + RE_STRING + RE_STRICT); - if (aff_entry->ae_prog == NULL) - smsg(_("Broken condition in %s line %d: %s"), - fname, lnum, items[4]); - } - - // For postponed prefixes we need an entry in si_prefcond - // for the condition. Use an existing one if possible. - // Can't be done for an affix with flags, ignoring - // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. - if (*items[0] == 'P' && aff->af_pfxpostpone - && aff_entry->ae_flags == NULL) { - // When the chop string is one lower-case letter and - // the add string ends in the upper-case letter we set - // the "upper" flag, clear "ae_chop" and remove the - // letters from "ae_add". The condition must either - // be empty or start with the same letter. - if (aff_entry->ae_chop != NULL - && aff_entry->ae_add != NULL - && aff_entry->ae_chop[(*mb_ptr2len)( - aff_entry->ae_chop)] == NUL - ) { - int c, c_up; - - c = PTR2CHAR(aff_entry->ae_chop); - c_up = SPELL_TOUPPER(c); - if (c_up != c - && (aff_entry->ae_cond == NULL - || PTR2CHAR(aff_entry->ae_cond) == c)) { - p = aff_entry->ae_add - + STRLEN(aff_entry->ae_add); - mb_ptr_back(aff_entry->ae_add, p); - if (PTR2CHAR(p) == c_up) { - upper = true; - aff_entry->ae_chop = NULL; - *p = NUL; - - // The condition is matched with the - // actual word, thus must check for the - // upper-case letter. - if (aff_entry->ae_cond != NULL) { - char_u buf[MAXLINELEN]; - if (has_mbyte) { - onecap_copy(items[4], buf, true); - aff_entry->ae_cond = getroom_save( - spin, buf); - } else - *aff_entry->ae_cond = c_up; - if (aff_entry->ae_cond != NULL) { - sprintf((char *)buf, "^%s", - aff_entry->ae_cond); - vim_regfree(aff_entry->ae_prog); - aff_entry->ae_prog = vim_regcomp( - buf, RE_MAGIC + RE_STRING); - } - } - } - } - } - - if (aff_entry->ae_chop == NULL - && aff_entry->ae_flags == NULL) { - int idx; - char_u **pp; - int n; - - // Find a previously used condition. - for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; - --idx) { - p = ((char_u **)spin->si_prefcond.ga_data)[idx]; - if (str_equal(p, aff_entry->ae_cond)) - break; - } - if (idx < 0) { - // Not found, add a new condition. - idx = spin->si_prefcond.ga_len; - pp = GA_APPEND_VIA_PTR(char_u *, &spin->si_prefcond); - *pp = (aff_entry->ae_cond == NULL) ? - NULL : getroom_save(spin, aff_entry->ae_cond); - } - - // Add the prefix to the prefix tree. - if (aff_entry->ae_add == NULL) - p = (char_u *)""; - else - p = aff_entry->ae_add; - - // PFX_FLAGS is a negative number, so that - // tree_add_word() knows this is the prefix tree. - n = PFX_FLAGS; - if (!cur_aff->ah_combine) - n |= WFP_NC; - if (upper) - n |= WFP_UP; - if (aff_entry->ae_comppermit) - n |= WFP_COMPPERMIT; - if (aff_entry->ae_compforbid) - n |= WFP_COMPFORBID; - tree_add_word(spin, p, spin->si_prefroot, n, - idx, cur_aff->ah_newID); - did_postpone_prefix = true; - } - - // Didn't actually use ah_newID, backup si_newprefID. - if (aff_todo == 0 && !did_postpone_prefix) { - --spin->si_newprefID; - cur_aff->ah_newID = 0; - } - } - } - } else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) { - fol = vim_strsave(items[1]); - } else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) { - low = vim_strsave(items[1]); - } else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) { - upp = vim_strsave(items[1]); - } else if (is_aff_rule(items, itemcnt, "REP", 2) - || is_aff_rule(items, itemcnt, "REPSAL", 2)) { - /* Ignore REP/REPSAL count */; - if (!isdigit(*items[1])) - smsg(_("Expected REP(SAL) count in %s line %d"), - fname, lnum); - } else if ((STRCMP(items[0], "REP") == 0 - || STRCMP(items[0], "REPSAL") == 0) - && itemcnt >= 3) { - // REP/REPSAL item - // Myspell ignores extra arguments, we require it starts with - // # to detect mistakes. - if (itemcnt > 3 && items[3][0] != '#') - smsg(_(e_afftrailing), fname, lnum, items[3]); - if (items[0][3] == 'S' ? do_repsal : do_rep) { - // Replace underscore with space (can't include a space - // directly). - for (p = items[1]; *p != NUL; mb_ptr_adv(p)) - if (*p == '_') - *p = ' '; - for (p = items[2]; *p != NUL; mb_ptr_adv(p)) - if (*p == '_') - *p = ' '; - add_fromto(spin, items[0][3] == 'S' - ? &spin->si_repsal - : &spin->si_rep, items[1], items[2]); - } - } else if (is_aff_rule(items, itemcnt, "MAP", 2)) { - // MAP item or count - if (!found_map) { - // First line contains the count. - found_map = true; - if (!isdigit(*items[1])) - smsg(_("Expected MAP count in %s line %d"), - fname, lnum); - } else if (do_mapline) { - int c; - - // Check that every character appears only once. - for (p = items[1]; *p != NUL; ) { - c = mb_ptr2char_adv(&p); - if ((!GA_EMPTY(&spin->si_map) - && vim_strchr(spin->si_map.ga_data, c) - != NULL) - || vim_strchr(p, c) != NULL) - smsg(_("Duplicate character in MAP in %s line %d"), - fname, lnum); - } - - // We simply concatenate all the MAP strings, separated by - // slashes. - ga_concat(&spin->si_map, items[1]); - ga_append(&spin->si_map, '/'); - } - } - // Accept "SAL from to" and "SAL from to #comment". - else if (is_aff_rule(items, itemcnt, "SAL", 3)) { - if (do_sal) { - // SAL item (sounds-a-like) - // Either one of the known keys or a from-to pair. - if (STRCMP(items[1], "followup") == 0) - spin->si_followup = sal_to_bool(items[2]); - else if (STRCMP(items[1], "collapse_result") == 0) - spin->si_collapse = sal_to_bool(items[2]); - else if (STRCMP(items[1], "remove_accents") == 0) - spin->si_rem_accents = sal_to_bool(items[2]); - else - // when "to" is "_" it means empty - add_fromto(spin, &spin->si_sal, items[1], - STRCMP(items[2], "_") == 0 ? (char_u *)"" - : items[2]); - } - } else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) - && sofofrom == NULL) { - sofofrom = getroom_save(spin, items[1]); - } else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) - && sofoto == NULL) { - sofoto = getroom_save(spin, items[1]); - } else if (STRCMP(items[0], "COMMON") == 0) { - int i; - - for (i = 1; i < itemcnt; ++i) { - if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, - items[i]))) { - p = vim_strsave(items[i]); - hash_add(&spin->si_commonwords, p); - } - } - } else - smsg(_("Unrecognized or duplicate item in %s line %d: %s"), - fname, lnum, items[0]); - } - } - - if (fol != NULL || low != NULL || upp != NULL) { - if (spin->si_clear_chartab) { - // Clear the char type tables, don't want to use any of the - // currently used spell properties. - init_spell_chartab(); - spin->si_clear_chartab = false; - } - - // Don't write a word table for an ASCII file, so that we don't check - // for conflicts with a word table that matches 'encoding'. - // Don't write one for utf-8 either, we use utf_*() and - // mb_get_class(), the list of chars in the file will be incomplete. - if (!spin->si_ascii - && !enc_utf8 - ) { - if (fol == NULL || low == NULL || upp == NULL) - smsg(_("Missing FOL/LOW/UPP line in %s"), fname); - else - (void)set_spell_chartab(fol, low, upp); - } - - xfree(fol); - xfree(low); - xfree(upp); - } - - // Use compound specifications of the .aff file for the spell info. - if (compmax != 0) { - aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); - spin->si_compmax = compmax; - } - - if (compminlen != 0) { - aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); - spin->si_compminlen = compminlen; - } - - if (compsylmax != 0) { - if (syllable == NULL) - smsg(_("COMPOUNDSYLMAX used without SYLLABLE")); - aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); - spin->si_compsylmax = compsylmax; - } - - if (compoptions != 0) { - aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); - spin->si_compoptions |= compoptions; - } - - if (compflags != NULL) - process_compflags(spin, aff, compflags); - - // Check that we didn't use too many renumbered flags. - if (spin->si_newcompID < spin->si_newprefID) { - if (spin->si_newcompID == 127 || spin->si_newcompID == 255) - MSG(_("Too many postponed prefixes")); - else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) - MSG(_("Too many compound flags")); - else - MSG(_("Too many postponed prefixes and/or compound flags")); - } - - if (syllable != NULL) { - aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); - spin->si_syllable = syllable; - } - - if (sofofrom != NULL || sofoto != NULL) { - if (sofofrom == NULL || sofoto == NULL) - smsg(_("Missing SOFO%s line in %s"), - sofofrom == NULL ? "FROM" : "TO", fname); - else if (!GA_EMPTY(&spin->si_sal)) - smsg(_("Both SAL and SOFO lines in %s"), fname); - else { - aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); - aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); - spin->si_sofofr = sofofrom; - spin->si_sofoto = sofoto; - } - } - - if (midword != NULL) { - aff_check_string(spin->si_midword, midword, "MIDWORD"); - spin->si_midword = midword; - } - - xfree(pc); - fclose(fd); - return aff; -} - -// Returns true when items[0] equals "rulename", there are "mincount" items or -// a comment is following after item "mincount". -static bool is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount) -{ - return STRCMP(items[0], rulename) == 0 - && (itemcnt == mincount - || (itemcnt > mincount && items[mincount][0] == '#')); -} - -// For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from -// ae_flags to ae_comppermit and ae_compforbid. -static void aff_process_flags(afffile_T *affile, affentry_T *entry) -{ - char_u *p; - char_u *prevp; - unsigned flag; - - if (entry->ae_flags != NULL - && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) { - for (p = entry->ae_flags; *p != NUL; ) { - prevp = p; - flag = get_affitem(affile->af_flagtype, &p); - if (flag == affile->af_comppermit || flag == affile->af_compforbid) { - STRMOVE(prevp, p); - p = prevp; - if (flag == affile->af_comppermit) - entry->ae_comppermit = true; - else - entry->ae_compforbid = true; - } - if (affile->af_flagtype == AFT_NUM && *p == ',') - ++p; - } - if (*entry->ae_flags == NUL) - entry->ae_flags = NULL; // nothing left - } -} - -// Returns true if "s" is the name of an info item in the affix file. -static bool spell_info_item(char_u *s) -{ - return STRCMP(s, "NAME") == 0 - || STRCMP(s, "HOME") == 0 - || STRCMP(s, "VERSION") == 0 - || STRCMP(s, "AUTHOR") == 0 - || STRCMP(s, "EMAIL") == 0 - || STRCMP(s, "COPYRIGHT") == 0; -} - -// Turn an affix flag name into a number, according to the FLAG type. -// returns zero for failure. -static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum) -{ - unsigned res; - char_u *p = item; - - res = get_affitem(flagtype, &p); - if (res == 0) { - if (flagtype == AFT_NUM) - smsg(_("Flag is not a number in %s line %d: %s"), - fname, lnum, item); - else - smsg(_("Illegal flag in %s line %d: %s"), - fname, lnum, item); - } - if (*p != NUL) { - smsg(_(e_affname), fname, lnum, item); - return 0; - } - - return res; -} - -// Get one affix name from "*pp" and advance the pointer. -// Returns zero for an error, still advances the pointer then. -static unsigned get_affitem(int flagtype, char_u **pp) -{ - int res; - - if (flagtype == AFT_NUM) { - if (!ascii_isdigit(**pp)) { - ++*pp; // always advance, avoid getting stuck - return 0; - } - res = getdigits_int(pp); - } else { - res = mb_ptr2char_adv(pp); - if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG - && res >= 'A' && res <= 'Z')) { - if (**pp == NUL) - return 0; - res = mb_ptr2char_adv(pp) + (res << 16); - } - } - return res; -} - -// Process the "compflags" string used in an affix file and append it to -// spin->si_compflags. -// The processing involves changing the affix names to ID numbers, so that -// they fit in one byte. -static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags) -{ - char_u *p; - char_u *prevp; - unsigned flag; - compitem_T *ci; - int id; - int len; - char_u *tp; - char_u key[AH_KEY_LEN]; - hashitem_T *hi; - - // Make room for the old and the new compflags, concatenated with a / in - // between. Processing it makes it shorter, but we don't know by how - // much, thus allocate the maximum. - len = (int)STRLEN(compflags) + 1; - if (spin->si_compflags != NULL) - len += (int)STRLEN(spin->si_compflags) + 1; - p = getroom(spin, len, false); - if (spin->si_compflags != NULL) { - STRCPY(p, spin->si_compflags); - STRCAT(p, "/"); - } - spin->si_compflags = p; - tp = p + STRLEN(p); - - for (p = compflags; *p != NUL; ) { - if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) - // Copy non-flag characters directly. - *tp++ = *p++; - else { - // First get the flag number, also checks validity. - prevp = p; - flag = get_affitem(aff->af_flagtype, &p); - if (flag != 0) { - // Find the flag in the hashtable. If it was used before, use - // the existing ID. Otherwise add a new entry. - STRLCPY(key, prevp, p - prevp + 1); - hi = hash_find(&aff->af_comp, key); - if (!HASHITEM_EMPTY(hi)) - id = HI2CI(hi)->ci_newID; - else { - ci = (compitem_T *)getroom(spin, sizeof(compitem_T), true); - if (ci == NULL) - break; - STRCPY(ci->ci_key, key); - ci->ci_flag = flag; - // Avoid using a flag ID that has a special meaning in a - // regexp (also inside []). - do { - check_renumber(spin); - id = spin->si_newcompID--; - } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); - ci->ci_newID = id; - hash_add(&aff->af_comp, ci->ci_key); - } - *tp++ = id; - } - if (aff->af_flagtype == AFT_NUM && *p == ',') - ++p; - } - } - - *tp = NUL; -} - -// Check that the new IDs for postponed affixes and compounding don't overrun -// each other. We have almost 255 available, but start at 0-127 to avoid -// using two bytes for utf-8. When the 0-127 range is used up go to 128-255. -// When that is used up an error message is given. -static void check_renumber(spellinfo_T *spin) -{ - if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) { - spin->si_newprefID = 127; - spin->si_newcompID = 255; - } -} - -// Returns true if flag "flag" appears in affix list "afflist". -static bool flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) -{ - char_u *p; - unsigned n; - - switch (flagtype) { - case AFT_CHAR: - return vim_strchr(afflist, flag) != NULL; - - case AFT_CAPLONG: - case AFT_LONG: - for (p = afflist; *p != NUL; ) { - n = mb_ptr2char_adv(&p); - if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) - && *p != NUL) - n = mb_ptr2char_adv(&p) + (n << 16); - if (n == flag) - return true; - } - break; - - case AFT_NUM: - for (p = afflist; *p != NUL; ) { - int digits = getdigits_int(&p); - assert(digits >= 0); - n = (unsigned int)digits; - if (n == flag) - return true; - if (*p != NUL) // skip over comma - ++p; - } - break; - } - return false; -} - -// Give a warning when "spinval" and "affval" numbers are set and not the same. -static void aff_check_number(int spinval, int affval, char *name) -{ - if (spinval != 0 && spinval != affval) - smsg(_("%s value differs from what is used in another .aff file"), - name); -} - -// Give a warning when "spinval" and "affval" strings are set and not the same. -static void aff_check_string(char_u *spinval, char_u *affval, char *name) -{ - if (spinval != NULL && STRCMP(spinval, affval) != 0) - smsg(_("%s value differs from what is used in another .aff file"), - name); -} - -// Returns true if strings "s1" and "s2" are equal. Also consider both being -// NULL as equal. -static bool str_equal(char_u *s1, char_u *s2) -{ - if (s1 == NULL || s2 == NULL) - return s1 == s2; - return STRCMP(s1, s2) == 0; -} - -// Add a from-to item to "gap". Used for REP and SAL items. -// They are stored case-folded. -static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to) -{ - char_u word[MAXWLEN]; - - fromto_T *ftp = GA_APPEND_VIA_PTR(fromto_T, gap); - (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); - ftp->ft_from = getroom_save(spin, word); - (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); - ftp->ft_to = getroom_save(spin, word); -} - -// Converts a boolean argument in a SAL line to true or false; -static bool sal_to_bool(char_u *s) -{ - return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; -} - -// Free the structure filled by spell_read_aff(). -static void spell_free_aff(afffile_T *aff) -{ - hashtab_T *ht; - hashitem_T *hi; - int todo; - affheader_T *ah; - affentry_T *ae; - - xfree(aff->af_enc); - - // All this trouble to free the "ae_prog" items... - for (ht = &aff->af_pref;; ht = &aff->af_suff) { - todo = (int)ht->ht_used; - for (hi = ht->ht_array; todo > 0; ++hi) { - if (!HASHITEM_EMPTY(hi)) { - --todo; - ah = HI2AH(hi); - for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) - vim_regfree(ae->ae_prog); - } - } - if (ht == &aff->af_suff) - break; - } - - hash_clear(&aff->af_pref); - hash_clear(&aff->af_suff); - hash_clear(&aff->af_comp); -} - -// Read dictionary file "fname". -// Returns OK or FAIL; -static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) -{ - hashtab_T ht; - char_u line[MAXLINELEN]; - char_u *p; - char_u *afflist; - char_u store_afflist[MAXWLEN]; - int pfxlen; - bool need_affix; - char_u *dw; - char_u *pc; - char_u *w; - int l; - hash_T hash; - hashitem_T *hi; - FILE *fd; - int lnum = 1; - int non_ascii = 0; - int retval = OK; - char_u message[MAXLINELEN + MAXWLEN]; - int flags; - int duplicate = 0; - - // Open the file. - fd = mch_fopen((char *)fname, "r"); - if (fd == NULL) { - EMSG2(_(e_notopen), fname); - return FAIL; - } - - // The hashtable is only used to detect duplicated words. - hash_init(&ht); - - vim_snprintf((char *)IObuff, IOSIZE, - _("Reading dictionary file %s ..."), fname); - spell_message(spin, IObuff); - - // start with a message for the first line - spin->si_msg_count = 999999; - - // Read and ignore the first line: word count. - (void)vim_fgets(line, MAXLINELEN, fd); - if (!ascii_isdigit(*skipwhite(line))) - EMSG2(_("E760: No word count in %s"), fname); - - // Read all the lines in the file one by one. - // The words are converted to 'encoding' here, before being added to - // the hashtable. - while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) { - line_breakcheck(); - ++lnum; - if (line[0] == '#' || line[0] == '/') - continue; // comment line - - // Remove CR, LF and white space from the end. White space halfway through - // the word is kept to allow multi-word terms like "et al.". - l = (int)STRLEN(line); - while (l > 0 && line[l - 1] <= ' ') - --l; - if (l == 0) - continue; // empty line - line[l] = NUL; - - // Convert from "SET" to 'encoding' when needed. - if (spin->si_conv.vc_type != CONV_NONE) { - pc = string_convert(&spin->si_conv, line, NULL); - if (pc == NULL) { - smsg(_("Conversion failure for word in %s line %d: %s"), - fname, lnum, line); - continue; - } - w = pc; - } else { - pc = NULL; - w = line; - } - - // Truncate the word at the "/", set "afflist" to what follows. - // Replace "\/" by "/" and "\\" by "\". - afflist = NULL; - for (p = w; *p != NUL; mb_ptr_adv(p)) { - if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) - STRMOVE(p, p + 1); - else if (*p == '/') { - *p = NUL; - afflist = p + 1; - break; - } - } - - // Skip non-ASCII words when "spin->si_ascii" is true. - if (spin->si_ascii && has_non_ascii(w)) { - ++non_ascii; - xfree(pc); - continue; - } - - // This takes time, print a message every 10000 words. - if (spin->si_verbose && spin->si_msg_count > 10000) { - spin->si_msg_count = 0; - vim_snprintf((char *)message, sizeof(message), - _("line %6d, word %6d - %s"), - lnum, spin->si_foldwcount + spin->si_keepwcount, w); - msg_start(); - msg_puts_long_attr(message, 0); - msg_clr_eos(); - msg_didout = FALSE; - msg_col = 0; - ui_flush(); - } - - // Store the word in the hashtable to be able to find duplicates. - dw = getroom_save(spin, w); - if (dw == NULL) { - retval = FAIL; - xfree(pc); - break; - } - - hash = hash_hash(dw); - hi = hash_lookup(&ht, (const char *)dw, STRLEN(dw), hash); - if (!HASHITEM_EMPTY(hi)) { - if (p_verbose > 0) - smsg(_("Duplicate word in %s line %d: %s"), - fname, lnum, dw); - else if (duplicate == 0) - smsg(_("First duplicate word in %s line %d: %s"), - fname, lnum, dw); - ++duplicate; - } else - hash_add_item(&ht, hi, dw, hash); - - flags = 0; - store_afflist[0] = NUL; - pfxlen = 0; - need_affix = false; - if (afflist != NULL) { - // Extract flags from the affix list. - flags |= get_affix_flags(affile, afflist); - - if (affile->af_needaffix != 0 && flag_in_afflist( - affile->af_flagtype, afflist, affile->af_needaffix)) - need_affix = true; - - if (affile->af_pfxpostpone) - // Need to store the list of prefix IDs with the word. - pfxlen = get_pfxlist(affile, afflist, store_afflist); - - if (spin->si_compflags != NULL) - // Need to store the list of compound flags with the word. - // Concatenate them to the list of prefix IDs. - get_compflags(affile, afflist, store_afflist + pfxlen); - } - - // Add the word to the word tree(s). - if (store_word(spin, dw, flags, spin->si_region, - store_afflist, need_affix) == FAIL) - retval = FAIL; - - if (afflist != NULL) { - // Find all matching suffixes and add the resulting words. - // Additionally do matching prefixes that combine. - if (store_aff_word(spin, dw, afflist, affile, - &affile->af_suff, &affile->af_pref, - CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) - retval = FAIL; - - // Find all matching prefixes and add the resulting words. - if (store_aff_word(spin, dw, afflist, affile, - &affile->af_pref, NULL, - CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) - retval = FAIL; - } - - xfree(pc); - } - - if (duplicate > 0) - smsg(_("%d duplicate word(s) in %s"), duplicate, fname); - if (spin->si_ascii && non_ascii > 0) - smsg(_("Ignored %d word(s) with non-ASCII characters in %s"), - non_ascii, fname); - hash_clear(&ht); - - fclose(fd); - return retval; -} - -// Check for affix flags in "afflist" that are turned into word flags. -// Return WF_ flags. -static int get_affix_flags(afffile_T *affile, char_u *afflist) -{ - int flags = 0; - - if (affile->af_keepcase != 0 && flag_in_afflist( - affile->af_flagtype, afflist, affile->af_keepcase)) - flags |= WF_KEEPCAP | WF_FIXCAP; - if (affile->af_rare != 0 && flag_in_afflist( - affile->af_flagtype, afflist, affile->af_rare)) - flags |= WF_RARE; - if (affile->af_bad != 0 && flag_in_afflist( - affile->af_flagtype, afflist, affile->af_bad)) - flags |= WF_BANNED; - if (affile->af_needcomp != 0 && flag_in_afflist( - affile->af_flagtype, afflist, affile->af_needcomp)) - flags |= WF_NEEDCOMP; - if (affile->af_comproot != 0 && flag_in_afflist( - affile->af_flagtype, afflist, affile->af_comproot)) - flags |= WF_COMPROOT; - if (affile->af_nosuggest != 0 && flag_in_afflist( - affile->af_flagtype, afflist, affile->af_nosuggest)) - flags |= WF_NOSUGGEST; - return flags; -} - -// Get the list of prefix IDs from the affix list "afflist". -// Used for PFXPOSTPONE. -// Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL -// and return the number of affixes. -static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist) -{ - char_u *p; - char_u *prevp; - int cnt = 0; - int id; - char_u key[AH_KEY_LEN]; - hashitem_T *hi; - - for (p = afflist; *p != NUL; ) { - prevp = p; - if (get_affitem(affile->af_flagtype, &p) != 0) { - // A flag is a postponed prefix flag if it appears in "af_pref" - // and it's ID is not zero. - STRLCPY(key, prevp, p - prevp + 1); - hi = hash_find(&affile->af_pref, key); - if (!HASHITEM_EMPTY(hi)) { - id = HI2AH(hi)->ah_newID; - if (id != 0) - store_afflist[cnt++] = id; - } - } - if (affile->af_flagtype == AFT_NUM && *p == ',') - ++p; - } - - store_afflist[cnt] = NUL; - return cnt; -} - -// Get the list of compound IDs from the affix list "afflist" that are used -// for compound words. -// Puts the flags in "store_afflist[]". -static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist) -{ - char_u *p; - char_u *prevp; - int cnt = 0; - char_u key[AH_KEY_LEN]; - hashitem_T *hi; - - for (p = afflist; *p != NUL; ) { - prevp = p; - if (get_affitem(affile->af_flagtype, &p) != 0) { - // A flag is a compound flag if it appears in "af_comp". - STRLCPY(key, prevp, p - prevp + 1); - hi = hash_find(&affile->af_comp, key); - if (!HASHITEM_EMPTY(hi)) - store_afflist[cnt++] = HI2CI(hi)->ci_newID; - } - if (affile->af_flagtype == AFT_NUM && *p == ',') - ++p; - } - - store_afflist[cnt] = NUL; -} - -// Apply affixes to a word and store the resulting words. -// "ht" is the hashtable with affentry_T that need to be applied, either -// prefixes or suffixes. -// "xht", when not NULL, is the prefix hashtable, to be used additionally on -// the resulting words for combining affixes. -// -// Returns FAIL when out of memory. -static int -store_aff_word ( - spellinfo_T *spin, // spell info - char_u *word, // basic word start - char_u *afflist, // list of names of supported affixes - afffile_T *affile, - hashtab_T *ht, - hashtab_T *xht, - int condit, // CONDIT_SUF et al. - int flags, // flags for the word - char_u *pfxlist, // list of prefix IDs - int pfxlen // nr of flags in "pfxlist" for prefixes, rest - // is compound flags -) -{ - int todo; - hashitem_T *hi; - affheader_T *ah; - affentry_T *ae; - char_u newword[MAXWLEN]; - int retval = OK; - int i, j; - char_u *p; - int use_flags; - char_u *use_pfxlist; - int use_pfxlen; - bool need_affix; - char_u store_afflist[MAXWLEN]; - char_u pfx_pfxlist[MAXWLEN]; - size_t wordlen = STRLEN(word); - int use_condit; - - todo = (int)ht->ht_used; - for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) { - if (!HASHITEM_EMPTY(hi)) { - --todo; - ah = HI2AH(hi); - - // Check that the affix combines, if required, and that the word - // supports this affix. - if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) - && flag_in_afflist(affile->af_flagtype, afflist, - ah->ah_flag)) { - // Loop over all affix entries with this name. - for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) { - // Check the condition. It's not logical to match case - // here, but it is required for compatibility with - // Myspell. - // Another requirement from Myspell is that the chop - // string is shorter than the word itself. - // For prefixes, when "PFXPOSTPONE" was used, only do - // prefixes with a chop string and/or flags. - // When a previously added affix had CIRCUMFIX this one - // must have it too, if it had not then this one must not - // have one either. - if ((xht != NULL || !affile->af_pfxpostpone - || ae->ae_chop != NULL - || ae->ae_flags != NULL) - && (ae->ae_chop == NULL - || STRLEN(ae->ae_chop) < wordlen) - && (ae->ae_prog == NULL - || vim_regexec_prog(&ae->ae_prog, false, word, (colnr_T)0)) - && (((condit & CONDIT_CFIX) == 0) - == ((condit & CONDIT_AFF) == 0 - || ae->ae_flags == NULL - || !flag_in_afflist(affile->af_flagtype, - ae->ae_flags, affile->af_circumfix)))) { - // Match. Remove the chop and add the affix. - if (xht == NULL) { - // prefix: chop/add at the start of the word - if (ae->ae_add == NULL) - *newword = NUL; - else - STRLCPY(newword, ae->ae_add, MAXWLEN); - p = word; - if (ae->ae_chop != NULL) { - // Skip chop string. - if (has_mbyte) { - i = mb_charlen(ae->ae_chop); - for (; i > 0; --i) - mb_ptr_adv(p); - } else - p += STRLEN(ae->ae_chop); - } - STRCAT(newword, p); - } else { - // suffix: chop/add at the end of the word - STRLCPY(newword, word, MAXWLEN); - if (ae->ae_chop != NULL) { - // Remove chop string. - p = newword + STRLEN(newword); - i = (int)MB_CHARLEN(ae->ae_chop); - for (; i > 0; --i) - mb_ptr_back(newword, p); - *p = NUL; - } - if (ae->ae_add != NULL) - STRCAT(newword, ae->ae_add); - } - - use_flags = flags; - use_pfxlist = pfxlist; - use_pfxlen = pfxlen; - need_affix = false; - use_condit = condit | CONDIT_COMB | CONDIT_AFF; - if (ae->ae_flags != NULL) { - // Extract flags from the affix list. - use_flags |= get_affix_flags(affile, ae->ae_flags); - - if (affile->af_needaffix != 0 && flag_in_afflist( - affile->af_flagtype, ae->ae_flags, - affile->af_needaffix)) - need_affix = true; - - // When there is a CIRCUMFIX flag the other affix - // must also have it and we don't add the word - // with one affix. - if (affile->af_circumfix != 0 && flag_in_afflist( - affile->af_flagtype, ae->ae_flags, - affile->af_circumfix)) { - use_condit |= CONDIT_CFIX; - if ((condit & CONDIT_CFIX) == 0) - need_affix = true; - } - - if (affile->af_pfxpostpone - || spin->si_compflags != NULL) { - if (affile->af_pfxpostpone) - // Get prefix IDS from the affix list. - use_pfxlen = get_pfxlist(affile, - ae->ae_flags, store_afflist); - else - use_pfxlen = 0; - use_pfxlist = store_afflist; - - // Combine the prefix IDs. Avoid adding the - // same ID twice. - for (i = 0; i < pfxlen; ++i) { - for (j = 0; j < use_pfxlen; ++j) - if (pfxlist[i] == use_pfxlist[j]) - break; - if (j == use_pfxlen) - use_pfxlist[use_pfxlen++] = pfxlist[i]; - } - - if (spin->si_compflags != NULL) - // Get compound IDS from the affix list. - get_compflags(affile, ae->ae_flags, - use_pfxlist + use_pfxlen); - else - use_pfxlist[use_pfxlen] = NUL; - - // Combine the list of compound flags. - // Concatenate them to the prefix IDs list. - // Avoid adding the same ID twice. - for (i = pfxlen; pfxlist[i] != NUL; ++i) { - for (j = use_pfxlen; - use_pfxlist[j] != NUL; ++j) - if (pfxlist[i] == use_pfxlist[j]) - break; - if (use_pfxlist[j] == NUL) { - use_pfxlist[j++] = pfxlist[i]; - use_pfxlist[j] = NUL; - } - } - } - } - - // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't - // use the compound flags. - if (use_pfxlist != NULL && ae->ae_compforbid) { - STRLCPY(pfx_pfxlist, use_pfxlist, use_pfxlen + 1); - use_pfxlist = pfx_pfxlist; - } - - // When there are postponed prefixes... - if (spin->si_prefroot != NULL - && spin->si_prefroot->wn_sibling != NULL) { - // ... add a flag to indicate an affix was used. - use_flags |= WF_HAS_AFF; - - // ... don't use a prefix list if combining - // affixes is not allowed. But do use the - // compound flags after them. - if (!ah->ah_combine && use_pfxlist != NULL) - use_pfxlist += use_pfxlen; - } - - // When compounding is supported and there is no - // "COMPOUNDPERMITFLAG" then forbid compounding on the - // side where the affix is applied. - if (spin->si_compflags != NULL && !ae->ae_comppermit) { - if (xht != NULL) - use_flags |= WF_NOCOMPAFT; - else - use_flags |= WF_NOCOMPBEF; - } - - // Store the modified word. - if (store_word(spin, newword, use_flags, - spin->si_region, use_pfxlist, - need_affix) == FAIL) - retval = FAIL; - - // When added a prefix or a first suffix and the affix - // has flags may add a(nother) suffix. RECURSIVE! - if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) - if (store_aff_word(spin, newword, ae->ae_flags, - affile, &affile->af_suff, xht, - use_condit & (xht == NULL - ? ~0 : ~CONDIT_SUF), - use_flags, use_pfxlist, pfxlen) == FAIL) - retval = FAIL; - - // When added a suffix and combining is allowed also - // try adding a prefix additionally. Both for the - // word flags and for the affix flags. RECURSIVE! - if (xht != NULL && ah->ah_combine) { - if (store_aff_word(spin, newword, - afflist, affile, - xht, NULL, use_condit, - use_flags, use_pfxlist, - pfxlen) == FAIL - || (ae->ae_flags != NULL - && store_aff_word(spin, newword, - ae->ae_flags, affile, - xht, NULL, use_condit, - use_flags, use_pfxlist, - pfxlen) == FAIL)) - retval = FAIL; - } - } - } - } - } - } - - return retval; -} - -// Read a file with a list of words. -static int spell_read_wordfile(spellinfo_T *spin, char_u *fname) -{ - FILE *fd; - long lnum = 0; - char_u rline[MAXLINELEN]; - char_u *line; - char_u *pc = NULL; - char_u *p; - int l; - int retval = OK; - bool did_word = false; - int non_ascii = 0; - int flags; - int regionmask; - - // Open the file. - fd = mch_fopen((char *)fname, "r"); - if (fd == NULL) { - EMSG2(_(e_notopen), fname); - return FAIL; - } - - vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); - spell_message(spin, IObuff); - - // Read all the lines in the file one by one. - while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { - line_breakcheck(); - ++lnum; - - // Skip comment lines. - if (*rline == '#') - continue; - - // Remove CR, LF and white space from the end. - l = (int)STRLEN(rline); - while (l > 0 && rline[l - 1] <= ' ') - --l; - if (l == 0) - continue; // empty or blank line - rline[l] = NUL; - - // Convert from "/encoding={encoding}" to 'encoding' when needed. - xfree(pc); - if (spin->si_conv.vc_type != CONV_NONE) { - pc = string_convert(&spin->si_conv, rline, NULL); - if (pc == NULL) { - smsg(_("Conversion failure for word in %s line %d: %s"), - fname, lnum, rline); - continue; - } - line = pc; - } else { - pc = NULL; - line = rline; - } - - if (*line == '/') { - ++line; - if (STRNCMP(line, "encoding=", 9) == 0) { - if (spin->si_conv.vc_type != CONV_NONE) - smsg(_("Duplicate /encoding= line ignored in %s line %d: %s"), - fname, lnum, line - 1); - else if (did_word) - smsg(_("/encoding= line after word ignored in %s line %d: %s"), - fname, lnum, line - 1); - else { - char_u *enc; - - // Setup for conversion to 'encoding'. - line += 9; - enc = enc_canonize(line); - if (!spin->si_ascii - && convert_setup(&spin->si_conv, enc, - p_enc) == FAIL) - smsg(_("Conversion in %s not supported: from %s to %s"), - fname, line, p_enc); - xfree(enc); - spin->si_conv.vc_fail = true; - } - continue; - } - - if (STRNCMP(line, "regions=", 8) == 0) { - if (spin->si_region_count > 1) - smsg(_("Duplicate /regions= line ignored in %s line %d: %s"), - fname, lnum, line); - else { - line += 8; - if (STRLEN(line) > 16) - smsg(_("Too many regions in %s line %d: %s"), - fname, lnum, line); - else { - spin->si_region_count = (int)STRLEN(line) / 2; - STRCPY(spin->si_region_name, line); - - // Adjust the mask for a word valid in all regions. - spin->si_region = (1 << spin->si_region_count) - 1; - } - } - continue; - } - - smsg(_("/ line ignored in %s line %d: %s"), - fname, lnum, line - 1); - continue; - } - - flags = 0; - regionmask = spin->si_region; - - // Check for flags and region after a slash. - p = vim_strchr(line, '/'); - if (p != NULL) { - *p++ = NUL; - while (*p != NUL) { - if (*p == '=') // keep-case word - flags |= WF_KEEPCAP | WF_FIXCAP; - else if (*p == '!') // Bad, bad, wicked word. - flags |= WF_BANNED; - else if (*p == '?') // Rare word. - flags |= WF_RARE; - else if (ascii_isdigit(*p)) { // region number(s) - if ((flags & WF_REGION) == 0) // first one - regionmask = 0; - flags |= WF_REGION; - - l = *p - '0'; - if (l > spin->si_region_count) { - smsg(_("Invalid region nr in %s line %d: %s"), - fname, lnum, p); - break; - } - regionmask |= 1 << (l - 1); - } else { - smsg(_("Unrecognized flags in %s line %d: %s"), - fname, lnum, p); - break; - } - ++p; - } - } - - // Skip non-ASCII words when "spin->si_ascii" is true. - if (spin->si_ascii && has_non_ascii(line)) { - ++non_ascii; - continue; - } - - // Normal word: store it. - if (store_word(spin, line, flags, regionmask, NULL, false) == FAIL) { - retval = FAIL; - break; - } - did_word = true; - } - - xfree(pc); - fclose(fd); - - if (spin->si_ascii && non_ascii > 0) { - vim_snprintf((char *)IObuff, IOSIZE, - _("Ignored %d words with non-ASCII characters"), non_ascii); - spell_message(spin, IObuff); - } - - return retval; -} - -/// Get part of an sblock_T, "len" bytes long. -/// This avoids calling free() for every little struct we use (and keeping -/// track of them). -/// The memory is cleared to all zeros. -/// -/// @param len Length needed (<= SBLOCKSIZE). -/// @param align Align for pointer. -/// @return Pointer into block data. -static void *getroom(spellinfo_T *spin, size_t len, bool align) - FUNC_ATTR_NONNULL_RET -{ - char_u *p; - sblock_T *bl = spin->si_blocks; - - assert(len <= SBLOCKSIZE); - - if (align && bl != NULL) - // Round size up for alignment. On some systems structures need to be - // aligned to the size of a pointer (e.g., SPARC). - bl->sb_used = (bl->sb_used + sizeof(char *) - 1) - & ~(sizeof(char *) - 1); - - if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) { - // Allocate a block of memory. It is not freed until much later. - bl = xcalloc(1, (sizeof(sblock_T) + SBLOCKSIZE)); - bl->sb_next = spin->si_blocks; - spin->si_blocks = bl; - bl->sb_used = 0; - ++spin->si_blocks_cnt; - } - - p = bl->sb_data + bl->sb_used; - bl->sb_used += (int)len; - - return p; -} - -// Make a copy of a string into memory allocated with getroom(). -// Returns NULL when out of memory. -static char_u *getroom_save(spellinfo_T *spin, char_u *s) -{ - char_u *sc; - - sc = (char_u *)getroom(spin, STRLEN(s) + 1, false); - if (sc != NULL) - STRCPY(sc, s); - return sc; -} - - -// Free the list of allocated sblock_T. -static void free_blocks(sblock_T *bl) -{ - sblock_T *next; - - while (bl != NULL) { - next = bl->sb_next; - xfree(bl); - bl = next; - } -} - -// Allocate the root of a word tree. -// Returns NULL when out of memory. -static wordnode_T *wordtree_alloc(spellinfo_T *spin) -{ - return (wordnode_T *)getroom(spin, sizeof(wordnode_T), true); -} - -// Store a word in the tree(s). -// Always store it in the case-folded tree. For a keep-case word this is -// useful when the word can also be used with all caps (no WF_FIXCAP flag) and -// used to find suggestions. -// For a keep-case word also store it in the keep-case tree. -// When "pfxlist" is not NULL store the word for each postponed prefix ID and -// compound flag. -static int -store_word ( - spellinfo_T *spin, - char_u *word, - int flags, // extra flags, WF_BANNED - int region, // supported region(s) - char_u *pfxlist, // list of prefix IDs or NULL - bool need_affix // only store word with affix ID -) -{ - int len = (int)STRLEN(word); - int ct = captype(word, word + len); - char_u foldword[MAXWLEN]; - int res = OK; - char_u *p; - - (void)spell_casefold(word, len, foldword, MAXWLEN); - for (p = pfxlist; res == OK; ++p) { - if (!need_affix || (p != NULL && *p != NUL)) - res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, - region, p == NULL ? 0 : *p); - if (p == NULL || *p == NUL) - break; - } - ++spin->si_foldwcount; - - if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) { - for (p = pfxlist; res == OK; ++p) { - if (!need_affix || (p != NULL && *p != NUL)) - res = tree_add_word(spin, word, spin->si_keeproot, flags, - region, p == NULL ? 0 : *p); - if (p == NULL || *p == NUL) - break; - } - ++spin->si_keepwcount; - } - return res; -} - -// Add word "word" to a word tree at "root". -// When "flags" < 0 we are adding to the prefix tree where "flags" is used for -// "rare" and "region" is the condition nr. -// Returns FAIL when out of memory. -static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int flags, int region, int affixID) -{ - wordnode_T *node = root; - wordnode_T *np; - wordnode_T *copyp, **copyprev; - wordnode_T **prev = NULL; - int i; - - // Add each byte of the word to the tree, including the NUL at the end. - for (i = 0;; ++i) { - // When there is more than one reference to this node we need to make - // a copy, so that we can modify it. Copy the whole list of siblings - // (we don't optimize for a partly shared list of siblings). - if (node != NULL && node->wn_refs > 1) { - --node->wn_refs; - copyprev = prev; - for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) { - // Allocate a new node and copy the info. - np = get_wordnode(spin); - if (np == NULL) - return FAIL; - np->wn_child = copyp->wn_child; - if (np->wn_child != NULL) - ++np->wn_child->wn_refs; // child gets extra ref - np->wn_byte = copyp->wn_byte; - if (np->wn_byte == NUL) { - np->wn_flags = copyp->wn_flags; - np->wn_region = copyp->wn_region; - np->wn_affixID = copyp->wn_affixID; - } - - // Link the new node in the list, there will be one ref. - np->wn_refs = 1; - if (copyprev != NULL) - *copyprev = np; - copyprev = &np->wn_sibling; - - // Let "node" point to the head of the copied list. - if (copyp == node) - node = np; - } - } - - // Look for the sibling that has the same character. They are sorted - // on byte value, thus stop searching when a sibling is found with a - // higher byte value. For zero bytes (end of word) the sorting is - // done on flags and then on affixID. - while (node != NULL - && (node->wn_byte < word[i] - || (node->wn_byte == NUL - && (flags < 0 - ? node->wn_affixID < (unsigned)affixID - : (node->wn_flags < (unsigned)(flags & WN_MASK) - || (node->wn_flags == (flags & WN_MASK) - && (spin->si_sugtree - ? (node->wn_region & 0xffff) < region - : node->wn_affixID - < (unsigned)affixID))))))) { - prev = &node->wn_sibling; - node = *prev; - } - if (node == NULL - || node->wn_byte != word[i] - || (word[i] == NUL - && (flags < 0 - || spin->si_sugtree - || node->wn_flags != (flags & WN_MASK) - || node->wn_affixID != affixID))) { - // Allocate a new node. - np = get_wordnode(spin); - if (np == NULL) - return FAIL; - np->wn_byte = word[i]; - - // If "node" is NULL this is a new child or the end of the sibling - // list: ref count is one. Otherwise use ref count of sibling and - // make ref count of sibling one (matters when inserting in front - // of the list of siblings). - if (node == NULL) - np->wn_refs = 1; - else { - np->wn_refs = node->wn_refs; - node->wn_refs = 1; - } - if (prev != NULL) - *prev = np; - np->wn_sibling = node; - node = np; - } - - if (word[i] == NUL) { - node->wn_flags = flags; - node->wn_region |= region; - node->wn_affixID = affixID; - break; - } - prev = &node->wn_child; - node = *prev; - } -#ifdef SPELL_PRINTTREE - smsg((char_u *)"Added \"%s\"", word); - spell_print_tree(root->wn_sibling); -#endif - - // count nr of words added since last message - ++spin->si_msg_count; - - if (spin->si_compress_cnt > 1) { - if (--spin->si_compress_cnt == 1) - // Did enough words to lower the block count limit. - spin->si_blocks_cnt += compress_inc; - } - - // When we have allocated lots of memory we need to compress the word tree - // to free up some room. But compression is slow, and we might actually - // need that room, thus only compress in the following situations: - // 1. When not compressed before (si_compress_cnt == 0): when using - // "compress_start" blocks. - // 2. When compressed before and used "compress_inc" blocks before - // adding "compress_added" words (si_compress_cnt > 1). - // 3. When compressed before, added "compress_added" words - // (si_compress_cnt == 1) and the number of free nodes drops below the - // maximum word length. -#ifndef SPELL_COMPRESS_ALLWAYS - if (spin->si_compress_cnt == 1 // NOLINT(readability/braces) - ? spin->si_free_count < MAXWLEN - : spin->si_blocks_cnt >= compress_start) -#endif - { - // Decrement the block counter. The effect is that we compress again - // when the freed up room has been used and another "compress_inc" - // blocks have been allocated. Unless "compress_added" words have - // been added, then the limit is put back again. - spin->si_blocks_cnt -= compress_inc; - spin->si_compress_cnt = compress_added; - - if (spin->si_verbose) { - msg_start(); - msg_puts(_(msg_compressing)); - msg_clr_eos(); - msg_didout = FALSE; - msg_col = 0; - ui_flush(); - } - - // Compress both trees. Either they both have many nodes, which makes - // compression useful, or one of them is small, which means - // compression goes fast. But when filling the soundfold word tree - // there is no keep-case tree. - wordtree_compress(spin, spin->si_foldroot); - if (affixID >= 0) - wordtree_compress(spin, spin->si_keeproot); - } - - return OK; -} - -// Check the 'mkspellmem' option. Return FAIL if it's wrong. -// Sets "sps_flags". -int spell_check_msm(void) -{ - char_u *p = p_msm; - long start = 0; - long incr = 0; - long added = 0; - - if (!ascii_isdigit(*p)) - return FAIL; - // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow) - start = (getdigits_long(&p) * 10) / (SBLOCKSIZE / 102); - if (*p != ',') - return FAIL; - ++p; - if (!ascii_isdigit(*p)) - return FAIL; - incr = (getdigits_long(&p) * 102) / (SBLOCKSIZE / 10); - if (*p != ',') - return FAIL; - ++p; - if (!ascii_isdigit(*p)) - return FAIL; - added = getdigits_long(&p) * 1024; - if (*p != NUL) - return FAIL; - - if (start == 0 || incr == 0 || added == 0 || incr > start) - return FAIL; - - compress_start = start; - compress_inc = incr; - compress_added = added; - return OK; -} - -// Get a wordnode_T, either from the list of previously freed nodes or -// allocate a new one. -// Returns NULL when out of memory. -static wordnode_T *get_wordnode(spellinfo_T *spin) -{ - wordnode_T *n; - - if (spin->si_first_free == NULL) - n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), true); - else { - n = spin->si_first_free; - spin->si_first_free = n->wn_child; - memset(n, 0, sizeof(wordnode_T)); - --spin->si_free_count; - } -#ifdef SPELL_PRINTTREE - if (n != NULL) - n->wn_nr = ++spin->si_wordnode_nr; -#endif - return n; -} - -// Decrement the reference count on a node (which is the head of a list of -// siblings). If the reference count becomes zero free the node and its -// siblings. -// Returns the number of nodes actually freed. -static int deref_wordnode(spellinfo_T *spin, wordnode_T *node) -{ - wordnode_T *np; - int cnt = 0; - - if (--node->wn_refs == 0) { - for (np = node; np != NULL; np = np->wn_sibling) { - if (np->wn_child != NULL) - cnt += deref_wordnode(spin, np->wn_child); - free_wordnode(spin, np); - ++cnt; - } - ++cnt; // length field - } - return cnt; -} - -// Free a wordnode_T for re-use later. -// Only the "wn_child" field becomes invalid. -static void free_wordnode(spellinfo_T *spin, wordnode_T *n) -{ - n->wn_child = spin->si_first_free; - spin->si_first_free = n; - ++spin->si_free_count; -} - -// Compress a tree: find tails that are identical and can be shared. -static void wordtree_compress(spellinfo_T *spin, wordnode_T *root) -{ - hashtab_T ht; - int n; - int tot = 0; - int perc; - - // Skip the root itself, it's not actually used. The first sibling is the - // start of the tree. - if (root->wn_sibling != NULL) { - hash_init(&ht); - n = node_compress(spin, root->wn_sibling, &ht, &tot); - -#ifndef SPELL_PRINTTREE - if (spin->si_verbose || p_verbose > 2) -#endif - { - if (tot > 1000000) - perc = (tot - n) / (tot / 100); - else if (tot == 0) - perc = 0; - else - perc = (tot - n) * 100 / tot; - vim_snprintf((char *)IObuff, IOSIZE, - _("Compressed %d of %d nodes; %d (%d%%) remaining"), - n, tot, tot - n, perc); - spell_message(spin, IObuff); - } -#ifdef SPELL_PRINTTREE - spell_print_tree(root->wn_sibling); -#endif - hash_clear(&ht); - } -} - -// Compress a node, its siblings and its children, depth first. -// Returns the number of compressed nodes. -static int -node_compress ( - spellinfo_T *spin, - wordnode_T *node, - hashtab_T *ht, - int *tot // total count of nodes before compressing, - // incremented while going through the tree -) -{ - wordnode_T *np; - wordnode_T *tp; - wordnode_T *child; - hash_T hash; - hashitem_T *hi; - int len = 0; - unsigned nr, n; - int compressed = 0; - - // Go through the list of siblings. Compress each child and then try - // finding an identical child to replace it. - // Note that with "child" we mean not just the node that is pointed to, - // but the whole list of siblings of which the child node is the first. - for (np = node; np != NULL && !got_int; np = np->wn_sibling) { - ++len; - if ((child = np->wn_child) != NULL) { - // Compress the child first. This fills hashkey. - compressed += node_compress(spin, child, ht, tot); - - // Try to find an identical child. - hash = hash_hash(child->wn_u1.hashkey); - hi = hash_lookup(ht, (const char *)child->wn_u1.hashkey, - STRLEN(child->wn_u1.hashkey), hash); - if (!HASHITEM_EMPTY(hi)) { - // There are children we encountered before with a hash value - // identical to the current child. Now check if there is one - // that is really identical. - for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) - if (node_equal(child, tp)) { - // Found one! Now use that child in place of the - // current one. This means the current child and all - // its siblings is unlinked from the tree. - ++tp->wn_refs; - compressed += deref_wordnode(spin, child); - np->wn_child = tp; - break; - } - if (tp == NULL) { - // No other child with this hash value equals the child of - // the node, add it to the linked list after the first - // item. - tp = HI2WN(hi); - child->wn_u2.next = tp->wn_u2.next; - tp->wn_u2.next = child; - } - } else - // No other child has this hash value, add it to the - // hashtable. - hash_add_item(ht, hi, child->wn_u1.hashkey, hash); - } - } - *tot += len + 1; // add one for the node that stores the length - - // Make a hash key for the node and its siblings, so that we can quickly - // find a lookalike node. This must be done after compressing the sibling - // list, otherwise the hash key would become invalid by the compression. - node->wn_u1.hashkey[0] = len; - nr = 0; - for (np = node; np != NULL; np = np->wn_sibling) { - if (np->wn_byte == NUL) - // end node: use wn_flags, wn_region and wn_affixID - n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); - else - // byte node: use the byte value and the child pointer - n = (unsigned)(np->wn_byte + ((uintptr_t)np->wn_child << 8)); - nr = nr * 101 + n; - } - - // Avoid NUL bytes, it terminates the hash key. - n = nr & 0xff; - node->wn_u1.hashkey[1] = n == 0 ? 1 : n; - n = (nr >> 8) & 0xff; - node->wn_u1.hashkey[2] = n == 0 ? 1 : n; - n = (nr >> 16) & 0xff; - node->wn_u1.hashkey[3] = n == 0 ? 1 : n; - n = (nr >> 24) & 0xff; - node->wn_u1.hashkey[4] = n == 0 ? 1 : n; - node->wn_u1.hashkey[5] = NUL; - - // Check for CTRL-C pressed now and then. - fast_breakcheck(); - - return compressed; -} - -// Returns true when two nodes have identical siblings and children. -static bool node_equal(wordnode_T *n1, wordnode_T *n2) -{ - wordnode_T *p1; - wordnode_T *p2; - - for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; - p1 = p1->wn_sibling, p2 = p2->wn_sibling) - if (p1->wn_byte != p2->wn_byte - || (p1->wn_byte == NUL - ? (p1->wn_flags != p2->wn_flags - || p1->wn_region != p2->wn_region - || p1->wn_affixID != p2->wn_affixID) - : (p1->wn_child != p2->wn_child))) - break; - - return p1 == NULL && p2 == NULL; -} - - -// Function given to qsort() to sort the REP items on "from" string. -static int rep_compare(const void *s1, const void *s2) -{ - fromto_T *p1 = (fromto_T *)s1; - fromto_T *p2 = (fromto_T *)s2; - - return STRCMP(p1->ft_from, p2->ft_from); -} - -// Write the Vim .spl file "fname". -// Return OK/FAIL. -static int write_vim_spell(spellinfo_T *spin, char_u *fname) -{ - int retval = OK; - int regionmask; - - FILE *fd = mch_fopen((char *)fname, "w"); - if (fd == NULL) { - EMSG2(_(e_notopen), fname); - return FAIL; - } - - // <HEADER>: <fileID> <versionnr> - // <fileID> - size_t fwv = fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, 1, fd); - if (fwv != (size_t)1) - // Catch first write error, don't try writing more. - goto theend; - - putc(VIMSPELLVERSION, fd); // <versionnr> - - // <SECTIONS>: <section> ... <sectionend> - - // SN_INFO: <infotext> - if (spin->si_info != NULL) { - putc(SN_INFO, fd); // <sectionID> - putc(0, fd); // <sectionflags> - size_t i = STRLEN(spin->si_info); - put_bytes(fd, i, 4); // <sectionlen> - fwv &= fwrite(spin->si_info, i, 1, fd); // <infotext> - } - - // SN_REGION: <regionname> ... - // Write the region names only if there is more than one. - if (spin->si_region_count > 1) { - putc(SN_REGION, fd); // <sectionID> - putc(SNF_REQUIRED, fd); // <sectionflags> - size_t l = (size_t)spin->si_region_count * 2; - put_bytes(fd, l, 4); // <sectionlen> - fwv &= fwrite(spin->si_region_name, l, 1, fd); - // <regionname> ... - regionmask = (1 << spin->si_region_count) - 1; - } else - regionmask = 0; - - // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> - // - // The table with character flags and the table for case folding. - // This makes sure the same characters are recognized as word characters - // when generating an when using a spell file. - // Skip this for ASCII, the table may conflict with the one used for - // 'encoding'. - // Also skip this for an .add.spl file, the main spell file must contain - // the table (avoids that it conflicts). File is shorter too. - if (!spin->si_ascii && !spin->si_add) { - char_u folchars[128 * 8]; - int flags; - - putc(SN_CHARFLAGS, fd); // <sectionID> - putc(SNF_REQUIRED, fd); // <sectionflags> - - // Form the <folchars> string first, we need to know its length. - size_t l = 0; - for (size_t i = 128; i < 256; ++i) { - if (has_mbyte) - l += (size_t)mb_char2bytes(spelltab.st_fold[i], folchars + l); - else - folchars[l++] = spelltab.st_fold[i]; - } - put_bytes(fd, 1 + 128 + 2 + l, 4); // <sectionlen> - - fputc(128, fd); // <charflagslen> - for (size_t i = 128; i < 256; ++i) { - flags = 0; - if (spelltab.st_isw[i]) - flags |= CF_WORD; - if (spelltab.st_isu[i]) - flags |= CF_UPPER; - fputc(flags, fd); // <charflags> - } - - put_bytes(fd, l, 2); // <folcharslen> - fwv &= fwrite(folchars, l, 1, fd); // <folchars> - } - - // SN_MIDWORD: <midword> - if (spin->si_midword != NULL) { - putc(SN_MIDWORD, fd); // <sectionID> - putc(SNF_REQUIRED, fd); // <sectionflags> - - size_t i = STRLEN(spin->si_midword); - put_bytes(fd, i, 4); // <sectionlen> - fwv &= fwrite(spin->si_midword, i, 1, fd); - // <midword> - } - - // SN_PREFCOND: <prefcondcnt> <prefcond> ... - if (!GA_EMPTY(&spin->si_prefcond)) { - putc(SN_PREFCOND, fd); // <sectionID> - putc(SNF_REQUIRED, fd); // <sectionflags> - - size_t l = (size_t)write_spell_prefcond(NULL, &spin->si_prefcond); - put_bytes(fd, l, 4); // <sectionlen> - - write_spell_prefcond(fd, &spin->si_prefcond); - } - - // SN_REP: <repcount> <rep> ... - // SN_SAL: <salflags> <salcount> <sal> ... - // SN_REPSAL: <repcount> <rep> ... - - // round 1: SN_REP section - // round 2: SN_SAL section (unless SN_SOFO is used) - // round 3: SN_REPSAL section - for (unsigned int round = 1; round <= 3; ++round) { - garray_T *gap; - if (round == 1) - gap = &spin->si_rep; - else if (round == 2) { - // Don't write SN_SAL when using a SN_SOFO section - if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) - continue; - gap = &spin->si_sal; - } else - gap = &spin->si_repsal; - - // Don't write the section if there are no items. - if (GA_EMPTY(gap)) - continue; - - // Sort the REP/REPSAL items. - if (round != 2) - qsort(gap->ga_data, (size_t)gap->ga_len, - sizeof(fromto_T), rep_compare); - - int i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); - putc(i, fd); // <sectionID> - - // This is for making suggestions, section is not required. - putc(0, fd); // <sectionflags> - - // Compute the length of what follows. - size_t l = 2; // count <repcount> or <salcount> - assert(gap->ga_len >= 0); - for (size_t i = 0; i < (size_t)gap->ga_len; ++i) { - fromto_T *ftp = &((fromto_T *)gap->ga_data)[i]; - l += 1 + STRLEN(ftp->ft_from); // count <*fromlen> and <*from> - l += 1 + STRLEN(ftp->ft_to); // count <*tolen> and <*to> - } - if (round == 2) - ++l; // count <salflags> - put_bytes(fd, l, 4); // <sectionlen> - - if (round == 2) { - int i = 0; - if (spin->si_followup) - i |= SAL_F0LLOWUP; - if (spin->si_collapse) - i |= SAL_COLLAPSE; - if (spin->si_rem_accents) - i |= SAL_REM_ACCENTS; - putc(i, fd); // <salflags> - } - - put_bytes(fd, (uintmax_t)gap->ga_len, 2); // <repcount> or <salcount> - for (size_t i = 0; i < (size_t)gap->ga_len; ++i) { - // <rep> : <repfromlen> <repfrom> <reptolen> <repto> - // <sal> : <salfromlen> <salfrom> <saltolen> <salto> - fromto_T *ftp = &((fromto_T *)gap->ga_data)[i]; - for (unsigned int rr = 1; rr <= 2; ++rr) { - char_u *p = rr == 1 ? ftp->ft_from : ftp->ft_to; - l = STRLEN(p); - assert(l < INT_MAX); - putc((int)l, fd); - if (l > 0) - fwv &= fwrite(p, l, 1, fd); - } - } - - } - - // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> - // This is for making suggestions, section is not required. - if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) { - putc(SN_SOFO, fd); // <sectionID> - putc(0, fd); // <sectionflags> - - size_t l = STRLEN(spin->si_sofofr); - put_bytes(fd, l + STRLEN(spin->si_sofoto) + 4, 4); // <sectionlen> - - put_bytes(fd, l, 2); // <sofofromlen> - fwv &= fwrite(spin->si_sofofr, l, 1, fd); // <sofofrom> - - l = STRLEN(spin->si_sofoto); - put_bytes(fd, l, 2); // <sofotolen> - fwv &= fwrite(spin->si_sofoto, l, 1, fd); // <sofoto> - } - - // SN_WORDS: <word> ... - // This is for making suggestions, section is not required. - if (spin->si_commonwords.ht_used > 0) { - putc(SN_WORDS, fd); // <sectionID> - putc(0, fd); // <sectionflags> - - // round 1: count the bytes - // round 2: write the bytes - for (unsigned int round = 1; round <= 2; ++round) { - size_t todo; - size_t len = 0; - hashitem_T *hi; - - todo = spin->si_commonwords.ht_used; - for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) - if (!HASHITEM_EMPTY(hi)) { - size_t l = STRLEN(hi->hi_key) + 1; - len += l; - if (round == 2) // <word> - fwv &= fwrite(hi->hi_key, l, 1, fd); - --todo; - } - if (round == 1) - put_bytes(fd, len, 4); // <sectionlen> - } - } - - // SN_MAP: <mapstr> - // This is for making suggestions, section is not required. - if (!GA_EMPTY(&spin->si_map)) { - putc(SN_MAP, fd); // <sectionID> - putc(0, fd); // <sectionflags> - size_t l = (size_t)spin->si_map.ga_len; - put_bytes(fd, l, 4); // <sectionlen> - fwv &= fwrite(spin->si_map.ga_data, l, 1, fd); // <mapstr> - } - - // SN_SUGFILE: <timestamp> - // This is used to notify that a .sug file may be available and at the - // same time allows for checking that a .sug file that is found matches - // with this .spl file. That's because the word numbers must be exactly - // right. - if (!spin->si_nosugfile - && (!GA_EMPTY(&spin->si_sal) - || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) { - putc(SN_SUGFILE, fd); // <sectionID> - putc(0, fd); // <sectionflags> - put_bytes(fd, 8, 4); // <sectionlen> - - // Set si_sugtime and write it to the file. - spin->si_sugtime = time(NULL); - put_time(fd, spin->si_sugtime); // <timestamp> - } - - // SN_NOSPLITSUGS: nothing - // This is used to notify that no suggestions with word splits are to be - // made. - if (spin->si_nosplitsugs) { - putc(SN_NOSPLITSUGS, fd); // <sectionID> - putc(0, fd); // <sectionflags> - put_bytes(fd, 0, 4); // <sectionlen> - } - - // SN_NOCOMPUNDSUGS: nothing - // This is used to notify that no suggestions with compounds are to be - // made. - if (spin->si_nocompoundsugs) { - putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID> - putc(0, fd); // <sectionflags> - put_bytes(fd, 0, 4); // <sectionlen> - } - - // SN_COMPOUND: compound info. - // We don't mark it required, when not supported all compound words will - // be bad words. - if (spin->si_compflags != NULL) { - putc(SN_COMPOUND, fd); // <sectionID> - putc(0, fd); // <sectionflags> - - size_t l = STRLEN(spin->si_compflags); - assert(spin->si_comppat.ga_len >= 0); - for (size_t i = 0; i < (size_t)spin->si_comppat.ga_len; ++i) { - l += STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; - } - put_bytes(fd, l + 7, 4); // <sectionlen> - - putc(spin->si_compmax, fd); // <compmax> - putc(spin->si_compminlen, fd); // <compminlen> - putc(spin->si_compsylmax, fd); // <compsylmax> - putc(0, fd); // for Vim 7.0b compatibility - putc(spin->si_compoptions, fd); // <compoptions> - put_bytes(fd, (uintmax_t)spin->si_comppat.ga_len, 2); // <comppatcount> - for (size_t i = 0; i < (size_t)spin->si_comppat.ga_len; ++i) { - char_u *p = ((char_u **)(spin->si_comppat.ga_data))[i]; - assert(STRLEN(p) < INT_MAX); - putc((int)STRLEN(p), fd); // <comppatlen> - fwv &= fwrite(p, STRLEN(p), 1, fd); // <comppattext> - } - // <compflags> - fwv &= fwrite(spin->si_compflags, STRLEN(spin->si_compflags), 1, fd); - } - - // SN_NOBREAK: NOBREAK flag - if (spin->si_nobreak) { - putc(SN_NOBREAK, fd); // <sectionID> - putc(0, fd); // <sectionflags> - - // It's empty, the presence of the section flags the feature. - put_bytes(fd, 0, 4); // <sectionlen> - } - - // SN_SYLLABLE: syllable info. - // We don't mark it required, when not supported syllables will not be - // counted. - if (spin->si_syllable != NULL) { - putc(SN_SYLLABLE, fd); // <sectionID> - putc(0, fd); // <sectionflags> - - size_t l = STRLEN(spin->si_syllable); - put_bytes(fd, l, 4); // <sectionlen> - fwv &= fwrite(spin->si_syllable, l, 1, fd); // <syllable> - } - - // end of <SECTIONS> - putc(SN_END, fd); // <sectionend> - - - // <LWORDTREE> <KWORDTREE> <PREFIXTREE> - spin->si_memtot = 0; - for (unsigned int round = 1; round <= 3; ++round) { - wordnode_T *tree; - if (round == 1) - tree = spin->si_foldroot->wn_sibling; - else if (round == 2) - tree = spin->si_keeproot->wn_sibling; - else - tree = spin->si_prefroot->wn_sibling; - - // Clear the index and wnode fields in the tree. - clear_node(tree); - - // Count the number of nodes. Needed to be able to allocate the - // memory when reading the nodes. Also fills in index for shared - // nodes. - size_t nodecount = (size_t)put_node(NULL, tree, 0, regionmask, round == 3); - - // number of nodes in 4 bytes - put_bytes(fd, nodecount, 4); // <nodecount> - assert(nodecount + nodecount * sizeof(int) < INT_MAX); - spin->si_memtot += (int)(nodecount + nodecount * sizeof(int)); - - // Write the nodes. - (void)put_node(fd, tree, 0, regionmask, round == 3); - } - - // Write another byte to check for errors (file system full). - if (putc(0, fd) == EOF) - retval = FAIL; -theend: - if (fclose(fd) == EOF) - retval = FAIL; - - if (fwv != (size_t)1) - retval = FAIL; - if (retval == FAIL) - EMSG(_(e_write)); - - return retval; -} - -// Clear the index and wnode fields of "node", it siblings and its -// children. This is needed because they are a union with other items to save -// space. -static void clear_node(wordnode_T *node) -{ - wordnode_T *np; - - if (node != NULL) - for (np = node; np != NULL; np = np->wn_sibling) { - np->wn_u1.index = 0; - np->wn_u2.wnode = NULL; - - if (np->wn_byte != NUL) - clear_node(np->wn_child); - } -} - - -// Dump a word tree at node "node". -// -// This first writes the list of possible bytes (siblings). Then for each -// byte recursively write the children. -// -// NOTE: The code here must match the code in read_tree_node(), since -// assumptions are made about the indexes (so that we don't have to write them -// in the file). -// -// Returns the number of nodes used. -static int -put_node ( - FILE *fd, // NULL when only counting - wordnode_T *node, - int idx, - int regionmask, - bool prefixtree // true for PREFIXTREE -) -{ - // If "node" is zero the tree is empty. - if (node == NULL) - return 0; - - // Store the index where this node is written. - node->wn_u1.index = idx; - - // Count the number of siblings. - int siblingcount = 0; - for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) - ++siblingcount; - - // Write the sibling count. - if (fd != NULL) - putc(siblingcount, fd); // <siblingcount> - - // Write each sibling byte and optionally extra info. - for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { - if (np->wn_byte == 0) { - if (fd != NULL) { - // For a NUL byte (end of word) write the flags etc. - if (prefixtree) { - // In PREFIXTREE write the required affixID and the - // associated condition nr (stored in wn_region). The - // byte value is misused to store the "rare" and "not - // combining" flags - if (np->wn_flags == (uint16_t)PFX_FLAGS) - putc(BY_NOFLAGS, fd); // <byte> - else { - putc(BY_FLAGS, fd); // <byte> - putc(np->wn_flags, fd); // <pflags> - } - putc(np->wn_affixID, fd); // <affixID> - put_bytes(fd, (uintmax_t)np->wn_region, 2); // <prefcondnr> - } else { - // For word trees we write the flag/region items. - int flags = np->wn_flags; - if (regionmask != 0 && np->wn_region != regionmask) - flags |= WF_REGION; - if (np->wn_affixID != 0) - flags |= WF_AFX; - if (flags == 0) { - // word without flags or region - putc(BY_NOFLAGS, fd); // <byte> - } else { - if (np->wn_flags >= 0x100) { - putc(BY_FLAGS2, fd); // <byte> - putc(flags, fd); // <flags> - putc((int)((unsigned)flags >> 8), fd); // <flags2> - } else { - putc(BY_FLAGS, fd); // <byte> - putc(flags, fd); // <flags> - } - if (flags & WF_REGION) - putc(np->wn_region, fd); // <region> - if (flags & WF_AFX) - putc(np->wn_affixID, fd); // <affixID> - } - } - } - } else { - if (np->wn_child->wn_u1.index != 0 - && np->wn_child->wn_u2.wnode != node) { - // The child is written elsewhere, write the reference. - if (fd != NULL) { - putc(BY_INDEX, fd); // <byte> - put_bytes(fd, (uintmax_t)np->wn_child->wn_u1.index, 3); // <nodeidx> - } - } else if (np->wn_child->wn_u2.wnode == NULL) - // We will write the child below and give it an index. - np->wn_child->wn_u2.wnode = node; - - if (fd != NULL) - if (putc(np->wn_byte, fd) == EOF) { // <byte> or <xbyte> - EMSG(_(e_write)); - return 0; - } - } - } - - // Space used in the array when reading: one for each sibling and one for - // the count. - int newindex = idx + siblingcount + 1; - - // Recursively dump the children of each sibling. - for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) - if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) - newindex = put_node(fd, np->wn_child, newindex, regionmask, - prefixtree); - - return newindex; -} - - -// ":mkspell [-ascii] outfile infile ..." -// ":mkspell [-ascii] addfile" -void ex_mkspell(exarg_T *eap) -{ - int fcount; - char_u **fnames; - char_u *arg = eap->arg; - bool ascii = false; - - if (STRNCMP(arg, "-ascii", 6) == 0) { - ascii = true; - arg = skipwhite(arg + 6); - } - - // Expand all the remaining arguments (e.g., $VIMRUNTIME). - if (get_arglist_exp(arg, &fcount, &fnames, false) == OK) { - mkspell(fcount, fnames, ascii, eap->forceit, false); - FreeWild(fcount, fnames); - } -} - -// Create the .sug file. -// Uses the soundfold info in "spin". -// Writes the file with the name "wfname", with ".spl" changed to ".sug". -static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname) -{ - char_u *fname = NULL; - int len; - slang_T *slang; - bool free_slang = false; - - // Read back the .spl file that was written. This fills the required - // info for soundfolding. This also uses less memory than the - // pointer-linked version of the trie. And it avoids having two versions - // of the code for the soundfolding stuff. - // It might have been done already by spell_reload_one(). - for (slang = first_lang; slang != NULL; slang = slang->sl_next) - if (path_full_compare(wfname, slang->sl_fname, FALSE) == kEqualFiles) - break; - if (slang == NULL) { - spell_message(spin, (char_u *)_("Reading back spell file...")); - slang = spell_load_file(wfname, NULL, NULL, false); - if (slang == NULL) - return; - free_slang = true; - } - - // Clear the info in "spin" that is used. - spin->si_blocks = NULL; - spin->si_blocks_cnt = 0; - spin->si_compress_cnt = 0; // will stay at 0 all the time - spin->si_free_count = 0; - spin->si_first_free = NULL; - spin->si_foldwcount = 0; - - // Go through the trie of good words, soundfold each word and add it to - // the soundfold trie. - spell_message(spin, (char_u *)_("Performing soundfolding...")); - if (sug_filltree(spin, slang) == FAIL) - goto theend; - - // Create the table which links each soundfold word with a list of the - // good words it may come from. Creates buffer "spin->si_spellbuf". - // This also removes the wordnr from the NUL byte entries to make - // compression possible. - if (sug_maketable(spin) == FAIL) - goto theend; - - smsg(_("Number of words after soundfolding: %" PRId64), - (int64_t)spin->si_spellbuf->b_ml.ml_line_count); - - // Compress the soundfold trie. - spell_message(spin, (char_u *)_(msg_compressing)); - wordtree_compress(spin, spin->si_foldroot); - - // Write the .sug file. - // Make the file name by changing ".spl" to ".sug". - fname = xmalloc(MAXPATHL); - STRLCPY(fname, wfname, MAXPATHL); - len = (int)STRLEN(fname); - fname[len - 2] = 'u'; - fname[len - 1] = 'g'; - sug_write(spin, fname); - -theend: - xfree(fname); - if (free_slang) - slang_free(slang); - free_blocks(spin->si_blocks); - close_spellbuf(spin->si_spellbuf); -} - -// Build the soundfold trie for language "slang". -static int sug_filltree(spellinfo_T *spin, slang_T *slang) -{ - char_u *byts; - idx_T *idxs; - int depth; - idx_T arridx[MAXWLEN]; - int curi[MAXWLEN]; - char_u tword[MAXWLEN]; - char_u tsalword[MAXWLEN]; - int c; - idx_T n; - unsigned words_done = 0; - int wordcount[MAXWLEN]; - - // We use si_foldroot for the soundfolded trie. - spin->si_foldroot = wordtree_alloc(spin); - if (spin->si_foldroot == NULL) - return FAIL; - - // Let tree_add_word() know we're adding to the soundfolded tree - spin->si_sugtree = true; - - // Go through the whole case-folded tree, soundfold each word and put it - // in the trie. - byts = slang->sl_fbyts; - idxs = slang->sl_fidxs; - - arridx[0] = 0; - curi[0] = 1; - wordcount[0] = 0; - - depth = 0; - while (depth >= 0 && !got_int) { - if (curi[depth] > byts[arridx[depth]]) { - // Done all bytes at this node, go up one level. - idxs[arridx[depth]] = wordcount[depth]; - if (depth > 0) - wordcount[depth - 1] += wordcount[depth]; - - --depth; - line_breakcheck(); - } else { - - // Do one more byte at this node. - n = arridx[depth] + curi[depth]; - ++curi[depth]; - - c = byts[n]; - if (c == 0) { - // Sound-fold the word. - tword[depth] = NUL; - spell_soundfold(slang, tword, true, tsalword); - - // We use the "flags" field for the MSB of the wordnr, - // "region" for the LSB of the wordnr. - if (tree_add_word(spin, tsalword, spin->si_foldroot, - words_done >> 16, words_done & 0xffff, - 0) == FAIL) - return FAIL; - - ++words_done; - ++wordcount[depth]; - - // Reset the block count each time to avoid compression - // kicking in. - spin->si_blocks_cnt = 0; - - // Skip over any other NUL bytes (same word with different - // flags). - while (byts[n + 1] == 0) { - ++n; - ++curi[depth]; - } - } else { - // Normal char, go one level deeper. - tword[depth++] = c; - arridx[depth] = idxs[n]; - curi[depth] = 1; - wordcount[depth] = 0; - } - } - } - - smsg(_("Total number of words: %d"), words_done); - - return OK; -} - -// Make the table that links each word in the soundfold trie to the words it -// can be produced from. -// This is not unlike lines in a file, thus use a memfile to be able to access -// the table efficiently. -// Returns FAIL when out of memory. -static int sug_maketable(spellinfo_T *spin) -{ - garray_T ga; - int res = OK; - - // Allocate a buffer, open a memline for it and create the swap file - // (uses a temp file, not a .swp file). - spin->si_spellbuf = open_spellbuf(); - - // Use a buffer to store the line info, avoids allocating many small - // pieces of memory. - ga_init(&ga, 1, 100); - - // recursively go through the tree - if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) - res = FAIL; - - ga_clear(&ga); - return res; -} - -// Fill the table for one node and its children. -// Returns the wordnr at the start of the node. -// Returns -1 when out of memory. -static int -sug_filltable ( - spellinfo_T *spin, - wordnode_T *node, - int startwordnr, - garray_T *gap // place to store line of numbers -) -{ - wordnode_T *p, *np; - int wordnr = startwordnr; - int nr; - int prev_nr; - - for (p = node; p != NULL; p = p->wn_sibling) { - if (p->wn_byte == NUL) { - gap->ga_len = 0; - prev_nr = 0; - for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) { - ga_grow(gap, 10); - - nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); - // Compute the offset from the previous nr and store the - // offset in a way that it takes a minimum number of bytes. - // It's a bit like utf-8, but without the need to mark - // following bytes. - nr -= prev_nr; - prev_nr += nr; - gap->ga_len += offset2bytes(nr, - (char_u *)gap->ga_data + gap->ga_len); - } - - // add the NUL byte - ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; - - if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, - gap->ga_data, gap->ga_len, TRUE) == FAIL) - return -1; - ++wordnr; - - // Remove extra NUL entries, we no longer need them. We don't - // bother freeing the nodes, the won't be reused anyway. - while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) - p->wn_sibling = p->wn_sibling->wn_sibling; - - // Clear the flags on the remaining NUL node, so that compression - // works a lot better. - p->wn_flags = 0; - p->wn_region = 0; - } else { - wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); - if (wordnr == -1) - return -1; - } - } - return wordnr; -} - -// Convert an offset into a minimal number of bytes. -// Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL -// bytes. -static int offset2bytes(int nr, char_u *buf) -{ - int rem; - int b1, b2, b3, b4; - - // Split the number in parts of base 255. We need to avoid NUL bytes. - b1 = nr % 255 + 1; - rem = nr / 255; - b2 = rem % 255 + 1; - rem = rem / 255; - b3 = rem % 255 + 1; - b4 = rem / 255 + 1; - - if (b4 > 1 || b3 > 0x1f) { // 4 bytes - buf[0] = 0xe0 + b4; - buf[1] = b3; - buf[2] = b2; - buf[3] = b1; - return 4; - } - if (b3 > 1 || b2 > 0x3f ) { // 3 bytes - buf[0] = 0xc0 + b3; - buf[1] = b2; - buf[2] = b1; - return 3; - } - if (b2 > 1 || b1 > 0x7f ) { // 2 bytes - buf[0] = 0x80 + b2; - buf[1] = b1; - return 2; - } - // 1 byte - buf[0] = b1; - return 1; -} // Opposite of offset2bytes(). // "pp" points to the bytes and is advanced over it. @@ -7449,86 +2470,12 @@ static int bytes2offset(char_u **pp) return nr; } -// Write the .sug file in "fname". -static void sug_write(spellinfo_T *spin, char_u *fname) -{ - // Create the file. Note that an existing file is silently overwritten! - FILE *fd = mch_fopen((char *)fname, "w"); - if (fd == NULL) { - EMSG2(_(e_notopen), fname); - return; - } - - vim_snprintf((char *)IObuff, IOSIZE, - _("Writing suggestion file %s ..."), fname); - spell_message(spin, IObuff); - - // <SUGHEADER>: <fileID> <versionnr> <timestamp> - if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) { // <fileID> - EMSG(_(e_write)); - goto theend; - } - putc(VIMSUGVERSION, fd); // <versionnr> - - // Write si_sugtime to the file. - put_time(fd, spin->si_sugtime); // <timestamp> - - // <SUGWORDTREE> - spin->si_memtot = 0; - wordnode_T *tree = spin->si_foldroot->wn_sibling; - - // Clear the index and wnode fields in the tree. - clear_node(tree); - - // Count the number of nodes. Needed to be able to allocate the - // memory when reading the nodes. Also fills in index for shared - // nodes. - size_t nodecount = (size_t)put_node(NULL, tree, 0, 0, false); - - // number of nodes in 4 bytes - put_bytes(fd, nodecount, 4); // <nodecount> - assert(nodecount + nodecount * sizeof(int) < INT_MAX); - spin->si_memtot += (int)(nodecount + nodecount * sizeof(int)); - - // Write the nodes. - (void)put_node(fd, tree, 0, 0, false); - - // <SUGTABLE>: <sugwcount> <sugline> ... - linenr_T wcount = spin->si_spellbuf->b_ml.ml_line_count; - assert(wcount >= 0); - put_bytes(fd, (uintmax_t)wcount, 4); // <sugwcount> - - for (linenr_T lnum = 1; lnum <= wcount; ++lnum) { - // <sugline>: <sugnr> ... NUL - char_u *line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); - size_t len = STRLEN(line) + 1; - if (fwrite(line, len, 1, fd) == 0) { - EMSG(_(e_write)); - goto theend; - } - assert((size_t)spin->si_memtot + len <= INT_MAX); - spin->si_memtot += (int)len; - } - - // Write another byte to check for errors. - if (putc(0, fd) == EOF) - EMSG(_(e_write)); - - vim_snprintf((char *)IObuff, IOSIZE, - _("Estimated runtime memory use: %d bytes"), spin->si_memtot); - spell_message(spin, IObuff); - -theend: - // close the file - fclose(fd); -} - // Open a spell buffer. This is a nameless buffer that is not in the buffer // list and only contains text lines. Can use a swapfile to reduce memory // use. // Most other fields are invalid! Esp. watch out for string options being // NULL and there is no undo info. -static buf_T *open_spellbuf(void) +buf_T *open_spellbuf(void) { buf_T *buf = xcalloc(1, sizeof(buf_T)); @@ -7541,7 +2488,7 @@ static buf_T *open_spellbuf(void) } // Close the buffer used for spell info. -static void close_spellbuf(buf_T *buf) +void close_spellbuf(buf_T *buf) { if (buf != NULL) { ml_close(buf, TRUE); @@ -7549,471 +2496,8 @@ static void close_spellbuf(buf_T *buf) } } - -// Create a Vim spell file from one or more word lists. -// "fnames[0]" is the output file name. -// "fnames[fcount - 1]" is the last input file name. -// Exception: when "fnames[0]" ends in ".add" it's used as the input file name -// and ".spl" is appended to make the output file name. -static void -mkspell ( - int fcount, - char_u **fnames, - bool ascii, // -ascii argument given - bool over_write, // overwrite existing output file - bool added_word // invoked through "zg" -) -{ - char_u *fname = NULL; - char_u *wfname; - char_u **innames; - int incount; - afffile_T *(afile[8]); - int i; - int len; - bool error = false; - spellinfo_T spin; - - memset(&spin, 0, sizeof(spin)); - spin.si_verbose = !added_word; - spin.si_ascii = ascii; - spin.si_followup = true; - spin.si_rem_accents = true; - ga_init(&spin.si_rep, (int)sizeof(fromto_T), 20); - ga_init(&spin.si_repsal, (int)sizeof(fromto_T), 20); - ga_init(&spin.si_sal, (int)sizeof(fromto_T), 20); - ga_init(&spin.si_map, (int)sizeof(char_u), 100); - ga_init(&spin.si_comppat, (int)sizeof(char_u *), 20); - ga_init(&spin.si_prefcond, (int)sizeof(char_u *), 50); - hash_init(&spin.si_commonwords); - spin.si_newcompID = 127; // start compound ID at first maximum - - // default: fnames[0] is output file, following are input files - innames = &fnames[1]; - incount = fcount - 1; - - wfname = xmalloc(MAXPATHL); - - if (fcount >= 1) { - len = (int)STRLEN(fnames[0]); - if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) { - // For ":mkspell path/en.latin1.add" output file is - // "path/en.latin1.add.spl". - innames = &fnames[0]; - incount = 1; - vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); - } else if (fcount == 1) { - // For ":mkspell path/vim" output file is "path/vim.latin1.spl". - innames = &fnames[0]; - incount = 1; - vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, - fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); - } else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) { - // Name ends in ".spl", use as the file name. - STRLCPY(wfname, fnames[0], MAXPATHL); - } else - // Name should be language, make the file name from it. - vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, - fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); - - // Check for .ascii.spl. - if (strstr((char *)path_tail(wfname), SPL_FNAME_ASCII) != NULL) - spin.si_ascii = true; - - // Check for .add.spl. - if (strstr((char *)path_tail(wfname), SPL_FNAME_ADD) != NULL) - spin.si_add = true; - } - - if (incount <= 0) - EMSG(_(e_invarg)); // need at least output and input names - else if (vim_strchr(path_tail(wfname), '_') != NULL) - EMSG(_("E751: Output file name must not have region name")); - else if (incount > 8) - EMSG(_("E754: Only up to 8 regions supported")); - else { - // Check for overwriting before doing things that may take a lot of - // time. - if (!over_write && os_path_exists(wfname)) { - EMSG(_(e_exists)); - goto theend; - } - if (os_isdir(wfname)) { - EMSG2(_(e_isadir2), wfname); - goto theend; - } - - fname = xmalloc(MAXPATHL); - - // Init the aff and dic pointers. - // Get the region names if there are more than 2 arguments. - for (i = 0; i < incount; ++i) { - afile[i] = NULL; - - if (incount > 1) { - len = (int)STRLEN(innames[i]); - if (STRLEN(path_tail(innames[i])) < 5 - || innames[i][len - 3] != '_') { - EMSG2(_("E755: Invalid region in %s"), innames[i]); - goto theend; - } - spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); - spin.si_region_name[i * 2 + 1] = - TOLOWER_ASC(innames[i][len - 1]); - } - } - spin.si_region_count = incount; - - spin.si_foldroot = wordtree_alloc(&spin); - spin.si_keeproot = wordtree_alloc(&spin); - spin.si_prefroot = wordtree_alloc(&spin); - if (spin.si_foldroot == NULL - || spin.si_keeproot == NULL - || spin.si_prefroot == NULL) { - free_blocks(spin.si_blocks); - goto theend; - } - - // When not producing a .add.spl file clear the character table when - // we encounter one in the .aff file. This means we dump the current - // one in the .spl file if the .aff file doesn't define one. That's - // better than guessing the contents, the table will match a - // previously loaded spell file. - if (!spin.si_add) - spin.si_clear_chartab = true; - - // Read all the .aff and .dic files. - // Text is converted to 'encoding'. - // Words are stored in the case-folded and keep-case trees. - for (i = 0; i < incount && !error; ++i) { - spin.si_conv.vc_type = CONV_NONE; - spin.si_region = 1 << i; - - vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); - if (os_path_exists(fname)) { - // Read the .aff file. Will init "spin->si_conv" based on the - // "SET" line. - afile[i] = spell_read_aff(&spin, fname); - if (afile[i] == NULL) - error = true; - else { - // Read the .dic file and store the words in the trees. - vim_snprintf((char *)fname, MAXPATHL, "%s.dic", - innames[i]); - if (spell_read_dic(&spin, fname, afile[i]) == FAIL) - error = true; - } - } else { - // No .aff file, try reading the file as a word list. Store - // the words in the trees. - if (spell_read_wordfile(&spin, innames[i]) == FAIL) - error = true; - } - - // Free any conversion stuff. - convert_setup(&spin.si_conv, NULL, NULL); - } - - if (spin.si_compflags != NULL && spin.si_nobreak) - MSG(_("Warning: both compounding and NOBREAK specified")); - - if (!error && !got_int) { - // Combine tails in the tree. - spell_message(&spin, (char_u *)_(msg_compressing)); - wordtree_compress(&spin, spin.si_foldroot); - wordtree_compress(&spin, spin.si_keeproot); - wordtree_compress(&spin, spin.si_prefroot); - } - - if (!error && !got_int) { - // Write the info in the spell file. - vim_snprintf((char *)IObuff, IOSIZE, - _("Writing spell file %s ..."), wfname); - spell_message(&spin, IObuff); - - error = write_vim_spell(&spin, wfname) == FAIL; - - spell_message(&spin, (char_u *)_("Done!")); - vim_snprintf((char *)IObuff, IOSIZE, - _("Estimated runtime memory use: %d bytes"), spin.si_memtot); - spell_message(&spin, IObuff); - - // If the file is loaded need to reload it. - if (!error) - spell_reload_one(wfname, added_word); - } - - // Free the allocated memory. - ga_clear(&spin.si_rep); - ga_clear(&spin.si_repsal); - ga_clear(&spin.si_sal); - ga_clear(&spin.si_map); - ga_clear(&spin.si_comppat); - ga_clear(&spin.si_prefcond); - hash_clear_all(&spin.si_commonwords, 0); - - // Free the .aff file structures. - for (i = 0; i < incount; ++i) - if (afile[i] != NULL) - spell_free_aff(afile[i]); - - // Free all the bits and pieces at once. - free_blocks(spin.si_blocks); - - // If there is soundfolding info and no NOSUGFILE item create the - // .sug file with the soundfolded word trie. - if (spin.si_sugtime != 0 && !error && !got_int) - spell_make_sugfile(&spin, wfname); - - } - -theend: - xfree(fname); - xfree(wfname); -} - -// Display a message for spell file processing when 'verbose' is set or using -// ":mkspell". "str" can be IObuff. -static void spell_message(spellinfo_T *spin, char_u *str) -{ - if (spin->si_verbose || p_verbose > 2) { - if (!spin->si_verbose) - verbose_enter(); - MSG(str); - ui_flush(); - if (!spin->si_verbose) - verbose_leave(); - } -} - -// ":[count]spellgood {word}" -// ":[count]spellwrong {word}" -// ":[count]spellundo {word}" -void ex_spell(exarg_T *eap) -{ - spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, - eap->forceit ? 0 : (int)eap->line2, - eap->cmdidx == CMD_spellundo); -} - -// Add "word[len]" to 'spellfile' as a good or bad word. -void -spell_add_word ( - char_u *word, - int len, - int bad, - int idx, // "zG" and "zW": zero, otherwise index in - // 'spellfile' - bool undo // true for "zug", "zuG", "zuw" and "zuW" -) -{ - FILE *fd = NULL; - buf_T *buf = NULL; - bool new_spf = false; - char_u *fname; - char_u *fnamebuf = NULL; - char_u line[MAXWLEN * 2]; - long fpos, fpos_next = 0; - int i; - char_u *spf; - - if (idx == 0) { // use internal wordlist - if (int_wordlist == NULL) { - int_wordlist = vim_tempname(); - if (int_wordlist == NULL) - return; - } - fname = int_wordlist; - } else { - // If 'spellfile' isn't set figure out a good default value. - if (*curwin->w_s->b_p_spf == NUL) { - init_spellfile(); - new_spf = true; - } - - if (*curwin->w_s->b_p_spf == NUL) { - EMSG2(_(e_notset), "spellfile"); - return; - } - fnamebuf = xmalloc(MAXPATHL); - - for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) { - copy_option_part(&spf, fnamebuf, MAXPATHL, ","); - if (i == idx) - break; - if (*spf == NUL) { - EMSGN(_("E765: 'spellfile' does not have %" PRId64 " entries"), idx); - xfree(fnamebuf); - return; - } - } - - // Check that the user isn't editing the .add file somewhere. - buf = buflist_findname_exp(fnamebuf); - if (buf != NULL && buf->b_ml.ml_mfp == NULL) - buf = NULL; - if (buf != NULL && bufIsChanged(buf)) { - EMSG(_(e_bufloaded)); - xfree(fnamebuf); - return; - } - - fname = fnamebuf; - } - - if (bad || undo) { - // When the word appears as good word we need to remove that one, - // since its flags sort before the one with WF_BANNED. - fd = mch_fopen((char *)fname, "r"); - if (fd != NULL) { - while (!vim_fgets(line, MAXWLEN * 2, fd)) { - fpos = fpos_next; - fpos_next = ftell(fd); - if (STRNCMP(word, line, len) == 0 - && (line[len] == '/' || line[len] < ' ')) { - // Found duplicate word. Remove it by writing a '#' at - // the start of the line. Mixing reading and writing - // doesn't work for all systems, close the file first. - fclose(fd); - fd = mch_fopen((char *)fname, "r+"); - if (fd == NULL) - break; - if (fseek(fd, fpos, SEEK_SET) == 0) { - fputc('#', fd); - if (undo) { - home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); - smsg(_("Word '%.*s' removed from %s"), - len, word, NameBuff); - } - } - fseek(fd, fpos_next, SEEK_SET); - } - } - if (fd != NULL) - fclose(fd); - } - } - - if (!undo) { - fd = mch_fopen((char *)fname, "a"); - if (fd == NULL && new_spf) { - char_u *p; - - // We just initialized the 'spellfile' option and can't open the - // file. We may need to create the "spell" directory first. We - // already checked the runtime directory is writable in - // init_spellfile(). - if (!dir_of_file_exists(fname) && (p = path_tail_with_sep(fname)) != fname) { - int c = *p; - - // The directory doesn't exist. Try creating it and opening - // the file again. - *p = NUL; - os_mkdir((char *)fname, 0755); - *p = c; - fd = mch_fopen((char *)fname, "a"); - } - } - - if (fd == NULL) - EMSG2(_(e_notopen), fname); - else { - if (bad) - fprintf(fd, "%.*s/!\n", len, word); - else - fprintf(fd, "%.*s\n", len, word); - fclose(fd); - - home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); - smsg(_("Word '%.*s' added to %s"), len, word, NameBuff); - } - } - - if (fd != NULL) { - // Update the .add.spl file. - mkspell(1, &fname, false, true, true); - - // If the .add file is edited somewhere, reload it. - if (buf != NULL) - buf_reload(buf, buf->b_orig_mode); - - redraw_all_later(SOME_VALID); - } - xfree(fnamebuf); -} - -// Initialize 'spellfile' for the current buffer. -static void init_spellfile(void) -{ - char_u *buf; - int l; - char_u *fname; - char_u *rtp; - char_u *lend; - bool aspath = false; - char_u *lstart = curbuf->b_s.b_p_spl; - - if (*curwin->w_s->b_p_spl != NUL && !GA_EMPTY(&curwin->w_s->b_langp)) { - buf = xmalloc(MAXPATHL); - - // Find the end of the language name. Exclude the region. If there - // is a path separator remember the start of the tail. - for (lend = curwin->w_s->b_p_spl; *lend != NUL - && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) - if (vim_ispathsep(*lend)) { - aspath = true; - lstart = lend + 1; - } - - // Loop over all entries in 'runtimepath'. Use the first one where we - // are allowed to write. - rtp = p_rtp; - while (*rtp != NUL) { - if (aspath) - // Use directory of an entry with path, e.g., for - // "/dir/lg.utf-8.spl" use "/dir". - STRLCPY(buf, curbuf->b_s.b_p_spl, - lstart - curbuf->b_s.b_p_spl); - else - // Copy the path from 'runtimepath' to buf[]. - copy_option_part(&rtp, buf, MAXPATHL, ","); - if (os_file_is_writable((char *)buf) == 2) { - // Use the first language name from 'spelllang' and the - // encoding used in the first loaded .spl file. - if (aspath) - STRLCPY(buf, curbuf->b_s.b_p_spl, - lend - curbuf->b_s.b_p_spl + 1); - else { - // Create the "spell" directory if it doesn't exist yet. - l = (int)STRLEN(buf); - vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); - if (os_file_is_writable((char *)buf) != 2) { - os_mkdir((char *)buf, 0755); - } - - l = (int)STRLEN(buf); - vim_snprintf((char *)buf + l, MAXPATHL - l, - "/%.*s", (int)(lend - lstart), lstart); - } - l = (int)STRLEN(buf); - fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) - ->lp_slang->sl_fname; - vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", - fname != NULL - && strstr((char *)path_tail(fname), ".ascii.") != NULL - ? (char_u *)"ascii" : spell_enc()); - set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); - break; - } - aspath = false; - } - - xfree(buf); - } -} - // Init the chartab used for spelling for ASCII. -static void clear_spell_chartab(spelltab_T *sp) +void clear_spell_chartab(spelltab_T *sp) { int i; @@ -8085,123 +2569,6 @@ void init_spell_chartab(void) } } -// Set the spell character tables from strings in the affix file. -static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp) -{ - // We build the new tables here first, so that we can compare with the - // previous one. - spelltab_T new_st; - char_u *pf = fol, *pl = low, *pu = upp; - int f, l, u; - - clear_spell_chartab(&new_st); - - while (*pf != NUL) { - if (*pl == NUL || *pu == NUL) { - EMSG(_(e_affform)); - return FAIL; - } - f = mb_ptr2char_adv(&pf); - l = mb_ptr2char_adv(&pl); - u = mb_ptr2char_adv(&pu); - // Every character that appears is a word character. - if (f < 256) - new_st.st_isw[f] = true; - if (l < 256) - new_st.st_isw[l] = true; - if (u < 256) - new_st.st_isw[u] = true; - - // if "LOW" and "FOL" are not the same the "LOW" char needs - // case-folding - if (l < 256 && l != f) { - if (f >= 256) { - EMSG(_(e_affrange)); - return FAIL; - } - new_st.st_fold[l] = f; - } - - // if "UPP" and "FOL" are not the same the "UPP" char needs - // case-folding, it's upper case and the "UPP" is the upper case of - // "FOL" . - if (u < 256 && u != f) { - if (f >= 256) { - EMSG(_(e_affrange)); - return FAIL; - } - new_st.st_fold[u] = f; - new_st.st_isu[u] = true; - new_st.st_upper[f] = u; - } - } - - if (*pl != NUL || *pu != NUL) { - EMSG(_(e_affform)); - return FAIL; - } - - return set_spell_finish(&new_st); -} - -// Set the spell character tables from strings in the .spl file. -static void -set_spell_charflags ( - char_u *flags, - int cnt, // length of "flags" - char_u *fol -) -{ - // We build the new tables here first, so that we can compare with the - // previous one. - spelltab_T new_st; - int i; - char_u *p = fol; - int c; - - clear_spell_chartab(&new_st); - - for (i = 0; i < 128; ++i) { - if (i < cnt) { - new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; - new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; - } - - if (*p != NUL) { - c = mb_ptr2char_adv(&p); - new_st.st_fold[i + 128] = c; - if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) - new_st.st_upper[c] = i + 128; - } - } - - (void)set_spell_finish(&new_st); -} - -static int set_spell_finish(spelltab_T *new_st) -{ - int i; - - if (did_set_spelltab) { - // check that it's the same table - for (i = 0; i < 256; ++i) { - if (spelltab.st_isw[i] != new_st->st_isw[i] - || spelltab.st_isu[i] != new_st->st_isu[i] - || spelltab.st_fold[i] != new_st->st_fold[i] - || spelltab.st_upper[i] != new_st->st_upper[i]) { - EMSG(_("E763: Word characters differ between spell files")); - return FAIL; - } - } - } else { - // copy the new spelltab into the one being used - spelltab = *new_st; - did_set_spelltab = true; - } - - return OK; -} - /// Returns true if "p" points to a word character. /// As a special case we see "midword" characters as word character when it is /// followed by a word character. This finds they'there but not 'they there'. @@ -8240,7 +2607,7 @@ static bool spell_iswordp(char_u *p, win_T *wp) // Returns true if "p" points to a word character. // Unlike spell_iswordp() this doesn't check for "midword" characters. -static bool spell_iswordp_nmw(char_u *p, win_T *wp) +bool spell_iswordp_nmw(char_u *p, win_T *wp) { int c; @@ -8289,41 +2656,11 @@ static bool spell_iswordp_w(int *p, win_T *wp) return spelltab.st_isw[*s]; } -// Write the table with prefix conditions to the .spl file. -// When "fd" is NULL only count the length of what is written. -static int write_spell_prefcond(FILE *fd, garray_T *gap) -{ - assert(gap->ga_len >= 0); - - if (fd != NULL) - put_bytes(fd, (uintmax_t)gap->ga_len, 2); // <prefcondcnt> - - size_t totlen = 2 + (size_t)gap->ga_len; // <prefcondcnt> and <condlen> bytes - size_t x = 1; // collect return value of fwrite() - for (int i = 0; i < gap->ga_len; ++i) { - // <prefcond> : <condlen> <condstr> - char_u *p = ((char_u **)gap->ga_data)[i]; - if (p != NULL) { - size_t len = STRLEN(p); - if (fd != NULL) { - assert(len <= INT_MAX); - fputc((int)len, fd); - x &= fwrite(p, len, 1, fd); - } - totlen += len; - } else if (fd != NULL) - fputc(0, fd); - } - - assert(totlen <= INT_MAX); - return (int)totlen; -} - // Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. // Uses the character definitions from the .spl file. // When using a multi-byte 'encoding' the length may change! // Returns FAIL when something wrong. -static int spell_casefold(char_u *str, int len, char_u *buf, int buflen) +int spell_casefold(char_u *str, int len, char_u *buf, int buflen) { int i; @@ -9047,174 +3384,6 @@ static void spell_suggest_intern(suginfo_T *su, bool interactive) } } -// Load the .sug files for languages that have one and weren't loaded yet. -static void suggest_load_files(void) -{ - langp_T *lp; - slang_T *slang; - char_u *dotp; - FILE *fd; - char_u buf[MAXWLEN]; - int i; - time_t timestamp; - int wcount; - int wordnr; - garray_T ga; - int c; - - // Do this for all languages that support sound folding. - for (int lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) { - lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); - slang = lp->lp_slang; - if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) { - // Change ".spl" to ".sug" and open the file. When the file isn't - // found silently skip it. Do set "sl_sugloaded" so that we - // don't try again and again. - slang->sl_sugloaded = true; - - dotp = vim_strrchr(slang->sl_fname, '.'); - if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) - continue; - STRCPY(dotp, ".sug"); - fd = mch_fopen((char *)slang->sl_fname, "r"); - if (fd == NULL) - goto nextone; - - // <SUGHEADER>: <fileID> <versionnr> <timestamp> - for (i = 0; i < VIMSUGMAGICL; ++i) - buf[i] = getc(fd); // <fileID> - if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) { - EMSG2(_("E778: This does not look like a .sug file: %s"), - slang->sl_fname); - goto nextone; - } - c = getc(fd); // <versionnr> - if (c < VIMSUGVERSION) { - EMSG2(_("E779: Old .sug file, needs to be updated: %s"), - slang->sl_fname); - goto nextone; - } else if (c > VIMSUGVERSION) { - EMSG2(_("E780: .sug file is for newer version of Vim: %s"), - slang->sl_fname); - goto nextone; - } - - // Check the timestamp, it must be exactly the same as the one in - // the .spl file. Otherwise the word numbers won't match. - timestamp = get8ctime(fd); // <timestamp> - if (timestamp != slang->sl_sugtime) { - EMSG2(_("E781: .sug file doesn't match .spl file: %s"), - slang->sl_fname); - goto nextone; - } - - // <SUGWORDTREE>: <wordtree> - // Read the trie with the soundfolded words. - if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, - false, 0) != 0) { -someerror: - EMSG2(_("E782: error while reading .sug file: %s"), - slang->sl_fname); - slang_clear_sug(slang); - goto nextone; - } - - // <SUGTABLE>: <sugwcount> <sugline> ... - // - // Read the table with word numbers. We use a file buffer for - // this, because it's so much like a file with lines. Makes it - // possible to swap the info and save on memory use. - slang->sl_sugbuf = open_spellbuf(); - - // <sugwcount> - wcount = get4c(fd); - if (wcount < 0) - goto someerror; - - // Read all the wordnr lists into the buffer, one NUL terminated - // list per line. - ga_init(&ga, 1, 100); - for (wordnr = 0; wordnr < wcount; ++wordnr) { - ga.ga_len = 0; - for (;; ) { - c = getc(fd); // <sugline> - if (c < 0) { - goto someerror; - } - GA_APPEND(char_u, &ga, c); - if (c == NUL) - break; - } - if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, - ga.ga_data, ga.ga_len, TRUE) == FAIL) - goto someerror; - } - ga_clear(&ga); - - // Need to put word counts in the word tries, so that we can find - // a word by its number. - tree_count_words(slang->sl_fbyts, slang->sl_fidxs); - tree_count_words(slang->sl_sbyts, slang->sl_sidxs); - -nextone: - if (fd != NULL) - fclose(fd); - STRCPY(dotp, ".spl"); - } - } -} - -// Fill in the wordcount fields for a trie. -// Returns the total number of words. -static void tree_count_words(char_u *byts, idx_T *idxs) -{ - int depth; - idx_T arridx[MAXWLEN]; - int curi[MAXWLEN]; - int c; - idx_T n; - int wordcount[MAXWLEN]; - - arridx[0] = 0; - curi[0] = 1; - wordcount[0] = 0; - depth = 0; - while (depth >= 0 && !got_int) { - if (curi[depth] > byts[arridx[depth]]) { - // Done all bytes at this node, go up one level. - idxs[arridx[depth]] = wordcount[depth]; - if (depth > 0) - wordcount[depth - 1] += wordcount[depth]; - - --depth; - fast_breakcheck(); - } else { - // Do one more byte at this node. - n = arridx[depth] + curi[depth]; - ++curi[depth]; - - c = byts[n]; - if (c == 0) { - // End of word, count it. - ++wordcount[depth]; - - // Skip over any other NUL bytes (same word with different - // flags). - while (byts[n + 1] == 0) { - ++n; - ++curi[depth]; - } - } else { - // Normal char, go one level deeper to count the words. - ++depth; - arridx[depth] = idxs[n]; - curi[depth] = 1; - wordcount[depth] = 0; - } - } - } -} - // Free the info put in "*su" by spell_find_suggest(). static void spell_find_cleanup(suginfo_T *su) { @@ -9227,15 +3396,14 @@ static void spell_find_cleanup(suginfo_T *su) hash_clear_all(&su->su_banned, 0); } -// Make a copy of "word", with the first letter upper or lower cased, to -// "wcopy[MAXWLEN]". "word" must not be empty. -// The result is NUL terminated. -static void -onecap_copy ( - char_u *word, - char_u *wcopy, - bool upper // true: first letter made upper case -) +/// Make a copy of "word", with the first letter upper or lower cased, to +/// "wcopy[MAXWLEN]". "word" must not be empty. +/// The result is NUL terminated. +/// +/// @param[in] word source string to copy +/// @param[in,out] wcopy copied string, with case of first letter changed +/// @param[in] upper True to upper case, otherwise lower case +void onecap_copy(char_u *word, char_u *wcopy, bool upper) { char_u *p; int c; @@ -10338,7 +4506,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so } if (has_mbyte) { - n = mb_cptr2len(p); + n = MB_CPTR2LEN(p); c = mb_ptr2char(p); if (p[n] == NUL) c2 = NUL; @@ -10415,9 +4583,9 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so // "fword" here, it's changed back afterwards at STATE_UNSWAP3. p = fword + sp->ts_fidx; if (has_mbyte) { - n = mb_cptr2len(p); + n = MB_CPTR2LEN(p); c = mb_ptr2char(p); - fl = mb_cptr2len(p + n); + fl = MB_CPTR2LEN(p + n); c2 = mb_ptr2char(p + n); if (!soundfold && !spell_iswordp(p + n + fl, curwin)) c3 = c; // don't swap non-word char @@ -10513,10 +4681,10 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so ++depth; p = fword + sp->ts_fidx; if (has_mbyte) { - n = mb_cptr2len(p); + n = MB_CPTR2LEN(p); c = mb_ptr2char(p); - fl = mb_cptr2len(p + n); - fl += mb_cptr2len(p + n + fl); + fl = MB_CPTR2LEN(p + n); + fl += MB_CPTR2LEN(p + n + fl); memmove(p, p + n, fl); mb_char2bytes(c, p + fl); stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; @@ -10565,10 +4733,10 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so ++depth; p = fword + sp->ts_fidx; if (has_mbyte) { - n = mb_cptr2len(p); - n += mb_cptr2len(p + n); + n = MB_CPTR2LEN(p); + n += MB_CPTR2LEN(p + n); c = mb_ptr2char(p + n); - tl = mb_cptr2len(p + n); + tl = MB_CPTR2LEN(p + n); memmove(p + tl, p, n); mb_char2bytes(c, p); stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; @@ -10811,10 +4979,11 @@ static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword) // round[depth] == 1: Try using the folded-case character. // round[depth] == 2: Try using the upper-case character. if (has_mbyte) { - flen = mb_cptr2len(fword + fwordidx[depth]); - ulen = mb_cptr2len(uword + uwordidx[depth]); - } else + flen = MB_CPTR2LEN(fword + fwordidx[depth]); + ulen = MB_CPTR2LEN(uword + uwordidx[depth]); + } else { ulen = flen = 1; + } if (round[depth] == 1) { p = fword + fwordidx[depth]; l = flen; @@ -11412,67 +5581,6 @@ static void make_case_word(char_u *fword, char_u *cword, int flags) STRCPY(cword, fword); } -// Use map string "map" for languages "lp". -static void set_map_str(slang_T *lp, char_u *map) -{ - char_u *p; - int headc = 0; - int c; - int i; - - if (*map == NUL) { - lp->sl_has_map = false; - return; - } - lp->sl_has_map = true; - - // Init the array and hash tables empty. - for (i = 0; i < 256; ++i) - lp->sl_map_array[i] = 0; - hash_init(&lp->sl_map_hash); - - // The similar characters are stored separated with slashes: - // "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and - // before the same slash. For characters above 255 sl_map_hash is used. - for (p = map; *p != NUL; ) { - c = mb_cptr2char_adv(&p); - if (c == '/') - headc = 0; - else { - if (headc == 0) - headc = c; - - // Characters above 255 don't fit in sl_map_array[], put them in - // the hash table. Each entry is the char, a NUL the headchar and - // a NUL. - if (c >= 256) { - int cl = mb_char2len(c); - int headcl = mb_char2len(headc); - char_u *b; - hash_T hash; - hashitem_T *hi; - - b = xmalloc(cl + headcl + 2); - mb_char2bytes(c, b); - b[cl] = NUL; - mb_char2bytes(headc, b + cl + 1); - b[cl + 1 + headcl] = NUL; - hash = hash_hash(b); - hi = hash_lookup(&lp->sl_map_hash, (const char *)b, STRLEN(b), hash); - if (HASHITEM_EMPTY(hi)) { - hash_add_item(&lp->sl_map_hash, hi, b, hash); - } else { - // This should have been checked when generating the .spl - // file. - EMSG(_("E783: duplicate char in MAP entry")); - xfree(b); - } - } else - lp->sl_map_array[c] = headc; - } - } -} - // Returns true if "c1" and "c2" are similar characters according to the MAP // lines in the .aff file. static bool similar_chars(slang_T *slang, int c1, int c2) @@ -11777,23 +5885,22 @@ char_u *eval_soundfold(char_u *word) return vim_strsave(word); } -// Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". -// -// There are many ways to turn a word into a sound-a-like representation. The -// oldest is Soundex (1918!). A nice overview can be found in "Approximate -// swedish name matching - survey and test of different algorithms" by Klas -// Erikson. -// -// We support two methods: -// 1. SOFOFROM/SOFOTO do a simple character mapping. -// 2. SAL items define a more advanced sound-folding (and much slower). -static void -spell_soundfold ( - slang_T *slang, - char_u *inword, - bool folded, // "inword" is already case-folded - char_u *res -) +/// Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". +/// +/// There are many ways to turn a word into a sound-a-like representation. The +/// oldest is Soundex (1918!). A nice overview can be found in "Approximate +/// swedish name matching - survey and test of different algorithms" by Klas +/// Erikson. +/// +/// We support two methods: +/// 1. SOFOFROM/SOFOTO do a simple character mapping. +/// 2. SAL items define a more advanced sound-folding (and much slower). +/// +/// @param[in] slang +/// @param[in] inword word to soundfold +/// @param[in] folded whether inword is already case-folded +/// @param[in,out] res destination for soundfolded word +void spell_soundfold(slang_T *slang, char_u *inword, bool folded, char_u *res) { char_u fword[MAXWLEN]; char_u *word; diff --git a/src/nvim/spell.h b/src/nvim/spell.h index 3a03cb2ef6..e950644a6d 100644 --- a/src/nvim/spell.h +++ b/src/nvim/spell.h @@ -3,6 +3,8 @@ #include <stdbool.h> +#include "nvim/spell_defs.h" + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "spell.h.generated.h" #endif diff --git a/src/nvim/spell_defs.h b/src/nvim/spell_defs.h new file mode 100644 index 0000000000..c54a7f5390 --- /dev/null +++ b/src/nvim/spell_defs.h @@ -0,0 +1,287 @@ +#ifndef NVIM_SPELL_DEFS_H +#define NVIM_SPELL_DEFS_H + +#include <stdbool.h> +#include <stdint.h> + +#include "nvim/buffer_defs.h" +#include "nvim/garray.h" +#include "nvim/regexp_defs.h" +#include "nvim/types.h" + +#define MAXWLEN 254 // Assume max. word len is this many bytes. + // Some places assume a word length fits in a + // byte, thus it can't be above 255. + +// Type used for indexes in the word tree need to be at least 4 bytes. If int +// is 8 bytes we could use something smaller, but what? +typedef int idx_T; + +# define SPL_FNAME_TMPL "%s.%s.spl" +# define SPL_FNAME_ADD ".add." +# define SPL_FNAME_ASCII ".ascii." + +// Flags used for a word. Only the lowest byte can be used, the region byte +// comes above it. +#define WF_REGION 0x01 // region byte follows +#define WF_ONECAP 0x02 // word with one capital (or all capitals) +#define WF_ALLCAP 0x04 // word must be all capitals +#define WF_RARE 0x08 // rare word +#define WF_BANNED 0x10 // bad word +#define WF_AFX 0x20 // affix ID follows +#define WF_FIXCAP 0x40 // keep-case word, allcap not allowed +#define WF_KEEPCAP 0x80 // keep-case word + +// for <flags2>, shifted up one byte to be used in wn_flags +#define WF_HAS_AFF 0x0100 // word includes affix +#define WF_NEEDCOMP 0x0200 // word only valid in compound +#define WF_NOSUGGEST 0x0400 // word not to be suggested +#define WF_COMPROOT 0x0800 // already compounded word, COMPOUNDROOT +#define WF_NOCOMPBEF 0x1000 // no compounding before this word +#define WF_NOCOMPAFT 0x2000 // no compounding after this word + +// flags for <pflags> +#define WFP_RARE 0x01 // rare prefix +#define WFP_NC 0x02 // prefix is not combining +#define WFP_UP 0x04 // to-upper prefix +#define WFP_COMPPERMIT 0x08 // prefix with COMPOUNDPERMITFLAG +#define WFP_COMPFORBID 0x10 // prefix with COMPOUNDFORBIDFLAG + +// Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one +// byte) and prefcondnr (two bytes). +#define WF_RAREPFX (WFP_RARE << 24) // rare postponed prefix +#define WF_PFX_NC (WFP_NC << 24) // non-combining postponed prefix +#define WF_PFX_UP (WFP_UP << 24) // to-upper postponed prefix +#define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) // postponed prefix with + // COMPOUNDPERMITFLAG +#define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) // postponed prefix with + // COMPOUNDFORBIDFLAG + + +// flags for <compoptions> +#define COMP_CHECKDUP 1 // CHECKCOMPOUNDDUP +#define COMP_CHECKREP 2 // CHECKCOMPOUNDREP +#define COMP_CHECKCASE 4 // CHECKCOMPOUNDCASE +#define COMP_CHECKTRIPLE 8 // CHECKCOMPOUNDTRIPLE + +// Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, +// si_repsal, sl_rep, and si_sal. Not for sl_sal! +// One replacement: from "ft_from" to "ft_to". +typedef struct fromto_S { + char_u *ft_from; + char_u *ft_to; +} fromto_T; + +// Info from "SAL" entries in ".aff" file used in sl_sal. +// The info is split for quick processing by spell_soundfold(). +// Note that "sm_oneof" and "sm_rules" point into sm_lead. +typedef struct salitem_S { + char_u *sm_lead; // leading letters + int sm_leadlen; // length of "sm_lead" + char_u *sm_oneof; // letters from () or NULL + char_u *sm_rules; // rules like ^, $, priority + char_u *sm_to; // replacement. + int *sm_lead_w; // wide character copy of "sm_lead" + int *sm_oneof_w; // wide character copy of "sm_oneof" + int *sm_to_w; // wide character copy of "sm_to" +} salitem_T; + +typedef int salfirst_T; + +// Values for SP_*ERROR are negative, positive values are used by +// read_cnt_string(). +#define SP_TRUNCERROR -1 // spell file truncated error +#define SP_FORMERROR -2 // format error in spell file +#define SP_OTHERERROR -3 // other error while reading spell file + +// Structure used to store words and other info for one language, loaded from +// a .spl file. +// The main access is through the tree in "sl_fbyts/sl_fidxs", storing the +// case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. +// +// The "byts" array stores the possible bytes in each tree node, preceded by +// the number of possible bytes, sorted on byte value: +// <len> <byte1> <byte2> ... +// The "idxs" array stores the index of the child node corresponding to the +// byte in "byts". +// Exception: when the byte is zero, the word may end here and "idxs" holds +// the flags, region mask and affixID for the word. There may be several +// zeros in sequence for alternative flag/region/affixID combinations. +typedef struct slang_S slang_T; + +struct slang_S { + slang_T *sl_next; // next language + char_u *sl_name; // language name "en", "en.rare", "nl", etc. + char_u *sl_fname; // name of .spl file + bool sl_add; // true if it's a .add file. + + char_u *sl_fbyts; // case-folded word bytes + idx_T *sl_fidxs; // case-folded word indexes + char_u *sl_kbyts; // keep-case word bytes + idx_T *sl_kidxs; // keep-case word indexes + char_u *sl_pbyts; // prefix tree word bytes + idx_T *sl_pidxs; // prefix tree word indexes + + char_u *sl_info; // infotext string or NULL + + char_u sl_regions[17]; // table with up to 8 region names plus NUL + + char_u *sl_midword; // MIDWORD string or NULL + + hashtab_T sl_wordcount; // hashtable with word count, wordcount_T + + int sl_compmax; // COMPOUNDWORDMAX (default: MAXWLEN) + int sl_compminlen; // COMPOUNDMIN (default: 0) + int sl_compsylmax; // COMPOUNDSYLMAX (default: MAXWLEN) + int sl_compoptions; // COMP_* flags + garray_T sl_comppat; // CHECKCOMPOUNDPATTERN items + regprog_T *sl_compprog; // COMPOUNDRULE turned into a regexp progrm + // (NULL when no compounding) + char_u *sl_comprules; // all COMPOUNDRULE concatenated (or NULL) + char_u *sl_compstartflags; // flags for first compound word + char_u *sl_compallflags; // all flags for compound words + bool sl_nobreak; // When true: no spaces between words + char_u *sl_syllable; // SYLLABLE repeatable chars or NULL + garray_T sl_syl_items; // syllable items + + int sl_prefixcnt; // number of items in "sl_prefprog" + regprog_T **sl_prefprog; // table with regprogs for prefixes + + garray_T sl_rep; // list of fromto_T entries from REP lines + int16_t sl_rep_first[256]; // indexes where byte first appears, -1 if + // there is none + garray_T sl_sal; // list of salitem_T entries from SAL lines + salfirst_T sl_sal_first[256]; // indexes where byte first appears, -1 if + // there is none + bool sl_followup; // SAL followup + bool sl_collapse; // SAL collapse_result + bool sl_rem_accents; // SAL remove_accents + bool sl_sofo; // SOFOFROM and SOFOTO instead of SAL items: + // "sl_sal_first" maps chars, when has_mbyte + // "sl_sal" is a list of wide char lists. + garray_T sl_repsal; // list of fromto_T entries from REPSAL lines + int16_t sl_repsal_first[256]; // sl_rep_first for REPSAL lines + bool sl_nosplitsugs; // don't suggest splitting a word + bool sl_nocompoundsugs; // don't suggest compounding + + // Info from the .sug file. Loaded on demand. + time_t sl_sugtime; // timestamp for .sug file + char_u *sl_sbyts; // soundfolded word bytes + idx_T *sl_sidxs; // soundfolded word indexes + buf_T *sl_sugbuf; // buffer with word number table + bool sl_sugloaded; // true when .sug file was loaded or failed to + // load + + bool sl_has_map; // true, if there is a MAP line + hashtab_T sl_map_hash; // MAP for multi-byte chars + int sl_map_array[256]; // MAP for first 256 chars + hashtab_T sl_sounddone; // table with soundfolded words that have + // handled, see add_sound_suggest() +}; + +// Structure used in "b_langp", filled from 'spelllang'. +typedef struct langp_S { + slang_T *lp_slang; // info for this language + slang_T *lp_sallang; // language used for sound folding or NULL + slang_T *lp_replang; // language used for REP items or NULL + int lp_region; // bitmask for region or REGION_ALL +} langp_T; + +#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) + +#define VIMSUGMAGIC "VIMsug" // string at start of Vim .sug file +#define VIMSUGMAGICL 6 +#define VIMSUGVERSION 1 + +#define REGION_ALL 0xff // word valid in all regions + +// The tables used for recognizing word characters according to spelling. +// These are only used for the first 256 characters of 'encoding'. +typedef struct { + bool st_isw[256]; // flags: is word char + bool st_isu[256]; // flags: is uppercase char + char_u st_fold[256]; // chars: folded case + char_u st_upper[256]; // chars: upper case +} spelltab_T; + +// For finding suggestions: At each node in the tree these states are tried: +typedef enum { + STATE_START = 0, // At start of node check for NUL bytes (goodword + // ends); if badword ends there is a match, otherwise + // try splitting word. + STATE_NOPREFIX, // try without prefix + STATE_SPLITUNDO, // Undo splitting. + STATE_ENDNUL, // Past NUL bytes at start of the node. + STATE_PLAIN, // Use each byte of the node. + STATE_DEL, // Delete a byte from the bad word. + STATE_INS_PREP, // Prepare for inserting bytes. + STATE_INS, // Insert a byte in the bad word. + STATE_SWAP, // Swap two bytes. + STATE_UNSWAP, // Undo swap two characters. + STATE_SWAP3, // Swap two characters over three. + STATE_UNSWAP3, // Undo Swap two characters over three. + STATE_UNROT3L, // Undo rotate three characters left + STATE_UNROT3R, // Undo rotate three characters right + STATE_REP_INI, // Prepare for using REP items. + STATE_REP, // Use matching REP items from the .aff file. + STATE_REP_UNDO, // Undo a REP item replacement. + STATE_FINAL // End of this node. +} state_T; + +// Struct to keep the state at each level in suggest_try_change(). +typedef struct trystate_S { + state_T ts_state; // state at this level, STATE_ + int ts_score; // score + idx_T ts_arridx; // index in tree array, start of node + short ts_curi; // index in list of child nodes + char_u ts_fidx; // index in fword[], case-folded bad word + char_u ts_fidxtry; // ts_fidx at which bytes may be changed + char_u ts_twordlen; // valid length of tword[] + char_u ts_prefixdepth; // stack depth for end of prefix or + // PFD_PREFIXTREE or PFD_NOPREFIX + char_u ts_flags; // TSF_ flags + char_u ts_tcharlen; // number of bytes in tword character + char_u ts_tcharidx; // current byte index in tword character + char_u ts_isdiff; // DIFF_ values + char_u ts_fcharstart; // index in fword where badword char started + char_u ts_prewordlen; // length of word in "preword[]" + char_u ts_splitoff; // index in "tword" after last split + char_u ts_splitfidx; // "ts_fidx" at word split + char_u ts_complen; // nr of compound words used + char_u ts_compsplit; // index for "compflags" where word was spit + char_u ts_save_badflags; // su_badflags saved here + char_u ts_delidx; // index in fword for char that was deleted, + // valid when "ts_flags" has TSF_DIDDEL +} trystate_T; + +// Use our own character-case definitions, because the current locale may +// differ from what the .spl file uses. +// These must not be called with negative number! +#include <wchar.h> // for towupper() and towlower() +// Multi-byte implementation. For Unicode we can call utf_*(), but don't do +// that for ASCII, because we don't want to use 'casemap' here. Otherwise use +// the "w" library function for characters above 255. +#define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ + : (c) < \ + 256 ? (int)spelltab.st_fold[c] : (int)towlower(c)) + +#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ + : (c) < \ + 256 ? (int)spelltab.st_upper[c] : (int)towupper(c)) + +#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ + : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) + +// First language that is loaded, start of the linked list of loaded +// languages. +extern slang_T *first_lang; + +// file used for "zG" and "zW" +extern char_u *int_wordlist; + +extern spelltab_T spelltab; +extern int did_set_spelltab; + +extern char *e_format; + +#endif // NVIM_SPELL_DEFS_H diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c new file mode 100644 index 0000000000..c108ae4a2c --- /dev/null +++ b/src/nvim/spellfile.c @@ -0,0 +1,5659 @@ +// spellfile.c: code for reading and writing spell files. +// +// See spell.c for information about spell checking. + +// Vim spell file format: <HEADER> +// <SECTIONS> +// <LWORDTREE> +// <KWORDTREE> +// <PREFIXTREE> +// +// <HEADER>: <fileID> <versionnr> +// +// <fileID> 8 bytes "VIMspell" +// <versionnr> 1 byte VIMSPELLVERSION +// +// +// Sections make it possible to add information to the .spl file without +// making it incompatible with previous versions. There are two kinds of +// sections: +// 1. Not essential for correct spell checking. E.g. for making suggestions. +// These are skipped when not supported. +// 2. Optional information, but essential for spell checking when present. +// E.g. conditions for affixes. When this section is present but not +// supported an error message is given. +// +// <SECTIONS>: <section> ... <sectionend> +// +// <section>: <sectionID> <sectionflags> <sectionlen> (section contents) +// +// <sectionID> 1 byte number from 0 to 254 identifying the section +// +// <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct +// spell checking +// +// <sectionlen> 4 bytes length of section contents, MSB first +// +// <sectionend> 1 byte SN_END +// +// +// sectionID == SN_INFO: <infotext> +// <infotext> N bytes free format text with spell file info (version, +// website, etc) +// +// sectionID == SN_REGION: <regionname> ... +// <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. +// First <regionname> is region 1. +// +// sectionID == SN_CHARFLAGS: <charflagslen> <charflags> +// <folcharslen> <folchars> +// <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). +// <charflags> N bytes List of flags (first one is for character 128): +// 0x01 word character CF_WORD +// 0x02 upper-case character CF_UPPER +// <folcharslen> 2 bytes Number of bytes in <folchars>. +// <folchars> N bytes Folded characters, first one is for character 128. +// +// sectionID == SN_MIDWORD: <midword> +// <midword> N bytes Characters that are word characters only when used +// in the middle of a word. +// +// sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... +// <prefcondcnt> 2 bytes Number of <prefcond> items following. +// <prefcond> : <condlen> <condstr> +// <condlen> 1 byte Length of <condstr>. +// <condstr> N bytes Condition for the prefix. +// +// sectionID == SN_REP: <repcount> <rep> ... +// <repcount> 2 bytes number of <rep> items, MSB first. +// <rep> : <repfromlen> <repfrom> <reptolen> <repto> +// <repfromlen> 1 byte length of <repfrom> +// <repfrom> N bytes "from" part of replacement +// <reptolen> 1 byte length of <repto> +// <repto> N bytes "to" part of replacement +// +// sectionID == SN_REPSAL: <repcount> <rep> ... +// just like SN_REP but for soundfolded words +// +// sectionID == SN_SAL: <salflags> <salcount> <sal> ... +// <salflags> 1 byte flags for soundsalike conversion: +// SAL_F0LLOWUP +// SAL_COLLAPSE +// SAL_REM_ACCENTS +// <salcount> 2 bytes number of <sal> items following +// <sal> : <salfromlen> <salfrom> <saltolen> <salto> +// <salfromlen> 1 byte length of <salfrom> +// <salfrom> N bytes "from" part of soundsalike +// <saltolen> 1 byte length of <salto> +// <salto> N bytes "to" part of soundsalike +// +// sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> +// <sofofromlen> 2 bytes length of <sofofrom> +// <sofofrom> N bytes "from" part of soundfold +// <sofotolen> 2 bytes length of <sofoto> +// <sofoto> N bytes "to" part of soundfold +// +// sectionID == SN_SUGFILE: <timestamp> +// <timestamp> 8 bytes time in seconds that must match with .sug file +// +// sectionID == SN_NOSPLITSUGS: nothing +// +// sectionID == SN_NOCOMPOUNDSUGS: nothing +// +// sectionID == SN_WORDS: <word> ... +// <word> N bytes NUL terminated common word +// +// sectionID == SN_MAP: <mapstr> +// <mapstr> N bytes String with sequences of similar characters, +// separated by slashes. +// +// sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> +// <comppatcount> <comppattern> ... <compflags> +// <compmax> 1 byte Maximum nr of words in compound word. +// <compminlen> 1 byte Minimal word length for compounding. +// <compsylmax> 1 byte Maximum nr of syllables in compound word. +// <compoptions> 2 bytes COMP_ flags. +// <comppatcount> 2 bytes number of <comppattern> following +// <compflags> N bytes Flags from COMPOUNDRULE items, separated by +// slashes. +// +// <comppattern>: <comppatlen> <comppattext> +// <comppatlen> 1 byte length of <comppattext> +// <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN +// +// sectionID == SN_NOBREAK: (empty, its presence is what matters) +// +// sectionID == SN_SYLLABLE: <syllable> +// <syllable> N bytes String from SYLLABLE item. +// +// <LWORDTREE>: <wordtree> +// +// <KWORDTREE>: <wordtree> +// +// <PREFIXTREE>: <wordtree> +// +// +// <wordtree>: <nodecount> <nodedata> ... +// +// <nodecount> 4 bytes Number of nodes following. MSB first. +// +// <nodedata>: <siblingcount> <sibling> ... +// +// <siblingcount> 1 byte Number of siblings in this node. The siblings +// follow in sorted order. +// +// <sibling>: <byte> [ <nodeidx> <xbyte> +// | <flags> [<flags2>] [<region>] [<affixID>] +// | [<pflags>] <affixID> <prefcondnr> ] +// +// <byte> 1 byte Byte value of the sibling. Special cases: +// BY_NOFLAGS: End of word without flags and for all +// regions. +// For PREFIXTREE <affixID> and +// <prefcondnr> follow. +// BY_FLAGS: End of word, <flags> follow. +// For PREFIXTREE <pflags>, <affixID> +// and <prefcondnr> follow. +// BY_FLAGS2: End of word, <flags> and <flags2> +// follow. Not used in PREFIXTREE. +// BY_INDEX: Child of sibling is shared, <nodeidx> +// and <xbyte> follow. +// +// <nodeidx> 3 bytes Index of child for this sibling, MSB first. +// +// <xbyte> 1 byte Byte value of the sibling. +// +// <flags> 1 byte Bitmask of: +// WF_ALLCAP word must have only capitals +// WF_ONECAP first char of word must be capital +// WF_KEEPCAP keep-case word +// WF_FIXCAP keep-case word, all caps not allowed +// WF_RARE rare word +// WF_BANNED bad word +// WF_REGION <region> follows +// WF_AFX <affixID> follows +// +// <flags2> 1 byte Bitmask of: +// WF_HAS_AFF >> 8 word includes affix +// WF_NEEDCOMP >> 8 word only valid in compound +// WF_NOSUGGEST >> 8 word not used for suggestions +// WF_COMPROOT >> 8 word already a compound +// WF_NOCOMPBEF >> 8 no compounding before this word +// WF_NOCOMPAFT >> 8 no compounding after this word +// +// <pflags> 1 byte Bitmask of: +// WFP_RARE rare prefix +// WFP_NC non-combining prefix +// WFP_UP letter after prefix made upper case +// +// <region> 1 byte Bitmask for regions in which word is valid. When +// omitted it's valid in all regions. +// Lowest bit is for region 1. +// +// <affixID> 1 byte ID of affix that can be used with this word. In +// PREFIXTREE used for the required prefix ID. +// +// <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list +// from HEADER. +// +// All text characters are in 'encoding', but stored as single bytes. + +// Vim .sug file format: <SUGHEADER> +// <SUGWORDTREE> +// <SUGTABLE> +// +// <SUGHEADER>: <fileID> <versionnr> <timestamp> +// +// <fileID> 6 bytes "VIMsug" +// <versionnr> 1 byte VIMSUGVERSION +// <timestamp> 8 bytes timestamp that must match with .spl file +// +// +// <SUGWORDTREE>: <wordtree> (see above, no flags or region used) +// +// +// <SUGTABLE>: <sugwcount> <sugline> ... +// +// <sugwcount> 4 bytes number of <sugline> following +// +// <sugline>: <sugnr> ... NUL +// +// <sugnr>: X bytes word number that results in this soundfolded word, +// stored as an offset to the previous number in as +// few bytes as possible, see offset2bytes()) + +#include <stdio.h> +#include <wctype.h> + +#include "nvim/vim.h" +#include "nvim/spell_defs.h" +#include "nvim/ascii.h" +#include "nvim/buffer.h" +#include "nvim/charset.h" +#include "nvim/ex_cmds2.h" +#include "nvim/fileio.h" +#include "nvim/memory.h" +#include "nvim/memline.h" +#include "nvim/misc1.h" +#include "nvim/option.h" +#include "nvim/os/os.h" +#include "nvim/path.h" +#include "nvim/regexp.h" +#include "nvim/screen.h" +#include "nvim/spell.h" +#include "nvim/spellfile.h" +#include "nvim/ui.h" +#include "nvim/undo.h" + +#ifndef UNIX // it's in os/unix_defs.h for Unix +# include <time.h> // for time_t +#endif + +// Special byte values for <byte>. Some are only used in the tree for +// postponed prefixes, some only in the other trees. This is a bit messy... +#define BY_NOFLAGS 0 // end of word without flags or region; for + // postponed prefix: no <pflags> +#define BY_INDEX 1 // child is shared, index follows +#define BY_FLAGS 2 // end of word, <flags> byte follows; for + // postponed prefix: <pflags> follows +#define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes + // follow; never used in prefix tree +#define BY_SPECIAL BY_FLAGS2 // highest special byte value + +// Flags used in .spl file for soundsalike flags. +#define SAL_F0LLOWUP 1 +#define SAL_COLLAPSE 2 +#define SAL_REM_ACCENTS 4 + +#define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file +#define VIMSPELLMAGICL 8 +#define VIMSPELLVERSION 50 + +// Section IDs. Only renumber them when VIMSPELLVERSION changes! +#define SN_REGION 0 // <regionname> section +#define SN_CHARFLAGS 1 // charflags section +#define SN_MIDWORD 2 // <midword> section +#define SN_PREFCOND 3 // <prefcond> section +#define SN_REP 4 // REP items section +#define SN_SAL 5 // SAL items section +#define SN_SOFO 6 // soundfolding section +#define SN_MAP 7 // MAP items section +#define SN_COMPOUND 8 // compound words section +#define SN_SYLLABLE 9 // syllable section +#define SN_NOBREAK 10 // NOBREAK section +#define SN_SUGFILE 11 // timestamp for .sug file +#define SN_REPSAL 12 // REPSAL items section +#define SN_WORDS 13 // common words +#define SN_NOSPLITSUGS 14 // don't split word for suggestions +#define SN_INFO 15 // info section +#define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions +#define SN_END 255 // end of sections + +#define SNF_REQUIRED 1 // <sectionflags>: required section + +#define CF_WORD 0x01 +#define CF_UPPER 0x02 + +static char *e_spell_trunc = N_("E758: Truncated spell file"); +static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); +static char *e_affname = N_("Affix name too long in %s line %d: %s"); +static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); +static char *e_affrange = N_( + "E762: Character in FOL, LOW or UPP is out of range"); +static char *msg_compressing = N_("Compressing word tree..."); + +#define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff + // and .dic file. +// Main structure to store the contents of a ".aff" file. +typedef struct afffile_S { + char_u *af_enc; // "SET", normalized, alloc'ed string or NULL + int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG + unsigned af_rare; // RARE ID for rare word + unsigned af_keepcase; // KEEPCASE ID for keep-case word + unsigned af_bad; // BAD ID for banned word + unsigned af_needaffix; // NEEDAFFIX ID + unsigned af_circumfix; // CIRCUMFIX ID + unsigned af_needcomp; // NEEDCOMPOUND ID + unsigned af_comproot; // COMPOUNDROOT ID + unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID + unsigned af_comppermit; // COMPOUNDPERMITFLAG ID + unsigned af_nosuggest; // NOSUGGEST ID + int af_pfxpostpone; // postpone prefixes without chop string and + // without flags + bool af_ignoreextra; // IGNOREEXTRA present + hashtab_T af_pref; // hashtable for prefixes, affheader_T + hashtab_T af_suff; // hashtable for suffixes, affheader_T + hashtab_T af_comp; // hashtable for compound flags, compitem_T +} afffile_T; + +#define AFT_CHAR 0 // flags are one character +#define AFT_LONG 1 // flags are two characters +#define AFT_CAPLONG 2 // flags are one or two characters +#define AFT_NUM 3 // flags are numbers, comma separated + +typedef struct affentry_S affentry_T; +// Affix entry from ".aff" file. Used for prefixes and suffixes. +struct affentry_S { + affentry_T *ae_next; // next affix with same name/number + char_u *ae_chop; // text to chop off basic word (can be NULL) + char_u *ae_add; // text to add to basic word (can be NULL) + char_u *ae_flags; // flags on the affix (can be NULL) + char_u *ae_cond; // condition (NULL for ".") + regprog_T *ae_prog; // regexp program for ae_cond or NULL + char ae_compforbid; // COMPOUNDFORBIDFLAG found + char ae_comppermit; // COMPOUNDPERMITFLAG found +}; + +# define AH_KEY_LEN 17 // 2 x 8 bytes + NUL + +// Affix header from ".aff" file. Used for af_pref and af_suff. +typedef struct affheader_S { + char_u ah_key[AH_KEY_LEN]; // key for hashtab == name of affix + unsigned ah_flag; // affix name as number, uses "af_flagtype" + int ah_newID; // prefix ID after renumbering; 0 if not used + int ah_combine; // suffix may combine with prefix + int ah_follows; // another affix block should be following + affentry_T *ah_first; // first affix entry +} affheader_T; + +#define HI2AH(hi) ((affheader_T *)(hi)->hi_key) + +// Flag used in compound items. +typedef struct compitem_S { + char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound + unsigned ci_flag; // affix name as number, uses "af_flagtype" + int ci_newID; // affix ID after renumbering. +} compitem_T; + +#define HI2CI(hi) ((compitem_T *)(hi)->hi_key) + +// Structure that is used to store the items in the word tree. This avoids +// the need to keep track of each allocated thing, everything is freed all at +// once after ":mkspell" is done. +// Note: "sb_next" must be just before "sb_data" to make sure the alignment of +// "sb_data" is correct for systems where pointers must be aligned on +// pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). +#define SBLOCKSIZE 16000 // size of sb_data +typedef struct sblock_S sblock_T; +struct sblock_S { + int sb_used; // nr of bytes already in use + sblock_T *sb_next; // next block in list + char_u sb_data[1]; // data, actually longer +}; + +// A node in the tree. +typedef struct wordnode_S wordnode_T; +struct wordnode_S { + union { // shared to save space + char_u hashkey[6]; // the hash key, only used while compressing + int index; // index in written nodes (valid after first + // round) + } wn_u1; + union { // shared to save space + wordnode_T *next; // next node with same hash key + wordnode_T *wnode; // parent node that will write this node + } wn_u2; + wordnode_T *wn_child; // child (next byte in word) + wordnode_T *wn_sibling; // next sibling (alternate byte in word, + // always sorted) + int wn_refs; // Nr. of references to this node. Only + // relevant for first node in a list of + // siblings, in following siblings it is + // always one. + char_u wn_byte; // Byte for this node. NUL for word end + + // Info for when "wn_byte" is NUL. + // In PREFIXTREE "wn_region" is used for the prefcondnr. + // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and + // "wn_region" the LSW of the wordnr. + char_u wn_affixID; // supported/required prefix ID or 0 + uint16_t wn_flags; // WF_ flags + short wn_region; // region mask + +#ifdef SPELL_PRINTTREE + int wn_nr; // sequence nr for printing +#endif +}; + +#define WN_MASK 0xffff // mask relevant bits of "wn_flags" + +#define HI2WN(hi) (wordnode_T *)((hi)->hi_key) + +// Info used while reading the spell files. +typedef struct spellinfo_S { + wordnode_T *si_foldroot; // tree with case-folded words + long si_foldwcount; // nr of words in si_foldroot + + wordnode_T *si_keeproot; // tree with keep-case words + long si_keepwcount; // nr of words in si_keeproot + + wordnode_T *si_prefroot; // tree with postponed prefixes + + long si_sugtree; // creating the soundfolding trie + + sblock_T *si_blocks; // memory blocks used + long si_blocks_cnt; // memory blocks allocated + int si_did_emsg; // TRUE when ran out of memory + + long si_compress_cnt; // words to add before lowering + // compression limit + wordnode_T *si_first_free; // List of nodes that have been freed during + // compression, linked by "wn_child" field. + long si_free_count; // number of nodes in si_first_free +#ifdef SPELL_PRINTTREE + int si_wordnode_nr; // sequence nr for nodes +#endif + buf_T *si_spellbuf; // buffer used to store soundfold word table + + int si_ascii; // handling only ASCII words + int si_add; // addition file + int si_clear_chartab; // when TRUE clear char tables + int si_region; // region mask + vimconv_T si_conv; // for conversion to 'encoding' + int si_memtot; // runtime memory used + int si_verbose; // verbose messages + int si_msg_count; // number of words added since last message + char_u *si_info; // info text chars or NULL + int si_region_count; // number of regions supported (1 when there + // are no regions) + char_u si_region_name[17]; // region names; used only if + // si_region_count > 1) + + garray_T si_rep; // list of fromto_T entries from REP lines + garray_T si_repsal; // list of fromto_T entries from REPSAL lines + garray_T si_sal; // list of fromto_T entries from SAL lines + char_u *si_sofofr; // SOFOFROM text + char_u *si_sofoto; // SOFOTO text + int si_nosugfile; // NOSUGFILE item found + int si_nosplitsugs; // NOSPLITSUGS item found + int si_nocompoundsugs; // NOCOMPOUNDSUGS item found + int si_followup; // soundsalike: ? + int si_collapse; // soundsalike: ? + hashtab_T si_commonwords; // hashtable for common words + time_t si_sugtime; // timestamp for .sug file + int si_rem_accents; // soundsalike: remove accents + garray_T si_map; // MAP info concatenated + char_u *si_midword; // MIDWORD chars or NULL + int si_compmax; // max nr of words for compounding + int si_compminlen; // minimal length for compounding + int si_compsylmax; // max nr of syllables for compounding + int si_compoptions; // COMP_ flags + garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as + // a string + char_u *si_compflags; // flags used for compounding + char_u si_nobreak; // NOBREAK + char_u *si_syllable; // syllable string + garray_T si_prefcond; // table with conditions for postponed + // prefixes, each stored as a string + int si_newprefID; // current value for ah_newID + int si_newcompID; // current value for compound ID +} spellinfo_T; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "spellfile.c.generated.h" +#endif + +// Load one spell file and store the info into a slang_T. +// +// This is invoked in three ways: +// - From spell_load_cb() to load a spell file for the first time. "lang" is +// the language name, "old_lp" is NULL. Will allocate an slang_T. +// - To reload a spell file that was changed. "lang" is NULL and "old_lp" +// points to the existing slang_T. +// - Just after writing a .spl file; it's read back to produce the .sug file. +// "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. +// +// Returns the slang_T the spell file was loaded into. NULL for error. +slang_T * +spell_load_file ( + char_u *fname, + char_u *lang, + slang_T *old_lp, + bool silent // no error if file doesn't exist +) +{ + FILE *fd; + char_u buf[VIMSPELLMAGICL]; + char_u *p; + int i; + int n; + int len; + char_u *save_sourcing_name = sourcing_name; + linenr_T save_sourcing_lnum = sourcing_lnum; + slang_T *lp = NULL; + int c = 0; + int res; + + fd = mch_fopen((char *)fname, "r"); + if (fd == NULL) { + if (!silent) + EMSG2(_(e_notopen), fname); + else if (p_verbose > 2) { + verbose_enter(); + smsg((char *)e_notopen, fname); + verbose_leave(); + } + goto endFAIL; + } + if (p_verbose > 2) { + verbose_enter(); + smsg(_("Reading spell file \"%s\""), fname); + verbose_leave(); + } + + if (old_lp == NULL) { + lp = slang_alloc(lang); + + // Remember the file name, used to reload the file when it's updated. + lp->sl_fname = vim_strsave(fname); + + // Check for .add.spl. + lp->sl_add = strstr((char *)path_tail(fname), SPL_FNAME_ADD) != NULL; + } else + lp = old_lp; + + // Set sourcing_name, so that error messages mention the file name. + sourcing_name = fname; + sourcing_lnum = 0; + + // <HEADER>: <fileID> + for (i = 0; i < VIMSPELLMAGICL; ++i) + buf[i] = getc(fd); // <fileID> + if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) { + EMSG(_("E757: This does not look like a spell file")); + goto endFAIL; + } + c = getc(fd); // <versionnr> + if (c < VIMSPELLVERSION) { + EMSG(_("E771: Old spell file, needs to be updated")); + goto endFAIL; + } else if (c > VIMSPELLVERSION) { + EMSG(_("E772: Spell file is for newer version of Vim")); + goto endFAIL; + } + + + // <SECTIONS>: <section> ... <sectionend> + // <section>: <sectionID> <sectionflags> <sectionlen> (section contents) + for (;; ) { + n = getc(fd); // <sectionID> or <sectionend> + if (n == SN_END) + break; + c = getc(fd); // <sectionflags> + len = get4c(fd); // <sectionlen> + if (len < 0) + goto truncerr; + + res = 0; + switch (n) { + case SN_INFO: + lp->sl_info = READ_STRING(fd, len); // <infotext> + if (lp->sl_info == NULL) + goto endFAIL; + break; + + case SN_REGION: + res = read_region_section(fd, lp, len); + break; + + case SN_CHARFLAGS: + res = read_charflags_section(fd); + break; + + case SN_MIDWORD: + lp->sl_midword = READ_STRING(fd, len); // <midword> + if (lp->sl_midword == NULL) + goto endFAIL; + break; + + case SN_PREFCOND: + res = read_prefcond_section(fd, lp); + break; + + case SN_REP: + res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); + break; + + case SN_REPSAL: + res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); + break; + + case SN_SAL: + res = read_sal_section(fd, lp); + break; + + case SN_SOFO: + res = read_sofo_section(fd, lp); + break; + + case SN_MAP: + p = READ_STRING(fd, len); // <mapstr> + if (p == NULL) + goto endFAIL; + set_map_str(lp, p); + xfree(p); + break; + + case SN_WORDS: + res = read_words_section(fd, lp, len); + break; + + case SN_SUGFILE: + lp->sl_sugtime = get8ctime(fd); // <timestamp> + break; + + case SN_NOSPLITSUGS: + lp->sl_nosplitsugs = true; + break; + + case SN_NOCOMPOUNDSUGS: + lp->sl_nocompoundsugs = true; + break; + + case SN_COMPOUND: + res = read_compound(fd, lp, len); + break; + + case SN_NOBREAK: + lp->sl_nobreak = true; + break; + + case SN_SYLLABLE: + lp->sl_syllable = READ_STRING(fd, len); // <syllable> + if (lp->sl_syllable == NULL) + goto endFAIL; + if (init_syl_tab(lp) == FAIL) + goto endFAIL; + break; + + default: + // Unsupported section. When it's required give an error + // message. When it's not required skip the contents. + if (c & SNF_REQUIRED) { + EMSG(_("E770: Unsupported section in spell file")); + goto endFAIL; + } + while (--len >= 0) + if (getc(fd) < 0) + goto truncerr; + break; + } +someerror: + if (res == SP_FORMERROR) { + EMSG(_(e_format)); + goto endFAIL; + } + if (res == SP_TRUNCERROR) { +truncerr: + EMSG(_(e_spell_trunc)); + goto endFAIL; + } + if (res == SP_OTHERERROR) + goto endFAIL; + } + + // <LWORDTREE> + res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, false, 0); + if (res != 0) + goto someerror; + + // <KWORDTREE> + res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, false, 0); + if (res != 0) + goto someerror; + + // <PREFIXTREE> + res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, true, + lp->sl_prefixcnt); + if (res != 0) + goto someerror; + + // For a new file link it in the list of spell files. + if (old_lp == NULL && lang != NULL) { + lp->sl_next = first_lang; + first_lang = lp; + } + + goto endOK; + +endFAIL: + if (lang != NULL) + // truncating the name signals the error to spell_load_lang() + *lang = NUL; + if (lp != NULL && old_lp == NULL) + slang_free(lp); + lp = NULL; + +endOK: + if (fd != NULL) + fclose(fd); + sourcing_name = save_sourcing_name; + sourcing_lnum = save_sourcing_lnum; + + return lp; +} + +// Fill in the wordcount fields for a trie. +// Returns the total number of words. +static void tree_count_words(char_u *byts, idx_T *idxs) +{ + int depth; + idx_T arridx[MAXWLEN]; + int curi[MAXWLEN]; + int c; + idx_T n; + int wordcount[MAXWLEN]; + + arridx[0] = 0; + curi[0] = 1; + wordcount[0] = 0; + depth = 0; + while (depth >= 0 && !got_int) { + if (curi[depth] > byts[arridx[depth]]) { + // Done all bytes at this node, go up one level. + idxs[arridx[depth]] = wordcount[depth]; + if (depth > 0) + wordcount[depth - 1] += wordcount[depth]; + + --depth; + fast_breakcheck(); + } else { + // Do one more byte at this node. + n = arridx[depth] + curi[depth]; + ++curi[depth]; + + c = byts[n]; + if (c == 0) { + // End of word, count it. + ++wordcount[depth]; + + // Skip over any other NUL bytes (same word with different + // flags). + while (byts[n + 1] == 0) { + ++n; + ++curi[depth]; + } + } else { + // Normal char, go one level deeper to count the words. + ++depth; + arridx[depth] = idxs[n]; + curi[depth] = 1; + wordcount[depth] = 0; + } + } + } +} + +// Load the .sug files for languages that have one and weren't loaded yet. +void suggest_load_files(void) +{ + langp_T *lp; + slang_T *slang; + char_u *dotp; + FILE *fd; + char_u buf[MAXWLEN]; + int i; + time_t timestamp; + int wcount; + int wordnr; + garray_T ga; + int c; + + // Do this for all languages that support sound folding. + for (int lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) { + lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); + slang = lp->lp_slang; + if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) { + // Change ".spl" to ".sug" and open the file. When the file isn't + // found silently skip it. Do set "sl_sugloaded" so that we + // don't try again and again. + slang->sl_sugloaded = true; + + dotp = vim_strrchr(slang->sl_fname, '.'); + if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) + continue; + STRCPY(dotp, ".sug"); + fd = mch_fopen((char *)slang->sl_fname, "r"); + if (fd == NULL) + goto nextone; + + // <SUGHEADER>: <fileID> <versionnr> <timestamp> + for (i = 0; i < VIMSUGMAGICL; ++i) + buf[i] = getc(fd); // <fileID> + if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) { + EMSG2(_("E778: This does not look like a .sug file: %s"), + slang->sl_fname); + goto nextone; + } + c = getc(fd); // <versionnr> + if (c < VIMSUGVERSION) { + EMSG2(_("E779: Old .sug file, needs to be updated: %s"), + slang->sl_fname); + goto nextone; + } else if (c > VIMSUGVERSION) { + EMSG2(_("E780: .sug file is for newer version of Vim: %s"), + slang->sl_fname); + goto nextone; + } + + // Check the timestamp, it must be exactly the same as the one in + // the .spl file. Otherwise the word numbers won't match. + timestamp = get8ctime(fd); // <timestamp> + if (timestamp != slang->sl_sugtime) { + EMSG2(_("E781: .sug file doesn't match .spl file: %s"), + slang->sl_fname); + goto nextone; + } + + // <SUGWORDTREE>: <wordtree> + // Read the trie with the soundfolded words. + if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, + false, 0) != 0) { +someerror: + EMSG2(_("E782: error while reading .sug file: %s"), + slang->sl_fname); + slang_clear_sug(slang); + goto nextone; + } + + // <SUGTABLE>: <sugwcount> <sugline> ... + // + // Read the table with word numbers. We use a file buffer for + // this, because it's so much like a file with lines. Makes it + // possible to swap the info and save on memory use. + slang->sl_sugbuf = open_spellbuf(); + + // <sugwcount> + wcount = get4c(fd); + if (wcount < 0) + goto someerror; + + // Read all the wordnr lists into the buffer, one NUL terminated + // list per line. + ga_init(&ga, 1, 100); + for (wordnr = 0; wordnr < wcount; ++wordnr) { + ga.ga_len = 0; + for (;; ) { + c = getc(fd); // <sugline> + if (c < 0) { + goto someerror; + } + GA_APPEND(char_u, &ga, c); + if (c == NUL) + break; + } + if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, + ga.ga_data, ga.ga_len, TRUE) == FAIL) + goto someerror; + } + ga_clear(&ga); + + // Need to put word counts in the word tries, so that we can find + // a word by its number. + tree_count_words(slang->sl_fbyts, slang->sl_fidxs); + tree_count_words(slang->sl_sbyts, slang->sl_sidxs); + +nextone: + if (fd != NULL) + fclose(fd); + STRCPY(dotp, ".spl"); + } + } +} + + +// Read a length field from "fd" in "cnt_bytes" bytes. +// Allocate memory, read the string into it and add a NUL at the end. +// Returns NULL when the count is zero. +// Sets "*cntp" to SP_*ERROR when there is an error, length of the result +// otherwise. +static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) +{ + int cnt = 0; + int i; + char_u *str; + + // read the length bytes, MSB first + for (i = 0; i < cnt_bytes; ++i) + cnt = (cnt << 8) + getc(fd); + if (cnt < 0) { + *cntp = SP_TRUNCERROR; + return NULL; + } + *cntp = cnt; + if (cnt == 0) + return NULL; // nothing to read, return NULL + + str = READ_STRING(fd, cnt); + if (str == NULL) + *cntp = SP_OTHERERROR; + return str; +} + +// Read SN_REGION: <regionname> ... +// Return SP_*ERROR flags. +static int read_region_section(FILE *fd, slang_T *lp, int len) +{ + int i; + + if (len > 16) + return SP_FORMERROR; + for (i = 0; i < len; ++i) + lp->sl_regions[i] = getc(fd); // <regionname> + lp->sl_regions[len] = NUL; + return 0; +} + +// Read SN_CHARFLAGS section: <charflagslen> <charflags> +// <folcharslen> <folchars> +// Return SP_*ERROR flags. +static int read_charflags_section(FILE *fd) +{ + char_u *flags; + char_u *fol; + int flagslen, follen; + + // <charflagslen> <charflags> + flags = read_cnt_string(fd, 1, &flagslen); + if (flagslen < 0) + return flagslen; + + // <folcharslen> <folchars> + fol = read_cnt_string(fd, 2, &follen); + if (follen < 0) { + xfree(flags); + return follen; + } + + // Set the word-char flags and fill SPELL_ISUPPER() table. + if (flags != NULL && fol != NULL) + set_spell_charflags(flags, flagslen, fol); + + xfree(flags); + xfree(fol); + + // When <charflagslen> is zero then <fcharlen> must also be zero. + if ((flags == NULL) != (fol == NULL)) + return SP_FORMERROR; + return 0; +} + +// Read SN_PREFCOND section. +// Return SP_*ERROR flags. +static int read_prefcond_section(FILE *fd, slang_T *lp) +{ + int cnt; + int i; + int n; + char_u *p; + char_u buf[MAXWLEN + 1]; + + // <prefcondcnt> <prefcond> ... + cnt = get2c(fd); // <prefcondcnt> + if (cnt <= 0) + return SP_FORMERROR; + + lp->sl_prefprog = xcalloc(cnt, sizeof(regprog_T *)); + lp->sl_prefixcnt = cnt; + + for (i = 0; i < cnt; ++i) { + // <prefcond> : <condlen> <condstr> + n = getc(fd); // <condlen> + if (n < 0 || n >= MAXWLEN) + return SP_FORMERROR; + + // When <condlen> is zero we have an empty condition. Otherwise + // compile the regexp program used to check for the condition. + if (n > 0) { + buf[0] = '^'; // always match at one position only + p = buf + 1; + while (n-- > 0) + *p++ = getc(fd); // <condstr> + *p = NUL; + lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); + } + } + return 0; +} + +// Read REP or REPSAL items section from "fd": <repcount> <rep> ... +// Return SP_*ERROR flags. +static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first) +{ + int cnt; + fromto_T *ftp; + + cnt = get2c(fd); // <repcount> + if (cnt < 0) + return SP_TRUNCERROR; + + ga_grow(gap, cnt); + + // <rep> : <repfromlen> <repfrom> <reptolen> <repto> + for (; gap->ga_len < cnt; ++gap->ga_len) { + int c; + ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; + ftp->ft_from = read_cnt_string(fd, 1, &c); + if (c < 0) + return c; + if (c == 0) + return SP_FORMERROR; + ftp->ft_to = read_cnt_string(fd, 1, &c); + if (c <= 0) { + xfree(ftp->ft_from); + if (c < 0) + return c; + return SP_FORMERROR; + } + } + + // Fill the first-index table. + for (int i = 0; i < 256; ++i) { + first[i] = -1; + } + for (int i = 0; i < gap->ga_len; ++i) { + ftp = &((fromto_T *)gap->ga_data)[i]; + if (first[*ftp->ft_from] == -1) + first[*ftp->ft_from] = i; + } + return 0; +} + +// Read SN_SAL section: <salflags> <salcount> <sal> ... +// Return SP_*ERROR flags. +static int read_sal_section(FILE *fd, slang_T *slang) +{ + int i; + int cnt; + garray_T *gap; + salitem_T *smp; + int ccnt; + char_u *p; + int c = NUL; + + slang->sl_sofo = false; + + i = getc(fd); // <salflags> + if (i & SAL_F0LLOWUP) + slang->sl_followup = true; + if (i & SAL_COLLAPSE) + slang->sl_collapse = true; + if (i & SAL_REM_ACCENTS) + slang->sl_rem_accents = true; + + cnt = get2c(fd); // <salcount> + if (cnt < 0) + return SP_TRUNCERROR; + + gap = &slang->sl_sal; + ga_init(gap, sizeof(salitem_T), 10); + ga_grow(gap, cnt + 1); + + // <sal> : <salfromlen> <salfrom> <saltolen> <salto> + for (; gap->ga_len < cnt; ++gap->ga_len) { + smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; + ccnt = getc(fd); // <salfromlen> + if (ccnt < 0) + return SP_TRUNCERROR; + p = xmalloc(ccnt + 2); + smp->sm_lead = p; + + // Read up to the first special char into sm_lead. + for (i = 0; i < ccnt; ++i) { + c = getc(fd); // <salfrom> + if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) + break; + *p++ = c; + } + smp->sm_leadlen = (int)(p - smp->sm_lead); + *p++ = NUL; + + // Put (abc) chars in sm_oneof, if any. + if (c == '(') { + smp->sm_oneof = p; + for (++i; i < ccnt; ++i) { + c = getc(fd); // <salfrom> + if (c == ')') + break; + *p++ = c; + } + *p++ = NUL; + if (++i < ccnt) + c = getc(fd); + } else + smp->sm_oneof = NULL; + + // Any following chars go in sm_rules. + smp->sm_rules = p; + if (i < ccnt) + // store the char we got while checking for end of sm_lead + *p++ = c; + for (++i; i < ccnt; ++i) + *p++ = getc(fd); // <salfrom> + *p++ = NUL; + + // <saltolen> <salto> + smp->sm_to = read_cnt_string(fd, 1, &ccnt); + if (ccnt < 0) { + xfree(smp->sm_lead); + return ccnt; + } + + if (has_mbyte) { + // convert the multi-byte strings to wide char strings + smp->sm_lead_w = mb_str2wide(smp->sm_lead); + smp->sm_leadlen = mb_charlen(smp->sm_lead); + if (smp->sm_oneof == NULL) + smp->sm_oneof_w = NULL; + else + smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); + if (smp->sm_to == NULL) + smp->sm_to_w = NULL; + else + smp->sm_to_w = mb_str2wide(smp->sm_to); + } + } + + if (!GA_EMPTY(gap)) { + // Add one extra entry to mark the end with an empty sm_lead. Avoids + // that we need to check the index every time. + smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; + p = xmalloc(1); + p[0] = NUL; + smp->sm_lead = p; + smp->sm_leadlen = 0; + smp->sm_oneof = NULL; + smp->sm_rules = p; + smp->sm_to = NULL; + if (has_mbyte) { + smp->sm_lead_w = mb_str2wide(smp->sm_lead); + smp->sm_leadlen = 0; + smp->sm_oneof_w = NULL; + smp->sm_to_w = NULL; + } + ++gap->ga_len; + } + + // Fill the first-index table. + set_sal_first(slang); + + return 0; +} + +// Read SN_WORDS: <word> ... +// Return SP_*ERROR flags. +static int read_words_section(FILE *fd, slang_T *lp, int len) +{ + int done = 0; + int i; + int c; + char_u word[MAXWLEN]; + + while (done < len) { + // Read one word at a time. + for (i = 0;; ++i) { + c = getc(fd); + if (c == EOF) + return SP_TRUNCERROR; + word[i] = c; + if (word[i] == NUL) + break; + if (i == MAXWLEN - 1) + return SP_FORMERROR; + } + + // Init the count to 10. + count_common_word(lp, word, -1, 10); + done += i + 1; + } + return 0; +} + +// SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> +// Return SP_*ERROR flags. +static int read_sofo_section(FILE *fd, slang_T *slang) +{ + int cnt; + char_u *from, *to; + int res; + + slang->sl_sofo = true; + + // <sofofromlen> <sofofrom> + from = read_cnt_string(fd, 2, &cnt); + if (cnt < 0) + return cnt; + + // <sofotolen> <sofoto> + to = read_cnt_string(fd, 2, &cnt); + if (cnt < 0) { + xfree(from); + return cnt; + } + + // Store the info in slang->sl_sal and/or slang->sl_sal_first. + if (from != NULL && to != NULL) + res = set_sofo(slang, from, to); + else if (from != NULL || to != NULL) + res = SP_FORMERROR; // only one of two strings is an error + else + res = 0; + + xfree(from); + xfree(to); + return res; +} + +// Read the compound section from the .spl file: +// <compmax> <compminlen> <compsylmax> <compoptions> <compflags> +// Returns SP_*ERROR flags. +static int read_compound(FILE *fd, slang_T *slang, int len) +{ + int todo = len; + int c; + int atstart; + char_u *pat; + char_u *pp; + char_u *cp; + char_u *ap; + char_u *crp; + int cnt; + garray_T *gap; + + if (todo < 2) + return SP_FORMERROR; // need at least two bytes + + --todo; + c = getc(fd); // <compmax> + if (c < 2) + c = MAXWLEN; + slang->sl_compmax = c; + + --todo; + c = getc(fd); // <compminlen> + if (c < 1) + c = 0; + slang->sl_compminlen = c; + + --todo; + c = getc(fd); // <compsylmax> + if (c < 1) + c = MAXWLEN; + slang->sl_compsylmax = c; + + c = getc(fd); // <compoptions> + if (c != 0) + ungetc(c, fd); // be backwards compatible with Vim 7.0b + else { + --todo; + c = getc(fd); // only use the lower byte for now + --todo; + slang->sl_compoptions = c; + + gap = &slang->sl_comppat; + c = get2c(fd); // <comppatcount> + todo -= 2; + ga_init(gap, sizeof(char_u *), c); + ga_grow(gap, c); + while (--c >= 0) { + ((char_u **)(gap->ga_data))[gap->ga_len++] = + read_cnt_string(fd, 1, &cnt); + // <comppatlen> <comppattext> + if (cnt < 0) + return cnt; + todo -= cnt + 1; + } + } + if (todo < 0) + return SP_FORMERROR; + + // Turn the COMPOUNDRULE items into a regexp pattern: + // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". + // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. + // Conversion to utf-8 may double the size. + c = todo * 2 + 7; + if (enc_utf8) + c += todo * 2; + pat = xmalloc(c); + + // We also need a list of all flags that can appear at the start and one + // for all flags. + cp = xmalloc(todo + 1); + slang->sl_compstartflags = cp; + *cp = NUL; + + ap = xmalloc(todo + 1); + slang->sl_compallflags = ap; + *ap = NUL; + + // And a list of all patterns in their original form, for checking whether + // compounding may work in match_compoundrule(). This is freed when we + // encounter a wildcard, the check doesn't work then. + crp = xmalloc(todo + 1); + slang->sl_comprules = crp; + + pp = pat; + *pp++ = '^'; + *pp++ = '\\'; + *pp++ = '('; + + atstart = 1; + while (todo-- > 0) { + c = getc(fd); // <compflags> + if (c == EOF) { + xfree(pat); + return SP_TRUNCERROR; + } + + // Add all flags to "sl_compallflags". + if (vim_strchr((char_u *)"?*+[]/", c) == NULL + && !byte_in_str(slang->sl_compallflags, c)) { + *ap++ = c; + *ap = NUL; + } + + if (atstart != 0) { + // At start of item: copy flags to "sl_compstartflags". For a + // [abc] item set "atstart" to 2 and copy up to the ']'. + if (c == '[') + atstart = 2; + else if (c == ']') + atstart = 0; + else { + if (!byte_in_str(slang->sl_compstartflags, c)) { + *cp++ = c; + *cp = NUL; + } + if (atstart == 1) + atstart = 0; + } + } + + // Copy flag to "sl_comprules", unless we run into a wildcard. + if (crp != NULL) { + if (c == '?' || c == '+' || c == '*') { + xfree(slang->sl_comprules); + slang->sl_comprules = NULL; + crp = NULL; + } else + *crp++ = c; + } + + if (c == '/') { // slash separates two items + *pp++ = '\\'; + *pp++ = '|'; + atstart = 1; + } else { // normal char, "[abc]" and '*' are copied as-is + if (c == '?' || c == '+' || c == '~') + *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+" + if (enc_utf8) + pp += mb_char2bytes(c, pp); + else + *pp++ = c; + } + } + + *pp++ = '\\'; + *pp++ = ')'; + *pp++ = '$'; + *pp = NUL; + + if (crp != NULL) + *crp = NUL; + + slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); + xfree(pat); + if (slang->sl_compprog == NULL) + return SP_FORMERROR; + + return 0; +} + +// Set the SOFOFROM and SOFOTO items in language "lp". +// Returns SP_*ERROR flags when there is something wrong. +static int set_sofo(slang_T *lp, char_u *from, char_u *to) +{ + int i; + + garray_T *gap; + char_u *s; + char_u *p; + int c; + int *inp; + + if (has_mbyte) { + // Use "sl_sal" as an array with 256 pointers to a list of wide + // characters. The index is the low byte of the character. + // The list contains from-to pairs with a terminating NUL. + // sl_sal_first[] is used for latin1 "from" characters. + gap = &lp->sl_sal; + ga_init(gap, sizeof(int *), 1); + ga_grow(gap, 256); + memset(gap->ga_data, 0, sizeof(int *) * 256); + gap->ga_len = 256; + + // First count the number of items for each list. Temporarily use + // sl_sal_first[] for this. + for (p = from, s = to; *p != NUL && *s != NUL; ) { + c = mb_cptr2char_adv(&p); + mb_cptr_adv(s); + if (c >= 256) + ++lp->sl_sal_first[c & 0xff]; + } + if (*p != NUL || *s != NUL) // lengths differ + return SP_FORMERROR; + + // Allocate the lists. + for (i = 0; i < 256; ++i) + if (lp->sl_sal_first[i] > 0) { + p = xmalloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); + ((int **)gap->ga_data)[i] = (int *)p; + *(int *)p = 0; + } + + // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal + // list. + memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); + for (p = from, s = to; *p != NUL && *s != NUL; ) { + c = mb_cptr2char_adv(&p); + i = mb_cptr2char_adv(&s); + if (c >= 256) { + // Append the from-to chars at the end of the list with + // the low byte. + inp = ((int **)gap->ga_data)[c & 0xff]; + while (*inp != 0) + ++inp; + *inp++ = c; // from char + *inp++ = i; // to char + *inp++ = NUL; // NUL at the end + } else + // mapping byte to char is done in sl_sal_first[] + lp->sl_sal_first[c] = i; + } + } else { + // mapping bytes to bytes is done in sl_sal_first[] + if (STRLEN(from) != STRLEN(to)) + return SP_FORMERROR; + + for (i = 0; to[i] != NUL; ++i) + lp->sl_sal_first[from[i]] = to[i]; + lp->sl_sal.ga_len = 1; // indicates we have soundfolding + } + + return 0; +} + +// Fill the first-index table for "lp". +static void set_sal_first(slang_T *lp) +{ + salfirst_T *sfirst; + salitem_T *smp; + int c; + garray_T *gap = &lp->sl_sal; + + sfirst = lp->sl_sal_first; + for (int i = 0; i < 256; ++i) { + sfirst[i] = -1; + } + smp = (salitem_T *)gap->ga_data; + for (int i = 0; i < gap->ga_len; ++i) { + if (has_mbyte) + // Use the lowest byte of the first character. For latin1 it's + // the character, for other encodings it should differ for most + // characters. + c = *smp[i].sm_lead_w & 0xff; + else + c = *smp[i].sm_lead; + if (sfirst[c] == -1) { + sfirst[c] = i; + if (has_mbyte) { + int n; + + // Make sure all entries with this byte are following each + // other. Move the ones that are in the wrong position. Do + // keep the same ordering! + while (i + 1 < gap->ga_len + && (*smp[i + 1].sm_lead_w & 0xff) == c) + // Skip over entry with same index byte. + ++i; + + for (n = 1; i + n < gap->ga_len; ++n) + if ((*smp[i + n].sm_lead_w & 0xff) == c) { + salitem_T tsal; + + // Move entry with same index byte after the entries + // we already found. + ++i; + --n; + tsal = smp[i + n]; + memmove(smp + i + 1, smp + i, + sizeof(salitem_T) * n); + smp[i] = tsal; + } + } + } + } +} + +// Turn a multi-byte string into a wide character string. +// Return it in allocated memory. +static int *mb_str2wide(char_u *s) +{ + int i = 0; + + int *res = xmalloc((mb_charlen(s) + 1) * sizeof(int)); + for (char_u *p = s; *p != NUL; ) + res[i++] = mb_ptr2char_adv(&p); + res[i] = NUL; + + return res; +} + +// Reads a tree from the .spl or .sug file. +// Allocates the memory and stores pointers in "bytsp" and "idxsp". +// This is skipped when the tree has zero length. +// Returns zero when OK, SP_ value for an error. +static int +spell_read_tree ( + FILE *fd, + char_u **bytsp, + idx_T **idxsp, + bool prefixtree, // true for the prefix tree + int prefixcnt // when "prefixtree" is true: prefix count +) +{ + int idx; + char_u *bp; + idx_T *ip; + + // The tree size was computed when writing the file, so that we can + // allocate it as one long block. <nodecount> + int len = get4c(fd); + if (len < 0) + return SP_TRUNCERROR; + if (len > 0) { + // Allocate the byte array. + bp = xmalloc(len); + *bytsp = bp; + + // Allocate the index array. + ip = xcalloc(len, sizeof(*ip)); + *idxsp = ip; + + // Recursively read the tree and store it in the array. + idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); + if (idx < 0) + return idx; + } + return 0; +} + +// Read one row of siblings from the spell file and store it in the byte array +// "byts" and index array "idxs". Recursively read the children. +// +// NOTE: The code here must match put_node()! +// +// Returns the index (>= 0) following the siblings. +// Returns SP_TRUNCERROR if the file is shorter than expected. +// Returns SP_FORMERROR if there is a format error. +static idx_T +read_tree_node ( + FILE *fd, + char_u *byts, + idx_T *idxs, + int maxidx, // size of arrays + idx_T startidx, // current index in "byts" and "idxs" + bool prefixtree, // true for reading PREFIXTREE + int maxprefcondnr // maximum for <prefcondnr> +) +{ + int len; + int i; + int n; + idx_T idx = startidx; + int c; + int c2; +#define SHARED_MASK 0x8000000 + + len = getc(fd); // <siblingcount> + if (len <= 0) + return SP_TRUNCERROR; + + if (startidx + len >= maxidx) + return SP_FORMERROR; + byts[idx++] = len; + + // Read the byte values, flag/region bytes and shared indexes. + for (i = 1; i <= len; ++i) { + c = getc(fd); // <byte> + if (c < 0) + return SP_TRUNCERROR; + if (c <= BY_SPECIAL) { + if (c == BY_NOFLAGS && !prefixtree) { + // No flags, all regions. + idxs[idx] = 0; + c = 0; + } else if (c != BY_INDEX) { + if (prefixtree) { + // Read the optional pflags byte, the prefix ID and the + // condition nr. In idxs[] store the prefix ID in the low + // byte, the condition index shifted up 8 bits, the flags + // shifted up 24 bits. + if (c == BY_FLAGS) + c = getc(fd) << 24; // <pflags> + else + c = 0; + + c |= getc(fd); // <affixID> + + n = get2c(fd); // <prefcondnr> + if (n >= maxprefcondnr) + return SP_FORMERROR; + c |= (n << 8); + } else { // c must be BY_FLAGS or BY_FLAGS2 + // Read flags and optional region and prefix ID. In + // idxs[] the flags go in the low two bytes, region above + // that and prefix ID above the region. + c2 = c; + c = getc(fd); // <flags> + if (c2 == BY_FLAGS2) + c = (getc(fd) << 8) + c; // <flags2> + if (c & WF_REGION) + c = (getc(fd) << 16) + c; // <region> + if (c & WF_AFX) + c = (getc(fd) << 24) + c; // <affixID> + } + + idxs[idx] = c; + c = 0; + } else { // c == BY_INDEX + // <nodeidx> + n = get3c(fd); + if (n < 0 || n >= maxidx) + return SP_FORMERROR; + idxs[idx] = n + SHARED_MASK; + c = getc(fd); // <xbyte> + } + } + byts[idx++] = c; + } + + // Recursively read the children for non-shared siblings. + // Skip the end-of-word ones (zero byte value) and the shared ones (and + // remove SHARED_MASK) + for (i = 1; i <= len; ++i) + if (byts[startidx + i] != 0) { + if (idxs[startidx + i] & SHARED_MASK) + idxs[startidx + i] &= ~SHARED_MASK; + else { + idxs[startidx + i] = idx; + idx = read_tree_node(fd, byts, idxs, maxidx, idx, + prefixtree, maxprefcondnr); + if (idx < 0) + break; + } + } + + return idx; +} + +// Reload the spell file "fname" if it's loaded. +static void +spell_reload_one ( + char_u *fname, + bool added_word // invoked through "zg" +) +{ + slang_T *slang; + bool didit = false; + + for (slang = first_lang; slang != NULL; slang = slang->sl_next) { + if (path_full_compare(fname, slang->sl_fname, FALSE) == kEqualFiles) { + slang_clear(slang); + if (spell_load_file(fname, NULL, slang, false) == NULL) + // reloading failed, clear the language + slang_clear(slang); + redraw_all_later(SOME_VALID); + didit = true; + } + } + + // When "zg" was used and the file wasn't loaded yet, should redo + // 'spelllang' to load it now. + if (added_word && !didit) + did_set_spelllang(curwin); +} + +// Functions for ":mkspell". + +// In the postponed prefixes tree wn_flags is used to store the WFP_ flags, +// but it must be negative to indicate the prefix tree to tree_add_word(). +// Use a negative number with the lower 8 bits zero. +#define PFX_FLAGS -256 + +// flags for "condit" argument of store_aff_word() +#define CONDIT_COMB 1 // affix must combine +#define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag +#define CONDIT_SUF 4 // add a suffix for matching flags +#define CONDIT_AFF 8 // word already has an affix + +// Tunable parameters for when the tree is compressed. See 'mkspellmem'. +static long compress_start = 30000; // memory / SBLOCKSIZE +static long compress_inc = 100; // memory / SBLOCKSIZE +static long compress_added = 500000; // word count + +// Check the 'mkspellmem' option. Return FAIL if it's wrong. +// Sets "sps_flags". +int spell_check_msm(void) +{ + char_u *p = p_msm; + long start = 0; + long incr = 0; + long added = 0; + + if (!ascii_isdigit(*p)) + return FAIL; + // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow) + start = (getdigits_long(&p) * 10) / (SBLOCKSIZE / 102); + if (*p != ',') + return FAIL; + ++p; + if (!ascii_isdigit(*p)) + return FAIL; + incr = (getdigits_long(&p) * 102) / (SBLOCKSIZE / 10); + if (*p != ',') + return FAIL; + ++p; + if (!ascii_isdigit(*p)) + return FAIL; + added = getdigits_long(&p) * 1024; + if (*p != NUL) + return FAIL; + + if (start == 0 || incr == 0 || added == 0 || incr > start) + return FAIL; + + compress_start = start; + compress_inc = incr; + compress_added = added; + return OK; +} + +#ifdef SPELL_PRINTTREE +// For debugging the tree code: print the current tree in a (more or less) +// readable format, so that we can see what happens when adding a word and/or +// compressing the tree. +// Based on code from Olaf Seibert. +#define PRINTLINESIZE 1000 +#define PRINTWIDTH 6 + +#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ + PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) + +static char line1[PRINTLINESIZE]; +static char line2[PRINTLINESIZE]; +static char line3[PRINTLINESIZE]; + +static void spell_clear_flags(wordnode_T *node) +{ + wordnode_T *np; + + for (np = node; np != NULL; np = np->wn_sibling) { + np->wn_u1.index = FALSE; + spell_clear_flags(np->wn_child); + } +} + +static void spell_print_node(wordnode_T *node, int depth) +{ + if (node->wn_u1.index) { + // Done this node before, print the reference. + PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); + PRINTSOME(line2, depth, " ", 0, 0); + PRINTSOME(line3, depth, " ", 0, 0); + msg((char_u *)line1); + msg((char_u *)line2); + msg((char_u *)line3); + } else { + node->wn_u1.index = TRUE; + + if (node->wn_byte != NUL) { + if (node->wn_child != NULL) + PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); + else + // Cannot happen? + PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); + } else + PRINTSOME(line1, depth, " $ ", 0, 0); + + PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); + + if (node->wn_sibling != NULL) + PRINTSOME(line3, depth, " | ", 0, 0); + else + PRINTSOME(line3, depth, " ", 0, 0); + + if (node->wn_byte == NUL) { + msg((char_u *)line1); + msg((char_u *)line2); + msg((char_u *)line3); + } + + // do the children + if (node->wn_byte != NUL && node->wn_child != NULL) + spell_print_node(node->wn_child, depth + 1); + + // do the siblings + if (node->wn_sibling != NULL) { + // get rid of all parent details except | + STRCPY(line1, line3); + STRCPY(line2, line3); + spell_print_node(node->wn_sibling, depth); + } + } +} + +static void spell_print_tree(wordnode_T *root) +{ + if (root != NULL) { + // Clear the "wn_u1.index" fields, used to remember what has been + // done. + spell_clear_flags(root); + + // Recursively print the tree. + spell_print_node(root, 0); + } +} + +#endif // SPELL_PRINTTREE + +// Reads the affix file "fname". +// Returns an afffile_T, NULL for complete failure. +static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname) +{ + FILE *fd; + afffile_T *aff; + char_u rline[MAXLINELEN]; + char_u *line; + char_u *pc = NULL; +#define MAXITEMCNT 30 + char_u *(items[MAXITEMCNT]); + int itemcnt; + char_u *p; + int lnum = 0; + affheader_T *cur_aff = NULL; + bool did_postpone_prefix = false; + int aff_todo = 0; + hashtab_T *tp; + char_u *low = NULL; + char_u *fol = NULL; + char_u *upp = NULL; + int do_rep; + int do_repsal; + int do_sal; + int do_mapline; + bool found_map = false; + hashitem_T *hi; + int l; + int compminlen = 0; // COMPOUNDMIN value + int compsylmax = 0; // COMPOUNDSYLMAX value + int compoptions = 0; // COMP_ flags + int compmax = 0; // COMPOUNDWORDMAX value + char_u *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE + // concatenated + char_u *midword = NULL; // MIDWORD value + char_u *syllable = NULL; // SYLLABLE value + char_u *sofofrom = NULL; // SOFOFROM value + char_u *sofoto = NULL; // SOFOTO value + + // Open the file. + fd = mch_fopen((char *)fname, "r"); + if (fd == NULL) { + EMSG2(_(e_notopen), fname); + return NULL; + } + + vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); + spell_message(spin, IObuff); + + // Only do REP lines when not done in another .aff file already. + do_rep = GA_EMPTY(&spin->si_rep); + + // Only do REPSAL lines when not done in another .aff file already. + do_repsal = GA_EMPTY(&spin->si_repsal); + + // Only do SAL lines when not done in another .aff file already. + do_sal = GA_EMPTY(&spin->si_sal); + + // Only do MAP lines when not done in another .aff file already. + do_mapline = GA_EMPTY(&spin->si_map); + + // Allocate and init the afffile_T structure. + aff = (afffile_T *)getroom(spin, sizeof(afffile_T), true); + if (aff == NULL) { + fclose(fd); + return NULL; + } + hash_init(&aff->af_pref); + hash_init(&aff->af_suff); + hash_init(&aff->af_comp); + + // Read all the lines in the file one by one. + while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { + line_breakcheck(); + ++lnum; + + // Skip comment lines. + if (*rline == '#') + continue; + + // Convert from "SET" to 'encoding' when needed. + xfree(pc); + if (spin->si_conv.vc_type != CONV_NONE) { + pc = string_convert(&spin->si_conv, rline, NULL); + if (pc == NULL) { + smsg(_("Conversion failure for word in %s line %d: %s"), + fname, lnum, rline); + continue; + } + line = pc; + } else { + pc = NULL; + line = rline; + } + + // Split the line up in white separated items. Put a NUL after each + // item. + itemcnt = 0; + for (p = line;; ) { + while (*p != NUL && *p <= ' ') // skip white space and CR/NL + ++p; + if (*p == NUL) + break; + if (itemcnt == MAXITEMCNT) // too many items + break; + items[itemcnt++] = p; + // A few items have arbitrary text argument, don't split them. + if (itemcnt == 2 && spell_info_item(items[0])) + while (*p >= ' ' || *p == TAB) // skip until CR/NL + ++p; + else + while (*p > ' ') // skip until white space or CR/NL + ++p; + if (*p == NUL) + break; + *p++ = NUL; + } + + // Handle non-empty lines. + if (itemcnt > 0) { + if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) { + // Setup for conversion from "ENC" to 'encoding'. + aff->af_enc = enc_canonize(items[1]); + if (!spin->si_ascii + && convert_setup(&spin->si_conv, aff->af_enc, + p_enc) == FAIL) + smsg(_("Conversion in %s not supported: from %s to %s"), + fname, aff->af_enc, p_enc); + spin->si_conv.vc_fail = true; + } else if (is_aff_rule(items, itemcnt, "FLAG", 2) + && aff->af_flagtype == AFT_CHAR) { + if (STRCMP(items[1], "long") == 0) + aff->af_flagtype = AFT_LONG; + else if (STRCMP(items[1], "num") == 0) + aff->af_flagtype = AFT_NUM; + else if (STRCMP(items[1], "caplong") == 0) + aff->af_flagtype = AFT_CAPLONG; + else + smsg(_("Invalid value for FLAG in %s line %d: %s"), + fname, lnum, items[1]); + if (aff->af_rare != 0 + || aff->af_keepcase != 0 + || aff->af_bad != 0 + || aff->af_needaffix != 0 + || aff->af_circumfix != 0 + || aff->af_needcomp != 0 + || aff->af_comproot != 0 + || aff->af_nosuggest != 0 + || compflags != NULL + || aff->af_suff.ht_used > 0 + || aff->af_pref.ht_used > 0) + smsg(_("FLAG after using flags in %s line %d: %s"), + fname, lnum, items[1]); + } else if (spell_info_item(items[0]) && itemcnt > 1) { + p = (char_u *)getroom(spin, + (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) + + STRLEN(items[0]) + + STRLEN(items[1]) + 3, false); + if (p != NULL) { + if (spin->si_info != NULL) { + STRCPY(p, spin->si_info); + STRCAT(p, "\n"); + } + STRCAT(p, items[0]); + STRCAT(p, " "); + STRCAT(p, items[1]); + spin->si_info = p; + } + } else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) + && midword == NULL) { + midword = getroom_save(spin, items[1]); + } else if (is_aff_rule(items, itemcnt, "TRY", 2)) { + // ignored, we look in the tree for what chars may appear + } + // TODO: remove "RAR" later + else if ((is_aff_rule(items, itemcnt, "RAR", 2) + || is_aff_rule(items, itemcnt, "RARE", 2)) + && aff->af_rare == 0) { + aff->af_rare = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + } + // TODO: remove "KEP" later + else if ((is_aff_rule(items, itemcnt, "KEP", 2) + || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) + && aff->af_keepcase == 0) { + aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + } else if ((is_aff_rule(items, itemcnt, "BAD", 2) + || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) + && aff->af_bad == 0) { + aff->af_bad = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + } else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) + && aff->af_needaffix == 0) { + aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + } else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) + && aff->af_circumfix == 0) { + aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + } else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) + && aff->af_nosuggest == 0) { + aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + } else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) + || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) + && aff->af_needcomp == 0) { + aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + } else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) + && aff->af_comproot == 0) { + aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + } else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) + && aff->af_compforbid == 0) { + aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + if (aff->af_pref.ht_used > 0) + smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), + fname, lnum); + } else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) + && aff->af_comppermit == 0) { + aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + if (aff->af_pref.ht_used > 0) + smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), + fname, lnum); + } else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) + && compflags == NULL) { + // Turn flag "c" into COMPOUNDRULE compatible string "c+", + // "Na" into "Na+", "1234" into "1234+". + p = getroom(spin, STRLEN(items[1]) + 2, false); + STRCPY(p, items[1]); + STRCAT(p, "+"); + compflags = p; + } else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) { + // We don't use the count, but do check that it's a number and + // not COMPOUNDRULE mistyped. + if (atoi((char *)items[1]) == 0) + smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"), + fname, lnum, items[1]); + } else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) { + // Don't use the first rule if it is a number. + if (compflags != NULL || *skipdigits(items[1]) != NUL) { + // Concatenate this string to previously defined ones, + // using a slash to separate them. + l = (int)STRLEN(items[1]) + 1; + if (compflags != NULL) + l += (int)STRLEN(compflags) + 1; + p = getroom(spin, l, false); + if (compflags != NULL) { + STRCPY(p, compflags); + STRCAT(p, "/"); + } + STRCAT(p, items[1]); + compflags = p; + } + } else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) + && compmax == 0) { + compmax = atoi((char *)items[1]); + if (compmax == 0) + smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), + fname, lnum, items[1]); + } else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) + && compminlen == 0) { + compminlen = atoi((char *)items[1]); + if (compminlen == 0) + smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"), + fname, lnum, items[1]); + } else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) + && compsylmax == 0) { + compsylmax = atoi((char *)items[1]); + if (compsylmax == 0) + smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), + fname, lnum, items[1]); + } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) { + compoptions |= COMP_CHECKDUP; + } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) { + compoptions |= COMP_CHECKREP; + } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) { + compoptions |= COMP_CHECKCASE; + } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) { + compoptions |= COMP_CHECKTRIPLE; + } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) { + if (atoi((char *)items[1]) == 0) + smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), + fname, lnum, items[1]); + } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) { + garray_T *gap = &spin->si_comppat; + int i; + + // Only add the couple if it isn't already there. + for (i = 0; i < gap->ga_len - 1; i += 2) + if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 + && STRCMP(((char_u **)(gap->ga_data))[i + 1], + items[2]) == 0) + break; + if (i >= gap->ga_len) { + ga_grow(gap, 2); + ((char_u **)(gap->ga_data))[gap->ga_len++] + = getroom_save(spin, items[1]); + ((char_u **)(gap->ga_data))[gap->ga_len++] + = getroom_save(spin, items[2]); + } + } else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) + && syllable == NULL) { + syllable = getroom_save(spin, items[1]); + } else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) { + spin->si_nobreak = true; + } else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) { + spin->si_nosplitsugs = true; + } else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) { + spin->si_nocompoundsugs = true; + } else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) { + spin->si_nosugfile = true; + } else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) { + aff->af_pfxpostpone = true; + } else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) { + aff->af_ignoreextra = true; + } else if ((STRCMP(items[0], "PFX") == 0 + || STRCMP(items[0], "SFX") == 0) + && aff_todo == 0 + && itemcnt >= 4) { + int lasti = 4; + char_u key[AH_KEY_LEN]; + + if (*items[0] == 'P') + tp = &aff->af_pref; + else + tp = &aff->af_suff; + + // Myspell allows the same affix name to be used multiple + // times. The affix files that do this have an undocumented + // "S" flag on all but the last block, thus we check for that + // and store it in ah_follows. + STRLCPY(key, items[1], AH_KEY_LEN); + hi = hash_find(tp, key); + if (!HASHITEM_EMPTY(hi)) { + cur_aff = HI2AH(hi); + if (cur_aff->ah_combine != (*items[2] == 'Y')) + smsg(_("Different combining flag in continued affix block in %s line %d: %s"), + fname, lnum, items[1]); + if (!cur_aff->ah_follows) + smsg(_("Duplicate affix in %s line %d: %s"), + fname, lnum, items[1]); + } else { + // New affix letter. + cur_aff = (affheader_T *)getroom(spin, + sizeof(affheader_T), true); + if (cur_aff == NULL) + break; + cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], + fname, lnum); + if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) + break; + if (cur_aff->ah_flag == aff->af_bad + || cur_aff->ah_flag == aff->af_rare + || cur_aff->ah_flag == aff->af_keepcase + || cur_aff->ah_flag == aff->af_needaffix + || cur_aff->ah_flag == aff->af_circumfix + || cur_aff->ah_flag == aff->af_nosuggest + || cur_aff->ah_flag == aff->af_needcomp + || cur_aff->ah_flag == aff->af_comproot) + smsg(_("Affix also used for " + "BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST" + "in %s line %d: %s"), + fname, lnum, items[1]); + STRCPY(cur_aff->ah_key, items[1]); + hash_add(tp, cur_aff->ah_key); + + cur_aff->ah_combine = (*items[2] == 'Y'); + } + + // Check for the "S" flag, which apparently means that another + // block with the same affix name is following. + if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) { + ++lasti; + cur_aff->ah_follows = true; + } else + cur_aff->ah_follows = false; + + // Myspell allows extra text after the item, but that might + // mean mistakes go unnoticed. Require a comment-starter, + // unless IGNOREEXTRA is used. Hunspell uses a "-" item. + if (itemcnt > lasti + && !aff->af_ignoreextra + && *items[lasti] != '#') + smsg(_(e_afftrailing), fname, lnum, items[lasti]); + + if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) + smsg(_("Expected Y or N in %s line %d: %s"), + fname, lnum, items[2]); + + if (*items[0] == 'P' && aff->af_pfxpostpone) { + if (cur_aff->ah_newID == 0) { + // Use a new number in the .spl file later, to be able + // to handle multiple .aff files. + check_renumber(spin); + cur_aff->ah_newID = ++spin->si_newprefID; + + // We only really use ah_newID if the prefix is + // postponed. We know that only after handling all + // the items. + did_postpone_prefix = false; + } else + // Did use the ID in a previous block. + did_postpone_prefix = true; + } + + aff_todo = atoi((char *)items[3]); + } else if ((STRCMP(items[0], "PFX") == 0 + || STRCMP(items[0], "SFX") == 0) + && aff_todo > 0 + && STRCMP(cur_aff->ah_key, items[1]) == 0 + && itemcnt >= 5) { + affentry_T *aff_entry; + bool upper = false; + int lasti = 5; + + // Myspell allows extra text after the item, but that might + // mean mistakes go unnoticed. Require a comment-starter. + // Hunspell uses a "-" item. + if (itemcnt > lasti && *items[lasti] != '#' + && (STRCMP(items[lasti], "-") != 0 + || itemcnt != lasti + 1)) + smsg(_(e_afftrailing), fname, lnum, items[lasti]); + + // New item for an affix letter. + --aff_todo; + aff_entry = (affentry_T *)getroom(spin, + sizeof(affentry_T), true); + if (aff_entry == NULL) + break; + + if (STRCMP(items[2], "0") != 0) + aff_entry->ae_chop = getroom_save(spin, items[2]); + if (STRCMP(items[3], "0") != 0) { + aff_entry->ae_add = getroom_save(spin, items[3]); + + // Recognize flags on the affix: abcd/XYZ + aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); + if (aff_entry->ae_flags != NULL) { + *aff_entry->ae_flags++ = NUL; + aff_process_flags(aff, aff_entry); + } + } + + // Don't use an affix entry with non-ASCII characters when + // "spin->si_ascii" is true. + if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) + || has_non_ascii(aff_entry->ae_add))) { + aff_entry->ae_next = cur_aff->ah_first; + cur_aff->ah_first = aff_entry; + + if (STRCMP(items[4], ".") != 0) { + char_u buf[MAXLINELEN]; + + aff_entry->ae_cond = getroom_save(spin, items[4]); + if (*items[0] == 'P') + sprintf((char *)buf, "^%s", items[4]); + else + sprintf((char *)buf, "%s$", items[4]); + aff_entry->ae_prog = vim_regcomp(buf, + RE_MAGIC + RE_STRING + RE_STRICT); + if (aff_entry->ae_prog == NULL) + smsg(_("Broken condition in %s line %d: %s"), + fname, lnum, items[4]); + } + + // For postponed prefixes we need an entry in si_prefcond + // for the condition. Use an existing one if possible. + // Can't be done for an affix with flags, ignoring + // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. + if (*items[0] == 'P' && aff->af_pfxpostpone + && aff_entry->ae_flags == NULL) { + // When the chop string is one lower-case letter and + // the add string ends in the upper-case letter we set + // the "upper" flag, clear "ae_chop" and remove the + // letters from "ae_add". The condition must either + // be empty or start with the same letter. + if (aff_entry->ae_chop != NULL + && aff_entry->ae_add != NULL + && aff_entry->ae_chop[(*mb_ptr2len)( + aff_entry->ae_chop)] == NUL + ) { + int c, c_up; + + c = PTR2CHAR(aff_entry->ae_chop); + c_up = SPELL_TOUPPER(c); + if (c_up != c + && (aff_entry->ae_cond == NULL + || PTR2CHAR(aff_entry->ae_cond) == c)) { + p = aff_entry->ae_add + + STRLEN(aff_entry->ae_add); + mb_ptr_back(aff_entry->ae_add, p); + if (PTR2CHAR(p) == c_up) { + upper = true; + aff_entry->ae_chop = NULL; + *p = NUL; + + // The condition is matched with the + // actual word, thus must check for the + // upper-case letter. + if (aff_entry->ae_cond != NULL) { + char_u buf[MAXLINELEN]; + if (has_mbyte) { + onecap_copy(items[4], buf, true); + aff_entry->ae_cond = getroom_save( + spin, buf); + } else + *aff_entry->ae_cond = c_up; + if (aff_entry->ae_cond != NULL) { + sprintf((char *)buf, "^%s", + aff_entry->ae_cond); + vim_regfree(aff_entry->ae_prog); + aff_entry->ae_prog = vim_regcomp( + buf, RE_MAGIC + RE_STRING); + } + } + } + } + } + + if (aff_entry->ae_chop == NULL + && aff_entry->ae_flags == NULL) { + int idx; + char_u **pp; + int n; + + // Find a previously used condition. + for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; + --idx) { + p = ((char_u **)spin->si_prefcond.ga_data)[idx]; + if (str_equal(p, aff_entry->ae_cond)) + break; + } + if (idx < 0) { + // Not found, add a new condition. + idx = spin->si_prefcond.ga_len; + pp = GA_APPEND_VIA_PTR(char_u *, &spin->si_prefcond); + *pp = (aff_entry->ae_cond == NULL) ? + NULL : getroom_save(spin, aff_entry->ae_cond); + } + + // Add the prefix to the prefix tree. + if (aff_entry->ae_add == NULL) + p = (char_u *)""; + else + p = aff_entry->ae_add; + + // PFX_FLAGS is a negative number, so that + // tree_add_word() knows this is the prefix tree. + n = PFX_FLAGS; + if (!cur_aff->ah_combine) + n |= WFP_NC; + if (upper) + n |= WFP_UP; + if (aff_entry->ae_comppermit) + n |= WFP_COMPPERMIT; + if (aff_entry->ae_compforbid) + n |= WFP_COMPFORBID; + tree_add_word(spin, p, spin->si_prefroot, n, + idx, cur_aff->ah_newID); + did_postpone_prefix = true; + } + + // Didn't actually use ah_newID, backup si_newprefID. + if (aff_todo == 0 && !did_postpone_prefix) { + --spin->si_newprefID; + cur_aff->ah_newID = 0; + } + } + } + } else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) { + fol = vim_strsave(items[1]); + } else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) { + low = vim_strsave(items[1]); + } else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) { + upp = vim_strsave(items[1]); + } else if (is_aff_rule(items, itemcnt, "REP", 2) + || is_aff_rule(items, itemcnt, "REPSAL", 2)) { + /* Ignore REP/REPSAL count */; + if (!isdigit(*items[1])) + smsg(_("Expected REP(SAL) count in %s line %d"), + fname, lnum); + } else if ((STRCMP(items[0], "REP") == 0 + || STRCMP(items[0], "REPSAL") == 0) + && itemcnt >= 3) { + // REP/REPSAL item + // Myspell ignores extra arguments, we require it starts with + // # to detect mistakes. + if (itemcnt > 3 && items[3][0] != '#') + smsg(_(e_afftrailing), fname, lnum, items[3]); + if (items[0][3] == 'S' ? do_repsal : do_rep) { + // Replace underscore with space (can't include a space + // directly). + for (p = items[1]; *p != NUL; mb_ptr_adv(p)) + if (*p == '_') + *p = ' '; + for (p = items[2]; *p != NUL; mb_ptr_adv(p)) + if (*p == '_') + *p = ' '; + add_fromto(spin, items[0][3] == 'S' + ? &spin->si_repsal + : &spin->si_rep, items[1], items[2]); + } + } else if (is_aff_rule(items, itemcnt, "MAP", 2)) { + // MAP item or count + if (!found_map) { + // First line contains the count. + found_map = true; + if (!isdigit(*items[1])) + smsg(_("Expected MAP count in %s line %d"), + fname, lnum); + } else if (do_mapline) { + int c; + + // Check that every character appears only once. + for (p = items[1]; *p != NUL; ) { + c = mb_ptr2char_adv(&p); + if ((!GA_EMPTY(&spin->si_map) + && vim_strchr(spin->si_map.ga_data, c) + != NULL) + || vim_strchr(p, c) != NULL) + smsg(_("Duplicate character in MAP in %s line %d"), + fname, lnum); + } + + // We simply concatenate all the MAP strings, separated by + // slashes. + ga_concat(&spin->si_map, items[1]); + ga_append(&spin->si_map, '/'); + } + } + // Accept "SAL from to" and "SAL from to #comment". + else if (is_aff_rule(items, itemcnt, "SAL", 3)) { + if (do_sal) { + // SAL item (sounds-a-like) + // Either one of the known keys or a from-to pair. + if (STRCMP(items[1], "followup") == 0) + spin->si_followup = sal_to_bool(items[2]); + else if (STRCMP(items[1], "collapse_result") == 0) + spin->si_collapse = sal_to_bool(items[2]); + else if (STRCMP(items[1], "remove_accents") == 0) + spin->si_rem_accents = sal_to_bool(items[2]); + else + // when "to" is "_" it means empty + add_fromto(spin, &spin->si_sal, items[1], + STRCMP(items[2], "_") == 0 ? (char_u *)"" + : items[2]); + } + } else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) + && sofofrom == NULL) { + sofofrom = getroom_save(spin, items[1]); + } else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) + && sofoto == NULL) { + sofoto = getroom_save(spin, items[1]); + } else if (STRCMP(items[0], "COMMON") == 0) { + int i; + + for (i = 1; i < itemcnt; ++i) { + if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, + items[i]))) { + p = vim_strsave(items[i]); + hash_add(&spin->si_commonwords, p); + } + } + } else + smsg(_("Unrecognized or duplicate item in %s line %d: %s"), + fname, lnum, items[0]); + } + } + + if (fol != NULL || low != NULL || upp != NULL) { + if (spin->si_clear_chartab) { + // Clear the char type tables, don't want to use any of the + // currently used spell properties. + init_spell_chartab(); + spin->si_clear_chartab = false; + } + + // Don't write a word table for an ASCII file, so that we don't check + // for conflicts with a word table that matches 'encoding'. + // Don't write one for utf-8 either, we use utf_*() and + // mb_get_class(), the list of chars in the file will be incomplete. + if (!spin->si_ascii + && !enc_utf8 + ) { + if (fol == NULL || low == NULL || upp == NULL) + smsg(_("Missing FOL/LOW/UPP line in %s"), fname); + else + (void)set_spell_chartab(fol, low, upp); + } + + xfree(fol); + xfree(low); + xfree(upp); + } + + // Use compound specifications of the .aff file for the spell info. + if (compmax != 0) { + aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); + spin->si_compmax = compmax; + } + + if (compminlen != 0) { + aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); + spin->si_compminlen = compminlen; + } + + if (compsylmax != 0) { + if (syllable == NULL) + smsg(_("COMPOUNDSYLMAX used without SYLLABLE")); + aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); + spin->si_compsylmax = compsylmax; + } + + if (compoptions != 0) { + aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); + spin->si_compoptions |= compoptions; + } + + if (compflags != NULL) + process_compflags(spin, aff, compflags); + + // Check that we didn't use too many renumbered flags. + if (spin->si_newcompID < spin->si_newprefID) { + if (spin->si_newcompID == 127 || spin->si_newcompID == 255) + MSG(_("Too many postponed prefixes")); + else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) + MSG(_("Too many compound flags")); + else + MSG(_("Too many postponed prefixes and/or compound flags")); + } + + if (syllable != NULL) { + aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); + spin->si_syllable = syllable; + } + + if (sofofrom != NULL || sofoto != NULL) { + if (sofofrom == NULL || sofoto == NULL) + smsg(_("Missing SOFO%s line in %s"), + sofofrom == NULL ? "FROM" : "TO", fname); + else if (!GA_EMPTY(&spin->si_sal)) + smsg(_("Both SAL and SOFO lines in %s"), fname); + else { + aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); + aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); + spin->si_sofofr = sofofrom; + spin->si_sofoto = sofoto; + } + } + + if (midword != NULL) { + aff_check_string(spin->si_midword, midword, "MIDWORD"); + spin->si_midword = midword; + } + + xfree(pc); + fclose(fd); + return aff; +} + +// Returns true when items[0] equals "rulename", there are "mincount" items or +// a comment is following after item "mincount". +static bool is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount) +{ + return STRCMP(items[0], rulename) == 0 + && (itemcnt == mincount + || (itemcnt > mincount && items[mincount][0] == '#')); +} + +// For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from +// ae_flags to ae_comppermit and ae_compforbid. +static void aff_process_flags(afffile_T *affile, affentry_T *entry) +{ + char_u *p; + char_u *prevp; + unsigned flag; + + if (entry->ae_flags != NULL + && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) { + for (p = entry->ae_flags; *p != NUL; ) { + prevp = p; + flag = get_affitem(affile->af_flagtype, &p); + if (flag == affile->af_comppermit || flag == affile->af_compforbid) { + STRMOVE(prevp, p); + p = prevp; + if (flag == affile->af_comppermit) + entry->ae_comppermit = true; + else + entry->ae_compforbid = true; + } + if (affile->af_flagtype == AFT_NUM && *p == ',') + ++p; + } + if (*entry->ae_flags == NUL) + entry->ae_flags = NULL; // nothing left + } +} + +// Returns true if "s" is the name of an info item in the affix file. +static bool spell_info_item(char_u *s) +{ + return STRCMP(s, "NAME") == 0 + || STRCMP(s, "HOME") == 0 + || STRCMP(s, "VERSION") == 0 + || STRCMP(s, "AUTHOR") == 0 + || STRCMP(s, "EMAIL") == 0 + || STRCMP(s, "COPYRIGHT") == 0; +} + +// Turn an affix flag name into a number, according to the FLAG type. +// returns zero for failure. +static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum) +{ + unsigned res; + char_u *p = item; + + res = get_affitem(flagtype, &p); + if (res == 0) { + if (flagtype == AFT_NUM) + smsg(_("Flag is not a number in %s line %d: %s"), + fname, lnum, item); + else + smsg(_("Illegal flag in %s line %d: %s"), + fname, lnum, item); + } + if (*p != NUL) { + smsg(_(e_affname), fname, lnum, item); + return 0; + } + + return res; +} + +// Get one affix name from "*pp" and advance the pointer. +// Returns zero for an error, still advances the pointer then. +static unsigned get_affitem(int flagtype, char_u **pp) +{ + int res; + + if (flagtype == AFT_NUM) { + if (!ascii_isdigit(**pp)) { + ++*pp; // always advance, avoid getting stuck + return 0; + } + res = getdigits_int(pp); + } else { + res = mb_ptr2char_adv(pp); + if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG + && res >= 'A' && res <= 'Z')) { + if (**pp == NUL) + return 0; + res = mb_ptr2char_adv(pp) + (res << 16); + } + } + return res; +} + +// Process the "compflags" string used in an affix file and append it to +// spin->si_compflags. +// The processing involves changing the affix names to ID numbers, so that +// they fit in one byte. +static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags) +{ + char_u *p; + char_u *prevp; + unsigned flag; + compitem_T *ci; + int id; + int len; + char_u *tp; + char_u key[AH_KEY_LEN]; + hashitem_T *hi; + + // Make room for the old and the new compflags, concatenated with a / in + // between. Processing it makes it shorter, but we don't know by how + // much, thus allocate the maximum. + len = (int)STRLEN(compflags) + 1; + if (spin->si_compflags != NULL) + len += (int)STRLEN(spin->si_compflags) + 1; + p = getroom(spin, len, false); + if (spin->si_compflags != NULL) { + STRCPY(p, spin->si_compflags); + STRCAT(p, "/"); + } + spin->si_compflags = p; + tp = p + STRLEN(p); + + for (p = compflags; *p != NUL; ) { + if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) + // Copy non-flag characters directly. + *tp++ = *p++; + else { + // First get the flag number, also checks validity. + prevp = p; + flag = get_affitem(aff->af_flagtype, &p); + if (flag != 0) { + // Find the flag in the hashtable. If it was used before, use + // the existing ID. Otherwise add a new entry. + STRLCPY(key, prevp, p - prevp + 1); + hi = hash_find(&aff->af_comp, key); + if (!HASHITEM_EMPTY(hi)) + id = HI2CI(hi)->ci_newID; + else { + ci = (compitem_T *)getroom(spin, sizeof(compitem_T), true); + if (ci == NULL) + break; + STRCPY(ci->ci_key, key); + ci->ci_flag = flag; + // Avoid using a flag ID that has a special meaning in a + // regexp (also inside []). + do { + check_renumber(spin); + id = spin->si_newcompID--; + } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); + ci->ci_newID = id; + hash_add(&aff->af_comp, ci->ci_key); + } + *tp++ = id; + } + if (aff->af_flagtype == AFT_NUM && *p == ',') + ++p; + } + } + + *tp = NUL; +} + +// Check that the new IDs for postponed affixes and compounding don't overrun +// each other. We have almost 255 available, but start at 0-127 to avoid +// using two bytes for utf-8. When the 0-127 range is used up go to 128-255. +// When that is used up an error message is given. +static void check_renumber(spellinfo_T *spin) +{ + if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) { + spin->si_newprefID = 127; + spin->si_newcompID = 255; + } +} + +// Returns true if flag "flag" appears in affix list "afflist". +static bool flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) +{ + char_u *p; + unsigned n; + + switch (flagtype) { + case AFT_CHAR: + return vim_strchr(afflist, flag) != NULL; + + case AFT_CAPLONG: + case AFT_LONG: + for (p = afflist; *p != NUL; ) { + n = mb_ptr2char_adv(&p); + if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) + && *p != NUL) + n = mb_ptr2char_adv(&p) + (n << 16); + if (n == flag) + return true; + } + break; + + case AFT_NUM: + for (p = afflist; *p != NUL; ) { + int digits = getdigits_int(&p); + assert(digits >= 0); + n = (unsigned int)digits; + if (n == flag) + return true; + if (*p != NUL) // skip over comma + ++p; + } + break; + } + return false; +} + +// Give a warning when "spinval" and "affval" numbers are set and not the same. +static void aff_check_number(int spinval, int affval, char *name) +{ + if (spinval != 0 && spinval != affval) + smsg(_("%s value differs from what is used in another .aff file"), + name); +} + +// Give a warning when "spinval" and "affval" strings are set and not the same. +static void aff_check_string(char_u *spinval, char_u *affval, char *name) +{ + if (spinval != NULL && STRCMP(spinval, affval) != 0) + smsg(_("%s value differs from what is used in another .aff file"), + name); +} + +// Returns true if strings "s1" and "s2" are equal. Also consider both being +// NULL as equal. +static bool str_equal(char_u *s1, char_u *s2) +{ + if (s1 == NULL || s2 == NULL) + return s1 == s2; + return STRCMP(s1, s2) == 0; +} + +// Add a from-to item to "gap". Used for REP and SAL items. +// They are stored case-folded. +static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to) +{ + char_u word[MAXWLEN]; + + fromto_T *ftp = GA_APPEND_VIA_PTR(fromto_T, gap); + (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); + ftp->ft_from = getroom_save(spin, word); + (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); + ftp->ft_to = getroom_save(spin, word); +} + +// Converts a boolean argument in a SAL line to true or false; +static bool sal_to_bool(char_u *s) +{ + return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; +} + +// Free the structure filled by spell_read_aff(). +static void spell_free_aff(afffile_T *aff) +{ + hashtab_T *ht; + hashitem_T *hi; + int todo; + affheader_T *ah; + affentry_T *ae; + + xfree(aff->af_enc); + + // All this trouble to free the "ae_prog" items... + for (ht = &aff->af_pref;; ht = &aff->af_suff) { + todo = (int)ht->ht_used; + for (hi = ht->ht_array; todo > 0; ++hi) { + if (!HASHITEM_EMPTY(hi)) { + --todo; + ah = HI2AH(hi); + for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) + vim_regfree(ae->ae_prog); + } + } + if (ht == &aff->af_suff) + break; + } + + hash_clear(&aff->af_pref); + hash_clear(&aff->af_suff); + hash_clear(&aff->af_comp); +} + +// Read dictionary file "fname". +// Returns OK or FAIL; +static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) +{ + hashtab_T ht; + char_u line[MAXLINELEN]; + char_u *p; + char_u *afflist; + char_u store_afflist[MAXWLEN]; + int pfxlen; + bool need_affix; + char_u *dw; + char_u *pc; + char_u *w; + int l; + hash_T hash; + hashitem_T *hi; + FILE *fd; + int lnum = 1; + int non_ascii = 0; + int retval = OK; + char_u message[MAXLINELEN + MAXWLEN]; + int flags; + int duplicate = 0; + + // Open the file. + fd = mch_fopen((char *)fname, "r"); + if (fd == NULL) { + EMSG2(_(e_notopen), fname); + return FAIL; + } + + // The hashtable is only used to detect duplicated words. + hash_init(&ht); + + vim_snprintf((char *)IObuff, IOSIZE, + _("Reading dictionary file %s ..."), fname); + spell_message(spin, IObuff); + + // start with a message for the first line + spin->si_msg_count = 999999; + + // Read and ignore the first line: word count. + (void)vim_fgets(line, MAXLINELEN, fd); + if (!ascii_isdigit(*skipwhite(line))) + EMSG2(_("E760: No word count in %s"), fname); + + // Read all the lines in the file one by one. + // The words are converted to 'encoding' here, before being added to + // the hashtable. + while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) { + line_breakcheck(); + ++lnum; + if (line[0] == '#' || line[0] == '/') + continue; // comment line + + // Remove CR, LF and white space from the end. White space halfway through + // the word is kept to allow multi-word terms like "et al.". + l = (int)STRLEN(line); + while (l > 0 && line[l - 1] <= ' ') + --l; + if (l == 0) + continue; // empty line + line[l] = NUL; + + // Convert from "SET" to 'encoding' when needed. + if (spin->si_conv.vc_type != CONV_NONE) { + pc = string_convert(&spin->si_conv, line, NULL); + if (pc == NULL) { + smsg(_("Conversion failure for word in %s line %d: %s"), + fname, lnum, line); + continue; + } + w = pc; + } else { + pc = NULL; + w = line; + } + + // Truncate the word at the "/", set "afflist" to what follows. + // Replace "\/" by "/" and "\\" by "\". + afflist = NULL; + for (p = w; *p != NUL; mb_ptr_adv(p)) { + if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) + STRMOVE(p, p + 1); + else if (*p == '/') { + *p = NUL; + afflist = p + 1; + break; + } + } + + // Skip non-ASCII words when "spin->si_ascii" is true. + if (spin->si_ascii && has_non_ascii(w)) { + ++non_ascii; + xfree(pc); + continue; + } + + // This takes time, print a message every 10000 words. + if (spin->si_verbose && spin->si_msg_count > 10000) { + spin->si_msg_count = 0; + vim_snprintf((char *)message, sizeof(message), + _("line %6d, word %6d - %s"), + lnum, spin->si_foldwcount + spin->si_keepwcount, w); + msg_start(); + msg_puts_long_attr(message, 0); + msg_clr_eos(); + msg_didout = FALSE; + msg_col = 0; + ui_flush(); + } + + // Store the word in the hashtable to be able to find duplicates. + dw = getroom_save(spin, w); + if (dw == NULL) { + retval = FAIL; + xfree(pc); + break; + } + + hash = hash_hash(dw); + hi = hash_lookup(&ht, (const char *)dw, STRLEN(dw), hash); + if (!HASHITEM_EMPTY(hi)) { + if (p_verbose > 0) + smsg(_("Duplicate word in %s line %d: %s"), + fname, lnum, dw); + else if (duplicate == 0) + smsg(_("First duplicate word in %s line %d: %s"), + fname, lnum, dw); + ++duplicate; + } else + hash_add_item(&ht, hi, dw, hash); + + flags = 0; + store_afflist[0] = NUL; + pfxlen = 0; + need_affix = false; + if (afflist != NULL) { + // Extract flags from the affix list. + flags |= get_affix_flags(affile, afflist); + + if (affile->af_needaffix != 0 && flag_in_afflist( + affile->af_flagtype, afflist, affile->af_needaffix)) + need_affix = true; + + if (affile->af_pfxpostpone) + // Need to store the list of prefix IDs with the word. + pfxlen = get_pfxlist(affile, afflist, store_afflist); + + if (spin->si_compflags != NULL) + // Need to store the list of compound flags with the word. + // Concatenate them to the list of prefix IDs. + get_compflags(affile, afflist, store_afflist + pfxlen); + } + + // Add the word to the word tree(s). + if (store_word(spin, dw, flags, spin->si_region, + store_afflist, need_affix) == FAIL) + retval = FAIL; + + if (afflist != NULL) { + // Find all matching suffixes and add the resulting words. + // Additionally do matching prefixes that combine. + if (store_aff_word(spin, dw, afflist, affile, + &affile->af_suff, &affile->af_pref, + CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) + retval = FAIL; + + // Find all matching prefixes and add the resulting words. + if (store_aff_word(spin, dw, afflist, affile, + &affile->af_pref, NULL, + CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) + retval = FAIL; + } + + xfree(pc); + } + + if (duplicate > 0) + smsg(_("%d duplicate word(s) in %s"), duplicate, fname); + if (spin->si_ascii && non_ascii > 0) + smsg(_("Ignored %d word(s) with non-ASCII characters in %s"), + non_ascii, fname); + hash_clear(&ht); + + fclose(fd); + return retval; +} + +// Check for affix flags in "afflist" that are turned into word flags. +// Return WF_ flags. +static int get_affix_flags(afffile_T *affile, char_u *afflist) +{ + int flags = 0; + + if (affile->af_keepcase != 0 && flag_in_afflist( + affile->af_flagtype, afflist, affile->af_keepcase)) + flags |= WF_KEEPCAP | WF_FIXCAP; + if (affile->af_rare != 0 && flag_in_afflist( + affile->af_flagtype, afflist, affile->af_rare)) + flags |= WF_RARE; + if (affile->af_bad != 0 && flag_in_afflist( + affile->af_flagtype, afflist, affile->af_bad)) + flags |= WF_BANNED; + if (affile->af_needcomp != 0 && flag_in_afflist( + affile->af_flagtype, afflist, affile->af_needcomp)) + flags |= WF_NEEDCOMP; + if (affile->af_comproot != 0 && flag_in_afflist( + affile->af_flagtype, afflist, affile->af_comproot)) + flags |= WF_COMPROOT; + if (affile->af_nosuggest != 0 && flag_in_afflist( + affile->af_flagtype, afflist, affile->af_nosuggest)) + flags |= WF_NOSUGGEST; + return flags; +} + +// Get the list of prefix IDs from the affix list "afflist". +// Used for PFXPOSTPONE. +// Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL +// and return the number of affixes. +static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist) +{ + char_u *p; + char_u *prevp; + int cnt = 0; + int id; + char_u key[AH_KEY_LEN]; + hashitem_T *hi; + + for (p = afflist; *p != NUL; ) { + prevp = p; + if (get_affitem(affile->af_flagtype, &p) != 0) { + // A flag is a postponed prefix flag if it appears in "af_pref" + // and it's ID is not zero. + STRLCPY(key, prevp, p - prevp + 1); + hi = hash_find(&affile->af_pref, key); + if (!HASHITEM_EMPTY(hi)) { + id = HI2AH(hi)->ah_newID; + if (id != 0) + store_afflist[cnt++] = id; + } + } + if (affile->af_flagtype == AFT_NUM && *p == ',') + ++p; + } + + store_afflist[cnt] = NUL; + return cnt; +} + +// Get the list of compound IDs from the affix list "afflist" that are used +// for compound words. +// Puts the flags in "store_afflist[]". +static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist) +{ + char_u *p; + char_u *prevp; + int cnt = 0; + char_u key[AH_KEY_LEN]; + hashitem_T *hi; + + for (p = afflist; *p != NUL; ) { + prevp = p; + if (get_affitem(affile->af_flagtype, &p) != 0) { + // A flag is a compound flag if it appears in "af_comp". + STRLCPY(key, prevp, p - prevp + 1); + hi = hash_find(&affile->af_comp, key); + if (!HASHITEM_EMPTY(hi)) + store_afflist[cnt++] = HI2CI(hi)->ci_newID; + } + if (affile->af_flagtype == AFT_NUM && *p == ',') + ++p; + } + + store_afflist[cnt] = NUL; +} + +// Apply affixes to a word and store the resulting words. +// "ht" is the hashtable with affentry_T that need to be applied, either +// prefixes or suffixes. +// "xht", when not NULL, is the prefix hashtable, to be used additionally on +// the resulting words for combining affixes. +// +// Returns FAIL when out of memory. +static int +store_aff_word ( + spellinfo_T *spin, // spell info + char_u *word, // basic word start + char_u *afflist, // list of names of supported affixes + afffile_T *affile, + hashtab_T *ht, + hashtab_T *xht, + int condit, // CONDIT_SUF et al. + int flags, // flags for the word + char_u *pfxlist, // list of prefix IDs + int pfxlen // nr of flags in "pfxlist" for prefixes, rest + // is compound flags +) +{ + int todo; + hashitem_T *hi; + affheader_T *ah; + affentry_T *ae; + char_u newword[MAXWLEN]; + int retval = OK; + int i, j; + char_u *p; + int use_flags; + char_u *use_pfxlist; + int use_pfxlen; + bool need_affix; + char_u store_afflist[MAXWLEN]; + char_u pfx_pfxlist[MAXWLEN]; + size_t wordlen = STRLEN(word); + int use_condit; + + todo = (int)ht->ht_used; + for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) { + if (!HASHITEM_EMPTY(hi)) { + --todo; + ah = HI2AH(hi); + + // Check that the affix combines, if required, and that the word + // supports this affix. + if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) + && flag_in_afflist(affile->af_flagtype, afflist, + ah->ah_flag)) { + // Loop over all affix entries with this name. + for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) { + // Check the condition. It's not logical to match case + // here, but it is required for compatibility with + // Myspell. + // Another requirement from Myspell is that the chop + // string is shorter than the word itself. + // For prefixes, when "PFXPOSTPONE" was used, only do + // prefixes with a chop string and/or flags. + // When a previously added affix had CIRCUMFIX this one + // must have it too, if it had not then this one must not + // have one either. + if ((xht != NULL || !affile->af_pfxpostpone + || ae->ae_chop != NULL + || ae->ae_flags != NULL) + && (ae->ae_chop == NULL + || STRLEN(ae->ae_chop) < wordlen) + && (ae->ae_prog == NULL + || vim_regexec_prog(&ae->ae_prog, false, word, (colnr_T)0)) + && (((condit & CONDIT_CFIX) == 0) + == ((condit & CONDIT_AFF) == 0 + || ae->ae_flags == NULL + || !flag_in_afflist(affile->af_flagtype, + ae->ae_flags, affile->af_circumfix)))) { + // Match. Remove the chop and add the affix. + if (xht == NULL) { + // prefix: chop/add at the start of the word + if (ae->ae_add == NULL) + *newword = NUL; + else + STRLCPY(newword, ae->ae_add, MAXWLEN); + p = word; + if (ae->ae_chop != NULL) { + // Skip chop string. + if (has_mbyte) { + i = mb_charlen(ae->ae_chop); + for (; i > 0; --i) + mb_ptr_adv(p); + } else + p += STRLEN(ae->ae_chop); + } + STRCAT(newword, p); + } else { + // suffix: chop/add at the end of the word + STRLCPY(newword, word, MAXWLEN); + if (ae->ae_chop != NULL) { + // Remove chop string. + p = newword + STRLEN(newword); + i = (int)MB_CHARLEN(ae->ae_chop); + for (; i > 0; --i) + mb_ptr_back(newword, p); + *p = NUL; + } + if (ae->ae_add != NULL) + STRCAT(newword, ae->ae_add); + } + + use_flags = flags; + use_pfxlist = pfxlist; + use_pfxlen = pfxlen; + need_affix = false; + use_condit = condit | CONDIT_COMB | CONDIT_AFF; + if (ae->ae_flags != NULL) { + // Extract flags from the affix list. + use_flags |= get_affix_flags(affile, ae->ae_flags); + + if (affile->af_needaffix != 0 && flag_in_afflist( + affile->af_flagtype, ae->ae_flags, + affile->af_needaffix)) + need_affix = true; + + // When there is a CIRCUMFIX flag the other affix + // must also have it and we don't add the word + // with one affix. + if (affile->af_circumfix != 0 && flag_in_afflist( + affile->af_flagtype, ae->ae_flags, + affile->af_circumfix)) { + use_condit |= CONDIT_CFIX; + if ((condit & CONDIT_CFIX) == 0) + need_affix = true; + } + + if (affile->af_pfxpostpone + || spin->si_compflags != NULL) { + if (affile->af_pfxpostpone) + // Get prefix IDS from the affix list. + use_pfxlen = get_pfxlist(affile, + ae->ae_flags, store_afflist); + else + use_pfxlen = 0; + use_pfxlist = store_afflist; + + // Combine the prefix IDs. Avoid adding the + // same ID twice. + for (i = 0; i < pfxlen; ++i) { + for (j = 0; j < use_pfxlen; ++j) + if (pfxlist[i] == use_pfxlist[j]) + break; + if (j == use_pfxlen) + use_pfxlist[use_pfxlen++] = pfxlist[i]; + } + + if (spin->si_compflags != NULL) + // Get compound IDS from the affix list. + get_compflags(affile, ae->ae_flags, + use_pfxlist + use_pfxlen); + else + use_pfxlist[use_pfxlen] = NUL; + + // Combine the list of compound flags. + // Concatenate them to the prefix IDs list. + // Avoid adding the same ID twice. + for (i = pfxlen; pfxlist[i] != NUL; ++i) { + for (j = use_pfxlen; + use_pfxlist[j] != NUL; ++j) + if (pfxlist[i] == use_pfxlist[j]) + break; + if (use_pfxlist[j] == NUL) { + use_pfxlist[j++] = pfxlist[i]; + use_pfxlist[j] = NUL; + } + } + } + } + + // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't + // use the compound flags. + if (use_pfxlist != NULL && ae->ae_compforbid) { + STRLCPY(pfx_pfxlist, use_pfxlist, use_pfxlen + 1); + use_pfxlist = pfx_pfxlist; + } + + // When there are postponed prefixes... + if (spin->si_prefroot != NULL + && spin->si_prefroot->wn_sibling != NULL) { + // ... add a flag to indicate an affix was used. + use_flags |= WF_HAS_AFF; + + // ... don't use a prefix list if combining + // affixes is not allowed. But do use the + // compound flags after them. + if (!ah->ah_combine && use_pfxlist != NULL) + use_pfxlist += use_pfxlen; + } + + // When compounding is supported and there is no + // "COMPOUNDPERMITFLAG" then forbid compounding on the + // side where the affix is applied. + if (spin->si_compflags != NULL && !ae->ae_comppermit) { + if (xht != NULL) + use_flags |= WF_NOCOMPAFT; + else + use_flags |= WF_NOCOMPBEF; + } + + // Store the modified word. + if (store_word(spin, newword, use_flags, + spin->si_region, use_pfxlist, + need_affix) == FAIL) + retval = FAIL; + + // When added a prefix or a first suffix and the affix + // has flags may add a(nother) suffix. RECURSIVE! + if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) + if (store_aff_word(spin, newword, ae->ae_flags, + affile, &affile->af_suff, xht, + use_condit & (xht == NULL + ? ~0 : ~CONDIT_SUF), + use_flags, use_pfxlist, pfxlen) == FAIL) + retval = FAIL; + + // When added a suffix and combining is allowed also + // try adding a prefix additionally. Both for the + // word flags and for the affix flags. RECURSIVE! + if (xht != NULL && ah->ah_combine) { + if (store_aff_word(spin, newword, + afflist, affile, + xht, NULL, use_condit, + use_flags, use_pfxlist, + pfxlen) == FAIL + || (ae->ae_flags != NULL + && store_aff_word(spin, newword, + ae->ae_flags, affile, + xht, NULL, use_condit, + use_flags, use_pfxlist, + pfxlen) == FAIL)) + retval = FAIL; + } + } + } + } + } + } + + return retval; +} + +// Read a file with a list of words. +static int spell_read_wordfile(spellinfo_T *spin, char_u *fname) +{ + FILE *fd; + long lnum = 0; + char_u rline[MAXLINELEN]; + char_u *line; + char_u *pc = NULL; + char_u *p; + int l; + int retval = OK; + bool did_word = false; + int non_ascii = 0; + int flags; + int regionmask; + + // Open the file. + fd = mch_fopen((char *)fname, "r"); + if (fd == NULL) { + EMSG2(_(e_notopen), fname); + return FAIL; + } + + vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); + spell_message(spin, IObuff); + + // Read all the lines in the file one by one. + while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { + line_breakcheck(); + ++lnum; + + // Skip comment lines. + if (*rline == '#') + continue; + + // Remove CR, LF and white space from the end. + l = (int)STRLEN(rline); + while (l > 0 && rline[l - 1] <= ' ') + --l; + if (l == 0) + continue; // empty or blank line + rline[l] = NUL; + + // Convert from "/encoding={encoding}" to 'encoding' when needed. + xfree(pc); + if (spin->si_conv.vc_type != CONV_NONE) { + pc = string_convert(&spin->si_conv, rline, NULL); + if (pc == NULL) { + smsg(_("Conversion failure for word in %s line %d: %s"), + fname, lnum, rline); + continue; + } + line = pc; + } else { + pc = NULL; + line = rline; + } + + if (*line == '/') { + ++line; + if (STRNCMP(line, "encoding=", 9) == 0) { + if (spin->si_conv.vc_type != CONV_NONE) + smsg(_("Duplicate /encoding= line ignored in %s line %d: %s"), + fname, lnum, line - 1); + else if (did_word) + smsg(_("/encoding= line after word ignored in %s line %d: %s"), + fname, lnum, line - 1); + else { + char_u *enc; + + // Setup for conversion to 'encoding'. + line += 9; + enc = enc_canonize(line); + if (!spin->si_ascii + && convert_setup(&spin->si_conv, enc, + p_enc) == FAIL) + smsg(_("Conversion in %s not supported: from %s to %s"), + fname, line, p_enc); + xfree(enc); + spin->si_conv.vc_fail = true; + } + continue; + } + + if (STRNCMP(line, "regions=", 8) == 0) { + if (spin->si_region_count > 1) + smsg(_("Duplicate /regions= line ignored in %s line %d: %s"), + fname, lnum, line); + else { + line += 8; + if (STRLEN(line) > 16) + smsg(_("Too many regions in %s line %d: %s"), + fname, lnum, line); + else { + spin->si_region_count = (int)STRLEN(line) / 2; + STRCPY(spin->si_region_name, line); + + // Adjust the mask for a word valid in all regions. + spin->si_region = (1 << spin->si_region_count) - 1; + } + } + continue; + } + + smsg(_("/ line ignored in %s line %d: %s"), + fname, lnum, line - 1); + continue; + } + + flags = 0; + regionmask = spin->si_region; + + // Check for flags and region after a slash. + p = vim_strchr(line, '/'); + if (p != NULL) { + *p++ = NUL; + while (*p != NUL) { + if (*p == '=') // keep-case word + flags |= WF_KEEPCAP | WF_FIXCAP; + else if (*p == '!') // Bad, bad, wicked word. + flags |= WF_BANNED; + else if (*p == '?') // Rare word. + flags |= WF_RARE; + else if (ascii_isdigit(*p)) { // region number(s) + if ((flags & WF_REGION) == 0) // first one + regionmask = 0; + flags |= WF_REGION; + + l = *p - '0'; + if (l > spin->si_region_count) { + smsg(_("Invalid region nr in %s line %d: %s"), + fname, lnum, p); + break; + } + regionmask |= 1 << (l - 1); + } else { + smsg(_("Unrecognized flags in %s line %d: %s"), + fname, lnum, p); + break; + } + ++p; + } + } + + // Skip non-ASCII words when "spin->si_ascii" is true. + if (spin->si_ascii && has_non_ascii(line)) { + ++non_ascii; + continue; + } + + // Normal word: store it. + if (store_word(spin, line, flags, regionmask, NULL, false) == FAIL) { + retval = FAIL; + break; + } + did_word = true; + } + + xfree(pc); + fclose(fd); + + if (spin->si_ascii && non_ascii > 0) { + vim_snprintf((char *)IObuff, IOSIZE, + _("Ignored %d words with non-ASCII characters"), non_ascii); + spell_message(spin, IObuff); + } + + return retval; +} + +/// Get part of an sblock_T, "len" bytes long. +/// This avoids calling free() for every little struct we use (and keeping +/// track of them). +/// The memory is cleared to all zeros. +/// +/// @param len Length needed (<= SBLOCKSIZE). +/// @param align Align for pointer. +/// @return Pointer into block data. +static void *getroom(spellinfo_T *spin, size_t len, bool align) + FUNC_ATTR_NONNULL_RET +{ + char_u *p; + sblock_T *bl = spin->si_blocks; + + assert(len <= SBLOCKSIZE); + + if (align && bl != NULL) + // Round size up for alignment. On some systems structures need to be + // aligned to the size of a pointer (e.g., SPARC). + bl->sb_used = (bl->sb_used + sizeof(char *) - 1) + & ~(sizeof(char *) - 1); + + if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) { + // Allocate a block of memory. It is not freed until much later. + bl = xcalloc(1, (sizeof(sblock_T) + SBLOCKSIZE)); + bl->sb_next = spin->si_blocks; + spin->si_blocks = bl; + bl->sb_used = 0; + ++spin->si_blocks_cnt; + } + + p = bl->sb_data + bl->sb_used; + bl->sb_used += (int)len; + + return p; +} + +// Make a copy of a string into memory allocated with getroom(). +// Returns NULL when out of memory. +static char_u *getroom_save(spellinfo_T *spin, char_u *s) +{ + char_u *sc; + + sc = (char_u *)getroom(spin, STRLEN(s) + 1, false); + if (sc != NULL) + STRCPY(sc, s); + return sc; +} + + +// Free the list of allocated sblock_T. +static void free_blocks(sblock_T *bl) +{ + sblock_T *next; + + while (bl != NULL) { + next = bl->sb_next; + xfree(bl); + bl = next; + } +} + +// Allocate the root of a word tree. +// Returns NULL when out of memory. +static wordnode_T *wordtree_alloc(spellinfo_T *spin) +{ + return (wordnode_T *)getroom(spin, sizeof(wordnode_T), true); +} + +// Store a word in the tree(s). +// Always store it in the case-folded tree. For a keep-case word this is +// useful when the word can also be used with all caps (no WF_FIXCAP flag) and +// used to find suggestions. +// For a keep-case word also store it in the keep-case tree. +// When "pfxlist" is not NULL store the word for each postponed prefix ID and +// compound flag. +static int +store_word ( + spellinfo_T *spin, + char_u *word, + int flags, // extra flags, WF_BANNED + int region, // supported region(s) + char_u *pfxlist, // list of prefix IDs or NULL + bool need_affix // only store word with affix ID +) +{ + int len = (int)STRLEN(word); + int ct = captype(word, word + len); + char_u foldword[MAXWLEN]; + int res = OK; + char_u *p; + + (void)spell_casefold(word, len, foldword, MAXWLEN); + for (p = pfxlist; res == OK; ++p) { + if (!need_affix || (p != NULL && *p != NUL)) + res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, + region, p == NULL ? 0 : *p); + if (p == NULL || *p == NUL) + break; + } + ++spin->si_foldwcount; + + if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) { + for (p = pfxlist; res == OK; ++p) { + if (!need_affix || (p != NULL && *p != NUL)) + res = tree_add_word(spin, word, spin->si_keeproot, flags, + region, p == NULL ? 0 : *p); + if (p == NULL || *p == NUL) + break; + } + ++spin->si_keepwcount; + } + return res; +} + +// Add word "word" to a word tree at "root". +// When "flags" < 0 we are adding to the prefix tree where "flags" is used for +// "rare" and "region" is the condition nr. +// Returns FAIL when out of memory. +static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int flags, int region, int affixID) +{ + wordnode_T *node = root; + wordnode_T *np; + wordnode_T *copyp, **copyprev; + wordnode_T **prev = NULL; + int i; + + // Add each byte of the word to the tree, including the NUL at the end. + for (i = 0;; ++i) { + // When there is more than one reference to this node we need to make + // a copy, so that we can modify it. Copy the whole list of siblings + // (we don't optimize for a partly shared list of siblings). + if (node != NULL && node->wn_refs > 1) { + --node->wn_refs; + copyprev = prev; + for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) { + // Allocate a new node and copy the info. + np = get_wordnode(spin); + if (np == NULL) + return FAIL; + np->wn_child = copyp->wn_child; + if (np->wn_child != NULL) + ++np->wn_child->wn_refs; // child gets extra ref + np->wn_byte = copyp->wn_byte; + if (np->wn_byte == NUL) { + np->wn_flags = copyp->wn_flags; + np->wn_region = copyp->wn_region; + np->wn_affixID = copyp->wn_affixID; + } + + // Link the new node in the list, there will be one ref. + np->wn_refs = 1; + if (copyprev != NULL) + *copyprev = np; + copyprev = &np->wn_sibling; + + // Let "node" point to the head of the copied list. + if (copyp == node) + node = np; + } + } + + // Look for the sibling that has the same character. They are sorted + // on byte value, thus stop searching when a sibling is found with a + // higher byte value. For zero bytes (end of word) the sorting is + // done on flags and then on affixID. + while (node != NULL + && (node->wn_byte < word[i] + || (node->wn_byte == NUL + && (flags < 0 + ? node->wn_affixID < (unsigned)affixID + : (node->wn_flags < (unsigned)(flags & WN_MASK) + || (node->wn_flags == (flags & WN_MASK) + && (spin->si_sugtree + ? (node->wn_region & 0xffff) < region + : node->wn_affixID + < (unsigned)affixID))))))) { + prev = &node->wn_sibling; + node = *prev; + } + if (node == NULL + || node->wn_byte != word[i] + || (word[i] == NUL + && (flags < 0 + || spin->si_sugtree + || node->wn_flags != (flags & WN_MASK) + || node->wn_affixID != affixID))) { + // Allocate a new node. + np = get_wordnode(spin); + if (np == NULL) + return FAIL; + np->wn_byte = word[i]; + + // If "node" is NULL this is a new child or the end of the sibling + // list: ref count is one. Otherwise use ref count of sibling and + // make ref count of sibling one (matters when inserting in front + // of the list of siblings). + if (node == NULL) + np->wn_refs = 1; + else { + np->wn_refs = node->wn_refs; + node->wn_refs = 1; + } + if (prev != NULL) + *prev = np; + np->wn_sibling = node; + node = np; + } + + if (word[i] == NUL) { + node->wn_flags = flags; + node->wn_region |= region; + node->wn_affixID = affixID; + break; + } + prev = &node->wn_child; + node = *prev; + } +#ifdef SPELL_PRINTTREE + smsg((char_u *)"Added \"%s\"", word); + spell_print_tree(root->wn_sibling); +#endif + + // count nr of words added since last message + ++spin->si_msg_count; + + if (spin->si_compress_cnt > 1) { + if (--spin->si_compress_cnt == 1) + // Did enough words to lower the block count limit. + spin->si_blocks_cnt += compress_inc; + } + + // When we have allocated lots of memory we need to compress the word tree + // to free up some room. But compression is slow, and we might actually + // need that room, thus only compress in the following situations: + // 1. When not compressed before (si_compress_cnt == 0): when using + // "compress_start" blocks. + // 2. When compressed before and used "compress_inc" blocks before + // adding "compress_added" words (si_compress_cnt > 1). + // 3. When compressed before, added "compress_added" words + // (si_compress_cnt == 1) and the number of free nodes drops below the + // maximum word length. +#ifndef SPELL_COMPRESS_ALLWAYS + if (spin->si_compress_cnt == 1 // NOLINT(readability/braces) + ? spin->si_free_count < MAXWLEN + : spin->si_blocks_cnt >= compress_start) +#endif + { + // Decrement the block counter. The effect is that we compress again + // when the freed up room has been used and another "compress_inc" + // blocks have been allocated. Unless "compress_added" words have + // been added, then the limit is put back again. + spin->si_blocks_cnt -= compress_inc; + spin->si_compress_cnt = compress_added; + + if (spin->si_verbose) { + msg_start(); + msg_puts(_(msg_compressing)); + msg_clr_eos(); + msg_didout = FALSE; + msg_col = 0; + ui_flush(); + } + + // Compress both trees. Either they both have many nodes, which makes + // compression useful, or one of them is small, which means + // compression goes fast. But when filling the soundfold word tree + // there is no keep-case tree. + wordtree_compress(spin, spin->si_foldroot); + if (affixID >= 0) + wordtree_compress(spin, spin->si_keeproot); + } + + return OK; +} + +// Get a wordnode_T, either from the list of previously freed nodes or +// allocate a new one. +// Returns NULL when out of memory. +static wordnode_T *get_wordnode(spellinfo_T *spin) +{ + wordnode_T *n; + + if (spin->si_first_free == NULL) + n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), true); + else { + n = spin->si_first_free; + spin->si_first_free = n->wn_child; + memset(n, 0, sizeof(wordnode_T)); + --spin->si_free_count; + } +#ifdef SPELL_PRINTTREE + if (n != NULL) + n->wn_nr = ++spin->si_wordnode_nr; +#endif + return n; +} + +// Decrement the reference count on a node (which is the head of a list of +// siblings). If the reference count becomes zero free the node and its +// siblings. +// Returns the number of nodes actually freed. +static int deref_wordnode(spellinfo_T *spin, wordnode_T *node) +{ + wordnode_T *np; + int cnt = 0; + + if (--node->wn_refs == 0) { + for (np = node; np != NULL; np = np->wn_sibling) { + if (np->wn_child != NULL) + cnt += deref_wordnode(spin, np->wn_child); + free_wordnode(spin, np); + ++cnt; + } + ++cnt; // length field + } + return cnt; +} + +// Free a wordnode_T for re-use later. +// Only the "wn_child" field becomes invalid. +static void free_wordnode(spellinfo_T *spin, wordnode_T *n) +{ + n->wn_child = spin->si_first_free; + spin->si_first_free = n; + ++spin->si_free_count; +} + +// Compress a tree: find tails that are identical and can be shared. +static void wordtree_compress(spellinfo_T *spin, wordnode_T *root) +{ + hashtab_T ht; + int n; + int tot = 0; + int perc; + + // Skip the root itself, it's not actually used. The first sibling is the + // start of the tree. + if (root->wn_sibling != NULL) { + hash_init(&ht); + n = node_compress(spin, root->wn_sibling, &ht, &tot); + +#ifndef SPELL_PRINTTREE + if (spin->si_verbose || p_verbose > 2) +#endif + { + if (tot > 1000000) + perc = (tot - n) / (tot / 100); + else if (tot == 0) + perc = 0; + else + perc = (tot - n) * 100 / tot; + vim_snprintf((char *)IObuff, IOSIZE, + _("Compressed %d of %d nodes; %d (%d%%) remaining"), + n, tot, tot - n, perc); + spell_message(spin, IObuff); + } +#ifdef SPELL_PRINTTREE + spell_print_tree(root->wn_sibling); +#endif + hash_clear(&ht); + } +} + +// Compress a node, its siblings and its children, depth first. +// Returns the number of compressed nodes. +static int +node_compress ( + spellinfo_T *spin, + wordnode_T *node, + hashtab_T *ht, + int *tot // total count of nodes before compressing, + // incremented while going through the tree +) +{ + wordnode_T *np; + wordnode_T *tp; + wordnode_T *child; + hash_T hash; + hashitem_T *hi; + int len = 0; + unsigned nr, n; + int compressed = 0; + + // Go through the list of siblings. Compress each child and then try + // finding an identical child to replace it. + // Note that with "child" we mean not just the node that is pointed to, + // but the whole list of siblings of which the child node is the first. + for (np = node; np != NULL && !got_int; np = np->wn_sibling) { + ++len; + if ((child = np->wn_child) != NULL) { + // Compress the child first. This fills hashkey. + compressed += node_compress(spin, child, ht, tot); + + // Try to find an identical child. + hash = hash_hash(child->wn_u1.hashkey); + hi = hash_lookup(ht, (const char *)child->wn_u1.hashkey, + STRLEN(child->wn_u1.hashkey), hash); + if (!HASHITEM_EMPTY(hi)) { + // There are children we encountered before with a hash value + // identical to the current child. Now check if there is one + // that is really identical. + for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) + if (node_equal(child, tp)) { + // Found one! Now use that child in place of the + // current one. This means the current child and all + // its siblings is unlinked from the tree. + ++tp->wn_refs; + compressed += deref_wordnode(spin, child); + np->wn_child = tp; + break; + } + if (tp == NULL) { + // No other child with this hash value equals the child of + // the node, add it to the linked list after the first + // item. + tp = HI2WN(hi); + child->wn_u2.next = tp->wn_u2.next; + tp->wn_u2.next = child; + } + } else + // No other child has this hash value, add it to the + // hashtable. + hash_add_item(ht, hi, child->wn_u1.hashkey, hash); + } + } + *tot += len + 1; // add one for the node that stores the length + + // Make a hash key for the node and its siblings, so that we can quickly + // find a lookalike node. This must be done after compressing the sibling + // list, otherwise the hash key would become invalid by the compression. + node->wn_u1.hashkey[0] = len; + nr = 0; + for (np = node; np != NULL; np = np->wn_sibling) { + if (np->wn_byte == NUL) + // end node: use wn_flags, wn_region and wn_affixID + n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); + else + // byte node: use the byte value and the child pointer + n = (unsigned)(np->wn_byte + ((uintptr_t)np->wn_child << 8)); + nr = nr * 101 + n; + } + + // Avoid NUL bytes, it terminates the hash key. + n = nr & 0xff; + node->wn_u1.hashkey[1] = n == 0 ? 1 : n; + n = (nr >> 8) & 0xff; + node->wn_u1.hashkey[2] = n == 0 ? 1 : n; + n = (nr >> 16) & 0xff; + node->wn_u1.hashkey[3] = n == 0 ? 1 : n; + n = (nr >> 24) & 0xff; + node->wn_u1.hashkey[4] = n == 0 ? 1 : n; + node->wn_u1.hashkey[5] = NUL; + + // Check for CTRL-C pressed now and then. + fast_breakcheck(); + + return compressed; +} + +// Returns true when two nodes have identical siblings and children. +static bool node_equal(wordnode_T *n1, wordnode_T *n2) +{ + wordnode_T *p1; + wordnode_T *p2; + + for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; + p1 = p1->wn_sibling, p2 = p2->wn_sibling) + if (p1->wn_byte != p2->wn_byte + || (p1->wn_byte == NUL + ? (p1->wn_flags != p2->wn_flags + || p1->wn_region != p2->wn_region + || p1->wn_affixID != p2->wn_affixID) + : (p1->wn_child != p2->wn_child))) + break; + + return p1 == NULL && p2 == NULL; +} + + +// Function given to qsort() to sort the REP items on "from" string. +static int rep_compare(const void *s1, const void *s2) +{ + fromto_T *p1 = (fromto_T *)s1; + fromto_T *p2 = (fromto_T *)s2; + + return STRCMP(p1->ft_from, p2->ft_from); +} + +// Write the Vim .spl file "fname". +// Return OK/FAIL. +static int write_vim_spell(spellinfo_T *spin, char_u *fname) +{ + int retval = OK; + int regionmask; + + FILE *fd = mch_fopen((char *)fname, "w"); + if (fd == NULL) { + EMSG2(_(e_notopen), fname); + return FAIL; + } + + // <HEADER>: <fileID> <versionnr> + // <fileID> + size_t fwv = fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, 1, fd); + if (fwv != (size_t)1) + // Catch first write error, don't try writing more. + goto theend; + + putc(VIMSPELLVERSION, fd); // <versionnr> + + // <SECTIONS>: <section> ... <sectionend> + + // SN_INFO: <infotext> + if (spin->si_info != NULL) { + putc(SN_INFO, fd); // <sectionID> + putc(0, fd); // <sectionflags> + size_t i = STRLEN(spin->si_info); + put_bytes(fd, i, 4); // <sectionlen> + fwv &= fwrite(spin->si_info, i, 1, fd); // <infotext> + } + + // SN_REGION: <regionname> ... + // Write the region names only if there is more than one. + if (spin->si_region_count > 1) { + putc(SN_REGION, fd); // <sectionID> + putc(SNF_REQUIRED, fd); // <sectionflags> + size_t l = (size_t)spin->si_region_count * 2; + put_bytes(fd, l, 4); // <sectionlen> + fwv &= fwrite(spin->si_region_name, l, 1, fd); + // <regionname> ... + regionmask = (1 << spin->si_region_count) - 1; + } else + regionmask = 0; + + // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> + // + // The table with character flags and the table for case folding. + // This makes sure the same characters are recognized as word characters + // when generating an when using a spell file. + // Skip this for ASCII, the table may conflict with the one used for + // 'encoding'. + // Also skip this for an .add.spl file, the main spell file must contain + // the table (avoids that it conflicts). File is shorter too. + if (!spin->si_ascii && !spin->si_add) { + char_u folchars[128 * 8]; + int flags; + + putc(SN_CHARFLAGS, fd); // <sectionID> + putc(SNF_REQUIRED, fd); // <sectionflags> + + // Form the <folchars> string first, we need to know its length. + size_t l = 0; + for (size_t i = 128; i < 256; ++i) { + if (has_mbyte) + l += (size_t)mb_char2bytes(spelltab.st_fold[i], folchars + l); + else + folchars[l++] = spelltab.st_fold[i]; + } + put_bytes(fd, 1 + 128 + 2 + l, 4); // <sectionlen> + + fputc(128, fd); // <charflagslen> + for (size_t i = 128; i < 256; ++i) { + flags = 0; + if (spelltab.st_isw[i]) + flags |= CF_WORD; + if (spelltab.st_isu[i]) + flags |= CF_UPPER; + fputc(flags, fd); // <charflags> + } + + put_bytes(fd, l, 2); // <folcharslen> + fwv &= fwrite(folchars, l, 1, fd); // <folchars> + } + + // SN_MIDWORD: <midword> + if (spin->si_midword != NULL) { + putc(SN_MIDWORD, fd); // <sectionID> + putc(SNF_REQUIRED, fd); // <sectionflags> + + size_t i = STRLEN(spin->si_midword); + put_bytes(fd, i, 4); // <sectionlen> + fwv &= fwrite(spin->si_midword, i, 1, fd); + // <midword> + } + + // SN_PREFCOND: <prefcondcnt> <prefcond> ... + if (!GA_EMPTY(&spin->si_prefcond)) { + putc(SN_PREFCOND, fd); // <sectionID> + putc(SNF_REQUIRED, fd); // <sectionflags> + + size_t l = (size_t)write_spell_prefcond(NULL, &spin->si_prefcond); + put_bytes(fd, l, 4); // <sectionlen> + + write_spell_prefcond(fd, &spin->si_prefcond); + } + + // SN_REP: <repcount> <rep> ... + // SN_SAL: <salflags> <salcount> <sal> ... + // SN_REPSAL: <repcount> <rep> ... + + // round 1: SN_REP section + // round 2: SN_SAL section (unless SN_SOFO is used) + // round 3: SN_REPSAL section + for (unsigned int round = 1; round <= 3; ++round) { + garray_T *gap; + if (round == 1) + gap = &spin->si_rep; + else if (round == 2) { + // Don't write SN_SAL when using a SN_SOFO section + if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) + continue; + gap = &spin->si_sal; + } else + gap = &spin->si_repsal; + + // Don't write the section if there are no items. + if (GA_EMPTY(gap)) + continue; + + // Sort the REP/REPSAL items. + if (round != 2) + qsort(gap->ga_data, (size_t)gap->ga_len, + sizeof(fromto_T), rep_compare); + + int i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); + putc(i, fd); // <sectionID> + + // This is for making suggestions, section is not required. + putc(0, fd); // <sectionflags> + + // Compute the length of what follows. + size_t l = 2; // count <repcount> or <salcount> + assert(gap->ga_len >= 0); + for (size_t i = 0; i < (size_t)gap->ga_len; ++i) { + fromto_T *ftp = &((fromto_T *)gap->ga_data)[i]; + l += 1 + STRLEN(ftp->ft_from); // count <*fromlen> and <*from> + l += 1 + STRLEN(ftp->ft_to); // count <*tolen> and <*to> + } + if (round == 2) + ++l; // count <salflags> + put_bytes(fd, l, 4); // <sectionlen> + + if (round == 2) { + int i = 0; + if (spin->si_followup) + i |= SAL_F0LLOWUP; + if (spin->si_collapse) + i |= SAL_COLLAPSE; + if (spin->si_rem_accents) + i |= SAL_REM_ACCENTS; + putc(i, fd); // <salflags> + } + + put_bytes(fd, (uintmax_t)gap->ga_len, 2); // <repcount> or <salcount> + for (size_t i = 0; i < (size_t)gap->ga_len; ++i) { + // <rep> : <repfromlen> <repfrom> <reptolen> <repto> + // <sal> : <salfromlen> <salfrom> <saltolen> <salto> + fromto_T *ftp = &((fromto_T *)gap->ga_data)[i]; + for (unsigned int rr = 1; rr <= 2; ++rr) { + char_u *p = rr == 1 ? ftp->ft_from : ftp->ft_to; + l = STRLEN(p); + assert(l < INT_MAX); + putc((int)l, fd); + if (l > 0) + fwv &= fwrite(p, l, 1, fd); + } + } + + } + + // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> + // This is for making suggestions, section is not required. + if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) { + putc(SN_SOFO, fd); // <sectionID> + putc(0, fd); // <sectionflags> + + size_t l = STRLEN(spin->si_sofofr); + put_bytes(fd, l + STRLEN(spin->si_sofoto) + 4, 4); // <sectionlen> + + put_bytes(fd, l, 2); // <sofofromlen> + fwv &= fwrite(spin->si_sofofr, l, 1, fd); // <sofofrom> + + l = STRLEN(spin->si_sofoto); + put_bytes(fd, l, 2); // <sofotolen> + fwv &= fwrite(spin->si_sofoto, l, 1, fd); // <sofoto> + } + + // SN_WORDS: <word> ... + // This is for making suggestions, section is not required. + if (spin->si_commonwords.ht_used > 0) { + putc(SN_WORDS, fd); // <sectionID> + putc(0, fd); // <sectionflags> + + // round 1: count the bytes + // round 2: write the bytes + for (unsigned int round = 1; round <= 2; ++round) { + size_t todo; + size_t len = 0; + hashitem_T *hi; + + todo = spin->si_commonwords.ht_used; + for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) + if (!HASHITEM_EMPTY(hi)) { + size_t l = STRLEN(hi->hi_key) + 1; + len += l; + if (round == 2) // <word> + fwv &= fwrite(hi->hi_key, l, 1, fd); + --todo; + } + if (round == 1) + put_bytes(fd, len, 4); // <sectionlen> + } + } + + // SN_MAP: <mapstr> + // This is for making suggestions, section is not required. + if (!GA_EMPTY(&spin->si_map)) { + putc(SN_MAP, fd); // <sectionID> + putc(0, fd); // <sectionflags> + size_t l = (size_t)spin->si_map.ga_len; + put_bytes(fd, l, 4); // <sectionlen> + fwv &= fwrite(spin->si_map.ga_data, l, 1, fd); // <mapstr> + } + + // SN_SUGFILE: <timestamp> + // This is used to notify that a .sug file may be available and at the + // same time allows for checking that a .sug file that is found matches + // with this .spl file. That's because the word numbers must be exactly + // right. + if (!spin->si_nosugfile + && (!GA_EMPTY(&spin->si_sal) + || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) { + putc(SN_SUGFILE, fd); // <sectionID> + putc(0, fd); // <sectionflags> + put_bytes(fd, 8, 4); // <sectionlen> + + // Set si_sugtime and write it to the file. + spin->si_sugtime = time(NULL); + put_time(fd, spin->si_sugtime); // <timestamp> + } + + // SN_NOSPLITSUGS: nothing + // This is used to notify that no suggestions with word splits are to be + // made. + if (spin->si_nosplitsugs) { + putc(SN_NOSPLITSUGS, fd); // <sectionID> + putc(0, fd); // <sectionflags> + put_bytes(fd, 0, 4); // <sectionlen> + } + + // SN_NOCOMPUNDSUGS: nothing + // This is used to notify that no suggestions with compounds are to be + // made. + if (spin->si_nocompoundsugs) { + putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID> + putc(0, fd); // <sectionflags> + put_bytes(fd, 0, 4); // <sectionlen> + } + + // SN_COMPOUND: compound info. + // We don't mark it required, when not supported all compound words will + // be bad words. + if (spin->si_compflags != NULL) { + putc(SN_COMPOUND, fd); // <sectionID> + putc(0, fd); // <sectionflags> + + size_t l = STRLEN(spin->si_compflags); + assert(spin->si_comppat.ga_len >= 0); + for (size_t i = 0; i < (size_t)spin->si_comppat.ga_len; ++i) { + l += STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; + } + put_bytes(fd, l + 7, 4); // <sectionlen> + + putc(spin->si_compmax, fd); // <compmax> + putc(spin->si_compminlen, fd); // <compminlen> + putc(spin->si_compsylmax, fd); // <compsylmax> + putc(0, fd); // for Vim 7.0b compatibility + putc(spin->si_compoptions, fd); // <compoptions> + put_bytes(fd, (uintmax_t)spin->si_comppat.ga_len, 2); // <comppatcount> + for (size_t i = 0; i < (size_t)spin->si_comppat.ga_len; ++i) { + char_u *p = ((char_u **)(spin->si_comppat.ga_data))[i]; + assert(STRLEN(p) < INT_MAX); + putc((int)STRLEN(p), fd); // <comppatlen> + fwv &= fwrite(p, STRLEN(p), 1, fd); // <comppattext> + } + // <compflags> + fwv &= fwrite(spin->si_compflags, STRLEN(spin->si_compflags), 1, fd); + } + + // SN_NOBREAK: NOBREAK flag + if (spin->si_nobreak) { + putc(SN_NOBREAK, fd); // <sectionID> + putc(0, fd); // <sectionflags> + + // It's empty, the presence of the section flags the feature. + put_bytes(fd, 0, 4); // <sectionlen> + } + + // SN_SYLLABLE: syllable info. + // We don't mark it required, when not supported syllables will not be + // counted. + if (spin->si_syllable != NULL) { + putc(SN_SYLLABLE, fd); // <sectionID> + putc(0, fd); // <sectionflags> + + size_t l = STRLEN(spin->si_syllable); + put_bytes(fd, l, 4); // <sectionlen> + fwv &= fwrite(spin->si_syllable, l, 1, fd); // <syllable> + } + + // end of <SECTIONS> + putc(SN_END, fd); // <sectionend> + + + // <LWORDTREE> <KWORDTREE> <PREFIXTREE> + spin->si_memtot = 0; + for (unsigned int round = 1; round <= 3; ++round) { + wordnode_T *tree; + if (round == 1) + tree = spin->si_foldroot->wn_sibling; + else if (round == 2) + tree = spin->si_keeproot->wn_sibling; + else + tree = spin->si_prefroot->wn_sibling; + + // Clear the index and wnode fields in the tree. + clear_node(tree); + + // Count the number of nodes. Needed to be able to allocate the + // memory when reading the nodes. Also fills in index for shared + // nodes. + size_t nodecount = (size_t)put_node(NULL, tree, 0, regionmask, round == 3); + + // number of nodes in 4 bytes + put_bytes(fd, nodecount, 4); // <nodecount> + assert(nodecount + nodecount * sizeof(int) < INT_MAX); + spin->si_memtot += (int)(nodecount + nodecount * sizeof(int)); + + // Write the nodes. + (void)put_node(fd, tree, 0, regionmask, round == 3); + } + + // Write another byte to check for errors (file system full). + if (putc(0, fd) == EOF) + retval = FAIL; +theend: + if (fclose(fd) == EOF) + retval = FAIL; + + if (fwv != (size_t)1) + retval = FAIL; + if (retval == FAIL) + EMSG(_(e_write)); + + return retval; +} + +// Clear the index and wnode fields of "node", it siblings and its +// children. This is needed because they are a union with other items to save +// space. +static void clear_node(wordnode_T *node) +{ + wordnode_T *np; + + if (node != NULL) + for (np = node; np != NULL; np = np->wn_sibling) { + np->wn_u1.index = 0; + np->wn_u2.wnode = NULL; + + if (np->wn_byte != NUL) + clear_node(np->wn_child); + } +} + + +// Dump a word tree at node "node". +// +// This first writes the list of possible bytes (siblings). Then for each +// byte recursively write the children. +// +// NOTE: The code here must match the code in read_tree_node(), since +// assumptions are made about the indexes (so that we don't have to write them +// in the file). +// +// Returns the number of nodes used. +static int +put_node ( + FILE *fd, // NULL when only counting + wordnode_T *node, + int idx, + int regionmask, + bool prefixtree // true for PREFIXTREE +) +{ + // If "node" is zero the tree is empty. + if (node == NULL) + return 0; + + // Store the index where this node is written. + node->wn_u1.index = idx; + + // Count the number of siblings. + int siblingcount = 0; + for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) + ++siblingcount; + + // Write the sibling count. + if (fd != NULL) + putc(siblingcount, fd); // <siblingcount> + + // Write each sibling byte and optionally extra info. + for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { + if (np->wn_byte == 0) { + if (fd != NULL) { + // For a NUL byte (end of word) write the flags etc. + if (prefixtree) { + // In PREFIXTREE write the required affixID and the + // associated condition nr (stored in wn_region). The + // byte value is misused to store the "rare" and "not + // combining" flags + if (np->wn_flags == (uint16_t)PFX_FLAGS) + putc(BY_NOFLAGS, fd); // <byte> + else { + putc(BY_FLAGS, fd); // <byte> + putc(np->wn_flags, fd); // <pflags> + } + putc(np->wn_affixID, fd); // <affixID> + put_bytes(fd, (uintmax_t)np->wn_region, 2); // <prefcondnr> + } else { + // For word trees we write the flag/region items. + int flags = np->wn_flags; + if (regionmask != 0 && np->wn_region != regionmask) + flags |= WF_REGION; + if (np->wn_affixID != 0) + flags |= WF_AFX; + if (flags == 0) { + // word without flags or region + putc(BY_NOFLAGS, fd); // <byte> + } else { + if (np->wn_flags >= 0x100) { + putc(BY_FLAGS2, fd); // <byte> + putc(flags, fd); // <flags> + putc((int)((unsigned)flags >> 8), fd); // <flags2> + } else { + putc(BY_FLAGS, fd); // <byte> + putc(flags, fd); // <flags> + } + if (flags & WF_REGION) + putc(np->wn_region, fd); // <region> + if (flags & WF_AFX) + putc(np->wn_affixID, fd); // <affixID> + } + } + } + } else { + if (np->wn_child->wn_u1.index != 0 + && np->wn_child->wn_u2.wnode != node) { + // The child is written elsewhere, write the reference. + if (fd != NULL) { + putc(BY_INDEX, fd); // <byte> + put_bytes(fd, (uintmax_t)np->wn_child->wn_u1.index, 3); // <nodeidx> + } + } else if (np->wn_child->wn_u2.wnode == NULL) + // We will write the child below and give it an index. + np->wn_child->wn_u2.wnode = node; + + if (fd != NULL) + if (putc(np->wn_byte, fd) == EOF) { // <byte> or <xbyte> + EMSG(_(e_write)); + return 0; + } + } + } + + // Space used in the array when reading: one for each sibling and one for + // the count. + int newindex = idx + siblingcount + 1; + + // Recursively dump the children of each sibling. + for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) + if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) + newindex = put_node(fd, np->wn_child, newindex, regionmask, + prefixtree); + + return newindex; +} + + +// ":mkspell [-ascii] outfile infile ..." +// ":mkspell [-ascii] addfile" +void ex_mkspell(exarg_T *eap) +{ + int fcount; + char_u **fnames; + char_u *arg = eap->arg; + bool ascii = false; + + if (STRNCMP(arg, "-ascii", 6) == 0) { + ascii = true; + arg = skipwhite(arg + 6); + } + + // Expand all the remaining arguments (e.g., $VIMRUNTIME). + if (get_arglist_exp(arg, &fcount, &fnames, false) == OK) { + mkspell(fcount, fnames, ascii, eap->forceit, false); + FreeWild(fcount, fnames); + } +} + +// Create the .sug file. +// Uses the soundfold info in "spin". +// Writes the file with the name "wfname", with ".spl" changed to ".sug". +static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname) +{ + char_u *fname = NULL; + int len; + slang_T *slang; + bool free_slang = false; + + // Read back the .spl file that was written. This fills the required + // info for soundfolding. This also uses less memory than the + // pointer-linked version of the trie. And it avoids having two versions + // of the code for the soundfolding stuff. + // It might have been done already by spell_reload_one(). + for (slang = first_lang; slang != NULL; slang = slang->sl_next) + if (path_full_compare(wfname, slang->sl_fname, FALSE) == kEqualFiles) + break; + if (slang == NULL) { + spell_message(spin, (char_u *)_("Reading back spell file...")); + slang = spell_load_file(wfname, NULL, NULL, false); + if (slang == NULL) + return; + free_slang = true; + } + + // Clear the info in "spin" that is used. + spin->si_blocks = NULL; + spin->si_blocks_cnt = 0; + spin->si_compress_cnt = 0; // will stay at 0 all the time + spin->si_free_count = 0; + spin->si_first_free = NULL; + spin->si_foldwcount = 0; + + // Go through the trie of good words, soundfold each word and add it to + // the soundfold trie. + spell_message(spin, (char_u *)_("Performing soundfolding...")); + if (sug_filltree(spin, slang) == FAIL) + goto theend; + + // Create the table which links each soundfold word with a list of the + // good words it may come from. Creates buffer "spin->si_spellbuf". + // This also removes the wordnr from the NUL byte entries to make + // compression possible. + if (sug_maketable(spin) == FAIL) + goto theend; + + smsg(_("Number of words after soundfolding: %" PRId64), + (int64_t)spin->si_spellbuf->b_ml.ml_line_count); + + // Compress the soundfold trie. + spell_message(spin, (char_u *)_(msg_compressing)); + wordtree_compress(spin, spin->si_foldroot); + + // Write the .sug file. + // Make the file name by changing ".spl" to ".sug". + fname = xmalloc(MAXPATHL); + STRLCPY(fname, wfname, MAXPATHL); + len = (int)STRLEN(fname); + fname[len - 2] = 'u'; + fname[len - 1] = 'g'; + sug_write(spin, fname); + +theend: + xfree(fname); + if (free_slang) + slang_free(slang); + free_blocks(spin->si_blocks); + close_spellbuf(spin->si_spellbuf); +} + +// Build the soundfold trie for language "slang". +static int sug_filltree(spellinfo_T *spin, slang_T *slang) +{ + char_u *byts; + idx_T *idxs; + int depth; + idx_T arridx[MAXWLEN]; + int curi[MAXWLEN]; + char_u tword[MAXWLEN]; + char_u tsalword[MAXWLEN]; + int c; + idx_T n; + unsigned words_done = 0; + int wordcount[MAXWLEN]; + + // We use si_foldroot for the soundfolded trie. + spin->si_foldroot = wordtree_alloc(spin); + if (spin->si_foldroot == NULL) + return FAIL; + + // Let tree_add_word() know we're adding to the soundfolded tree + spin->si_sugtree = true; + + // Go through the whole case-folded tree, soundfold each word and put it + // in the trie. + byts = slang->sl_fbyts; + idxs = slang->sl_fidxs; + + arridx[0] = 0; + curi[0] = 1; + wordcount[0] = 0; + + depth = 0; + while (depth >= 0 && !got_int) { + if (curi[depth] > byts[arridx[depth]]) { + // Done all bytes at this node, go up one level. + idxs[arridx[depth]] = wordcount[depth]; + if (depth > 0) + wordcount[depth - 1] += wordcount[depth]; + + --depth; + line_breakcheck(); + } else { + + // Do one more byte at this node. + n = arridx[depth] + curi[depth]; + ++curi[depth]; + + c = byts[n]; + if (c == 0) { + // Sound-fold the word. + tword[depth] = NUL; + spell_soundfold(slang, tword, true, tsalword); + + // We use the "flags" field for the MSB of the wordnr, + // "region" for the LSB of the wordnr. + if (tree_add_word(spin, tsalword, spin->si_foldroot, + words_done >> 16, words_done & 0xffff, + 0) == FAIL) + return FAIL; + + ++words_done; + ++wordcount[depth]; + + // Reset the block count each time to avoid compression + // kicking in. + spin->si_blocks_cnt = 0; + + // Skip over any other NUL bytes (same word with different + // flags). + while (byts[n + 1] == 0) { + ++n; + ++curi[depth]; + } + } else { + // Normal char, go one level deeper. + tword[depth++] = c; + arridx[depth] = idxs[n]; + curi[depth] = 1; + wordcount[depth] = 0; + } + } + } + + smsg(_("Total number of words: %d"), words_done); + + return OK; +} + +// Make the table that links each word in the soundfold trie to the words it +// can be produced from. +// This is not unlike lines in a file, thus use a memfile to be able to access +// the table efficiently. +// Returns FAIL when out of memory. +static int sug_maketable(spellinfo_T *spin) +{ + garray_T ga; + int res = OK; + + // Allocate a buffer, open a memline for it and create the swap file + // (uses a temp file, not a .swp file). + spin->si_spellbuf = open_spellbuf(); + + // Use a buffer to store the line info, avoids allocating many small + // pieces of memory. + ga_init(&ga, 1, 100); + + // recursively go through the tree + if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) + res = FAIL; + + ga_clear(&ga); + return res; +} + +// Fill the table for one node and its children. +// Returns the wordnr at the start of the node. +// Returns -1 when out of memory. +static int +sug_filltable ( + spellinfo_T *spin, + wordnode_T *node, + int startwordnr, + garray_T *gap // place to store line of numbers +) +{ + wordnode_T *p, *np; + int wordnr = startwordnr; + int nr; + int prev_nr; + + for (p = node; p != NULL; p = p->wn_sibling) { + if (p->wn_byte == NUL) { + gap->ga_len = 0; + prev_nr = 0; + for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) { + ga_grow(gap, 10); + + nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); + // Compute the offset from the previous nr and store the + // offset in a way that it takes a minimum number of bytes. + // It's a bit like utf-8, but without the need to mark + // following bytes. + nr -= prev_nr; + prev_nr += nr; + gap->ga_len += offset2bytes(nr, + (char_u *)gap->ga_data + gap->ga_len); + } + + // add the NUL byte + ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; + + if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, + gap->ga_data, gap->ga_len, TRUE) == FAIL) + return -1; + ++wordnr; + + // Remove extra NUL entries, we no longer need them. We don't + // bother freeing the nodes, the won't be reused anyway. + while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) + p->wn_sibling = p->wn_sibling->wn_sibling; + + // Clear the flags on the remaining NUL node, so that compression + // works a lot better. + p->wn_flags = 0; + p->wn_region = 0; + } else { + wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); + if (wordnr == -1) + return -1; + } + } + return wordnr; +} + +// Convert an offset into a minimal number of bytes. +// Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL +// bytes. +static int offset2bytes(int nr, char_u *buf) +{ + int rem; + int b1, b2, b3, b4; + + // Split the number in parts of base 255. We need to avoid NUL bytes. + b1 = nr % 255 + 1; + rem = nr / 255; + b2 = rem % 255 + 1; + rem = rem / 255; + b3 = rem % 255 + 1; + b4 = rem / 255 + 1; + + if (b4 > 1 || b3 > 0x1f) { // 4 bytes + buf[0] = 0xe0 + b4; + buf[1] = b3; + buf[2] = b2; + buf[3] = b1; + return 4; + } + if (b3 > 1 || b2 > 0x3f ) { // 3 bytes + buf[0] = 0xc0 + b3; + buf[1] = b2; + buf[2] = b1; + return 3; + } + if (b2 > 1 || b1 > 0x7f ) { // 2 bytes + buf[0] = 0x80 + b2; + buf[1] = b1; + return 2; + } + // 1 byte + buf[0] = b1; + return 1; +} + +// Write the .sug file in "fname". +static void sug_write(spellinfo_T *spin, char_u *fname) +{ + // Create the file. Note that an existing file is silently overwritten! + FILE *fd = mch_fopen((char *)fname, "w"); + if (fd == NULL) { + EMSG2(_(e_notopen), fname); + return; + } + + vim_snprintf((char *)IObuff, IOSIZE, + _("Writing suggestion file %s ..."), fname); + spell_message(spin, IObuff); + + // <SUGHEADER>: <fileID> <versionnr> <timestamp> + if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) { // <fileID> + EMSG(_(e_write)); + goto theend; + } + putc(VIMSUGVERSION, fd); // <versionnr> + + // Write si_sugtime to the file. + put_time(fd, spin->si_sugtime); // <timestamp> + + // <SUGWORDTREE> + spin->si_memtot = 0; + wordnode_T *tree = spin->si_foldroot->wn_sibling; + + // Clear the index and wnode fields in the tree. + clear_node(tree); + + // Count the number of nodes. Needed to be able to allocate the + // memory when reading the nodes. Also fills in index for shared + // nodes. + size_t nodecount = (size_t)put_node(NULL, tree, 0, 0, false); + + // number of nodes in 4 bytes + put_bytes(fd, nodecount, 4); // <nodecount> + assert(nodecount + nodecount * sizeof(int) < INT_MAX); + spin->si_memtot += (int)(nodecount + nodecount * sizeof(int)); + + // Write the nodes. + (void)put_node(fd, tree, 0, 0, false); + + // <SUGTABLE>: <sugwcount> <sugline> ... + linenr_T wcount = spin->si_spellbuf->b_ml.ml_line_count; + assert(wcount >= 0); + put_bytes(fd, (uintmax_t)wcount, 4); // <sugwcount> + + for (linenr_T lnum = 1; lnum <= wcount; ++lnum) { + // <sugline>: <sugnr> ... NUL + char_u *line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); + size_t len = STRLEN(line) + 1; + if (fwrite(line, len, 1, fd) == 0) { + EMSG(_(e_write)); + goto theend; + } + assert((size_t)spin->si_memtot + len <= INT_MAX); + spin->si_memtot += (int)len; + } + + // Write another byte to check for errors. + if (putc(0, fd) == EOF) + EMSG(_(e_write)); + + vim_snprintf((char *)IObuff, IOSIZE, + _("Estimated runtime memory use: %d bytes"), spin->si_memtot); + spell_message(spin, IObuff); + +theend: + // close the file + fclose(fd); +} + + +// Create a Vim spell file from one or more word lists. +// "fnames[0]" is the output file name. +// "fnames[fcount - 1]" is the last input file name. +// Exception: when "fnames[0]" ends in ".add" it's used as the input file name +// and ".spl" is appended to make the output file name. +static void +mkspell ( + int fcount, + char_u **fnames, + bool ascii, // -ascii argument given + bool over_write, // overwrite existing output file + bool added_word // invoked through "zg" +) +{ + char_u *fname = NULL; + char_u *wfname; + char_u **innames; + int incount; + afffile_T *(afile[8]); + int i; + int len; + bool error = false; + spellinfo_T spin; + + memset(&spin, 0, sizeof(spin)); + spin.si_verbose = !added_word; + spin.si_ascii = ascii; + spin.si_followup = true; + spin.si_rem_accents = true; + ga_init(&spin.si_rep, (int)sizeof(fromto_T), 20); + ga_init(&spin.si_repsal, (int)sizeof(fromto_T), 20); + ga_init(&spin.si_sal, (int)sizeof(fromto_T), 20); + ga_init(&spin.si_map, (int)sizeof(char_u), 100); + ga_init(&spin.si_comppat, (int)sizeof(char_u *), 20); + ga_init(&spin.si_prefcond, (int)sizeof(char_u *), 50); + hash_init(&spin.si_commonwords); + spin.si_newcompID = 127; // start compound ID at first maximum + + // default: fnames[0] is output file, following are input files + innames = &fnames[1]; + incount = fcount - 1; + + wfname = xmalloc(MAXPATHL); + + if (fcount >= 1) { + len = (int)STRLEN(fnames[0]); + if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) { + // For ":mkspell path/en.latin1.add" output file is + // "path/en.latin1.add.spl". + innames = &fnames[0]; + incount = 1; + vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); + } else if (fcount == 1) { + // For ":mkspell path/vim" output file is "path/vim.latin1.spl". + innames = &fnames[0]; + incount = 1; + vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, + fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); + } else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) { + // Name ends in ".spl", use as the file name. + STRLCPY(wfname, fnames[0], MAXPATHL); + } else + // Name should be language, make the file name from it. + vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, + fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); + + // Check for .ascii.spl. + if (strstr((char *)path_tail(wfname), SPL_FNAME_ASCII) != NULL) + spin.si_ascii = true; + + // Check for .add.spl. + if (strstr((char *)path_tail(wfname), SPL_FNAME_ADD) != NULL) + spin.si_add = true; + } + + if (incount <= 0) + EMSG(_(e_invarg)); // need at least output and input names + else if (vim_strchr(path_tail(wfname), '_') != NULL) + EMSG(_("E751: Output file name must not have region name")); + else if (incount > 8) + EMSG(_("E754: Only up to 8 regions supported")); + else { + // Check for overwriting before doing things that may take a lot of + // time. + if (!over_write && os_path_exists(wfname)) { + EMSG(_(e_exists)); + goto theend; + } + if (os_isdir(wfname)) { + EMSG2(_(e_isadir2), wfname); + goto theend; + } + + fname = xmalloc(MAXPATHL); + + // Init the aff and dic pointers. + // Get the region names if there are more than 2 arguments. + for (i = 0; i < incount; ++i) { + afile[i] = NULL; + + if (incount > 1) { + len = (int)STRLEN(innames[i]); + if (STRLEN(path_tail(innames[i])) < 5 + || innames[i][len - 3] != '_') { + EMSG2(_("E755: Invalid region in %s"), innames[i]); + goto theend; + } + spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); + spin.si_region_name[i * 2 + 1] = + TOLOWER_ASC(innames[i][len - 1]); + } + } + spin.si_region_count = incount; + + spin.si_foldroot = wordtree_alloc(&spin); + spin.si_keeproot = wordtree_alloc(&spin); + spin.si_prefroot = wordtree_alloc(&spin); + if (spin.si_foldroot == NULL + || spin.si_keeproot == NULL + || spin.si_prefroot == NULL) { + free_blocks(spin.si_blocks); + goto theend; + } + + // When not producing a .add.spl file clear the character table when + // we encounter one in the .aff file. This means we dump the current + // one in the .spl file if the .aff file doesn't define one. That's + // better than guessing the contents, the table will match a + // previously loaded spell file. + if (!spin.si_add) + spin.si_clear_chartab = true; + + // Read all the .aff and .dic files. + // Text is converted to 'encoding'. + // Words are stored in the case-folded and keep-case trees. + for (i = 0; i < incount && !error; ++i) { + spin.si_conv.vc_type = CONV_NONE; + spin.si_region = 1 << i; + + vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); + if (os_path_exists(fname)) { + // Read the .aff file. Will init "spin->si_conv" based on the + // "SET" line. + afile[i] = spell_read_aff(&spin, fname); + if (afile[i] == NULL) + error = true; + else { + // Read the .dic file and store the words in the trees. + vim_snprintf((char *)fname, MAXPATHL, "%s.dic", + innames[i]); + if (spell_read_dic(&spin, fname, afile[i]) == FAIL) + error = true; + } + } else { + // No .aff file, try reading the file as a word list. Store + // the words in the trees. + if (spell_read_wordfile(&spin, innames[i]) == FAIL) + error = true; + } + + // Free any conversion stuff. + convert_setup(&spin.si_conv, NULL, NULL); + } + + if (spin.si_compflags != NULL && spin.si_nobreak) + MSG(_("Warning: both compounding and NOBREAK specified")); + + if (!error && !got_int) { + // Combine tails in the tree. + spell_message(&spin, (char_u *)_(msg_compressing)); + wordtree_compress(&spin, spin.si_foldroot); + wordtree_compress(&spin, spin.si_keeproot); + wordtree_compress(&spin, spin.si_prefroot); + } + + if (!error && !got_int) { + // Write the info in the spell file. + vim_snprintf((char *)IObuff, IOSIZE, + _("Writing spell file %s ..."), wfname); + spell_message(&spin, IObuff); + + error = write_vim_spell(&spin, wfname) == FAIL; + + spell_message(&spin, (char_u *)_("Done!")); + vim_snprintf((char *)IObuff, IOSIZE, + _("Estimated runtime memory use: %d bytes"), spin.si_memtot); + spell_message(&spin, IObuff); + + // If the file is loaded need to reload it. + if (!error) + spell_reload_one(wfname, added_word); + } + + // Free the allocated memory. + ga_clear(&spin.si_rep); + ga_clear(&spin.si_repsal); + ga_clear(&spin.si_sal); + ga_clear(&spin.si_map); + ga_clear(&spin.si_comppat); + ga_clear(&spin.si_prefcond); + hash_clear_all(&spin.si_commonwords, 0); + + // Free the .aff file structures. + for (i = 0; i < incount; ++i) + if (afile[i] != NULL) + spell_free_aff(afile[i]); + + // Free all the bits and pieces at once. + free_blocks(spin.si_blocks); + + // If there is soundfolding info and no NOSUGFILE item create the + // .sug file with the soundfolded word trie. + if (spin.si_sugtime != 0 && !error && !got_int) + spell_make_sugfile(&spin, wfname); + + } + +theend: + xfree(fname); + xfree(wfname); +} + +// Display a message for spell file processing when 'verbose' is set or using +// ":mkspell". "str" can be IObuff. +static void spell_message(spellinfo_T *spin, char_u *str) +{ + if (spin->si_verbose || p_verbose > 2) { + if (!spin->si_verbose) + verbose_enter(); + MSG(str); + ui_flush(); + if (!spin->si_verbose) + verbose_leave(); + } +} + +// ":[count]spellgood {word}" +// ":[count]spellwrong {word}" +// ":[count]spellundo {word}" +void ex_spell(exarg_T *eap) +{ + spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, + eap->forceit ? 0 : (int)eap->line2, + eap->cmdidx == CMD_spellundo); +} + +// Add "word[len]" to 'spellfile' as a good or bad word. +void +spell_add_word ( + char_u *word, + int len, + int bad, + int idx, // "zG" and "zW": zero, otherwise index in + // 'spellfile' + bool undo // true for "zug", "zuG", "zuw" and "zuW" +) +{ + FILE *fd = NULL; + buf_T *buf = NULL; + bool new_spf = false; + char_u *fname; + char_u *fnamebuf = NULL; + char_u line[MAXWLEN * 2]; + long fpos, fpos_next = 0; + int i; + char_u *spf; + + if (idx == 0) { // use internal wordlist + if (int_wordlist == NULL) { + int_wordlist = vim_tempname(); + if (int_wordlist == NULL) + return; + } + fname = int_wordlist; + } else { + // If 'spellfile' isn't set figure out a good default value. + if (*curwin->w_s->b_p_spf == NUL) { + init_spellfile(); + new_spf = true; + } + + if (*curwin->w_s->b_p_spf == NUL) { + EMSG2(_(e_notset), "spellfile"); + return; + } + fnamebuf = xmalloc(MAXPATHL); + + for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) { + copy_option_part(&spf, fnamebuf, MAXPATHL, ","); + if (i == idx) + break; + if (*spf == NUL) { + EMSGN(_("E765: 'spellfile' does not have %" PRId64 " entries"), idx); + xfree(fnamebuf); + return; + } + } + + // Check that the user isn't editing the .add file somewhere. + buf = buflist_findname_exp(fnamebuf); + if (buf != NULL && buf->b_ml.ml_mfp == NULL) + buf = NULL; + if (buf != NULL && bufIsChanged(buf)) { + EMSG(_(e_bufloaded)); + xfree(fnamebuf); + return; + } + + fname = fnamebuf; + } + + if (bad || undo) { + // When the word appears as good word we need to remove that one, + // since its flags sort before the one with WF_BANNED. + fd = mch_fopen((char *)fname, "r"); + if (fd != NULL) { + while (!vim_fgets(line, MAXWLEN * 2, fd)) { + fpos = fpos_next; + fpos_next = ftell(fd); + if (STRNCMP(word, line, len) == 0 + && (line[len] == '/' || line[len] < ' ')) { + // Found duplicate word. Remove it by writing a '#' at + // the start of the line. Mixing reading and writing + // doesn't work for all systems, close the file first. + fclose(fd); + fd = mch_fopen((char *)fname, "r+"); + if (fd == NULL) + break; + if (fseek(fd, fpos, SEEK_SET) == 0) { + fputc('#', fd); + if (undo) { + home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); + smsg(_("Word '%.*s' removed from %s"), + len, word, NameBuff); + } + } + fseek(fd, fpos_next, SEEK_SET); + } + } + if (fd != NULL) + fclose(fd); + } + } + + if (!undo) { + fd = mch_fopen((char *)fname, "a"); + if (fd == NULL && new_spf) { + char_u *p; + + // We just initialized the 'spellfile' option and can't open the + // file. We may need to create the "spell" directory first. We + // already checked the runtime directory is writable in + // init_spellfile(). + if (!dir_of_file_exists(fname) && (p = path_tail_with_sep(fname)) != fname) { + int c = *p; + + // The directory doesn't exist. Try creating it and opening + // the file again. + *p = NUL; + os_mkdir((char *)fname, 0755); + *p = c; + fd = mch_fopen((char *)fname, "a"); + } + } + + if (fd == NULL) + EMSG2(_(e_notopen), fname); + else { + if (bad) + fprintf(fd, "%.*s/!\n", len, word); + else + fprintf(fd, "%.*s\n", len, word); + fclose(fd); + + home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); + smsg(_("Word '%.*s' added to %s"), len, word, NameBuff); + } + } + + if (fd != NULL) { + // Update the .add.spl file. + mkspell(1, &fname, false, true, true); + + // If the .add file is edited somewhere, reload it. + if (buf != NULL) + buf_reload(buf, buf->b_orig_mode); + + redraw_all_later(SOME_VALID); + } + xfree(fnamebuf); +} + +// Initialize 'spellfile' for the current buffer. +static void init_spellfile(void) +{ + char_u *buf; + int l; + char_u *fname; + char_u *rtp; + char_u *lend; + bool aspath = false; + char_u *lstart = curbuf->b_s.b_p_spl; + + if (*curwin->w_s->b_p_spl != NUL && !GA_EMPTY(&curwin->w_s->b_langp)) { + buf = xmalloc(MAXPATHL); + + // Find the end of the language name. Exclude the region. If there + // is a path separator remember the start of the tail. + for (lend = curwin->w_s->b_p_spl; *lend != NUL + && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) + if (vim_ispathsep(*lend)) { + aspath = true; + lstart = lend + 1; + } + + // Loop over all entries in 'runtimepath'. Use the first one where we + // are allowed to write. + rtp = p_rtp; + while (*rtp != NUL) { + if (aspath) + // Use directory of an entry with path, e.g., for + // "/dir/lg.utf-8.spl" use "/dir". + STRLCPY(buf, curbuf->b_s.b_p_spl, + lstart - curbuf->b_s.b_p_spl); + else + // Copy the path from 'runtimepath' to buf[]. + copy_option_part(&rtp, buf, MAXPATHL, ","); + if (os_file_is_writable((char *)buf) == 2) { + // Use the first language name from 'spelllang' and the + // encoding used in the first loaded .spl file. + if (aspath) + STRLCPY(buf, curbuf->b_s.b_p_spl, + lend - curbuf->b_s.b_p_spl + 1); + else { + // Create the "spell" directory if it doesn't exist yet. + l = (int)STRLEN(buf); + vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); + if (os_file_is_writable((char *)buf) != 2) { + os_mkdir((char *)buf, 0755); + } + + l = (int)STRLEN(buf); + vim_snprintf((char *)buf + l, MAXPATHL - l, + "/%.*s", (int)(lend - lstart), lstart); + } + l = (int)STRLEN(buf); + fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) + ->lp_slang->sl_fname; + vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", + fname != NULL + && strstr((char *)path_tail(fname), ".ascii.") != NULL + ? (char_u *)"ascii" : spell_enc()); + set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); + break; + } + aspath = false; + } + + xfree(buf); + } +} + +// Set the spell character tables from strings in the affix file. +static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp) +{ + // We build the new tables here first, so that we can compare with the + // previous one. + spelltab_T new_st; + char_u *pf = fol, *pl = low, *pu = upp; + int f, l, u; + + clear_spell_chartab(&new_st); + + while (*pf != NUL) { + if (*pl == NUL || *pu == NUL) { + EMSG(_(e_affform)); + return FAIL; + } + f = mb_ptr2char_adv(&pf); + l = mb_ptr2char_adv(&pl); + u = mb_ptr2char_adv(&pu); + // Every character that appears is a word character. + if (f < 256) + new_st.st_isw[f] = true; + if (l < 256) + new_st.st_isw[l] = true; + if (u < 256) + new_st.st_isw[u] = true; + + // if "LOW" and "FOL" are not the same the "LOW" char needs + // case-folding + if (l < 256 && l != f) { + if (f >= 256) { + EMSG(_(e_affrange)); + return FAIL; + } + new_st.st_fold[l] = f; + } + + // if "UPP" and "FOL" are not the same the "UPP" char needs + // case-folding, it's upper case and the "UPP" is the upper case of + // "FOL" . + if (u < 256 && u != f) { + if (f >= 256) { + EMSG(_(e_affrange)); + return FAIL; + } + new_st.st_fold[u] = f; + new_st.st_isu[u] = true; + new_st.st_upper[f] = u; + } + } + + if (*pl != NUL || *pu != NUL) { + EMSG(_(e_affform)); + return FAIL; + } + + return set_spell_finish(&new_st); +} + +// Set the spell character tables from strings in the .spl file. +static void +set_spell_charflags ( + char_u *flags, + int cnt, // length of "flags" + char_u *fol +) +{ + // We build the new tables here first, so that we can compare with the + // previous one. + spelltab_T new_st; + int i; + char_u *p = fol; + int c; + + clear_spell_chartab(&new_st); + + for (i = 0; i < 128; ++i) { + if (i < cnt) { + new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; + new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; + } + + if (*p != NUL) { + c = mb_ptr2char_adv(&p); + new_st.st_fold[i + 128] = c; + if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) + new_st.st_upper[c] = i + 128; + } + } + + (void)set_spell_finish(&new_st); +} + +static int set_spell_finish(spelltab_T *new_st) +{ + int i; + + if (did_set_spelltab) { + // check that it's the same table + for (i = 0; i < 256; ++i) { + if (spelltab.st_isw[i] != new_st->st_isw[i] + || spelltab.st_isu[i] != new_st->st_isu[i] + || spelltab.st_fold[i] != new_st->st_fold[i] + || spelltab.st_upper[i] != new_st->st_upper[i]) { + EMSG(_("E763: Word characters differ between spell files")); + return FAIL; + } + } + } else { + // copy the new spelltab into the one being used + spelltab = *new_st; + did_set_spelltab = true; + } + + return OK; +} + +// Write the table with prefix conditions to the .spl file. +// When "fd" is NULL only count the length of what is written. +static int write_spell_prefcond(FILE *fd, garray_T *gap) +{ + assert(gap->ga_len >= 0); + + if (fd != NULL) + put_bytes(fd, (uintmax_t)gap->ga_len, 2); // <prefcondcnt> + + size_t totlen = 2 + (size_t)gap->ga_len; // <prefcondcnt> and <condlen> bytes + size_t x = 1; // collect return value of fwrite() + for (int i = 0; i < gap->ga_len; ++i) { + // <prefcond> : <condlen> <condstr> + char_u *p = ((char_u **)gap->ga_data)[i]; + if (p != NULL) { + size_t len = STRLEN(p); + if (fd != NULL) { + assert(len <= INT_MAX); + fputc((int)len, fd); + x &= fwrite(p, len, 1, fd); + } + totlen += len; + } else if (fd != NULL) + fputc(0, fd); + } + + assert(totlen <= INT_MAX); + return (int)totlen; +} + +// Use map string "map" for languages "lp". +static void set_map_str(slang_T *lp, char_u *map) +{ + char_u *p; + int headc = 0; + int c; + int i; + + if (*map == NUL) { + lp->sl_has_map = false; + return; + } + lp->sl_has_map = true; + + // Init the array and hash tables empty. + for (i = 0; i < 256; ++i) + lp->sl_map_array[i] = 0; + hash_init(&lp->sl_map_hash); + + // The similar characters are stored separated with slashes: + // "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and + // before the same slash. For characters above 255 sl_map_hash is used. + for (p = map; *p != NUL; ) { + c = mb_cptr2char_adv(&p); + if (c == '/') + headc = 0; + else { + if (headc == 0) + headc = c; + + // Characters above 255 don't fit in sl_map_array[], put them in + // the hash table. Each entry is the char, a NUL the headchar and + // a NUL. + if (c >= 256) { + int cl = mb_char2len(c); + int headcl = mb_char2len(headc); + char_u *b; + hash_T hash; + hashitem_T *hi; + + b = xmalloc(cl + headcl + 2); + mb_char2bytes(c, b); + b[cl] = NUL; + mb_char2bytes(headc, b + cl + 1); + b[cl + 1 + headcl] = NUL; + hash = hash_hash(b); + hi = hash_lookup(&lp->sl_map_hash, (const char *)b, STRLEN(b), hash); + if (HASHITEM_EMPTY(hi)) { + hash_add_item(&lp->sl_map_hash, hi, b, hash); + } else { + // This should have been checked when generating the .spl + // file. + EMSG(_("E783: duplicate char in MAP entry")); + xfree(b); + } + } else + lp->sl_map_array[c] = headc; + } + } +} + diff --git a/src/nvim/spellfile.h b/src/nvim/spellfile.h new file mode 100644 index 0000000000..89acddda0d --- /dev/null +++ b/src/nvim/spellfile.h @@ -0,0 +1,12 @@ +#ifndef NVIM_SPELLFILE_H +#define NVIM_SPELLFILE_H + +#include <stdbool.h> + +#include "nvim/spell_defs.h" +#include "nvim/types.h" + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "spellfile.h.generated.h" +#endif +#endif // NVIM_SPELLFILE_H diff --git a/src/nvim/terminal.c b/src/nvim/terminal.c index cec7fc84a5..bd925a8106 100644 --- a/src/nvim/terminal.c +++ b/src/nvim/terminal.c @@ -238,6 +238,7 @@ Terminal *terminal_open(TerminalOptions opts) set_option_value((uint8_t *)"wrap", false, NULL, OPT_LOCAL); set_option_value((uint8_t *)"number", false, NULL, OPT_LOCAL); set_option_value((uint8_t *)"relativenumber", false, NULL, OPT_LOCAL); + set_option_value((uint8_t *)"list", false, NULL, OPT_LOCAL); buf_set_term_title(curbuf, (char *)curbuf->b_ffname); RESET_BINDING(curwin); // Reset cursor in current window. diff --git a/src/nvim/testdir/Makefile b/src/nvim/testdir/Makefile index 740a10f153..9f9ecbc6c9 100644 --- a/src/nvim/testdir/Makefile +++ b/src/nvim/testdir/Makefile @@ -24,7 +24,6 @@ SCRIPTS ?= \ test64.out \ test73.out \ test79.out \ - test_marks.out \ # Tests using runtest.vim. # Keep test_alot*.res as the last one, sort the others. @@ -44,8 +43,10 @@ NEW_TESTS ?= \ test_increment_dbcs.res \ test_lambda.res \ test_langmap.res \ + test_marks.res \ test_match.res \ test_matchadd_conceal.res \ + test_nested_function.res \ test_quickfix.res \ test_signs.res \ test_syntax.res \ @@ -55,6 +56,7 @@ NEW_TESTS ?= \ test_viml.res \ test_visual.res \ test_window_id.res \ + test_writefile.res \ test_alot.res SCRIPTS_GUI := test16.out diff --git a/src/nvim/testdir/test_alot.vim b/src/nvim/testdir/test_alot.vim index 3da9b82a9f..8aa0f417d1 100644 --- a/src/nvim/testdir/test_alot.vim +++ b/src/nvim/testdir/test_alot.vim @@ -16,11 +16,13 @@ source test_lambda.vim source test_match.vim source test_matchadd_conceal_utf8.vim source test_menu.vim +source test_mapping.vim source test_messages.vim source test_options.vim source test_partial.vim source test_popup.vim source test_regexp_utf8.vim +source test_source_utf8.vim source test_statusline.vim source test_syn_attr.vim source test_tabline.vim diff --git a/src/nvim/testdir/test_expr_utf8.vim b/src/nvim/testdir/test_expr_utf8.vim index 097d708329..9ea6d8872b 100644 --- a/src/nvim/testdir/test_expr_utf8.vim +++ b/src/nvim/testdir/test_expr_utf8.vim @@ -35,60 +35,3 @@ func Test_strcharpart_utf8() call assert_equal('̀', strcharpart('àxb', 1, 1)) call assert_equal('x', strcharpart('àxb', 2, 1)) endfunc - -func s:classes_test() - set isprint=@,161-255 - call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) - - let alphachars = '' - let lowerchars = '' - let upperchars = '' - let alnumchars = '' - let printchars = '' - let punctchars = '' - let xdigitchars = '' - let i = 1 - while i <= 255 - let c = nr2char(i) - if c =~ '[[:alpha:]]' - let alphachars .= c - endif - if c =~ '[[:lower:]]' - let lowerchars .= c - endif - if c =~ '[[:upper:]]' - let upperchars .= c - endif - if c =~ '[[:alnum:]]' - let alnumchars .= c - endif - if c =~ '[[:print:]]' - let printchars .= c - endif - if c =~ '[[:punct:]]' - let punctchars .= c - endif - if c =~ '[[:xdigit:]]' - let xdigitchars .= c - endif - let i += 1 - endwhile - - call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars) - call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars) - call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars) - call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars) - call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars) - call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars) - call assert_equal('0123456789ABCDEFabcdef', xdigitchars) -endfunc - -func Test_classes_re1() - set re=1 - call s:classes_test() -endfunc - -func Test_classes_re2() - set re=2 - call s:classes_test() -endfunc diff --git a/src/nvim/testdir/test_fold.vim b/src/nvim/testdir/test_fold.vim index 1f835b876f..7cb9faa75f 100644 --- a/src/nvim/testdir/test_fold.vim +++ b/src/nvim/testdir/test_fold.vim @@ -96,6 +96,22 @@ func! Test_indent_fold2() bw! endfunc +func Test_folds_marker_in_comment() + new + call setline(1, ['" foo', 'bar', 'baz']) + setl fen fdm=marker + setl com=sO:\"\ -,mO:\"\ \ ,eO:\"\",:\" cms=\"%s + norm! zf2j + setl nofen + :1y + call assert_equal(['" foo{{{'], getreg(0,1,1)) + :+2y + call assert_equal(['baz"}}}'], getreg(0,1,1)) + + set foldmethod& + bwipe! +endfunc + func Test_manual_fold_with_filter() if !executable('cat') return diff --git a/src/nvim/testdir/test_glob2regpat.vim b/src/nvim/testdir/test_glob2regpat.vim new file mode 100644 index 0000000000..fdf17946b6 --- /dev/null +++ b/src/nvim/testdir/test_glob2regpat.vim @@ -0,0 +1,30 @@ +" Test glob2regpat() + +func Test_invalid() + call assert_fails('call glob2regpat(1.33)', 'E806:') + call assert_fails('call glob2regpat("}")', 'E219:') + call assert_fails('call glob2regpat("{")', 'E220:') +endfunc + +func Test_valid() + call assert_equal('^foo\.', glob2regpat('foo.*')) + call assert_equal('^foo.$', glob2regpat('foo?')) + call assert_equal('\.vim$', glob2regpat('*.vim')) + call assert_equal('^[abc]$', glob2regpat('[abc]')) + call assert_equal('^foo bar$', glob2regpat('foo\ bar')) + call assert_equal('^foo,bar$', glob2regpat('foo,bar')) + call assert_equal('^\(foo\|bar\)$', glob2regpat('{foo,bar}')) + call assert_equal('.*', glob2regpat('**')) + + if exists('+shellslash') + call assert_equal('^foo[\/].$', glob2regpat('foo\?')) + call assert_equal('^\(foo[\/]\|bar\|foobar\)$', glob2regpat('{foo\,bar,foobar}')) + call assert_equal('^[\/]\(foo\|bar[\/]\)$', glob2regpat('\{foo,bar\}')) + call assert_equal('^[\/][\/]\(foo\|bar[\/][\/]\)$', glob2regpat('\\{foo,bar\\}')) + else + call assert_equal('^foo?$', glob2regpat('foo\?')) + call assert_equal('^\(foo,bar\|foobar\)$', glob2regpat('{foo\,bar,foobar}')) + call assert_equal('^{foo,bar}$', glob2regpat('\{foo,bar\}')) + call assert_equal('^\\\(foo\|bar\\\)$', glob2regpat('\\{foo,bar\\}')) + endif +endfunc diff --git a/src/nvim/testdir/test_mapping.vim b/src/nvim/testdir/test_mapping.vim new file mode 100644 index 0000000000..d937565ce5 --- /dev/null +++ b/src/nvim/testdir/test_mapping.vim @@ -0,0 +1,100 @@ +" Tests for mappings and abbreviations + +if !has('multi_byte') + finish +endif + +func Test_abbreviation() + " abbreviation with 0x80 should work + inoreab чкпр vim + call feedkeys("Goчкпр \<Esc>", "xt") + call assert_equal('vim ', getline('$')) + iunab чкпр + set nomodified +endfunc + +func Test_map_ctrl_c_insert() + " mapping of ctrl-c in Insert mode + set cpo-=< cpo-=k + inoremap <c-c> <ctrl-c> + cnoremap <c-c> dummy + cunmap <c-c> + call feedkeys("GoTEST2: CTRL-C |\<C-C>A|\<Esc>", "xt") + call assert_equal('TEST2: CTRL-C |<ctrl-c>A|', getline('$')) + unmap! <c-c> + set nomodified +endfunc + +func Test_map_ctrl_c_visual() + " mapping of ctrl-c in Visual mode + vnoremap <c-c> :<C-u>$put ='vmap works' + call feedkeys("GV\<C-C>\<CR>", "xt") + call assert_equal('vmap works', getline('$')) + vunmap <c-c> + set nomodified +endfunc + +func Test_map_langmap() + " langmap should not get remapped in insert mode + inoremap { FAIL_ilangmap + set langmap=+{ langnoremap + call feedkeys("Go+\<Esc>", "xt") + call assert_equal('+', getline('$')) + + " Insert-mode expr mapping with langmap + inoremap <expr> { "FAIL_iexplangmap" + call feedkeys("Go+\<Esc>", "xt") + call assert_equal('+', getline('$')) + iunmap <expr> { + + " langmap should not get remapped in Command-line mode + cnoremap { FAIL_clangmap + call feedkeys(":call append(line('$'), '+')\<CR>", "xt") + call assert_equal('+', getline('$')) + cunmap { + + " Command-line mode expr mapping with langmap + cnoremap <expr> { "FAIL_cexplangmap" + call feedkeys(":call append(line('$'), '+')\<CR>", "xt") + call assert_equal('+', getline('$')) + cunmap { + set nomodified +endfunc + +func Test_map_feedkeys() + " issue #212 (feedkeys insert mapping at current position) + nnoremap . :call feedkeys(".", "in")<cr> + call setline('$', ['a b c d', 'a b c d']) + $-1 + call feedkeys("0qqdw.ifoo\<Esc>qj0@q\<Esc>", "xt") + call assert_equal(['fooc d', 'fooc d'], getline(line('$') - 1, line('$'))) + unmap . + set nomodified +endfunc + +func Test_map_cursor() + " <c-g>U<cursor> works only within a single line + imapclear + imap ( ()<c-g>U<left> + call feedkeys("G2o\<Esc>ki\<CR>Test1: text with a (here some more text\<Esc>k.", "xt") + call assert_equal('Test1: text with a (here some more text)', getline(line('$') - 2)) + call assert_equal('Test1: text with a (here some more text)', getline(line('$') - 1)) + + " test undo + call feedkeys("G2o\<Esc>ki\<CR>Test2: text wit a (here some more text [und undo]\<C-G>u\<Esc>k.u", "xt") + call assert_equal('', getline(line('$') - 2)) + call assert_equal('Test2: text wit a (here some more text [und undo])', getline(line('$') - 1)) + set nomodified + imapclear +endfunc + +" This isn't actually testing a mapping, but similar use of CTRL-G U as above. +func Test_break_undo() + :set whichwrap=<,>,[,] + call feedkeys("G4o2k", "xt") + exe ":norm! iTest3: text with a (parenthesis here\<C-G>U\<Right>new line here\<esc>\<up>\<up>." + call assert_equal('new line here', getline(line('$') - 3)) + call assert_equal('Test3: text with a (parenthesis here', getline(line('$') - 2)) + call assert_equal('new line here', getline(line('$') - 1)) + set nomodified +endfunc diff --git a/src/nvim/testdir/test_marks.vim b/src/nvim/testdir/test_marks.vim new file mode 100644 index 0000000000..d00b1ddc88 --- /dev/null +++ b/src/nvim/testdir/test_marks.vim @@ -0,0 +1,26 @@ + +" Test that a deleted mark is restored after delete-undo-redo-undo. +function! Test_Restore_DelMark() + enew! + call append(0, [" textline A", " textline B", " textline C"]) + normal! 2gg + set nocp viminfo+=nviminfo + exe "normal! i\<C-G>u\<Esc>" + exe "normal! maddu\<C-R>u" + let pos = getpos("'a") + call assert_equal(2, pos[1]) + call assert_equal(1, pos[2]) + enew! +endfunction + +" Test that CTRL-A and CTRL-X updates last changed mark '[, ']. +function! Test_Incr_Marks() + enew! + call append(0, ["123 123 123", "123 123 123", "123 123 123"]) + normal! gg + execute "normal! \<C-A>`[v`]rAjwvjw\<C-X>`[v`]rX" + call assert_equal("AAA 123 123", getline(1)) + call assert_equal("123 XXXXXXX", getline(2)) + call assert_equal("XXX 123 123", getline(3)) + enew! +endfunction diff --git a/src/nvim/testdir/test_nested_function.vim b/src/nvim/testdir/test_nested_function.vim new file mode 100644 index 0000000000..f881730529 --- /dev/null +++ b/src/nvim/testdir/test_nested_function.vim @@ -0,0 +1,32 @@ +"Tests for nested functions +" +function! NestedFunc() + fu! Func1() + let g:text .= 'Func1 ' + endfunction + call Func1() + fu! s:func2() + let g:text .= 's:func2 ' + endfunction + call s:func2() + fu! s:_func3() + let g:text .= 's:_func3 ' + endfunction + call s:_func3() + let fn = 'Func4' + fu! {fn}() + let g:text .= 'Func4 ' + endfunction + call {fn}() + let fn = 'func5' + fu! s:{fn}() + let g:text .= 's:func5' + endfunction + call s:{fn}() +endfunction + +function! Test_nested_functions() + let g:text = '' + call NestedFunc() + call assert_equal('Func1 s:func2 s:_func3 Func4 s:func5', g:text) +endfunction diff --git a/src/nvim/testdir/test_quickfix.vim b/src/nvim/testdir/test_quickfix.vim index 39d0c5407a..640918b343 100644 --- a/src/nvim/testdir/test_quickfix.vim +++ b/src/nvim/testdir/test_quickfix.vim @@ -1509,6 +1509,16 @@ function Xproperty_tests(cchar) call assert_equal('Sample', w:quickfix_title) Xclose + " Tests for action argument + silent! Xolder 999 + let qfnr = g:Xgetlist({'all':1}).nr + call g:Xsetlist([], 'r', {'title' : 'N1'}) + call assert_equal('N1', g:Xgetlist({'all':1}).title) + call g:Xsetlist([], ' ', {'title' : 'N2'}) + call assert_equal(qfnr + 1, g:Xgetlist({'all':1}).nr) + call g:Xsetlist([], ' ', {'title' : 'N3'}) + call assert_equal('N2', g:Xgetlist({'nr':2, 'title':1}).title) + " Invalid arguments call assert_fails('call g:Xgetlist([])', 'E715') call assert_fails('call g:Xsetlist([], "a", [])', 'E715') @@ -1561,3 +1571,20 @@ function Test_Autocmd() \ 'postcaddbuffer'] call assert_equal(l, g:acmds) endfunction + +function! Test_Autocmd_Exception() + set efm=%m + lgetexpr '?' + + try + call DoesNotExit() + catch + lgetexpr '1' + finally + lgetexpr '1' + endtry + + call assert_equal('1', getloclist(0)[0].text) + + set efm&vim +endfunction diff --git a/src/nvim/testdir/test_regexp_utf8.vim b/src/nvim/testdir/test_regexp_utf8.vim index ecb03a0f8c..9e9a3de500 100644 --- a/src/nvim/testdir/test_regexp_utf8.vim +++ b/src/nvim/testdir/test_regexp_utf8.vim @@ -31,6 +31,65 @@ func Test_equivalence_re2() set re=0 endfunc +func s:classes_test() + set isprint=@,161-255 + call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) + + let alphachars = '' + let lowerchars = '' + let upperchars = '' + let alnumchars = '' + let printchars = '' + let punctchars = '' + let xdigitchars = '' + let i = 1 + while i <= 255 + let c = nr2char(i) + if c =~ '[[:alpha:]]' + let alphachars .= c + endif + if c =~ '[[:lower:]]' + let lowerchars .= c + endif + if c =~ '[[:upper:]]' + let upperchars .= c + endif + if c =~ '[[:alnum:]]' + let alnumchars .= c + endif + if c =~ '[[:print:]]' + let printchars .= c + endif + if c =~ '[[:punct:]]' + let punctchars .= c + endif + if c =~ '[[:xdigit:]]' + let xdigitchars .= c + endif + let i += 1 + endwhile + + call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars) + call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars) + call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars) + call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars) + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars) + call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars) + call assert_equal('0123456789ABCDEFabcdef', xdigitchars) +endfunc + +func Test_classes_re1() + set re=1 + call s:classes_test() + set re=0 +endfunc + +func Test_classes_re2() + set re=2 + call s:classes_test() + set re=0 +endfunc + func Test_recursive_substitute() new s/^/\=execute("s#^##gn") diff --git a/src/nvim/testdir/test_source_utf8.vim b/src/nvim/testdir/test_source_utf8.vim new file mode 100644 index 0000000000..edb76fc43d --- /dev/null +++ b/src/nvim/testdir/test_source_utf8.vim @@ -0,0 +1,33 @@ +" Test the :source! command +if !has('multi_byte') + finish +endif + +func Test_source_utf8() + " check that sourcing a script with 0x80 as second byte works + new + call setline(1, [':%s/àx/--à1234--/g', ':%s/Àx/--À1234--/g']) + write! Xscript + bwipe! + new + call setline(1, [' àx ', ' Àx ']) + source! Xscript | echo + call assert_equal(' --à1234-- ', getline(1)) + call assert_equal(' --À1234-- ', getline(2)) + bwipe! + call delete('Xscript') +endfunc + +func Test_source_latin() + " check that sourcing a latin1 script with a 0xc0 byte works + new + call setline(1, ["call feedkeys('r')", "call feedkeys('\xc0', 'xt')"]) + write! Xscript + bwipe! + new + call setline(1, ['xxx']) + source Xscript + call assert_equal("\u00c0xx", getline(1)) + bwipe! + call delete('Xscript') +endfunc diff --git a/src/nvim/testdir/test_writefile.vim b/src/nvim/testdir/test_writefile.vim new file mode 100644 index 0000000000..d820c580ac --- /dev/null +++ b/src/nvim/testdir/test_writefile.vim @@ -0,0 +1,16 @@ + +function! Test_WriteFile() + let f = tempname() + call writefile(["over","written"], f, "b") + call writefile(["hello","world"], f, "b") + call writefile(["!", "good"], f, "a") + call writefile(["morning"], f, "ab") + call writefile(["", "vimmers"], f, "ab") + let l = readfile(f) + call assert_equal("hello", l[0]) + call assert_equal("world!", l[1]) + call assert_equal("good", l[2]) + call assert_equal("morning", l[3]) + call assert_equal("vimmers", l[4]) + call delete(f) +endfunction diff --git a/src/nvim/version.c b/src/nvim/version.c index a845600810..614de67e04 100644 --- a/src/nvim/version.c +++ b/src/nvim/version.c @@ -135,7 +135,7 @@ static int included_patches[] = { // 2308 NA // 2307, // 2306, - // 2305, + 2305, // 2304 NA // 2303, // 2302 NA @@ -176,7 +176,7 @@ static int included_patches[] = { // 2267 NA // 2266, 2265, - // 2264, + 2264, // 2263, // 2262 NA // 2261 NA @@ -217,8 +217,8 @@ static int included_patches[] = { 2226, 2225, // 2224, - // 2223, - // 2222, + 2223, + 2222, // 2221, 2220, 2219, @@ -232,7 +232,7 @@ static int included_patches[] = { // 2211 NA // 2210 NA // 2209, - // 2208, + 2208, // 2207 NA // 2206 NA 2205, @@ -244,7 +244,7 @@ static int included_patches[] = { // 2199 NA // 2198 NA 2197, - // 2196, + 2196, // 2195 NA 2194, // 2193 NA @@ -253,9 +253,9 @@ static int included_patches[] = { // 2190, // 2189, 2188, - // 2187, + 2187, // 2186 NA - // 2185, + 2185, // 2184, 2183, // 2182 NA @@ -339,7 +339,7 @@ static int included_patches[] = { // 2104, 2103, // 2102 NA - // 2101, + 2101, 2100, 2099, 2098, @@ -371,7 +371,7 @@ static int included_patches[] = { 2072, 2071, // 2070 NA - // 2069, + 2069, 2068, 2067, 2066, diff --git a/src/nvim/window.c b/src/nvim/window.c index 73a60b2e04..6c9d3554f1 100644 --- a/src/nvim/window.c +++ b/src/nvim/window.c @@ -161,13 +161,18 @@ newwindow: /* cursor to preview window */ case 'P': - for (wp = firstwin; wp != NULL; wp = wp->w_next) - if (wp->w_p_pvw) + wp = NULL; + FOR_ALL_WINDOWS_IN_TAB(wp2, curtab) { + if (wp2->w_p_pvw) { + wp = wp2; break; - if (wp == NULL) + } + } + if (wp == NULL) { EMSG(_("E441: There is no preview window")); - else + } else { win_goto(wp); + } break; /* close all but current window */ @@ -2998,8 +3003,7 @@ void free_tabpage(tabpage_T *tp) hash_init(&tp->tp_vars->dv_hashtab); unref_var_dict(tp->tp_vars); - - xfree(tp->localdir); // Free tab-local working directory + xfree(tp->tp_localdir); xfree(tp); } @@ -3025,7 +3029,7 @@ int win_new_tabpage(int after, char_u *filename) return FAIL; } - newtp->localdir = tp->localdir ? vim_strsave(tp->localdir) : NULL; + newtp->tp_localdir = tp->tp_localdir ? vim_strsave(tp->tp_localdir) : NULL; curtab = newtp; @@ -3362,15 +3366,20 @@ void tabpage_move(int nr) tp_dst = tp; - /* Remove the current tab page from the list of tab pages. */ - if (curtab == first_tabpage) + // Remove the current tab page from the list of tab pages. + if (curtab == first_tabpage) { first_tabpage = curtab->tp_next; - else { - for (tp = first_tabpage; tp != NULL; tp = tp->tp_next) - if (tp->tp_next == curtab) + } else { + tp = NULL; + FOR_ALL_TABS(tp2) { + if (tp2->tp_next == curtab) { + tp = tp2; break; - if (tp == NULL) /* "cannot happen" */ + } + } + if (tp == NULL) { // "cannot happen" return; + } tp->tp_next = curtab->tp_next; } @@ -3617,28 +3626,38 @@ static void win_enter_ext(win_T *wp, bool undo_sync, int curwin_invalid, curwin->w_cursor.coladd = 0; changed_line_abv_curs(); /* assume cursor position needs updating */ - // The new directory is either the local directory of the window, of the tab - // or NULL. - char_u *new_dir = curwin->w_localdir ? curwin->w_localdir : curtab->localdir; + // New directory is either the local directory of the window, tab or NULL. + char *new_dir = (char *)(curwin->w_localdir + ? curwin->w_localdir : curtab->tp_localdir); + + char cwd[MAXPATHL]; + if (os_dirname((char_u *)cwd, MAXPATHL) != OK) { + cwd[0] = NUL; + } if (new_dir) { // Window/tab has a local directory: Save current directory as global - // directory (unless that was done already) and change to the local - // directory. + // (unless that was done already) and change to the local directory. if (globaldir == NULL) { - char_u cwd[MAXPATHL]; - - if (os_dirname(cwd, MAXPATHL) == OK) { - globaldir = vim_strsave(cwd); + if (cwd[0] != NUL) { + globaldir = (char_u *)xstrdup(cwd); } } - if (os_chdir((char *)new_dir) == 0) { + if (os_chdir(new_dir) == 0) { + if (!p_acd && !strequal(new_dir, cwd)) { + do_autocmd_dirchanged(new_dir, curwin->w_localdir + ? kCdScopeWindow : kCdScopeTab); + } shorten_fnames(true); } } else if (globaldir != NULL) { - /* Window doesn't have a local directory and we are not in the global - * directory: Change to the global directory. */ - ignored = os_chdir((char *)globaldir); + // Window doesn't have a local directory and we are not in the global + // directory: Change to the global directory. + if (os_chdir((char *)globaldir) == 0) { + if (!p_acd && !strequal((char *)globaldir, cwd)) { + do_autocmd_dirchanged((char *)globaldir, kCdScopeGlobal); + } + } xfree(globaldir); globaldir = NULL; shorten_fnames(TRUE); @@ -5753,10 +5772,11 @@ int win_getid(typval_T *argvars) if (argvars[1].v_type == VAR_UNKNOWN) { wp = firstwin; } else { - tabpage_T *tp; + tabpage_T *tp = NULL; int tabnr = get_tv_number(&argvars[1]); - for (tp = first_tabpage; tp != NULL; tp = tp->tp_next) { + FOR_ALL_TABS(tp2) { if (--tabnr == 0) { + tp = tp2; break; } } @@ -5833,11 +5853,10 @@ win_T * win_id2wp(typval_T *argvars) int win_id2win(typval_T *argvars) { - win_T *wp; int nr = 1; int id = get_tv_number(&argvars[0]); - for (wp = firstwin; wp != NULL; wp = wp->w_next) { + FOR_ALL_WINDOWS_IN_TAB(wp, curtab) { if (wp->handle == id) { return nr; } @@ -5850,12 +5869,9 @@ void win_findbuf(typval_T *argvars, list_T *list) { int bufnr = get_tv_number(&argvars[0]); - for (tabpage_T *tp = first_tabpage; tp != NULL; tp = tp->tp_next) { - for (win_T *wp = tp == curtab ? firstwin : tp->tp_firstwin; - wp != NULL; wp = wp->w_next) { - if (wp->w_buffer->b_fnum == bufnr) { - list_append_number(list, wp->handle); - } + FOR_ALL_TAB_WINDOWS(tp, wp) { + if (wp->w_buffer->b_fnum == bufnr) { + list_append_number(list, wp->handle); } } } diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000000..df66f24626 --- /dev/null +++ b/test/README.md @@ -0,0 +1,101 @@ +# Tests + +Tests are run by `/cmake/RunTests.cmake` file, using busted. + +## Directory structure + +Directories with tests: `/test/benchmark` for benchmarks, `/test/functional` for +functional tests, `/test/unit` for unit tests. `/test/config` contains `*.in` +files (currently a single one) which are transformed into `*.lua` files using +`configure_file` CMake command: this is for acessing CMake variables in lua +tests. `/test/includes` contains include files for use by luajit `ffi.cdef` +C definitions parser: normally used to make macros not accessible via this +mechanism accessible the other way. + +Files `/test/*/preload.lua` contain modules which will be preloaded by busted, +via `--helper` option. `/test/**/helpers.lua` contain various “library” +functions, (intended to be) used by a number of tests and not just a single one. + +`/test/*/**/*_spec.lua` are files containing actual tests. Files that do not end +with a `_spec.lua` are libraries like `/test/**/helpers.lua`, except that they +have some common topic. + +Tests inside `/test/unit` and `/test/functional` are normally divided into +groups by the semantic component they are testing. + +## Environment variables + +Test behaviour is affected by environment variables. Currently supported +(Functional, Unit, Benchmarks) (when Defined; when set to _1_; when defined, +treated as Integer; when defined, treated as String; !must be defined to +function properly): + +`GDB` (F) (D): makes nvim instances to be run under `gdbserver`. It will be +accessible on `localhost:7777`: use `gdb build/bin/nvim`, type `target remote +:7777` inside. + +`GDBSERVER_PORT` (F) (I): overrides port used for `GDB`. + +`VALGRIND` (F) (D): makes nvim instances to be run under `valgrind`. Log files +are named `valgrind-%p.log` in this case. Note that non-empty valgrind log may +fail tests. Valgrind arguments may be seen in `/test/functional/helpers.lua`. +May be used in conjunction with `GDB`. + +`VALGRIND_LOG` (F) (S): overrides valgrind log file name used for `VALGRIND`. + +`TEST_SKIP_FRAGILE` (F) (D): makes test suite skip some fragile tests. + +`NVIM_PROG`, `NVIM_PRG` (F) (S): override path to Neovim executable (default to +`build/bin/nvim`). + +`CC` (U) (S): specifies which C compiler to use to preprocess files. Currently +only compilers with gcc-compatible arguments are supported. + +`NVIM_TEST_MAIN_CDEFS` (U) (1): makes `ffi.cdef` run in main process. This +raises a possibility of bugs due to conflicts in header definitions, despite the +counters, but greatly speeds up unit tests by not requiring `ffi.cdef` to do +parsing of big strings with C definitions. + +`NVIM_TEST_PRINT_I` (U) (1): makes `cimport` print preprocessed, but not yet +filtered through `formatc` headers. Used to debug `formatc`. Printing is done +with the line numbers. + +`NVIM_TEST_PRINT_CDEF` (U) (1): makes `cimport` print final lines which will be +then passed to `ffi.cdef`. Used to debug errors `ffi.cdef` happens to throw +sometimes. + +`NVIM_TEST_PRINT_SYSCALLS` (U) (1): makes it print to stderr when syscall +wrappers are called and what they returned. Used to debug code which makes unit +tests be executed in separate processes. + +`NVIM_TEST_RUN_FAILING_TESTS` (U) (1): makes `itp` run tests which are known to +fail (marked by setting third argument to `true`). + +`LOG_DIR` (FU) (S!): specifies where to seek for valgrind and ASAN log files. + +`NVIM_TEST_CORE_*` (FU) (S): a set of environment variables which specify where +to search for core files. Are supposed to be defined all at once. + +`NVIM_TEST_CORE_GLOB_DIRECTORY` (FU) (S): directory where core files are +located. May be `.`. This directory is then recursively searched for core files. +Note: this variable must be defined for any of the following to have any effect. + +`NVIM_TEST_CORE_GLOB_RE` (FU) (S): regular expression which must be matched by +core files. E.g. `/core[^/]*$`. May be absent, in which case any file is +considered to be matched. + +`NVIM_TEST_CORE_EXC_RE` (FU) (S): regular expression which excludes certain +directories from searching for core files inside. E.g. use `^/%.deps$` to not +search inside `/.deps`. If absent, nothing is excluded. + +`NVIM_TEST_CORE_DB_CMD` (FU) (S): command to get backtrace out of the debugger. +E.g. `gdb -n -batch -ex "thread apply all bt full" "$_NVIM_TEST_APP" -c +"$_NVIM_TEST_CORE"`. Defaults to the example command. This debug command may use +environment variables `_NVIM_TEST_APP` (path to application which is being +debugged: normally either nvim or luajit) and `_NVIM_TEST_CORE` (core file to +get backtrace from). + +`NVIM_TEST_CORE_RANDOM_SKIP` (FU) (D): makes `check_cores` not check cores after +approximately 90% of the tests. Should be used when finding cores is too hard +for some reason. Normally (on OS X or when `NVIM_TEST_CORE_GLOB_DIRECTORY` is +defined and this variable is not) cores are checked for after each test. diff --git a/test/config/paths.lua.in b/test/config/paths.lua.in index 80cc5629d1..8dd4de75db 100644 --- a/test/config/paths.lua.in +++ b/test/config/paths.lua.in @@ -8,6 +8,15 @@ end module.test_include_path = "${CMAKE_BINARY_DIR}/test/includes/post" module.test_libnvim_path = "${TEST_LIBNVIM_PATH}" module.test_source_path = "${CMAKE_SOURCE_DIR}" +module.test_lua_prg = "${LUA_PRG}" +module.test_luajit_prg = "" +if module.test_luajit_prg == '' then + if module.test_lua_prg:sub(-6) == 'luajit' then + module.test_luajit_prg = module.test_lua_prg + else + module.test_luajit_prg = nil + end +end table.insert(module.include_paths, "${CMAKE_BINARY_DIR}/include") return module diff --git a/test/functional/autocmd/dirchanged_spec.lua b/test/functional/autocmd/dirchanged_spec.lua index 15196dbd44..63cf0bc410 100644 --- a/test/functional/autocmd/dirchanged_spec.lua +++ b/test/functional/autocmd/dirchanged_spec.lua @@ -20,29 +20,44 @@ describe('autocmd DirChanged', function() before_each(function() clear() - command('autocmd DirChanged * let [g:event, g:scope, g:cdcount] = [copy(v:event), expand("<amatch>"), 1 + get(g:, "cdcount", 0)]') + command('autocmd DirChanged * let [g:getcwd, g:ev, g:amatch, g:cdcount] ' + ..' = [getcwd(), copy(v:event), expand("<amatch>"), 1 + get(g:, "cdcount", 0)]') + -- Normalize path separators. + command([[autocmd DirChanged * let g:ev['cwd'] = substitute(g:ev['cwd'], '\\', '/', 'g')]]) + command([[autocmd DirChanged * let g:getcwd = substitute(g:getcwd, '\\', '/', 'g')]]) end) it('sets v:event', function() command('lcd '..dirs[1]) - eq({cwd=dirs[1], scope='window'}, eval('g:event')) + eq({cwd=dirs[1], scope='window'}, eval('g:ev')) eq(1, eval('g:cdcount')) command('tcd '..dirs[2]) - eq({cwd=dirs[2], scope='tab'}, eval('g:event')) + eq({cwd=dirs[2], scope='tab'}, eval('g:ev')) eq(2, eval('g:cdcount')) command('cd '..dirs[3]) - eq({cwd=dirs[3], scope='global'}, eval('g:event')) + eq({cwd=dirs[3], scope='global'}, eval('g:ev')) eq(3, eval('g:cdcount')) end) + it('sets getcwd() during event #6260', function() + command('lcd '..dirs[1]) + eq(dirs[1], eval('g:getcwd')) + + command('tcd '..dirs[2]) + eq(dirs[2], eval('g:getcwd')) + + command('cd '..dirs[3]) + eq(dirs[3], eval('g:getcwd')) + end) + it('disallows recursion', function() command('set shellslash') -- Set up a _nested_ handler. command('autocmd DirChanged * nested lcd '..dirs[3]) command('lcd '..dirs[1]) - eq({cwd=dirs[1], scope='window'}, eval('g:event')) + eq({cwd=dirs[1], scope='window'}, eval('g:ev')) eq(1, eval('g:cdcount')) -- autocmd changed to dirs[3], but did NOT trigger another DirChanged. eq(dirs[3], eval('getcwd()')) @@ -50,32 +65,32 @@ describe('autocmd DirChanged', function() it('sets <amatch> to CWD "scope"', function() command('lcd '..dirs[1]) - eq('window', eval('g:scope')) + eq('window', eval('g:amatch')) command('tcd '..dirs[2]) - eq('tab', eval('g:scope')) + eq('tab', eval('g:amatch')) command('cd '..dirs[3]) - eq('global', eval('g:scope')) + eq('global', eval('g:amatch')) end) it('does not trigger if :cd fails', function() - command('let g:event = {}') + command('let g:ev = {}') local status1, err1 = pcall(function() command('lcd '..dirs[1] .. '/doesnotexist') end) - eq({}, eval('g:event')) + eq({}, eval('g:ev')) local status2, err2 = pcall(function() command('lcd '..dirs[2] .. '/doesnotexist') end) - eq({}, eval('g:event')) + eq({}, eval('g:ev')) local status3, err3 = pcall(function() command('lcd '..dirs[3] .. '/doesnotexist') end) - eq({}, eval('g:event')) + eq({}, eval('g:ev')) eq(false, status1) eq(false, status2) @@ -90,24 +105,53 @@ describe('autocmd DirChanged', function() command('set autochdir') command('split '..dirs[1]..'/foo') - eq({cwd=dirs[1], scope='window'}, eval('g:event')) + eq({cwd=dirs[1], scope='window'}, eval('g:ev')) command('split '..dirs[2]..'/bar') - eq({cwd=dirs[2], scope='window'}, eval('g:event')) + eq({cwd=dirs[2], scope='window'}, eval('g:ev')) + + eq(2, eval('g:cdcount')) + end) + + it("is triggered by switching to win/tab with different CWD #6054", function() + command('lcd '..dirs[3]) -- window 3 + command('split '..dirs[2]..'/foo') -- window 2 + command('lcd '..dirs[2]) + command('split '..dirs[1]..'/bar') -- window 1 + command('lcd '..dirs[1]) + + command('2wincmd w') -- window 2 + eq({cwd=dirs[2], scope='window'}, eval('g:ev')) + + eq(4, eval('g:cdcount')) + command('tabnew') -- tab 2 (tab-local CWD) + eq(4, eval('g:cdcount')) -- same CWD, no DirChanged event + command('tcd '..dirs[3]) + command('tabnext') -- tab 1 (no tab-local CWD) + eq({cwd=dirs[2], scope='window'}, eval('g:ev')) + command('tabnext') -- tab 2 + eq({cwd=dirs[3], scope='tab'}, eval('g:ev')) + eq(7, eval('g:cdcount')) + + command('tabnext') -- tab 1 + command('3wincmd w') -- window 3 + eq(9, eval('g:cdcount')) + command('tabnext') -- tab 2 (has the *same* CWD) + eq(9, eval('g:cdcount')) -- same CWD, no DirChanged event end) it('is triggered by nvim_set_current_dir()', function() request('nvim_set_current_dir', dirs[1]) - eq({cwd=dirs[1], scope='global'}, eval('g:event')) + eq({cwd=dirs[1], scope='global'}, eval('g:ev')) request('nvim_set_current_dir', dirs[2]) - eq({cwd=dirs[2], scope='global'}, eval('g:event')) + eq({cwd=dirs[2], scope='global'}, eval('g:ev')) local status, err = pcall(function() request('nvim_set_current_dir', '/doesnotexist') end) eq(false, status) eq('Failed to change directory', string.match(err, ': (.*)')) - eq({cwd=dirs[2], scope='global'}, eval('g:event')) + eq({cwd=dirs[2], scope='global'}, eval('g:ev')) end) end) diff --git a/test/functional/provider/define_spec.lua b/test/functional/provider/define_spec.lua index b0363eb435..51a8831274 100644 --- a/test/functional/provider/define_spec.lua +++ b/test/functional/provider/define_spec.lua @@ -3,7 +3,6 @@ local eval, command, nvim = helpers.eval, helpers.command, helpers.nvim local eq, run, stop = helpers.eq, helpers.run, helpers.stop local clear = helpers.clear - local function get_prefix(sync) if sync then return 'sync' @@ -11,12 +10,10 @@ local function get_prefix(sync) return 'async' end - local function call(fn, arguments) command('call '..fn..'('..arguments..')') end - local function clear_and_init(init) return function() clear() @@ -26,7 +23,6 @@ local function clear_and_init(init) end end - local function runx(sync, handler, on_setup) local function setup_cb(...) on_setup(...) diff --git a/test/functional/terminal/scrollback_spec.lua b/test/functional/terminal/scrollback_spec.lua index 930d0cf58b..81649f2bde 100644 --- a/test/functional/terminal/scrollback_spec.lua +++ b/test/functional/terminal/scrollback_spec.lua @@ -368,10 +368,11 @@ describe("'scrollback' option", function() clear() end) - local function expect_lines(expected) + local function expect_lines(expected, epsilon) + local ep = epsilon and epsilon or 0 local actual = eval("line('$')") - if expected ~= actual then - error('expected: '..expected..', actual: '..tostring(actual)) + if expected > actual + ep and expected < actual - ep then + error('expected (+/- '..ep..'): '..expected..', actual: '..tostring(actual)) end end @@ -399,12 +400,12 @@ describe("'scrollback' option", function() screen:expect('line30 ', nil, nil, nil, true) - retry(nil, nil, function() expect_lines(33) end) + retry(nil, nil, function() expect_lines(33, 2) end) curbufmeths.set_option('scrollback', 10) wait() retry(nil, nil, function() expect_lines(16) end) curbufmeths.set_option('scrollback', 10000) - eq(16, eval("line('$')")) + retry(nil, nil, function() expect_lines(16) end) -- Terminal job data is received asynchronously, may happen before the -- 'scrollback' option is synchronized with the internal sb_buffer. command('sleep 100m') diff --git a/test/helpers.lua b/test/helpers.lua index 25ab80bb50..e5224349c2 100644 --- a/test/helpers.lua +++ b/test/helpers.lua @@ -30,13 +30,15 @@ local function glob(initial_path, re, exc_re) if ((not exc_re or not checked_path:match(exc_re)) and e:sub(1, 1) ~= '.') then local attrs = lfs.attributes(full_path) - local check_key = attrs.dev .. ':' .. tostring(attrs.ino) - if not checked_files[check_key] then - checked_files[check_key] = true - if attrs.mode == 'directory' then - paths_to_check[#paths_to_check + 1] = full_path - elseif not re or checked_path:match(re) then - ret[#ret + 1] = full_path + if attrs then + local check_key = attrs.dev .. ':' .. tostring(attrs.ino) + if not checked_files[check_key] then + checked_files[check_key] = true + if attrs.mode == 'directory' then + paths_to_check[#paths_to_check + 1] = full_path + elseif not re or checked_path:match(re) then + ret[#ret + 1] = full_path + end end end end @@ -212,6 +214,17 @@ local function check_cores(app) end end +local function which(exe) + local pipe = io.popen('which ' .. exe, 'r') + local ret = pipe:read('*a') + pipe:close() + if ret == '' then + return nil + else + return ret:sub(1, -2) + end +end + return { eq = eq, neq = neq, @@ -224,4 +237,5 @@ return { glob = glob, check_cores = check_cores, hasenv = hasenv, + which = which, } diff --git a/test/unit/api/helpers.lua b/test/unit/api/helpers.lua index 166456d2a1..4fb1cee4b3 100644 --- a/test/unit/api/helpers.lua +++ b/test/unit/api/helpers.lua @@ -1,4 +1,4 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(nil) local eval_helpers = require('test.unit.eval.helpers') local cimport = helpers.cimport @@ -19,47 +19,55 @@ local api = cimport('./src/nvim/api/private/defs.h', local obj2lua -local obj2lua_tab = { - [tonumber(api.kObjectTypeArray)] = function(obj) - local ret = {[type_key]=list_type} - for i = 1,tonumber(obj.data.array.size) do - ret[i] = obj2lua(obj.data.array.items[i - 1]) - end - if ret[1] then - ret[type_key] = nil - end - return ret - end, - [tonumber(api.kObjectTypeDictionary)] = function(obj) - local ret = {} - for i = 1,tonumber(obj.data.dictionary.size) do - local kv_pair = obj.data.dictionary.items[i - 1] - ret[ffi.string(kv_pair.key.data, kv_pair.key.size)] = obj2lua(kv_pair.value) - end - return ret - end, - [tonumber(api.kObjectTypeBoolean)] = function(obj) - if obj.data.boolean == false then - return false - else - return true - end - end, - [tonumber(api.kObjectTypeNil)] = function(_) - return nil_value - end, - [tonumber(api.kObjectTypeFloat)] = function(obj) - return tonumber(obj.data.floating) - end, - [tonumber(api.kObjectTypeInteger)] = function(obj) - return {[type_key]=int_type, value=tonumber(obj.data.integer)} - end, - [tonumber(api.kObjectTypeString)] = function(obj) - return ffi.string(obj.data.string.data, obj.data.string.size) - end, -} +local obj2lua_tab = nil + +local function init_obj2lua_tab() + if obj2lua_tab then + return + end + obj2lua_tab = { + [tonumber(api.kObjectTypeArray)] = function(obj) + local ret = {[type_key]=list_type} + for i = 1,tonumber(obj.data.array.size) do + ret[i] = obj2lua(obj.data.array.items[i - 1]) + end + if ret[1] then + ret[type_key] = nil + end + return ret + end, + [tonumber(api.kObjectTypeDictionary)] = function(obj) + local ret = {} + for i = 1,tonumber(obj.data.dictionary.size) do + local kv_pair = obj.data.dictionary.items[i - 1] + ret[ffi.string(kv_pair.key.data, kv_pair.key.size)] = obj2lua(kv_pair.value) + end + return ret + end, + [tonumber(api.kObjectTypeBoolean)] = function(obj) + if obj.data.boolean == false then + return false + else + return true + end + end, + [tonumber(api.kObjectTypeNil)] = function(_) + return nil_value + end, + [tonumber(api.kObjectTypeFloat)] = function(obj) + return tonumber(obj.data.floating) + end, + [tonumber(api.kObjectTypeInteger)] = function(obj) + return {[type_key]=int_type, value=tonumber(obj.data.integer)} + end, + [tonumber(api.kObjectTypeString)] = function(obj) + return ffi.string(obj.data.string.data, obj.data.string.size) + end, + } +end obj2lua = function(obj) + init_obj2lua_tab() return ((obj2lua_tab[tonumber(obj['type'])] or function(obj_inner) assert(false, 'Converting ' .. tostring(tonumber(obj_inner['type'])) .. ' is not implementing yet') end)(obj)) diff --git a/test/unit/api/private_helpers_spec.lua b/test/unit/api/private_helpers_spec.lua index 8c54ea6a2a..a534d83165 100644 --- a/test/unit/api/private_helpers_spec.lua +++ b/test/unit/api/private_helpers_spec.lua @@ -1,4 +1,5 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local eval_helpers = require('test.unit.eval.helpers') local api_helpers = require('test.unit.api.helpers') @@ -25,7 +26,7 @@ describe('vim_to_object', function() end local different_output_test = function(name, input, output) - it(name, function() + itp(name, function() eq(output, vim_to_object(input)) end) end @@ -76,19 +77,19 @@ describe('vim_to_object', function() different_output_test('outputs nil for nested lists (2 level, in dict)', lst3, {{lst=nil_value}, true, false, 'ttest'}) - it('outputs empty list for NULL list', function() + itp('outputs empty list for NULL list', function() local tt = typvalt('VAR_LIST', {v_list=NULL}) eq(nil, tt.vval.v_list) eq({[type_key]=list_type}, obj2lua(api.vim_to_object(tt))) end) - it('outputs empty dict for NULL dict', function() + itp('outputs empty dict for NULL dict', function() local tt = typvalt('VAR_DICT', {v_dict=NULL}) eq(nil, tt.vval.v_dict) eq({}, obj2lua(api.vim_to_object(tt))) end) - it('regression: partials in a list', function() + itp('regression: partials in a list', function() local llist = { { [type_key]=func_type, diff --git a/test/unit/buffer_spec.lua b/test/unit/buffer_spec.lua index 49a4d84279..f7124b2782 100644 --- a/test/unit/buffer_spec.lua +++ b/test/unit/buffer_spec.lua @@ -1,5 +1,6 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) local to_cstr = helpers.to_cstr local get_str = helpers.ffi.string @@ -39,17 +40,17 @@ describe('buffer functions', function() describe('buf_valid', function() - it('should view NULL as an invalid buffer', function() + itp('should view NULL as an invalid buffer', function() eq(false, buffer.buf_valid(NULL)) end) - it('should view an open buffer as valid', function() + itp('should view an open buffer as valid', function() local buf = buflist_new(path1, buffer.BLN_LISTED) eq(true, buffer.buf_valid(buf)) end) - it('should view a closed and hidden buffer as valid', function() + itp('should view a closed and hidden buffer as valid', function() local buf = buflist_new(path1, buffer.BLN_LISTED) close_buffer(NULL, buf, 0, 0) @@ -57,7 +58,7 @@ describe('buffer functions', function() eq(true, buffer.buf_valid(buf)) end) - it('should view a closed and unloaded buffer as valid', function() + itp('should view a closed and unloaded buffer as valid', function() local buf = buflist_new(path1, buffer.BLN_LISTED) close_buffer(NULL, buf, buffer.DOBUF_UNLOAD, 0) @@ -65,7 +66,7 @@ describe('buffer functions', function() eq(true, buffer.buf_valid(buf)) end) - it('should view a closed and wiped buffer as invalid', function() + itp('should view a closed and wiped buffer as invalid', function() local buf = buflist_new(path1, buffer.BLN_LISTED) close_buffer(NULL, buf, buffer.DOBUF_WIPE, 0) @@ -84,7 +85,7 @@ describe('buffer functions', function() return buffer.buflist_findpat(to_cstr(pat), NULL, allow_unlisted, 0, 0) end - it('should find exact matches', function() + itp('should find exact matches', function() local buf = buflist_new(path1, buffer.BLN_LISTED) eq(buf.handle, buflist_findpat(path1, ONLY_LISTED)) @@ -92,7 +93,7 @@ describe('buffer functions', function() close_buffer(NULL, buf, buffer.DOBUF_WIPE, 0) end) - it('should prefer to match the start of a file path', function() + itp('should prefer to match the start of a file path', function() local buf1 = buflist_new(path1, buffer.BLN_LISTED) local buf2 = buflist_new(path2, buffer.BLN_LISTED) local buf3 = buflist_new(path3, buffer.BLN_LISTED) @@ -106,7 +107,7 @@ describe('buffer functions', function() close_buffer(NULL, buf3, buffer.DOBUF_WIPE, 0) end) - it('should prefer to match the end of a file over the middle', function() + itp('should prefer to match the end of a file over the middle', function() --{ Given: Two buffers, where 'test' appears in both -- And: 'test' appears at the end of buf3 but in the middle of buf2 local buf2 = buflist_new(path2, buffer.BLN_LISTED) @@ -130,7 +131,7 @@ describe('buffer functions', function() close_buffer(NULL, buf3, buffer.DOBUF_WIPE, 0) end) - it('should match a unique fragment of a file path', function() + itp('should match a unique fragment of a file path', function() local buf1 = buflist_new(path1, buffer.BLN_LISTED) local buf2 = buflist_new(path2, buffer.BLN_LISTED) local buf3 = buflist_new(path3, buffer.BLN_LISTED) @@ -142,7 +143,7 @@ describe('buffer functions', function() close_buffer(NULL, buf3, buffer.DOBUF_WIPE, 0) end) - it('should include / ignore unlisted buffers based on the flag.', function() + itp('should include / ignore unlisted buffers based on the flag.', function() --{ Given: A buffer local buf3 = buflist_new(path3, buffer.BLN_LISTED) @@ -169,7 +170,7 @@ describe('buffer functions', function() --} end) - it('should prefer listed buffers to unlisted buffers.', function() + itp('should prefer listed buffers to unlisted buffers.', function() --{ Given: Two buffers that match a pattern local buf1 = buflist_new(path1, buffer.BLN_LISTED) local buf2 = buflist_new(path2, buffer.BLN_LISTED) @@ -265,7 +266,7 @@ describe('buffer functions', function() local expected_cell_count = option.expected_cell_count or statusline_cell_count local expected_byte_length = option.expected_byte_length or expected_cell_count - it(description, function() + itp(description, function() if option.file_name then buffer.setfname(globals.curbuf, to_cstr(option.file_name), NULL, 1) else diff --git a/test/unit/eval/decode_spec.lua b/test/unit/eval/decode_spec.lua index 742b754d8a..2d7597c0f4 100644 --- a/test/unit/eval/decode_spec.lua +++ b/test/unit/eval/decode_spec.lua @@ -1,4 +1,5 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local to_cstr = helpers.to_cstr @@ -11,25 +12,11 @@ local decode = cimport('./src/nvim/eval/decode.h', './src/nvim/eval_defs.h', './src/nvim/message.h') describe('json_decode_string()', function() - local saved_p_enc = nil - - before_each(function() - saved_p_enc = decode.p_enc - end) - - after_each(function() - decode.emsg_silent = 0 - decode.p_enc = saved_p_enc - while decode.delete_first_msg() == 1 do - -- Delete all messages - end - end) - local char = function(c) return ffi.gc(decode.xmemdup(c, 1), decode.xfree) end - it('does not overflow when running with `n…`, `t…`, `f…`', function() + itp('does not overflow when running with `n…`, `t…`, `f…`', function() local rettv = ffi.new('typval_T', {v_type=decode.VAR_UNKNOWN}) decode.emsg_silent = 1 -- This will not crash, but if `len` argument will be ignored it will parse @@ -56,7 +43,7 @@ describe('json_decode_string()', function() eq(decode.VAR_UNKNOWN, rettv.v_type) end) - it('does not overflow and crash when running with `n`, `t`, `f`', function() + itp('does not overflow and crash when running with `n`, `t`, `f`', function() local rettv = ffi.new('typval_T', {v_type=decode.VAR_UNKNOWN}) decode.emsg_silent = 1 eq(0, decode.json_decode_string(char('n'), 1, rettv)) @@ -67,7 +54,7 @@ describe('json_decode_string()', function() eq(decode.VAR_UNKNOWN, rettv.v_type) end) - it('does not overflow when running with `"…`', function() + itp('does not overflow when running with `"…`', function() local rettv = ffi.new('typval_T', {v_type=decode.VAR_UNKNOWN}) decode.emsg_silent = 1 eq(0, decode.json_decode_string('"t"', 2, rettv)) @@ -84,7 +71,8 @@ describe('json_decode_string()', function() eq(msg, ffi.string(decode.last_msg_hist.msg)) end - it('does not overflow in error messages', function() + itp('does not overflow in error messages', function() + local saved_p_enc = decode.p_enc check_failure(']test', 1, 'E474: No container to close: ]') check_failure('[}test', 2, 'E474: Closing list with curly bracket: }') check_failure('{]test', 2, @@ -129,11 +117,11 @@ describe('json_decode_string()', function() check_failure('[1test', 2, 'E474: Unexpected end of input: [1') end) - it('does not overflow with `-`', function() + itp('does not overflow with `-`', function() check_failure('-0', 1, 'E474: Missing number after minus sign: -') end) - it('does not overflow and crash when running with `"`', function() + itp('does not overflow and crash when running with `"`', function() local rettv = ffi.new('typval_T', {v_type=decode.VAR_UNKNOWN}) decode.emsg_silent = 1 eq(0, decode.json_decode_string(char('"'), 1, rettv)) diff --git a/test/unit/eval/encode_spec.lua b/test/unit/eval/encode_spec.lua index 98fc8305e0..058c55093e 100644 --- a/test/unit/eval/encode_spec.lua +++ b/test/unit/eval/encode_spec.lua @@ -1,4 +1,5 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local eval_helpers = require('test.unit.eval.helpers') local cimport = helpers.cimport @@ -18,25 +19,25 @@ describe('encode_list_write()', function() return encode.encode_list_write(l, to_cstr(s), #s) end - it('writes empty string', function() + itp('writes empty string', function() local l = list() eq(0, encode_list_write(l, '')) eq({[type_key]=list_type}, lst2tbl(l)) end) - it('writes ASCII string literal with printable characters', function() + itp('writes ASCII string literal with printable characters', function() local l = list() eq(0, encode_list_write(l, 'abc')) eq({'abc'}, lst2tbl(l)) end) - it('writes string starting with NL', function() + itp('writes string starting with NL', function() local l = list() eq(0, encode_list_write(l, '\nabc')) eq({null_string, 'abc'}, lst2tbl(l)) end) - it('writes string starting with NL twice', function() + itp('writes string starting with NL twice', function() local l = list() eq(0, encode_list_write(l, '\nabc')) eq({null_string, 'abc'}, lst2tbl(l)) @@ -44,13 +45,13 @@ describe('encode_list_write()', function() eq({null_string, 'abc', 'abc'}, lst2tbl(l)) end) - it('writes string ending with NL', function() + itp('writes string ending with NL', function() local l = list() eq(0, encode_list_write(l, 'abc\n')) eq({'abc', null_string}, lst2tbl(l)) end) - it('writes string ending with NL twice', function() + itp('writes string ending with NL twice', function() local l = list() eq(0, encode_list_write(l, 'abc\n')) eq({'abc', null_string}, lst2tbl(l)) @@ -58,7 +59,7 @@ describe('encode_list_write()', function() eq({'abc', 'abc', null_string}, lst2tbl(l)) end) - it('writes string starting, ending and containing NL twice', function() + itp('writes string starting, ending and containing NL twice', function() local l = list() eq(0, encode_list_write(l, '\na\nb\n')) eq({null_string, 'a', 'b', null_string}, lst2tbl(l)) @@ -66,7 +67,7 @@ describe('encode_list_write()', function() eq({null_string, 'a', 'b', null_string, 'a', 'b', null_string}, lst2tbl(l)) end) - it('writes string starting, ending and containing NUL with NL between twice', function() + itp('writes string starting, ending and containing NUL with NL between twice', function() local l = list() eq(0, encode_list_write(l, '\0\n\0\n\0')) eq({'\n', '\n', '\n'}, lst2tbl(l)) @@ -74,7 +75,7 @@ describe('encode_list_write()', function() eq({'\n', '\n', '\n\n', '\n', '\n'}, lst2tbl(l)) end) - it('writes string starting, ending and containing NL with NUL between twice', function() + itp('writes string starting, ending and containing NL with NUL between twice', function() local l = list() eq(0, encode_list_write(l, '\n\0\n\0\n')) eq({null_string, '\n', '\n', null_string}, lst2tbl(l)) @@ -82,7 +83,7 @@ describe('encode_list_write()', function() eq({null_string, '\n', '\n', null_string, '\n', '\n', null_string}, lst2tbl(l)) end) - it('writes string containing a single NL twice', function() + itp('writes string containing a single NL twice', function() local l = list() eq(0, encode_list_write(l, '\n')) eq({null_string, null_string}, lst2tbl(l)) @@ -90,7 +91,7 @@ describe('encode_list_write()', function() eq({null_string, null_string, null_string}, lst2tbl(l)) end) - it('writes string containing a few NLs twice', function() + itp('writes string containing a few NLs twice', function() local l = list() eq(0, encode_list_write(l, '\n\n\n')) eq({null_string, null_string, null_string, null_string}, lst2tbl(l)) diff --git a/test/unit/eval/helpers.lua b/test/unit/eval/helpers.lua index c3c27e4fed..1377d5b501 100644 --- a/test/unit/eval/helpers.lua +++ b/test/unit/eval/helpers.lua @@ -1,4 +1,4 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(nil) local cimport = helpers.cimport local to_cstr = helpers.to_cstr @@ -46,12 +46,6 @@ local function list(...) return ret end -local special_tab = { - [eval.kSpecialVarFalse] = false, - [eval.kSpecialVarNull] = nil_value, - [eval.kSpecialVarTrue] = true, -} - local ptr2key = function(ptr) return tostring(ptr) end @@ -60,64 +54,74 @@ local lst2tbl local dct2tbl local typvalt2lua -local typvalt2lua_tab +local typvalt2lua_tab = nil -typvalt2lua_tab = { - [tonumber(eval.VAR_SPECIAL)] = function(t) - return special_tab[t.vval.v_special] - end, - [tonumber(eval.VAR_NUMBER)] = function(t) - return {[type_key]=int_type, value=tonumber(t.vval.v_number)} - end, - [tonumber(eval.VAR_FLOAT)] = function(t) - return tonumber(t.vval.v_float) - end, - [tonumber(eval.VAR_STRING)] = function(t) - local str = t.vval.v_string - if str == nil then - return null_string - else - return ffi.string(str) - end - end, - [tonumber(eval.VAR_LIST)] = function(t, processed) - return lst2tbl(t.vval.v_list, processed) - end, - [tonumber(eval.VAR_DICT)] = function(t, processed) - return dct2tbl(t.vval.v_dict, processed) - end, - [tonumber(eval.VAR_FUNC)] = function(t, processed) - return {[type_key]=func_type, value=typvalt2lua_tab[eval.VAR_STRING](t, processed or {})} - end, - [tonumber(eval.VAR_PARTIAL)] = function(t, processed) - local p_key = ptr2key(t) - if processed[p_key] then - return processed[p_key] - end - local pt = t.vval.v_partial - local value, auto, dict, argv = nil, nil, nil, nil - if pt ~= nil then - value = ffi.string(pt.pt_name) - auto = pt.pt_auto and true or nil - argv = {} - for i = 1, pt.pt_argc do - argv[i] = typvalt2lua(pt.pt_argv[i - 1], processed) +local function typvalt2lua_tab_init() + if typvalt2lua_tab then + return + end + typvalt2lua_tab = { + [tonumber(eval.VAR_SPECIAL)] = function(t) + return ({ + [eval.kSpecialVarFalse] = false, + [eval.kSpecialVarNull] = nil_value, + [eval.kSpecialVarTrue] = true, + })[t.vval.v_special] + end, + [tonumber(eval.VAR_NUMBER)] = function(t) + return {[type_key]=int_type, value=tonumber(t.vval.v_number)} + end, + [tonumber(eval.VAR_FLOAT)] = function(t) + return tonumber(t.vval.v_float) + end, + [tonumber(eval.VAR_STRING)] = function(t) + local str = t.vval.v_string + if str == nil then + return null_string + else + return ffi.string(str) end - if pt.pt_dict ~= nil then - dict = dct2tbl(pt.pt_dict) + end, + [tonumber(eval.VAR_LIST)] = function(t, processed) + return lst2tbl(t.vval.v_list, processed) + end, + [tonumber(eval.VAR_DICT)] = function(t, processed) + return dct2tbl(t.vval.v_dict, processed) + end, + [tonumber(eval.VAR_FUNC)] = function(t, processed) + return {[type_key]=func_type, value=typvalt2lua_tab[eval.VAR_STRING](t, processed or {})} + end, + [tonumber(eval.VAR_PARTIAL)] = function(t, processed) + local p_key = ptr2key(t) + if processed[p_key] then + return processed[p_key] end - end - return { - [type_key]=func_type, - value=value, - auto=auto, - args=argv, - dict=dict, - } - end, -} + local pt = t.vval.v_partial + local value, auto, dict, argv = nil, nil, nil, nil + if pt ~= nil then + value = ffi.string(pt.pt_name) + auto = pt.pt_auto and true or nil + argv = {} + for i = 1, pt.pt_argc do + argv[i] = typvalt2lua(pt.pt_argv[i - 1], processed) + end + if pt.pt_dict ~= nil then + dict = dct2tbl(pt.pt_dict) + end + end + return { + [type_key]=func_type, + value=value, + auto=auto, + args=argv, + dict=dict, + } + end, + } +end typvalt2lua = function(t, processed) + typvalt2lua_tab_init() return ((typvalt2lua_tab[tonumber(t.v_type)] or function(t_inner) assert(false, 'Converting ' .. tonumber(t_inner.v_type) .. ' was not implemented yet') end)(t, processed or {})) @@ -169,9 +173,10 @@ lst2tbl = function(l, processed) return ret end -local hi_key_removed = eval._hash_key_removed() +local hi_key_removed = nil local function dict_iter(d, return_hi) + hi_key_removed = hi_key_removed or eval._hash_key_removed() local init_s = { todo=d.dv_hashtab.ht_used, hi=d.dv_hashtab.ht_array, @@ -320,25 +325,28 @@ local lua2typvalt_type_tab = { end, } -local special_vals = { - [null_string] = {eval.VAR_STRING, {v_string=ffi.cast('char_u*', nil)}}, - [null_list] = {eval.VAR_LIST, {v_list=ffi.cast('list_T*', nil)}}, - [null_dict] = {eval.VAR_DICT, {v_dict=ffi.cast('dict_T*', nil)}}, - [nil_value] = {eval.VAR_SPECIAL, {v_special=eval.kSpecialVarNull}}, - [true] = {eval.VAR_SPECIAL, {v_special=eval.kSpecialVarTrue}}, - [false] = {eval.VAR_SPECIAL, {v_special=eval.kSpecialVarFalse}}, -} +local special_vals = nil -for k, v in pairs(special_vals) do - local tmp = function(typ, vval) - special_vals[k] = function() - return typvalt(typ, vval) +lua2typvalt = function(l, processed) + if not special_vals then + special_vals = { + [null_string] = {'VAR_STRING', {v_string=ffi.cast('char_u*', nil)}}, + [null_list] = {'VAR_LIST', {v_list=ffi.cast('list_T*', nil)}}, + [null_dict] = {'VAR_DICT', {v_dict=ffi.cast('dict_T*', nil)}}, + [nil_value] = {'VAR_SPECIAL', {v_special=eval.kSpecialVarNull}}, + [true] = {'VAR_SPECIAL', {v_special=eval.kSpecialVarTrue}}, + [false] = {'VAR_SPECIAL', {v_special=eval.kSpecialVarFalse}}, + } + + for k, v in pairs(special_vals) do + local tmp = function(typ, vval) + special_vals[k] = function() + return typvalt(eval[typ], vval) + end + end + tmp(v[1], v[2]) end end - tmp(v[1], v[2]) -end - -lua2typvalt = function(l, processed) processed = processed or {} if l == nil or l == nil_value then return special_vals[nil_value]() @@ -360,7 +368,7 @@ lua2typvalt = function(l, processed) return typvalt(eval.VAR_STRING, {v_string=eval.xmemdupz(to_cstr(l), #l)}) elseif type(l) == 'cdata' then local tv = typvalt(eval.VAR_UNKNOWN) - eval.tv_copy(l, tv) + eval.copy_tv(l, tv) return tv end end diff --git a/test/unit/eval/tricks_spec.lua b/test/unit/eval/tricks_spec.lua index 4c5184995c..ec79a9cad5 100644 --- a/test/unit/eval/tricks_spec.lua +++ b/test/unit/eval/tricks_spec.lua @@ -1,4 +1,5 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local to_cstr = helpers.to_cstr @@ -15,7 +16,7 @@ local eval_expr = function(expr) end describe('NULL typval_T', function() - it('is produced by $XXX_UNEXISTENT_VAR_XXX', function() + itp('is produced by $XXX_UNEXISTENT_VAR_XXX', function() -- Required for various tests which need to check whether typval_T with NULL -- string works correctly. This test checks that unexistent environment -- variable produces NULL string, not that some specific environment @@ -29,13 +30,13 @@ describe('NULL typval_T', function() eq(nil, rettv.vval.v_string) end) - it('is produced by v:_null_list', function() + itp('is produced by v:_null_list', function() local rettv = eval_expr('v:_null_list') eq(eval.VAR_LIST, rettv.v_type) eq(nil, rettv.vval.v_list) end) - it('is produced by v:_null_dict', function() + itp('is produced by v:_null_dict', function() local rettv = eval_expr('v:_null_dict') eq(eval.VAR_DICT, rettv.v_type) eq(nil, rettv.vval.v_dict) diff --git a/test/unit/eval/tv_clear_spec.lua b/test/unit/eval/tv_clear_spec.lua index 96eccdbd71..47d4661ad8 100644 --- a/test/unit/eval/tv_clear_spec.lua +++ b/test/unit/eval/tv_clear_spec.lua @@ -1,4 +1,5 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local eval_helpers = require('test.unit.eval.helpers') local alloc_log_new = helpers.alloc_log_new @@ -26,7 +27,7 @@ after_each(function() end) describe('clear_tv()', function() - it('successfully frees all lists in [&l [1], *l, *l]', function() + itp('successfully frees all lists in [&l [1], *l, *l]', function() local l_inner = {1} local list = {l_inner, l_inner, l_inner} local list_tv = ffi.gc(lua2typvalt(list), nil) @@ -53,7 +54,7 @@ describe('clear_tv()', function() a.freed(list_p), }) end) - it('successfully frees all lists in [&l [], *l, *l]', function() + itp('successfully frees all lists in [&l [], *l, *l]', function() local l_inner = {[type_key]=list_type} local list = {l_inner, l_inner, l_inner} local list_tv = ffi.gc(lua2typvalt(list), nil) @@ -77,7 +78,7 @@ describe('clear_tv()', function() a.freed(list_p), }) end) - it('successfully frees all dictionaries in [&d {}, *d]', function() + itp('successfully frees all dictionaries in [&d {}, *d]', function() local d_inner = {} local list = {d_inner, d_inner} local list_tv = ffi.gc(lua2typvalt(list), nil) @@ -99,7 +100,7 @@ describe('clear_tv()', function() a.freed(list_p), }) end) - it('successfully frees all dictionaries in [&d {a: 1}, *d]', function() + itp('successfully frees all dictionaries in [&d {a: 1}, *d]', function() local d_inner = {a=1} local list = {d_inner, d_inner} local list_tv = ffi.gc(lua2typvalt(list), nil) diff --git a/test/unit/fileio_spec.lua b/test/unit/fileio_spec.lua index 3e3c36617d..066d013b19 100644 --- a/test/unit/fileio_spec.lua +++ b/test/unit/fileio_spec.lua @@ -1,4 +1,5 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) --{:cimport, :internalize, :eq, :neq, :ffi, :lib, :cstr, :to_cstr} = require 'test.unit.helpers' local eq = helpers.eq @@ -16,67 +17,67 @@ describe('file_pat functions', function() return ffi.string(res) end - it('returns ^path$ regex for literal path input', function() + itp('returns ^path$ regex for literal path input', function() eq( '^path$', file_pat_to_reg_pat('path')) end) - it('does not prepend ^ when there is a starting glob (*)', function() + itp('does not prepend ^ when there is a starting glob (*)', function() eq('path$', file_pat_to_reg_pat('*path')) end) - it('does not append $ when there is an ending glob (*)', function() + itp('does not append $ when there is an ending glob (*)', function() eq('^path', file_pat_to_reg_pat('path*')) end) - it('does not include ^ or $ when surrounded by globs (*)', function() + itp('does not include ^ or $ when surrounded by globs (*)', function() eq('path', file_pat_to_reg_pat('*path*')) end) - it('replaces the bash any character (?) with the regex any character (.)', function() + itp('replaces the bash any character (?) with the regex any character (.)', function() eq('^foo.bar$', file_pat_to_reg_pat('foo?bar')) end) - it('replaces a glob (*) in the middle of a path with regex multiple any character (.*)', + itp('replaces a glob (*) in the middle of a path with regex multiple any character (.*)', function() eq('^foo.*bar$', file_pat_to_reg_pat('foo*bar')) end) - it([[unescapes \? to ?]], function() + itp([[unescapes \? to ?]], function() eq('^foo?bar$', file_pat_to_reg_pat([[foo\?bar]])) end) - it([[unescapes \% to %]], function() + itp([[unescapes \% to %]], function() eq('^foo%bar$', file_pat_to_reg_pat([[foo\%bar]])) end) - it([[unescapes \, to ,]], function() + itp([[unescapes \, to ,]], function() eq('^foo,bar$', file_pat_to_reg_pat([[foo\,bar]])) end) - it([[unescapes '\ ' to ' ']], function() + itp([[unescapes '\ ' to ' ']], function() eq('^foo bar$', file_pat_to_reg_pat([[foo\ bar]])) end) - it([[escapes . to \.]], function() + itp([[escapes . to \.]], function() eq([[^foo\.bar$]], file_pat_to_reg_pat('foo.bar')) end) - it('Converts bash brace expansion {a,b} to regex options (a|b)', function() + itp('Converts bash brace expansion {a,b} to regex options (a|b)', function() eq([[^foo\(bar\|baz\)$]], file_pat_to_reg_pat('foo{bar,baz}')) end) - it('Collapses multiple consecutive * into a single character', function() + itp('Collapses multiple consecutive * into a single character', function() eq([[^foo.*bar$]], file_pat_to_reg_pat('foo*******bar')) eq([[foobar$]], file_pat_to_reg_pat('********foobar')) eq([[^foobar]], file_pat_to_reg_pat('foobar********')) end) - it('Does not escape ^', function() + itp('Does not escape ^', function() eq([[^^blah$]], file_pat_to_reg_pat('^blah')) eq([[^foo^bar$]], file_pat_to_reg_pat('foo^bar')) end) - it('Does not escape $', function() + itp('Does not escape $', function() eq([[^blah$$]], file_pat_to_reg_pat('blah$')) eq([[^foo$bar$]], file_pat_to_reg_pat('foo$bar')) end) diff --git a/test/unit/fixtures/posix.h b/test/unit/fixtures/posix.h new file mode 100644 index 0000000000..f6f24cd9dc --- /dev/null +++ b/test/unit/fixtures/posix.h @@ -0,0 +1,11 @@ +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/wait.h> +#include <stdlib.h> + +enum { + kPOSIXErrnoEINTR = EINTR, + kPOSIXErrnoECHILD = ECHILD, + kPOSIXWaitWUNTRACED = WUNTRACED, +}; diff --git a/test/unit/garray_spec.lua b/test/unit/garray_spec.lua index 422ef7b36a..28df8a6e3f 100644 --- a/test/unit/garray_spec.lua +++ b/test/unit/garray_spec.lua @@ -1,4 +1,5 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local internalize = helpers.internalize @@ -8,7 +9,7 @@ local ffi = helpers.ffi local to_cstr = helpers.to_cstr local NULL = helpers.NULL -local garray = cimport('stdlib.h', './src/nvim/garray.h') +local garray = cimport('./src/nvim/garray.h') local itemsize = 14 local growsize = 95 @@ -156,7 +157,7 @@ local ga_append_ints = function(garr, ...) end -- enhanced constructors -local garray_ctype = ffi.typeof('garray_T[1]') +local garray_ctype = function(...) return ffi.typeof('garray_T[1]')(...) end local new_garray = function() local garr = garray_ctype() return ffi.gc(garr, ga_clear) @@ -183,7 +184,7 @@ end describe('garray', function() describe('ga_init', function() - it('initializes the values of the garray', function() + itp('initializes the values of the garray', function() local garr = new_garray() ga_init(garr, itemsize, growsize) eq(0, ga_len(garr)) @@ -204,7 +205,7 @@ describe('garray', function() return garr end - it('grows by growsize items if num < growsize', function() + itp('grows by growsize items if num < growsize', function() itemsize = 16 growsize = 4 local grow_by = growsize - 1 @@ -213,7 +214,7 @@ describe('garray', function() eq(growsize, ga_maxlen(garr)) -- we requested LESS than growsize, so... end) - it('grows by num items if num > growsize', function() + itp('grows by num items if num > growsize', function() itemsize = 16 growsize = 4 local grow_by = growsize + 1 @@ -222,7 +223,7 @@ describe('garray', function() eq(grow_by, ga_maxlen(garr)) -- we requested MORE than growsize, so... end) - it('does not grow when nothing is requested', function() + itp('does not grow when nothing is requested', function() local garr = new_and_grow(16, 4, 0) eq(NULL, ga_data(garr)) eq(0, ga_maxlen(garr)) @@ -230,7 +231,7 @@ describe('garray', function() end) describe('ga_clear', function() - it('clears an already allocated array', function() + itp('clears an already allocated array', function() -- allocate and scramble an array local garr = garray_ctype() ga_init(garr, itemsize, growsize) @@ -247,7 +248,7 @@ describe('garray', function() end) describe('ga_append', function() - it('can append bytes', function() + itp('can append bytes', function() -- this is the actual ga_append, the others are just emulated lua -- versions local garr = new_garray() @@ -262,7 +263,7 @@ describe('garray', function() eq('hello', ffi.string(bytes)) end) - it('can append integers', function() + itp('can append integers', function() local garr = new_garray() ga_init(garr, ffi.sizeof("int"), 1) local input = { @@ -279,7 +280,7 @@ describe('garray', function() end end) - it('can append strings to a growing array of strings', function() + itp('can append strings to a growing array of strings', function() local garr = new_string_garray() local input = { "some", @@ -298,7 +299,7 @@ describe('garray', function() end) describe('ga_concat', function() - it('concatenates the parameter to the growing byte array', function() + itp('concatenates the parameter to the growing byte array', function() local garr = new_garray() ga_init(garr, ffi.sizeof("char"), 1) local str = "ohwell●●" @@ -329,11 +330,11 @@ describe('garray', function() end describe('ga_concat_strings', function() - it('returns an empty string when concatenating an empty array', function() + itp('returns an empty string when concatenating an empty array', function() test_concat_fn({ }, ga_concat_strings) end) - it('can concatenate a non-empty array', function() + itp('can concatenate a non-empty array', function() test_concat_fn({ 'oh', 'my', @@ -343,11 +344,11 @@ describe('garray', function() end) describe('ga_concat_strings_sep', function() - it('returns an empty string when concatenating an empty array', function() + itp('returns an empty string when concatenating an empty array', function() test_concat_fn({ }, ga_concat_strings_sep, '---') end) - it('can concatenate a non-empty array', function() + itp('can concatenate a non-empty array', function() local sep = '-●●-' test_concat_fn({ 'oh', @@ -358,7 +359,7 @@ describe('garray', function() end) describe('ga_remove_duplicate_strings', function() - it('sorts and removes duplicate strings', function() + itp('sorts and removes duplicate strings', function() local garr = new_string_garray() local input = { 'ccc', diff --git a/test/unit/helpers.lua b/test/unit/helpers.lua index 4af078b486..612b337ee7 100644 --- a/test/unit/helpers.lua +++ b/test/unit/helpers.lua @@ -4,8 +4,15 @@ local Set = require('test.unit.set') local Preprocess = require('test.unit.preprocess') local Paths = require('test.config.paths') local global_helpers = require('test.helpers') +local assert = require('luassert') +local say = require('say') +local posix = nil +local syscall = nil + +local check_cores = global_helpers.check_cores local neq = global_helpers.neq +local map = global_helpers.map local eq = global_helpers.eq local ok = global_helpers.ok @@ -15,20 +22,110 @@ local NULL = ffi.cast('void*', 0) local OK = 1 local FAIL = 0 +local cimport + -- add some standard header locations for _, p in ipairs(Paths.include_paths) do Preprocess.add_to_include_path(p) end --- load neovim shared library -local libnvim = ffi.load(Paths.test_libnvim_path) +local child_pid = nil +local function only_separate(func) + return function(...) + if child_pid ~= 0 then + error('This function must be run in a separate process only') + end + return func(...) + end +end +local child_calls_init = {} +local child_calls_mod = nil +local child_calls_mod_once = nil +local function child_call(func, ret) + return function(...) + local child_calls = child_calls_mod or child_calls_init + if child_pid ~= 0 then + child_calls[#child_calls + 1] = {func=func, args={...}} + return ret + else + return func(...) + end + end +end + +-- Run some code at the start of the child process, before running the test +-- itself. Is supposed to be run in `before_each`. +local function child_call_once(func, ...) + if child_pid ~= 0 then + child_calls_mod_once[#child_calls_mod_once + 1] = { + func=func, args={...}} + else + func(...) + end +end + +local child_cleanups_mod_once = nil + +-- Run some code at the end of the child process, before exiting. Is supposed to +-- be run in `before_each` because `after_each` is run after child has exited. +local function child_cleanup_once(func, ...) + local child_cleanups = child_cleanups_mod_once + if child_pid ~= 0 then + child_cleanups[#child_cleanups + 1] = {func=func, args={...}} + else + func(...) + end +end + +local libnvim = nil + +local lib = setmetatable({}, { + __index = only_separate(function(_, idx) + return libnvim[idx] + end), + __newindex = child_call(function(_, idx, val) + libnvim[idx] = val + end), +}) + +local init = only_separate(function() + -- load neovim shared library + libnvim = ffi.load(Paths.test_libnvim_path) + for _, c in ipairs(child_calls_init) do + c.func(unpack(c.args)) + end + libnvim.time_init() + libnvim.early_init() + libnvim.event_init() + if child_calls_mod then + for _, c in ipairs(child_calls_mod) do + c.func(unpack(c.args)) + end + end + if child_calls_mod_once then + for _, c in ipairs(child_calls_mod_once) do + c.func(unpack(c.args)) + end + child_calls_mod_once = nil + end +end) + +local deinit = only_separate(function() + if child_cleanups_mod_once then + for _, c in ipairs(child_cleanups_mod_once) do + c.func(unpack(c.args)) + end + child_cleanups_mod_once = nil + end +end) local function trim(s) return s:match('^%s*(.*%S)') or '' end -- a Set that keeps around the lines we've already seen -local cdefs = Set:new() +local cdefs_init = Set:new() +local cdefs_mod = nil local imported = Set:new() local pragma_pack_id = 1 @@ -51,84 +148,120 @@ local function filter_complex_blocks(body) return table.concat(result, "\n") end -local previous_defines = '' --- use this helper to import C files, you can pass multiple paths at once, --- this helper will return the C namespace of the nvim library. -local function cimport(...) - local paths = {} - local args = {...} - - -- filter out paths we've already imported - for _,path in pairs(args) do - if path ~= nil and not imported:contains(path) then - paths[#paths + 1] = path - end - end +local cdef = ffi.cdef - for _,path in pairs(paths) do - imported:add(path) - end +local cimportstr - if #paths == 0 then - return libnvim - end +local previous_defines_init = '' +local preprocess_cache_init = {} +local previous_defines_mod = '' +local preprocess_cache_mod = nil - local body - body, previous_defines = Preprocess.preprocess(previous_defines, unpack(paths)) +local function is_child_cdefs() + return (os.getenv('NVIM_TEST_MAIN_CDEFS') ~= '1') +end - -- format it (so that the lines are "unique" statements), also filter out - -- Objective-C blocks - if os.getenv('NVIM_TEST_PRINT_I') == '1' then - local lnum = 0 - for line in body:gmatch('[^\n]+') do - lnum = lnum + 1 - print(lnum, line) - end +-- use this helper to import C files, you can pass multiple paths at once, +-- this helper will return the C namespace of the nvim library. +cimport = function(...) + local previous_defines, preprocess_cache, cdefs + if is_child_cdefs() and preprocess_cache_mod then + preprocess_cache = preprocess_cache_mod + previous_defines = previous_defines_mod + cdefs = cdefs_mod + else + preprocess_cache = preprocess_cache_init + previous_defines = previous_defines_init + cdefs = cdefs_init end - body = formatc(body) - body = filter_complex_blocks(body) + for _, path in ipairs({...}) do + if not (path:sub(1, 1) == '/' or path:sub(1, 1) == '.' + or path:sub(2, 2) == ':') then + path = './' .. path + end + if not preprocess_cache[path] then + local body + body, previous_defines = Preprocess.preprocess(previous_defines, path) + -- format it (so that the lines are "unique" statements), also filter out + -- Objective-C blocks + if os.getenv('NVIM_TEST_PRINT_I') == '1' then + local lnum = 0 + for line in body:gmatch('[^\n]+') do + lnum = lnum + 1 + print(lnum, line) + end + end + body = formatc(body) + body = filter_complex_blocks(body) + -- add the formatted lines to a set + local new_cdefs = Set:new() + for line in body:gmatch("[^\r\n]+") do + line = trim(line) + -- give each #pragma pack an unique id, so that they don't get removed + -- if they are inserted into the set + -- (they are needed in the right order with the struct definitions, + -- otherwise luajit has wrong memory layouts for the sturcts) + if line:match("#pragma%s+pack") then + line = line .. " // " .. pragma_pack_id + pragma_pack_id = pragma_pack_id + 1 + end + new_cdefs:add(line) + end - -- add the formatted lines to a set - local new_cdefs = Set:new() - for line in body:gmatch("[^\r\n]+") do - line = trim(line) - -- give each #pragma pack an unique id, so that they don't get removed - -- if they are inserted into the set - -- (they are needed in the right order with the struct definitions, - -- otherwise luajit has wrong memory layouts for the sturcts) - if line:match("#pragma%s+pack") then - line = line .. " // " .. pragma_pack_id - pragma_pack_id = pragma_pack_id + 1 + -- subtract the lines we've already imported from the new lines, then add + -- the new unique lines to the old lines (so they won't be imported again) + new_cdefs:diff(cdefs) + cdefs:union(new_cdefs) + -- request a sorted version of the new lines (same relative order as the + -- original preprocessed file) and feed that to the LuaJIT ffi + local new_lines = new_cdefs:to_table() + if os.getenv('NVIM_TEST_PRINT_CDEF') == '1' then + for lnum, line in ipairs(new_lines) do + print(lnum, line) + end + end + body = table.concat(new_lines, '\n') + + preprocess_cache[path] = body end - new_cdefs:add(line) + cimportstr(preprocess_cache, path) end + return lib +end - -- subtract the lines we've already imported from the new lines, then add - -- the new unique lines to the old lines (so they won't be imported again) - new_cdefs:diff(cdefs) - cdefs:union(new_cdefs) - - if new_cdefs:size() == 0 then - -- if there's no new lines, just return - return libnvim +local cimport_immediate = function(...) + local saved_pid = child_pid + child_pid = 0 + local err, emsg = pcall(cimport, ...) + child_pid = saved_pid + if not err then + emsg = tostring(emsg) + io.stderr:write(emsg .. '\n') + assert(false) + else + return lib end +end - -- request a sorted version of the new lines (same relative order as the - -- original preprocessed file) and feed that to the LuaJIT ffi - local new_lines = new_cdefs:to_table() - if os.getenv('NVIM_TEST_PRINT_CDEF') == '1' then - for lnum, line in ipairs(new_lines) do - print(lnum, line) - end +local function _cimportstr(preprocess_cache, path) + if imported:contains(path) then + return lib end - ffi.cdef(table.concat(new_lines, "\n")) + local body = preprocess_cache[path] + if body == '' then + return lib + end + cdef(body) + imported:add(path) - return libnvim + return lib end -local function cppimport(path) - return cimport(Paths.test_include_path .. '/' .. path) +if is_child_cdefs() then + cimportstr = child_call(_cimportstr, lib) +else + cimportstr = _cimportstr end local function alloc_log_new() @@ -141,9 +274,12 @@ local function alloc_log_new() local allocator_functions = {'malloc', 'free', 'calloc', 'realloc'} function log:save_original_functions() for _, funcname in ipairs(allocator_functions) do - self.original_functions[funcname] = self.lib['mem_' .. funcname] + if not self.original_functions[funcname] then + self.original_functions[funcname] = self.lib['mem_' .. funcname] + end end end + log.save_original_functions = child_call(log.save_original_functions) function log:set_mocks() for _, k in ipairs(allocator_functions) do do @@ -170,6 +306,7 @@ local function alloc_log_new() end end end + log.set_mocks = child_call(log.set_mocks) function log:clear() self.log = {} end @@ -178,22 +315,28 @@ local function alloc_log_new() self:clear() end function log:restore_original_functions() - for k, v in pairs(self.original_functions) do - self.lib['mem_' .. k] = v - end + -- Do nothing: set mocks live in a separate process + return + --[[ + [ for k, v in pairs(self.original_functions) do + [ self.lib['mem_' .. k] = v + [ end + ]] end - function log:before_each() + function log:setup() log:save_original_functions() log:set_mocks() end + function log:before_each() + return + end function log:after_each() log:restore_original_functions() end + log:setup() return log end -cimport('./src/nvim/types.h') - -- take a pointer to a C-allocated string and return an interned -- version while also freeing the memory local function internalize(cdata, len) @@ -206,17 +349,226 @@ local function to_cstr(string) return cstr(#string + 1, string) end --- initialize some global variables, this is still necessary to unit test --- functions that rely on global state. -do - local main = cimport('./src/nvim/main.h') - local time = cimport('./src/nvim/os/time.h') - time.time_init() - main.early_init() - main.event_init() +local sc + +if posix ~= nil then + sc = { + fork = posix.fork, + pipe = posix.pipe, + read = posix.read, + write = posix.write, + close = posix.close, + wait = posix.wait, + exit = posix._exit, + } +elseif syscall ~= nil then + sc = { + fork = syscall.fork, + pipe = function() + local ret = {syscall.pipe()} + return ret[3], ret[4] + end, + read = function(rd, len) + return rd:read(nil, len) + end, + write = function(wr, s) + return wr:write(s) + end, + close = function(p) + return p:close() + end, + wait = syscall.wait, + exit = syscall.exit, + } +else + cimport_immediate('./test/unit/fixtures/posix.h') + sc = { + fork = function() + return tonumber(ffi.C.fork()) + end, + pipe = function() + local ret = ffi.new('int[2]', {-1, -1}) + ffi.errno(0) + local res = ffi.C.pipe(ret) + if (res ~= 0) then + local err = ffi.errno(0) + assert(res == 0, ("pipe() error: %u: %s"):format( + err, ffi.string(ffi.C.strerror(err)))) + end + assert(ret[0] ~= -1 and ret[1] ~= -1) + return ret[0], ret[1] + end, + read = function(rd, len) + local ret = ffi.new('char[?]', len, {0}) + local total_bytes_read = 0 + ffi.errno(0) + while total_bytes_read < len do + local bytes_read = tonumber(ffi.C.read( + rd, + ffi.cast('void*', ret + total_bytes_read), + len - total_bytes_read)) + if bytes_read == -1 then + local err = ffi.errno(0) + if err ~= ffi.C.kPOSIXErrnoEINTR then + assert(false, ("read() error: %u: %s"):format( + err, ffi.string(ffi.C.strerror(err)))) + end + elseif bytes_read == 0 then + break + else + total_bytes_read = total_bytes_read + bytes_read + end + end + return ffi.string(ret, total_bytes_read) + end, + write = function(wr, s) + local wbuf = to_cstr(s) + local total_bytes_written = 0 + ffi.errno(0) + while total_bytes_written < #s do + local bytes_written = tonumber(ffi.C.write( + wr, + ffi.cast('void*', wbuf + total_bytes_written), + #s - total_bytes_written)) + if bytes_written == -1 then + local err = ffi.errno(0) + if err ~= ffi.C.kPOSIXErrnoEINTR then + assert(false, ("write() error: %u: %s"):format( + err, ffi.string(ffi.C.strerror(err)))) + end + elseif bytes_written == 0 then + break + else + total_bytes_written = total_bytes_written + bytes_written + end + end + return total_bytes_written + end, + close = ffi.C.close, + wait = function(pid) + ffi.errno(0) + while true do + local r = ffi.C.waitpid(pid, nil, ffi.C.kPOSIXWaitWUNTRACED) + if r == -1 then + local err = ffi.errno(0) + if err == ffi.C.kPOSIXErrnoECHILD then + break + elseif err ~= ffi.C.kPOSIXErrnoEINTR then + assert(false, ("waitpid() error: %u: %s"):format( + err, ffi.string(ffi.C.strerror(err)))) + end + else + assert(r == pid) + end + end + end, + exit = ffi.C._exit, + } end -return { +local function format_list(lst) + local ret = '' + for _, v in ipairs(lst) do + if ret ~= '' then ret = ret .. ', ' end + ret = ret .. assert:format({v, n=1})[1] + end + return ret +end + +if os.getenv('NVIM_TEST_PRINT_SYSCALLS') == '1' then + for k_, v_ in pairs(sc) do + (function(k, v) + sc[k] = function(...) + local rets = {v(...)} + io.stderr:write(('%s(%s) = %s\n'):format(k, format_list({...}), + format_list(rets))) + return unpack(rets) + end + end)(k_, v_) + end +end + +local function gen_itp(it) + child_calls_mod = {} + child_calls_mod_once = {} + child_cleanups_mod_once = {} + preprocess_cache_mod = map(function(v) return v end, preprocess_cache_init) + previous_defines_mod = previous_defines_init + cdefs_mod = cdefs_init:copy() + local function just_fail(_) + return false + end + say:set('assertion.just_fail.positive', '%s') + say:set('assertion.just_fail.negative', '%s') + assert:register('assertion', 'just_fail', just_fail, + 'assertion.just_fail.positive', + 'assertion.just_fail.negative') + local function itp(name, func, allow_failure) + if allow_failure and os.getenv('NVIM_TEST_RUN_FAILING_TESTS') ~= '1' then + -- FIXME Fix tests with this true + return + end + it(name, function() + local rd, wr = sc.pipe() + child_pid = sc.fork() + if child_pid == 0 then + init() + sc.close(rd) + collectgarbage('stop') + local err, emsg = pcall(func) + collectgarbage('restart') + emsg = tostring(emsg) + if not err then + sc.write(wr, ('-\n%05u\n%s'):format(#emsg, emsg)) + deinit() + sc.close(wr) + sc.exit(1) + else + sc.write(wr, '+\n') + deinit() + sc.close(wr) + sc.exit(0) + end + else + sc.close(wr) + sc.wait(child_pid) + child_pid = nil + local function check() + local res = sc.read(rd, 2) + eq(2, #res) + if res == '+\n' then + return + end + eq('-\n', res) + local len_s = sc.read(rd, 5) + local len = tonumber(len_s) + neq(0, len) + local err = sc.read(rd, len + 1) + assert.just_fail(err) + end + local err, emsg = pcall(check) + sc.close(rd) + if not err then + if allow_failure then + io.stderr:write('Errorred out:\n' .. tostring(emsg) .. '\n') + os.execute([[sh -c "source .ci/common/test.sh ; check_core_dumps --delete \"]] .. Paths.test_luajit_prg .. [[\""]]) + else + error(emsg) + end + end + end + end) + end + return itp +end + +local function cppimport(path) + return cimport(Paths.test_include_path .. '/' .. path) +end + +cimport('./src/nvim/types.h', './src/nvim/main.h', './src/nvim/os/time.h') + +local module = { cimport = cimport, cppimport = cppimport, internalize = internalize, @@ -224,11 +576,23 @@ return { eq = eq, neq = neq, ffi = ffi, - lib = libnvim, + lib = lib, cstr = cstr, to_cstr = to_cstr, NULL = NULL, OK = OK, FAIL = FAIL, alloc_log_new = alloc_log_new, + gen_itp = gen_itp, + only_separate = only_separate, + child_call_once = child_call_once, + child_cleanup_once = child_cleanup_once, } +return function(after_each) + if after_each then + after_each(function() + check_cores(Paths.test_luajit_prg) + end) + end + return module +end diff --git a/test/unit/mbyte_spec.lua b/test/unit/mbyte_spec.lua index 9b2415a93f..6feef4e601 100644 --- a/test/unit/mbyte_spec.lua +++ b/test/unit/mbyte_spec.lua @@ -1,4 +1,5 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) local ffi = helpers.ffi local eq = helpers.eq @@ -26,7 +27,7 @@ describe('mbyte', function() before_each(function() end) - it('utf_ptr2char', function() + itp('utf_ptr2char', function() -- For strings with length 1 the first byte is returned. for c = 0, 255 do eq(c, mbyte.utf_ptr2char(to_string({c, 0}))) @@ -44,7 +45,7 @@ describe('mbyte', function() describe('utfc_ptr2char_len', function() - it('1-byte sequences', function() + itp('1-byte sequences', function() local pcc = to_intp() for c = 0, 255 do eq(c, mbyte.utfc_ptr2char_len(to_string({c}), pcc, 1)) @@ -52,7 +53,7 @@ describe('mbyte', function() end end) - it('2-byte sequences', function() + itp('2-byte sequences', function() local pcc = to_intp() -- No combining characters eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f}), pcc, 2)) @@ -76,7 +77,7 @@ describe('mbyte', function() eq(0, pcc[0]) end) - it('3-byte sequences', function() + itp('3-byte sequences', function() local pcc = to_intp() -- No second UTF-8 character @@ -108,7 +109,7 @@ describe('mbyte', function() eq(0, pcc[0]) end) - it('4-byte sequences', function() + itp('4-byte sequences', function() local pcc = to_intp() -- No following combining character @@ -145,7 +146,7 @@ describe('mbyte', function() eq(0, pcc[0]) end) - it('5+-byte sequences', function() + itp('5+-byte sequences', function() local pcc = to_intp() -- No following combining character diff --git a/test/unit/memory_spec.lua b/test/unit/memory_spec.lua index 73a32724ef..bd72c8bf47 100644 --- a/test/unit/memory_spec.lua +++ b/test/unit/memory_spec.lua @@ -1,4 +1,5 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local cstr = helpers.cstr @@ -26,7 +27,7 @@ describe('xstrlcat()', function() return ffi.string(dst_cstr) end - it('concatenates strings', function() + itp('concatenates strings', function() eq('ab', test_xstrlcat('a', 'b', 3)) eq('ab', test_xstrlcat('a', 'b', 4096)) eq('ABCיהZdefgiיהZ', test_xstrlcat('ABCיהZ', 'defgiיהZ', 4096)) @@ -34,7 +35,7 @@ describe('xstrlcat()', function() eq('a', test_xstrlcat('a', '', 4096)) end) - it('concatenates overlapping strings', function() + itp('concatenates overlapping strings', function() eq('abcabc', test_xstrlcat_overlap('abc', 0, 7)) eq('abca', test_xstrlcat_overlap('abc', 0, 5)) eq('abcb', test_xstrlcat_overlap('abc', 1, 5)) @@ -42,7 +43,7 @@ describe('xstrlcat()', function() eq('abcabc', test_xstrlcat_overlap('abc', 0, 2343)) end) - it('truncates if `dsize` is too small', function() + itp('truncates if `dsize` is too small', function() eq('a', test_xstrlcat('a', 'b', 2)) eq('', test_xstrlcat('', 'b', 1)) eq('ABCיהZd', test_xstrlcat('ABCיהZ', 'defgiיהZ', 10)) diff --git a/test/unit/message_spec.lua b/test/unit/message_spec.lua index afb572347f..7e92b5c857 100644 --- a/test/unit/message_spec.lua +++ b/test/unit/message_spec.lua @@ -1,4 +1,5 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) local ffi = helpers.ffi local eq = helpers.eq @@ -35,23 +36,23 @@ describe('trunc_string', function() for _,t in ipairs(permutations) do describe('populates buf '..t.desc, function() - it('with a small string', function() + itp('with a small string', function() t.func('text', 'text') end) - it('with a medium string', function() + itp('with a medium string', function() t.func('a short text', 'a short text') end) - it('with a string of length == 1/2 room', function() + itp('with a string of length == 1/2 room', function() t.func('a text that fits', 'a text that fits', 34) end) - it('with a string exactly the truncate size', function() + itp('with a string exactly the truncate size', function() t.func('a text tha just fits', 'a text tha just fits') end) - it('with a string that must be truncated', function() + itp('with a string that must be truncated', function() t.func('a text that nott fits', 'a text t...nott fits') end) end) diff --git a/test/unit/multiqueue_spec.lua b/test/unit/multiqueue_spec.lua index c7f8dd8328..bb08a8386f 100644 --- a/test/unit/multiqueue_spec.lua +++ b/test/unit/multiqueue_spec.lua @@ -1,9 +1,12 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) -local ffi = helpers.ffi -local eq = helpers.eq +local child_call_once = helpers.child_call_once +local cimport = helpers.cimport +local ffi = helpers.ffi +local eq = helpers.eq -local multiqueue = helpers.cimport("./test/unit/fixtures/multiqueue.h") +local multiqueue = cimport("./test/unit/fixtures/multiqueue.h") describe("multiqueue (multi-level event-queue)", function() local parent, child1, child2, child3 @@ -21,28 +24,30 @@ describe("multiqueue (multi-level event-queue)", function() end before_each(function() - parent = multiqueue.multiqueue_new_parent(ffi.NULL, ffi.NULL) - child1 = multiqueue.multiqueue_new_child(parent) - child2 = multiqueue.multiqueue_new_child(parent) - child3 = multiqueue.multiqueue_new_child(parent) - put(child1, 'c1i1') - put(child1, 'c1i2') - put(child2, 'c2i1') - put(child1, 'c1i3') - put(child2, 'c2i2') - put(child2, 'c2i3') - put(child2, 'c2i4') - put(child3, 'c3i1') - put(child3, 'c3i2') + child_call_once(function() + parent = multiqueue.multiqueue_new_parent(ffi.NULL, ffi.NULL) + child1 = multiqueue.multiqueue_new_child(parent) + child2 = multiqueue.multiqueue_new_child(parent) + child3 = multiqueue.multiqueue_new_child(parent) + put(child1, 'c1i1') + put(child1, 'c1i2') + put(child2, 'c2i1') + put(child1, 'c1i3') + put(child2, 'c2i2') + put(child2, 'c2i3') + put(child2, 'c2i4') + put(child3, 'c3i1') + put(child3, 'c3i2') + end) end) - it('keeps count of added events', function() + itp('keeps count of added events', function() eq(3, multiqueue.multiqueue_size(child1)) eq(4, multiqueue.multiqueue_size(child2)) eq(2, multiqueue.multiqueue_size(child3)) end) - it('keeps count of removed events', function() + itp('keeps count of removed events', function() multiqueue.multiqueue_get(child1) eq(2, multiqueue.multiqueue_size(child1)) multiqueue.multiqueue_get(child1) @@ -57,7 +62,7 @@ describe("multiqueue (multi-level event-queue)", function() eq(0, multiqueue.multiqueue_size(child1)) end) - it('removing from parent removes from child', function() + itp('removing from parent removes from child', function() eq('c1i1', get(parent)) eq('c1i2', get(parent)) eq('c2i1', get(parent)) @@ -67,7 +72,7 @@ describe("multiqueue (multi-level event-queue)", function() eq('c2i4', get(parent)) end) - it('removing from child removes from parent', function() + itp('removing from child removes from parent', function() eq('c2i1', get(child2)) eq('c2i2', get(child2)) eq('c1i1', get(child1)) @@ -77,13 +82,13 @@ describe("multiqueue (multi-level event-queue)", function() eq('c2i4', get(parent)) end) - it('removing from child at the beginning of parent', function() + itp('removing from child at the beginning of parent', function() eq('c1i1', get(child1)) eq('c1i2', get(child1)) eq('c2i1', get(parent)) end) - it('removing from parent after get from parent and put to child', function() + itp('removing from parent after get from parent and put to child', function() eq('c1i1', get(parent)) eq('c1i2', get(parent)) eq('c2i1', get(parent)) @@ -99,7 +104,7 @@ describe("multiqueue (multi-level event-queue)", function() eq('c1i22', get(parent)) end) - it('removing from parent after get and put to child', function() + itp('removing from parent after get and put to child', function() eq('c1i1', get(child1)) eq('c1i2', get(child1)) eq('c2i1', get(child2)) @@ -117,7 +122,7 @@ describe("multiqueue (multi-level event-queue)", function() eq('c1i12', get(parent)) end) - it('put after removing from child at the end of parent', function() + itp('put after removing from child at the end of parent', function() eq('c3i1', get(child3)) eq('c3i2', get(child3)) put(child1, 'c1i11') @@ -133,7 +138,7 @@ describe("multiqueue (multi-level event-queue)", function() eq('c2i11', get(parent)) end) - it('removes from parent queue when child is freed', function() + itp('removes from parent queue when child is freed', function() free(child2) eq('c1i1', get(parent)) eq('c1i2', get(parent)) diff --git a/test/unit/option_spec.lua b/test/unit/option_spec.lua index 8bab0194a2..b8b8a435bc 100644 --- a/test/unit/option_spec.lua +++ b/test/unit/option_spec.lua @@ -1,4 +1,5 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) local to_cstr = helpers.to_cstr local eq = helpers.eq @@ -12,23 +13,23 @@ end describe('check_ff_value', function() - it('views empty string as valid', function() + itp('views empty string as valid', function() eq(1, check_ff_value("")) end) - it('views "unix", "dos" and "mac" as valid', function() + itp('views "unix", "dos" and "mac" as valid', function() eq(1, check_ff_value("unix")) eq(1, check_ff_value("dos")) eq(1, check_ff_value("mac")) end) - it('views "foo" as invalid', function() + itp('views "foo" as invalid', function() eq(0, check_ff_value("foo")) end) end) describe('get_sts_value', function() - it([[returns 'softtabstop' when it is non-negative]], function() + itp([[returns 'softtabstop' when it is non-negative]], function() globals.curbuf.b_p_sts = 5 eq(5, option.get_sts_value()) @@ -36,7 +37,7 @@ describe('get_sts_value', function() eq(0, option.get_sts_value()) end) - it([[returns "effective shiftwidth" when 'softtabstop' is negative]], function() + itp([[returns "effective shiftwidth" when 'softtabstop' is negative]], function() local shiftwidth = 2 globals.curbuf.b_p_sw = shiftwidth local tabstop = 5 diff --git a/test/unit/os/env_spec.lua b/test/unit/os/env_spec.lua index 3c2cc164c9..1ffed784ff 100644 --- a/test/unit/os/env_spec.lua +++ b/test/unit/os/env_spec.lua @@ -1,4 +1,5 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local eq = helpers.eq @@ -33,7 +34,7 @@ describe('env function', function() describe('os_setenv', function() local OK = 0 - it('sets an env variable and returns OK', function() + itp('sets an env variable and returns OK', function() local name = 'NEOVIM_UNIT_TEST_SETENV_1N' local value = 'NEOVIM_UNIT_TEST_SETENV_1V' eq(nil, os.getenv(name)) @@ -41,7 +42,7 @@ describe('env function', function() eq(value, os.getenv(name)) end) - it("dosn't overwrite an env variable if overwrite is 0", function() + itp("dosn't overwrite an env variable if overwrite is 0", function() local name = 'NEOVIM_UNIT_TEST_SETENV_2N' local value = 'NEOVIM_UNIT_TEST_SETENV_2V' local value_updated = 'NEOVIM_UNIT_TEST_SETENV_2V_UPDATED' @@ -53,13 +54,13 @@ describe('env function', function() end) describe('os_setenv_append_path', function() - it('appends /foo/bar to $PATH', function() + itp('appends /foo/bar to $PATH', function() local original_path = os.getenv('PATH') eq(true, cimp.os_setenv_append_path(to_cstr('/foo/bar/baz'))) eq(original_path..':/foo/bar', os.getenv('PATH')) end) - it('returns false if `fname` is not absolute', function() + itp('returns false if `fname` is not absolute', function() local original_path = os.getenv('PATH') eq(false, cimp.os_setenv_append_path(to_cstr('foo/bar/baz'))) eq(original_path, os.getenv('PATH')) @@ -67,7 +68,7 @@ describe('env function', function() end) describe('os_getenv', function() - it('reads an env variable', function() + itp('reads an env variable', function() local name = 'NEOVIM_UNIT_TEST_GETENV_1N' local value = 'NEOVIM_UNIT_TEST_GETENV_1V' eq(NULL, os_getenv(name)) @@ -76,14 +77,14 @@ describe('env function', function() eq(value, os_getenv(name)) end) - it('returns NULL if the env variable is not found', function() + itp('returns NULL if the env variable is not found', function() local name = 'NEOVIM_UNIT_TEST_GETENV_NOTFOUND' return eq(NULL, os_getenv(name)) end) end) describe('os_unsetenv', function() - it('unsets environment variable', function() + itp('unsets environment variable', function() local name = 'TEST_UNSETENV' local value = 'TESTVALUE' os_setenv(name, value, 1) @@ -95,7 +96,7 @@ describe('env function', function() end) describe('os_getenvname_at_index', function() - it('returns names of environment variables', function() + itp('returns names of environment variables', function() local test_name = 'NEOVIM_UNIT_TEST_GETENVNAME_AT_INDEX_1N' local test_value = 'NEOVIM_UNIT_TEST_GETENVNAME_AT_INDEX_1V' os_setenv(test_name, test_value, 1) @@ -115,7 +116,7 @@ describe('env function', function() eq(true, found_name) end) - it('returns NULL if the index is out of bounds', function() + itp('returns NULL if the index is out of bounds', function() local huge = ffi.new('size_t', 10000) local maxuint32 = ffi.new('size_t', 4294967295) eq(NULL, cimp.os_getenvname_at_index(huge)) @@ -132,7 +133,7 @@ describe('env function', function() end) describe('os_get_pid', function() - it('returns the process ID', function() + itp('returns the process ID', function() local stat_file = io.open('/proc/self/stat') if stat_file then local stat_str = stat_file:read('*l') @@ -147,7 +148,7 @@ describe('env function', function() end) describe('os_get_hostname', function() - it('returns the hostname', function() + itp('returns the hostname', function() local handle = io.popen('hostname') local hostname = handle:read('*l') handle:close() @@ -158,7 +159,7 @@ describe('env function', function() end) describe('expand_env_esc', function() - it('expands environment variables', function() + itp('expands environment variables', function() local name = 'NEOVIM_UNIT_TEST_EXPAND_ENV_ESCN' local value = 'NEOVIM_UNIT_TEST_EXPAND_ENV_ESCV' os_setenv(name, value, 1) @@ -175,7 +176,7 @@ describe('env function', function() eq(output_expected, ffi.string(output_buff2)) end) - it('expands ~ once when `one` is true', function() + itp('expands ~ once when `one` is true', function() local input = '~/foo ~ foo' local homedir = cstr(255, '') cimp.expand_env_esc(to_cstr('~'), homedir, 255, false, true, NULL) @@ -185,7 +186,7 @@ describe('env function', function() eq(ffi.string(output), ffi.string(output_expected)) end) - it('expands ~ every time when `one` is false', function() + itp('expands ~ every time when `one` is false', function() local input = to_cstr('~/foo ~ foo') local dst = cstr(255, '') cimp.expand_env_esc(to_cstr('~'), dst, 255, false, true, NULL) @@ -196,7 +197,7 @@ describe('env function', function() eq(output_expected, ffi.string(output)) end) - it('does not crash #3725', function() + itp('does not crash #3725', function() local name_out = ffi.new('char[100]') cimp.os_get_user_name(name_out, 100) local curuser = ffi.string(name_out) @@ -209,7 +210,7 @@ describe('env function', function() assert.True(len < 99) end) - it('respects `dstlen` without expansion', function() + itp('respects `dstlen` without expansion', function() local input = to_cstr('this is a very long thing that will not fit') -- The buffer is long enough to actually contain the full input in case the -- test fails, but we don't tell expand_env_esc that @@ -223,7 +224,7 @@ describe('env function', function() eq(0, output[4]) end) - it('respects `dstlen` with expansion', function() + itp('respects `dstlen` with expansion', function() local varname = to_cstr('NVIM_UNIT_TEST_EXPAND_ENV_ESC_DSTLENN') local varval = to_cstr('NVIM_UNIT_TEST_EXPAND_ENV_ESC_DSTLENV') cimp.os_setenv(varname, varval, 1) diff --git a/test/unit/os/fileio_spec.lua b/test/unit/os/fileio_spec.lua index 5358022422..7a738ce85c 100644 --- a/test/unit/os/fileio_spec.lua +++ b/test/unit/os/fileio_spec.lua @@ -1,6 +1,7 @@ local lfs = require('lfs') -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local eq = helpers.eq local ffi = helpers.ffi @@ -88,7 +89,7 @@ local function file_skip(fp, size) end describe('file_open', function() - it('can create a rwx------ file with kFileCreate', function() + itp('can create a rwx------ file with kFileCreate', function() local err, fp = file_open(filec, m.kFileCreate, 448) eq(0, err) local attrs = lfs.attributes(filec) @@ -96,7 +97,7 @@ describe('file_open', function() eq(0, m.file_close(fp)) end) - it('can create a rw------- file with kFileCreate', function() + itp('can create a rw------- file with kFileCreate', function() local err, fp = file_open(filec, m.kFileCreate, 384) eq(0, err) local attrs = lfs.attributes(filec) @@ -104,7 +105,7 @@ describe('file_open', function() eq(0, m.file_close(fp)) end) - it('can create a rwx------ file with kFileCreateOnly', function() + itp('can create a rwx------ file with kFileCreateOnly', function() local err, fp = file_open(filec, m.kFileCreateOnly, 448) eq(0, err) local attrs = lfs.attributes(filec) @@ -112,7 +113,7 @@ describe('file_open', function() eq(0, m.file_close(fp)) end) - it('can create a rw------- file with kFileCreateOnly', function() + itp('can create a rw------- file with kFileCreateOnly', function() local err, fp = file_open(filec, m.kFileCreateOnly, 384) eq(0, err) local attrs = lfs.attributes(filec) @@ -120,47 +121,47 @@ describe('file_open', function() eq(0, m.file_close(fp)) end) - it('fails to open an existing file with kFileCreateOnly', function() + itp('fails to open an existing file with kFileCreateOnly', function() local err, _ = file_open(file1, m.kFileCreateOnly, 384) eq(m.UV_EEXIST, err) end) - it('fails to open an symlink with kFileNoSymlink', function() + itp('fails to open an symlink with kFileNoSymlink', function() local err, _ = file_open(linkf, m.kFileNoSymlink, 384) -- err is UV_EMLINK in FreeBSD, but if I use `ok(err == m.UV_ELOOP or err == -- m.UV_EMLINK)`, then I loose the ability to see actual `err` value. if err ~= m.UV_ELOOP then eq(m.UV_EMLINK, err) end end) - it('can open an existing file write-only with kFileCreate', function() + itp('can open an existing file write-only with kFileCreate', function() local err, fp = file_open(file1, m.kFileCreate, 384) eq(0, err) eq(true, fp.wr) eq(0, m.file_close(fp)) end) - it('can open an existing file read-only with zero', function() + itp('can open an existing file read-only with zero', function() local err, fp = file_open(file1, 0, 384) eq(0, err) eq(false, fp.wr) eq(0, m.file_close(fp)) end) - it('can open an existing file read-only with kFileReadOnly', function() + itp('can open an existing file read-only with kFileReadOnly', function() local err, fp = file_open(file1, m.kFileReadOnly, 384) eq(0, err) eq(false, fp.wr) eq(0, m.file_close(fp)) end) - it('can open an existing file read-only with kFileNoSymlink', function() + itp('can open an existing file read-only with kFileNoSymlink', function() local err, fp = file_open(file1, m.kFileNoSymlink, 384) eq(0, err) eq(false, fp.wr) eq(0, m.file_close(fp)) end) - it('can truncate an existing file with kFileTruncate', function() + itp('can truncate an existing file with kFileTruncate', function() local err, fp = file_open(file1, m.kFileTruncate, 384) eq(0, err) eq(true, fp.wr) @@ -169,7 +170,7 @@ describe('file_open', function() eq(0, attrs.size) end) - it('can open an existing file write-only with kFileWriteOnly', function() + itp('can open an existing file write-only with kFileWriteOnly', function() local err, fp = file_open(file1, m.kFileWriteOnly, 384) eq(0, err) eq(true, fp.wr) @@ -178,14 +179,14 @@ describe('file_open', function() eq(4096, attrs.size) end) - it('fails to create a file with just kFileWriteOnly', function() + itp('fails to create a file with just kFileWriteOnly', function() local err, _ = file_open(filec, m.kFileWriteOnly, 384) eq(m.UV_ENOENT, err) local attrs = lfs.attributes(filec) eq(nil, attrs) end) - it('can truncate an existing file with kFileTruncate when opening a symlink', + itp('can truncate an existing file with kFileTruncate when opening a symlink', function() local err, fp = file_open(linkf, m.kFileTruncate, 384) eq(0, err) @@ -195,31 +196,31 @@ describe('file_open', function() eq(0, attrs.size) end) - it('fails to open a directory write-only', function() + itp('fails to open a directory write-only', function() local err, _ = file_open(dir, m.kFileWriteOnly, 384) eq(m.UV_EISDIR, err) end) - it('fails to open a broken symbolic link write-only', function() + itp('fails to open a broken symbolic link write-only', function() local err, _ = file_open(linkb, m.kFileWriteOnly, 384) eq(m.UV_ENOENT, err) end) - it('fails to open a broken symbolic link read-only', function() + itp('fails to open a broken symbolic link read-only', function() local err, _ = file_open(linkb, m.kFileReadOnly, 384) eq(m.UV_ENOENT, err) end) end) describe('file_open_new', function() - it('can open a file read-only', function() + itp('can open a file read-only', function() local err, fp = file_open_new(file1, 0, 384) eq(0, err) eq(false, fp.wr) eq(0, m.file_free(fp)) end) - it('fails to open an existing file with kFileCreateOnly', function() + itp('fails to open an existing file with kFileCreateOnly', function() local err, fp = file_open_new(file1, m.kFileCreateOnly, 384) eq(m.UV_EEXIST, err) eq(nil, fp) @@ -229,7 +230,7 @@ end) -- file_close is called above, so it is not tested directly describe('file_fsync', function() - it('can flush writes to disk', function() + itp('can flush writes to disk', function() local err, fp = file_open(filec, m.kFileCreateOnly, 384) eq(0, file_fsync(fp)) eq(0, err) @@ -244,7 +245,7 @@ describe('file_fsync', function() end) describe('file_read', function() - it('can read small chunks of input until eof', function() + itp('can read small chunks of input until eof', function() local err, fp = file_open(file1, 0, 384) eq(0, err) eq(false, fp.wr) @@ -264,7 +265,7 @@ describe('file_read', function() eq(0, m.file_close(fp)) end) - it('can read the whole file at once', function() + itp('can read the whole file at once', function() local err, fp = file_open(file1, 0, 384) eq(0, err) eq(false, fp.wr) @@ -273,7 +274,7 @@ describe('file_read', function() eq(0, m.file_close(fp)) end) - it('can read more then 1024 bytes after reading a small chunk', function() + itp('can read more then 1024 bytes after reading a small chunk', function() local err, fp = file_open(file1, 0, 384) eq(0, err) eq(false, fp.wr) @@ -283,7 +284,7 @@ describe('file_read', function() eq(0, m.file_close(fp)) end) - it('can read file by 768-byte-chunks', function() + itp('can read file by 768-byte-chunks', function() local err, fp = file_open(file1, 0, 384) eq(0, err) eq(false, fp.wr) @@ -305,7 +306,7 @@ describe('file_read', function() end) describe('file_write', function() - it('can write the whole file at once', function() + itp('can write the whole file at once', function() local err, fp = file_open(filec, m.kFileCreateOnly, 384) eq(0, err) eq(true, fp.wr) @@ -316,7 +317,7 @@ describe('file_write', function() eq(fcontents, io.open(filec):read('*a')) end) - it('can write the whole file by small chunks', function() + itp('can write the whole file by small chunks', function() local err, fp = file_open(filec, m.kFileCreateOnly, 384) eq(0, err) eq(true, fp.wr) @@ -333,7 +334,7 @@ describe('file_write', function() eq(fcontents, io.open(filec):read('*a')) end) - it('can write the whole file by 768-byte-chunks', function() + itp('can write the whole file by 768-byte-chunks', function() local err, fp = file_open(filec, m.kFileCreateOnly, 384) eq(0, err) eq(true, fp.wr) @@ -352,7 +353,7 @@ describe('file_write', function() end) describe('file_skip', function() - it('can skip 3 bytes', function() + itp('can skip 3 bytes', function() local err, fp = file_open(file1, 0, 384) eq(0, err) eq(false, fp.wr) diff --git a/test/unit/os/fs_spec.lua b/test/unit/os/fs_spec.lua index 516fb5a7d1..860ebfdbcb 100644 --- a/test/unit/os/fs_spec.lua +++ b/test/unit/os/fs_spec.lua @@ -1,7 +1,8 @@ local lfs = require('lfs') local bit = require('bit') -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local cppimport = helpers.cppimport @@ -15,10 +16,10 @@ local to_cstr = helpers.to_cstr local OK = helpers.OK local FAIL = helpers.FAIL local NULL = helpers.NULL + local NODE_NORMAL = 0 local NODE_WRITABLE = 1 -cimport('unistd.h') cimport('./src/nvim/os/shell.h') cimport('./src/nvim/option_defs.h') cimport('./src/nvim/main.h') @@ -65,13 +66,10 @@ local function os_getperm(filename) end describe('fs function', function() - local orig_test_file_perm - - setup(function() + before_each(function() lfs.mkdir('unit-test-directory'); io.open('unit-test-directory/test.file', 'w').close() - orig_test_file_perm = os_getperm('unit-test-directory/test.file') io.open('unit-test-directory/test_2.file', 'w').close() lfs.link('test.file', 'unit-test-directory/test_link.file', true) @@ -83,7 +81,7 @@ describe('fs function', function() directory, executable_name = string.match(absolute_executable, '^(.*)/(.*)$') end) - teardown(function() + after_each(function() os.remove('unit-test-directory/test.file') os.remove('unit-test-directory/test_2.file') os.remove('unit-test-directory/test_link.file') @@ -104,13 +102,13 @@ describe('fs function', function() buffer = cstr(length, '') end) - it('returns OK and writes current directory into the buffer if it is large\n enough', function() + itp('returns OK and writes current directory into the buffer if it is large\n enough', function() eq(OK, (os_dirname(buffer, length))) eq(lfs.currentdir(), (ffi.string(buffer))) end) -- What kind of other failing cases are possible? - it('returns FAIL if the buffer is too small', function() + itp('returns FAIL if the buffer is too small', function() local buf = cstr((length - 1), '') eq(FAIL, (os_dirname(buf, (length - 1)))) end) @@ -121,35 +119,35 @@ describe('fs function', function() end describe('os_isdir', function() - it('returns false if an empty string is given', function() + itp('returns false if an empty string is given', function() eq(false, (os_isdir(''))) end) - it('returns false if a nonexisting directory is given', function() + itp('returns false if a nonexisting directory is given', function() eq(false, (os_isdir('non-existing-directory'))) end) - it('returns false if a nonexisting absolute directory is given', function() + itp('returns false if a nonexisting absolute directory is given', function() eq(false, (os_isdir('/non-existing-directory'))) end) - it('returns false if an existing file is given', function() + itp('returns false if an existing file is given', function() eq(false, (os_isdir('unit-test-directory/test.file'))) end) - it('returns true if the current directory is given', function() + itp('returns true if the current directory is given', function() eq(true, (os_isdir('.'))) end) - it('returns true if the parent directory is given', function() + itp('returns true if the parent directory is given', function() eq(true, (os_isdir('..'))) end) - it('returns true if an arbitrary directory is given', function() + itp('returns true if an arbitrary directory is given', function() eq(true, (os_isdir('unit-test-directory'))) end) - it('returns true if an absolute directory is given', function() + itp('returns true if an absolute directory is given', function() eq(true, (os_isdir(directory))) end) end) @@ -179,24 +177,24 @@ describe('fs function', function() return os_can_exe(name) end - it('returns false when given a directory', function() + itp('returns false when given a directory', function() cant_exe('./unit-test-directory') end) - it('returns false when given a regular file without executable bit set', function() + itp('returns false when given a regular file without executable bit set', function() cant_exe('unit-test-directory/test.file') end) - it('returns false when the given file does not exists', function() + itp('returns false when the given file does not exists', function() cant_exe('does-not-exist.file') end) - it('returns the absolute path when given an executable inside $PATH', function() + itp('returns the absolute path when given an executable inside $PATH', function() local fullpath = exe('ls') eq(1, fs.path_is_absolute_path(to_cstr(fullpath))) end) - it('returns the absolute path when given an executable relative to the current dir', function() + itp('returns the absolute path when given an executable relative to the current dir', function() local old_dir = lfs.currentdir() lfs.chdir(directory) @@ -216,10 +214,6 @@ describe('fs function', function() end) describe('file permissions', function() - before_each(function() - os_setperm('unit-test-directory/test.file', orig_test_file_perm) - end) - local function os_fchown(filename, user_id, group_id) local fd = ffi.C.open(filename, 0) local res = fs.os_fchown(fd, user_id, group_id) @@ -240,22 +234,22 @@ describe('fs function', function() end describe('os_getperm', function() - it('returns UV_ENOENT when the given file does not exist', function() + itp('returns UV_ENOENT when the given file does not exist', function() eq(ffi.C.UV_ENOENT, (os_getperm('non-existing-file'))) end) - it('returns a perm > 0 when given an existing file', function() + itp('returns a perm > 0 when given an existing file', function() assert.is_true((os_getperm('unit-test-directory')) > 0) end) - it('returns S_IRUSR when the file is readable', function() + itp('returns S_IRUSR when the file is readable', function() local perm = os_getperm('unit-test-directory') assert.is_true((bit_set(perm, ffi.C.kS_IRUSR))) end) end) describe('os_setperm', function() - it('can set and unset the executable bit of a file', function() + itp('can set and unset the executable bit of a file', function() local perm = os_getperm('unit-test-directory/test.file') perm = unset_bit(perm, ffi.C.kS_IXUSR) eq(OK, (os_setperm('unit-test-directory/test.file', perm))) @@ -267,7 +261,7 @@ describe('fs function', function() assert.is_true((bit_set(perm, ffi.C.kS_IXUSR))) end) - it('fails if given file does not exist', function() + itp('fails if given file does not exist', function() local perm = ffi.C.kS_IXUSR eq(FAIL, (os_setperm('non-existing-file', perm))) end) @@ -275,7 +269,7 @@ describe('fs function', function() describe('os_fchown', function() local filename = 'unit-test-directory/test.file' - it('does not change owner and group if respective IDs are equal to -1', function() + itp('does not change owner and group if respective IDs are equal to -1', function() local uid = lfs.attributes(filename, 'uid') local gid = lfs.attributes(filename, 'gid') eq(0, os_fchown(filename, -1, -1)) @@ -287,7 +281,7 @@ describe('fs function', function() if (os.execute('id -G > /dev/null 2>&1') ~= 0) then pending('skipped (missing `id` utility)', function() end) else - it('owner of a file may change the group of the file to any group of which that owner is a member', function() + itp('owner of a file may change the group of the file to any group of which that owner is a member', function() local file_gid = lfs.attributes(filename, 'gid') -- Gets ID of any group of which current user is a member except the @@ -311,7 +305,7 @@ describe('fs function', function() if (ffi.os == 'Windows' or ffi.C.geteuid() == 0) then pending('skipped (uv_fs_chown is no-op on Windows)', function() end) else - it('returns nonzero if process has not enough permissions', function() + itp('returns nonzero if process has not enough permissions', function() -- chown to root neq(0, os_fchown(filename, 0, 0)) end) @@ -320,7 +314,7 @@ describe('fs function', function() describe('os_file_is_readable', function() - it('returns false if the file is not readable', function() + itp('returns false if the file is not readable', function() local perm = os_getperm('unit-test-directory/test.file') perm = unset_bit(perm, ffi.C.kS_IRUSR) perm = unset_bit(perm, ffi.C.kS_IRGRP) @@ -329,19 +323,19 @@ describe('fs function', function() eq(false, os_file_is_readable('unit-test-directory/test.file')) end) - it('returns false if the file does not exist', function() + itp('returns false if the file does not exist', function() eq(false, os_file_is_readable( 'unit-test-directory/what_are_you_smoking.gif')) end) - it('returns true if the file is readable', function() + itp('returns true if the file is readable', function() eq(true, os_file_is_readable( 'unit-test-directory/test.file')) end) end) describe('os_file_is_writable', function() - it('returns 0 if the file is readonly', function() + itp('returns 0 if the file is readonly', function() local perm = os_getperm('unit-test-directory/test.file') perm = unset_bit(perm, ffi.C.kS_IWUSR) perm = unset_bit(perm, ffi.C.kS_IWGRP) @@ -350,11 +344,11 @@ describe('fs function', function() eq(0, os_file_is_writable('unit-test-directory/test.file')) end) - it('returns 1 if the file is writable', function() + itp('returns 1 if the file is writable', function() eq(1, os_file_is_writable('unit-test-directory/test.file')) end) - it('returns 2 when given a folder with rights to write into', function() + itp('returns 2 when given a folder with rights to write into', function() eq(2, os_file_is_writable('unit-test-directory')) end) end) @@ -420,19 +414,19 @@ describe('fs function', function() end describe('os_path_exists', function() - it('returns false when given a non-existing file', function() + itp('returns false when given a non-existing file', function() eq(false, (os_path_exists('non-existing-file'))) end) - it('returns true when given an existing file', function() + itp('returns true when given an existing file', function() eq(true, (os_path_exists('unit-test-directory/test.file'))) end) - it('returns false when given a broken symlink', function() + itp('returns false when given a broken symlink', function() eq(false, (os_path_exists('unit-test-directory/test_broken_link.file'))) end) - it('returns true when given a directory', function() + itp('returns true when given a directory', function() eq(true, (os_path_exists('unit-test-directory'))) end) end) @@ -441,18 +435,18 @@ describe('fs function', function() local test = 'unit-test-directory/test.file' local not_exist = 'unit-test-directory/not_exist.file' - it('can rename file if destination file does not exist', function() + itp('can rename file if destination file does not exist', function() eq(OK, (os_rename(test, not_exist))) eq(false, (os_path_exists(test))) eq(true, (os_path_exists(not_exist))) eq(OK, (os_rename(not_exist, test))) -- restore test file end) - it('fail if source file does not exist', function() + itp('fail if source file does not exist', function() eq(FAIL, (os_rename(not_exist, test))) end) - it('can overwrite destination file if it exists', function() + itp('can overwrite destination file if it exists', function() local other = 'unit-test-directory/other.file' local file = io.open(other, 'w') file:write('other') @@ -477,11 +471,11 @@ describe('fs function', function() os.remove('unit-test-directory/test_remove.file') end) - it('returns non-zero when given a non-existing file', function() + itp('returns non-zero when given a non-existing file', function() neq(0, (os_remove('non-existing-file'))) end) - it('removes the given file and returns 0', function() + itp('removes the given file and returns 0', function() local f = 'unit-test-directory/test_remove.file' assert_file_exists(f) eq(0, (os_remove(f))) @@ -502,30 +496,30 @@ describe('fs function', function() os.remove(new_file) end) - it('returns UV_ENOENT for O_RDWR on a non-existing file', function() + itp('returns UV_ENOENT for O_RDWR on a non-existing file', function() eq(ffi.C.UV_ENOENT, (os_open('non-existing-file', ffi.C.kO_RDWR, 0))) end) - it('returns non-negative for O_CREAT on a non-existing file which then can be closed', function() + itp('returns non-negative for O_CREAT on a non-existing file which then can be closed', function() assert_file_does_not_exist(new_file) local fd = os_open(new_file, ffi.C.kO_CREAT, 0) assert.is_true(0 <= fd) eq(0, os_close(fd)) end) - it('returns non-negative for O_CREAT on a existing file which then can be closed', function() + itp('returns non-negative for O_CREAT on a existing file which then can be closed', function() assert_file_exists(existing_file) local fd = os_open(existing_file, ffi.C.kO_CREAT, 0) assert.is_true(0 <= fd) eq(0, os_close(fd)) end) - it('returns UV_EEXIST for O_CREAT|O_EXCL on a existing file', function() + itp('returns UV_EEXIST for O_CREAT|O_EXCL on a existing file', function() assert_file_exists(existing_file) eq(ffi.C.kUV_EEXIST, (os_open(existing_file, (bit.bor(ffi.C.kO_CREAT, ffi.C.kO_EXCL)), 0))) end) - it('sets `rwx` permissions for O_CREAT 700 which then can be closed', function() + itp('sets `rwx` permissions for O_CREAT 700 which then can be closed', function() assert_file_does_not_exist(new_file) --create the file local fd = os_open(new_file, ffi.C.kO_CREAT, tonumber("700", 8)) @@ -534,7 +528,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('sets `rw` permissions for O_CREAT 600 which then can be closed', function() + itp('sets `rw` permissions for O_CREAT 600 which then can be closed', function() assert_file_does_not_exist(new_file) --create the file local fd = os_open(new_file, ffi.C.kO_CREAT, tonumber("600", 8)) @@ -543,7 +537,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('returns a non-negative file descriptor for an existing file which then can be closed', function() + itp('returns a non-negative file descriptor for an existing file which then can be closed', function() local fd = os_open(existing_file, ffi.C.kO_RDWR, 0) assert.is_true(0 <= fd) eq(0, os_close(fd)) @@ -551,7 +545,7 @@ describe('fs function', function() end) describe('os_close', function() - it('returns EBADF for negative file descriptors', function() + itp('returns EBADF for negative file descriptors', function() eq(ffi.C.UV_EBADF, os_close(-1)) eq(ffi.C.UV_EBADF, os_close(-1000)) end) @@ -570,7 +564,7 @@ describe('fs function', function() os.remove(file) end) - it('can read zero bytes from a file', function() + itp('can read zero bytes from a file', function() local fd = os_open(file, ffi.C.kO_RDONLY, 0) ok(fd >= 0) eq({false, 0, ''}, {os_read(fd, nil)}) @@ -578,7 +572,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('can read from a file multiple times', function() + itp('can read from a file multiple times', function() local fd = os_open(file, ffi.C.kO_RDONLY, 0) ok(fd >= 0) eq({false, 2, '\000\001'}, {os_read(fd, 2)}) @@ -586,7 +580,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('can read the whole file at once and then report eof', function() + itp('can read the whole file at once and then report eof', function() local fd = os_open(file, ffi.C.kO_RDONLY, 0) ok(fd >= 0) eq({false, #fcontents, fcontents}, {os_read(fd, #fcontents)}) @@ -594,7 +588,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('can read the whole file in two calls, one partially', function() + itp('can read the whole file in two calls, one partially', function() local fd = os_open(file, ffi.C.kO_RDONLY, 0) ok(fd >= 0) eq({false, #fcontents * 3/4, fcontents:sub(1, #fcontents * 3/4)}, @@ -624,7 +618,7 @@ describe('fs function', function() os.remove(file) end) - it('can read zero bytes from a file', function() + itp('can read zero bytes from a file', function() local fd = os_open(file, ffi.C.kO_RDONLY, 0) ok(fd >= 0) eq({false, 0, {}}, {os_readv(fd, {})}) @@ -632,7 +626,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('can read from a file multiple times to a differently-sized buffers', function() + itp('can read from a file multiple times to a differently-sized buffers', function() local fd = os_open(file, ffi.C.kO_RDONLY, 0) ok(fd >= 0) eq({false, 2, {'\000\001'}}, {os_readv(fd, {2})}) @@ -640,7 +634,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('can read the whole file at once and then report eof', function() + itp('can read the whole file at once and then report eof', function() local fd = os_open(file, ffi.C.kO_RDONLY, 0) ok(fd >= 0) eq({false, @@ -657,7 +651,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('can read the whole file in two calls, one partially', function() + itp('can read the whole file in two calls, one partially', function() local fd = os_open(file, ffi.C.kO_RDONLY, 0) ok(fd >= 0) eq({false, #fcontents * 3/4, {fcontents:sub(1, #fcontents * 3/4)}}, @@ -684,7 +678,7 @@ describe('fs function', function() os.remove(file) end) - it('can write zero bytes to a file', function() + itp('can write zero bytes to a file', function() local fd = os_open(file, ffi.C.kO_WRONLY, 0) ok(fd >= 0) eq(0, os_write(fd, '')) @@ -693,7 +687,7 @@ describe('fs function', function() eq(0, os_close(fd)) end) - it('can write some data to a file', function() + itp('can write some data to a file', function() local fd = os_open(file, ffi.C.kO_WRONLY, 0) ok(fd >= 0) eq(3, os_write(fd, 'abc')) @@ -708,11 +702,11 @@ describe('fs function', function() os.remove('non-existing-file') end) - it('returns NODE_NORMAL for non-existing file', function() + itp('returns NODE_NORMAL for non-existing file', function() eq(NODE_NORMAL, fs.os_nodetype(to_cstr('non-existing-file'))) end) - it('returns NODE_WRITABLE for /dev/stderr', function() + itp('returns NODE_WRITABLE for /dev/stderr', function() eq(NODE_WRITABLE, fs.os_nodetype(to_cstr('/dev/stderr'))) end) end) @@ -738,12 +732,12 @@ describe('fs function', function() end describe('os_mkdir', function() - it('returns non-zero when given an already existing directory', function() + itp('returns non-zero when given an already existing directory', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR neq(0, (os_mkdir('unit-test-directory', mode))) end) - it('creates a directory and returns 0', function() + itp('creates a directory and returns 0', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR eq(false, (os_isdir('unit-test-directory/new-dir'))) eq(0, (os_mkdir('unit-test-directory/new-dir', mode))) @@ -753,14 +747,14 @@ describe('fs function', function() end) describe('os_mkdir_recurse', function() - it('returns zero when given an already existing directory', function() + itp('returns zero when given an already existing directory', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR local ret, failed_str = os_mkdir_recurse('unit-test-directory', mode) eq(0, ret) eq(nil, failed_str) end) - it('fails to create a directory where there is a file', function() + itp('fails to create a directory where there is a file', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR local ret, failed_str = os_mkdir_recurse( 'unit-test-directory/test.file', mode) @@ -768,7 +762,7 @@ describe('fs function', function() eq('unit-test-directory/test.file', failed_str) end) - it('fails to create a directory where there is a file in path', function() + itp('fails to create a directory where there is a file in path', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR local ret, failed_str = os_mkdir_recurse( 'unit-test-directory/test.file/test', mode) @@ -776,7 +770,7 @@ describe('fs function', function() eq('unit-test-directory/test.file', failed_str) end) - it('succeeds to create a directory', function() + itp('succeeds to create a directory', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR local ret, failed_str = os_mkdir_recurse( 'unit-test-directory/new-dir-recurse', mode) @@ -787,7 +781,7 @@ describe('fs function', function() eq(false, os_isdir('unit-test-directory/new-dir-recurse')) end) - it('succeeds to create a directory ending with ///', function() + itp('succeeds to create a directory ending with ///', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR local ret, failed_str = os_mkdir_recurse( 'unit-test-directory/new-dir-recurse///', mode) @@ -798,7 +792,7 @@ describe('fs function', function() eq(false, os_isdir('unit-test-directory/new-dir-recurse')) end) - it('succeeds to create a directory ending with /', function() + itp('succeeds to create a directory ending with /', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR local ret, failed_str = os_mkdir_recurse( 'unit-test-directory/new-dir-recurse/', mode) @@ -809,7 +803,7 @@ describe('fs function', function() eq(false, os_isdir('unit-test-directory/new-dir-recurse')) end) - it('succeeds to create a directory tree', function() + itp('succeeds to create a directory tree', function() local mode = ffi.C.kS_IRUSR + ffi.C.kS_IWUSR + ffi.C.kS_IXUSR local ret, failed_str = os_mkdir_recurse( 'unit-test-directory/new-dir-recurse/1/2/3', mode) @@ -828,11 +822,11 @@ describe('fs function', function() end) describe('os_rmdir', function() - it('returns non_zero when given a non-existing directory', function() + itp('returns non_zero when given a non-existing directory', function() neq(0, (os_rmdir('non-existing-directory'))) end) - it('removes the given directory and returns 0', function() + itp('removes the given directory and returns 0', function() lfs.mkdir('unit-test-directory/new-dir') eq(0, os_rmdir('unit-test-directory/new-dir')) eq(false, (os_isdir('unit-test-directory/new-dir'))) @@ -860,19 +854,19 @@ describe('fs function', function() end describe('os_fileinfo', function() - it('returns false if given a non-existing file', function() + itp('returns false if given a non-existing file', function() local file_info = file_info_new() assert.is_false((fs.os_fileinfo('/non-existent', file_info))) end) - it('returns true if given an existing file and fills file_info', function() + itp('returns true if given an existing file and fills file_info', function() local file_info = file_info_new() local path = 'unit-test-directory/test.file' assert.is_true((fs.os_fileinfo(path, file_info))) assert.is_true((is_file_info_filled(file_info))) end) - it('returns the file info of the linked file, not the link', function() + itp('returns the file info of the linked file, not the link', function() local file_info = file_info_new() local path = 'unit-test-directory/test_link.file' assert.is_true((fs.os_fileinfo(path, file_info))) @@ -883,19 +877,19 @@ describe('fs function', function() end) describe('os_fileinfo_link', function() - it('returns false if given a non-existing file', function() + itp('returns false if given a non-existing file', function() local file_info = file_info_new() assert.is_false((fs.os_fileinfo_link('/non-existent', file_info))) end) - it('returns true if given an existing file and fills file_info', function() + itp('returns true if given an existing file and fills file_info', function() local file_info = file_info_new() local path = 'unit-test-directory/test.file' assert.is_true((fs.os_fileinfo_link(path, file_info))) assert.is_true((is_file_info_filled(file_info))) end) - it('returns the file info of the link, not the linked file', function() + itp('returns the file info of the link, not the linked file', function() local file_info = file_info_new() local path = 'unit-test-directory/test_link.file' assert.is_true((fs.os_fileinfo_link(path, file_info))) @@ -906,12 +900,12 @@ describe('fs function', function() end) describe('os_fileinfo_fd', function() - it('returns false if given an invalid file descriptor', function() + itp('returns false if given an invalid file descriptor', function() local file_info = file_info_new() assert.is_false((fs.os_fileinfo_fd(-1, file_info))) end) - it('returns true if given a file descriptor and fills file_info', function() + itp('returns true if given a file descriptor and fills file_info', function() local file_info = file_info_new() local path = 'unit-test-directory/test.file' local fd = ffi.C.open(path, 0) @@ -922,7 +916,7 @@ describe('fs function', function() end) describe('os_fileinfo_id_equal', function() - it('returns false if file infos represent different files', function() + itp('returns false if file infos represent different files', function() local file_info_1 = file_info_new() local file_info_2 = file_info_new() local path_1 = 'unit-test-directory/test.file' @@ -932,7 +926,7 @@ describe('fs function', function() assert.is_false((fs.os_fileinfo_id_equal(file_info_1, file_info_2))) end) - it('returns true if file infos represent the same file', function() + itp('returns true if file infos represent the same file', function() local file_info_1 = file_info_new() local file_info_2 = file_info_new() local path = 'unit-test-directory/test.file' @@ -941,7 +935,7 @@ describe('fs function', function() assert.is_true((fs.os_fileinfo_id_equal(file_info_1, file_info_2))) end) - it('returns true if file infos represent the same file (symlink)', function() + itp('returns true if file infos represent the same file (symlink)', function() local file_info_1 = file_info_new() local file_info_2 = file_info_new() local path_1 = 'unit-test-directory/test.file' @@ -953,7 +947,7 @@ describe('fs function', function() end) describe('os_fileinfo_id', function() - it('extracts ino/dev from file_info into file_id', function() + itp('extracts ino/dev from file_info into file_id', function() local file_info = file_info_new() local file_id = file_id_new() local path = 'unit-test-directory/test.file' @@ -965,7 +959,7 @@ describe('fs function', function() end) describe('os_fileinfo_inode', function() - it('returns the inode from file_info', function() + itp('returns the inode from file_info', function() local file_info = file_info_new() local path = 'unit-test-directory/test.file' assert.is_true((fs.os_fileinfo(path, file_info))) @@ -975,7 +969,7 @@ describe('fs function', function() end) describe('os_fileinfo_size', function() - it('returns the correct size of a file', function() + itp('returns the correct size of a file', function() local path = 'unit-test-directory/test.file' local file = io.open(path, 'w') file:write('some bytes to get filesize != 0') @@ -989,7 +983,7 @@ describe('fs function', function() end) describe('os_fileinfo_hardlinks', function() - it('returns the correct number of hardlinks', function() + itp('returns the correct number of hardlinks', function() local path = 'unit-test-directory/test.file' local path_link = 'unit-test-directory/test_hlink.file' local file_info = file_info_new() @@ -1002,7 +996,7 @@ describe('fs function', function() end) describe('os_fileinfo_blocksize', function() - it('returns the correct blocksize of a file', function() + itp('returns the correct blocksize of a file', function() local path = 'unit-test-directory/test.file' -- there is a bug in luafilesystem where -- `lfs.attributes path, 'blksize'` returns the worng value: @@ -1023,12 +1017,12 @@ describe('fs function', function() end) describe('os_fileid', function() - it('returns false if given an non-existing file', function() + itp('returns false if given an non-existing file', function() local file_id = file_id_new() assert.is_false((fs.os_fileid('/non-existent', file_id))) end) - it('returns true if given an existing file and fills file_id', function() + itp('returns true if given an existing file and fills file_id', function() local file_id = file_id_new() local path = 'unit-test-directory/test.file' assert.is_true((fs.os_fileid(path, file_id))) @@ -1038,14 +1032,14 @@ describe('fs function', function() end) describe('os_fileid_equal', function() - it('returns true if two FileIDs are equal', function() + itp('returns true if two FileIDs are equal', function() local file_id = file_id_new() local path = 'unit-test-directory/test.file' assert.is_true((fs.os_fileid(path, file_id))) assert.is_true((fs.os_fileid_equal(file_id, file_id))) end) - it('returns false if two FileIDs are not equal', function() + itp('returns false if two FileIDs are not equal', function() local file_id_1 = file_id_new() local file_id_2 = file_id_new() local path_1 = 'unit-test-directory/test.file' @@ -1057,7 +1051,7 @@ describe('fs function', function() end) describe('os_fileid_equal_fileinfo', function() - it('returns true if file_id and file_info represent the same file', function() + itp('returns true if file_id and file_info represent the same file', function() local file_id = file_id_new() local file_info = file_info_new() local path = 'unit-test-directory/test.file' @@ -1066,7 +1060,7 @@ describe('fs function', function() assert.is_true((fs.os_fileid_equal_fileinfo(file_id, file_info))) end) - it('returns false if file_id and file_info represent different files', function() + itp('returns false if file_id and file_info represent different files', function() local file_id = file_id_new() local file_info = file_info_new() local path_1 = 'unit-test-directory/test.file' diff --git a/test/unit/os/shell_spec.lua b/test/unit/os/shell_spec.lua index 3603403daf..e883301cfb 100644 --- a/test/unit/os/shell_spec.lua +++ b/test/unit/os/shell_spec.lua @@ -1,4 +1,5 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local cimported = helpers.cimport( './src/nvim/os/shell.h', './src/nvim/option_defs.h', @@ -51,63 +52,51 @@ describe('shell functions', function() end describe('os_system', function() - it('can echo some output (shell builtin)', function() + itp('can echo some output (shell builtin)', function() local cmd, text = 'echo -n', 'some text' local status, output = os_system(cmd .. ' ' .. text) eq(text, output) eq(0, status) end) - it('can deal with empty output', function() + itp('can deal with empty output', function() local cmd = 'echo -n' local status, output = os_system(cmd) eq('', output) eq(0, status) end) - it('can pass input on stdin', function() + itp('can pass input on stdin', function() local cmd, input = 'cat -', 'some text\nsome other text' local status, output = os_system(cmd, input) eq(input, output) eq(0, status) end) - it ('returns non-zero exit code', function() + itp('returns non-zero exit code', function() local status = os_system('exit 2') eq(2, status) end) end) describe('shell_build_argv', function() - local saved_opts = {} - - setup(function() - saved_opts.p_sh = cimported.p_sh - saved_opts.p_shcf = cimported.p_shcf - end) - - teardown(function() - cimported.p_sh = saved_opts.p_sh - cimported.p_shcf = saved_opts.p_shcf - end) - - it('works with NULL arguments', function() + itp('works with NULL arguments', function() eq({'/bin/bash'}, shell_build_argv(nil, nil)) end) - it('works with cmd', function() + itp('works with cmd', function() eq({'/bin/bash', '-c', 'abc def'}, shell_build_argv('abc def', nil)) end) - it('works with extra_args', function() + itp('works with extra_args', function() eq({'/bin/bash', 'ghi jkl'}, shell_build_argv(nil, 'ghi jkl')) end) - it('works with cmd and extra_args', function() + itp('works with cmd and extra_args', function() eq({'/bin/bash', 'ghi jkl', '-c', 'abc def'}, shell_build_argv('abc def', 'ghi jkl')) end) - it('splits and unquotes &shell and &shellcmdflag', function() + itp('splits and unquotes &shell and &shellcmdflag', function() cimported.p_sh = to_cstr('/Program" "Files/zsh -f') cimported.p_shcf = to_cstr('-x -o "sh word split" "-"c') eq({'/Program Files/zsh', '-f', @@ -117,7 +106,7 @@ describe('shell functions', function() shell_build_argv('abc def', 'ghi jkl')) end) - it('applies shellxescape (p_sxe) and shellxquote (p_sxq)', function() + itp('applies shellxescape (p_sxe) and shellxquote (p_sxq)', function() cimported.p_sxq = to_cstr('(') cimported.p_sxe = to_cstr('"&|<>()@^') @@ -129,7 +118,7 @@ describe('shell functions', function() eq(nil, argv[3]) end) - it('applies shellxquote="(', function() + itp('applies shellxquote="(', function() cimported.p_sxq = to_cstr('"(') cimported.p_sxe = to_cstr('"&|<>()@^') @@ -141,7 +130,7 @@ describe('shell functions', function() eq(nil, argv[3]) end) - it('applies shellxquote="', function() + itp('applies shellxquote="', function() cimported.p_sxq = to_cstr('"') cimported.p_sxe = to_cstr('') @@ -153,7 +142,7 @@ describe('shell functions', function() eq(nil, argv[3]) end) - it('with empty shellxquote/shellxescape', function() + itp('with empty shellxquote/shellxescape', function() local argv = ffi.cast('char**', cimported.shell_build_argv( to_cstr('echo -n some text'), nil)) eq(ffi.string(argv[0]), '/bin/bash') diff --git a/test/unit/os/users_spec.lua b/test/unit/os/users_spec.lua index 236481e9e7..f92413c7de 100644 --- a/test/unit/os/users_spec.lua +++ b/test/unit/os/users_spec.lua @@ -1,4 +1,5 @@ -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local eq = helpers.eq @@ -27,11 +28,11 @@ describe('users function', function() local current_username = os.getenv('USER') describe('os_get_usernames', function() - it('returns FAIL if called with NULL', function() + itp('returns FAIL if called with NULL', function() eq(FAIL, users.os_get_usernames(NULL)) end) - it('fills the names garray with os usernames and returns OK', function() + itp('fills the names garray with os usernames and returns OK', function() local ga_users = garray_new() eq(OK, users.os_get_usernames(ga_users)) local user_count = garray_get_len(ga_users) @@ -48,7 +49,7 @@ describe('users function', function() end) describe('os_get_user_name', function() - it('should write the username into the buffer and return OK', function() + itp('should write the username into the buffer and return OK', function() local name_out = ffi.new('char[100]') eq(OK, users.os_get_user_name(name_out, 100)) eq(current_username, ffi.string(name_out)) @@ -56,14 +57,14 @@ describe('users function', function() end) describe('os_get_uname', function() - it('should write the username into the buffer and return OK', function() + itp('should write the username into the buffer and return OK', function() local name_out = ffi.new('char[100]') local user_id = lib.getuid() eq(OK, users.os_get_uname(user_id, name_out, 100)) eq(current_username, ffi.string(name_out)) end) - it('should FAIL if the userid is not found', function() + itp('should FAIL if the userid is not found', function() local name_out = ffi.new('char[100]') -- hoping nobody has this uid local user_id = 2342 @@ -73,16 +74,16 @@ describe('users function', function() end) describe('os_get_user_directory', function() - it('should return NULL if called with NULL', function() + itp('should return NULL if called with NULL', function() eq(NULL, users.os_get_user_directory(NULL)) end) - it('should return $HOME for the current user', function() + itp('should return $HOME for the current user', function() local home = os.getenv('HOME') eq(home, ffi.string((users.os_get_user_directory(current_username)))) end) - it('should return NULL if the user is not found', function() + itp('should return NULL if the user is not found', function() eq(NULL, users.os_get_user_directory('neovim_user_not_found_test')) end) end) diff --git a/test/unit/path_spec.lua b/test/unit/path_spec.lua index ccaf0228ab..470f971e68 100644 --- a/test/unit/path_spec.lua +++ b/test/unit/path_spec.lua @@ -1,5 +1,6 @@ local lfs = require('lfs') -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local eq = helpers.eq @@ -14,13 +15,6 @@ local FAIL = helpers.FAIL cimport('string.h') local path = cimport('./src/nvim/path.h') --- import constants parsed by ffi -local kEqualFiles = path.kEqualFiles -local kDifferentFiles = path.kDifferentFiles -local kBothFilesMissing = path.kBothFilesMissing -local kOneFileMissing = path.kOneFileMissing -local kEqualFileNames = path.kEqualFileNames - local length = 0 local buffer = nil @@ -45,7 +39,7 @@ describe('path function', function() buffer = cstr(length, '') end) - it('returns the absolute directory name of a given relative one', function() + itp('returns the absolute directory name of a given relative one', function() local result = path_full_dir_name('..', buffer, length) eq(OK, result) local old_dir = lfs.currentdir() @@ -55,16 +49,16 @@ describe('path function', function() eq(expected, (ffi.string(buffer))) end) - it('returns the current directory name if the given string is empty', function() + itp('returns the current directory name if the given string is empty', function() eq(OK, (path_full_dir_name('', buffer, length))) eq(lfs.currentdir(), (ffi.string(buffer))) end) - it('fails if the given directory does not exist', function() + itp('fails if the given directory does not exist', function() eq(FAIL, path_full_dir_name('does_not_exist', buffer, length)) end) - it('works with a normal relative dir', function() + itp('works with a normal relative dir', function() local result = path_full_dir_name('unit-test-directory', buffer, length) eq(lfs.currentdir() .. '/unit-test-directory', (ffi.string(buffer))) eq(OK, result) @@ -91,26 +85,26 @@ describe('path function', function() os.remove(f2) end) - it('returns kEqualFiles when passed the same file', function() - eq(kEqualFiles, (path_full_compare(f1, f1))) + itp('returns kEqualFiles when passed the same file', function() + eq(path.kEqualFiles, (path_full_compare(f1, f1))) end) - it('returns kEqualFileNames when files that dont exist and have same name', function() - eq(kEqualFileNames, (path_full_compare('null.txt', 'null.txt', true))) + itp('returns kEqualFileNames when files that dont exist and have same name', function() + eq(path.kEqualFileNames, (path_full_compare('null.txt', 'null.txt', true))) end) - it('returns kBothFilesMissing when files that dont exist', function() - eq(kBothFilesMissing, (path_full_compare('null.txt', 'null.txt'))) + itp('returns kBothFilesMissing when files that dont exist', function() + eq(path.kBothFilesMissing, (path_full_compare('null.txt', 'null.txt'))) end) - it('returns kDifferentFiles when passed different files', function() - eq(kDifferentFiles, (path_full_compare(f1, f2))) - eq(kDifferentFiles, (path_full_compare(f2, f1))) + itp('returns kDifferentFiles when passed different files', function() + eq(path.kDifferentFiles, (path_full_compare(f1, f2))) + eq(path.kDifferentFiles, (path_full_compare(f2, f1))) end) - it('returns kOneFileMissing if only one does not exist', function() - eq(kOneFileMissing, (path_full_compare(f1, 'null.txt'))) - eq(kOneFileMissing, (path_full_compare('null.txt', f1))) + itp('returns kOneFileMissing if only one does not exist', function() + eq(path.kOneFileMissing, (path_full_compare(f1, 'null.txt'))) + eq(path.kOneFileMissing, (path_full_compare('null.txt', f1))) end) end) @@ -121,11 +115,11 @@ describe('path function', function() return ffi.string(res) end - it('returns the tail of a given file path', function() + itp('returns the tail of a given file path', function() eq('file.txt', path_tail('directory/file.txt')) end) - it('returns an empty string if file ends in a slash', function() + itp('returns an empty string if file ends in a slash', function() eq('', path_tail('directory/')) end) end) @@ -137,24 +131,24 @@ describe('path function', function() return ffi.string(res) end - it('returns the tail of a file together with its separator', function() + itp('returns the tail of a file together with its separator', function() eq('///file.txt', path_tail_with_sep('directory///file.txt')) end) - it('returns an empty string when given an empty file name', function() + itp('returns an empty string when given an empty file name', function() eq('', path_tail_with_sep('')) end) - it('returns only the separator if there is a trailing separator', function() + itp('returns only the separator if there is a trailing separator', function() eq('/', path_tail_with_sep('some/directory/')) end) - it('cuts a leading separator', function() + itp('cuts a leading separator', function() eq('file.txt', path_tail_with_sep('/file.txt')) eq('', path_tail_with_sep('/')) end) - it('returns the whole file name if there is no separator', function() + itp('returns the whole file name if there is no separator', function() eq('file.txt', path_tail_with_sep('file.txt')) end) end) @@ -180,13 +174,13 @@ describe('path function', function() return eq(0, (ffi.C.strncmp((to_cstr(base)), pinvk, len))) end - it('returns the executable name of an invocation given a relative invocation', function() + itp('returns the executable name of an invocation given a relative invocation', function() local invk, len = invocation_path_tail('directory/exe a b c') compare("exe a b c", invk, len) eq(3, len) end) - it('returns the executable name of an invocation given an absolute invocation', function() + itp('returns the executable name of an invocation given an absolute invocation', function() if ffi.os == 'Windows' then local invk, len = invocation_path_tail('C:\\Users\\anyone\\Program Files\\z a b') compare('z a b', invk, len) @@ -198,18 +192,18 @@ describe('path function', function() end end) - it('does not count arguments to the executable as part of its path', function() + itp('does not count arguments to the executable as part of its path', function() local invk, len = invocation_path_tail('exe a/b\\c') compare("exe a/b\\c", invk, len) eq(3, len) end) - it('only accepts whitespace as a terminator for the executable name', function() + itp('only accepts whitespace as a terminator for the executable name', function() local invk, _ = invocation_path_tail('exe-a+b_c[]()|#!@$%^&*') eq('exe-a+b_c[]()|#!@$%^&*', (ffi.string(invk))) end) - it('is equivalent to path_tail when args do not contain a path separator', function() + itp('is equivalent to path_tail when args do not contain a path separator', function() local ptail = path.path_tail(to_cstr("a/b/c x y z")) neq(NULL, ptail) local tail = ffi.string(ptail) @@ -217,7 +211,7 @@ describe('path function', function() eq(tail, ffi.string(invk)) end) - it('is not equivalent to path_tail when args contain a path separator', function() + itp('is not equivalent to path_tail when args contain a path separator', function() local ptail = path.path_tail(to_cstr("a/b/c x y/z")) neq(NULL, ptail) local invk, _ = invocation_path_tail("a/b/c x y/z") @@ -232,34 +226,34 @@ describe('path function', function() return ffi.string(res) end - it('returns', function() + itp('returns', function() eq('directory/file.txt', path_next_component('some/directory/file.txt')) end) - it('returns empty string if given file contains no separator', function() + itp('returns empty string if given file contains no separator', function() eq('', path_next_component('file.txt')) end) end) describe('path_shorten_fname', function() - it('returns NULL if `full_path` is NULL', function() + itp('returns NULL if `full_path` is NULL', function() local dir = to_cstr('some/directory/file.txt') eq(NULL, (path.path_shorten_fname(NULL, dir))) end) - it('returns NULL if the path and dir does not match', function() + itp('returns NULL if the path and dir does not match', function() local dir = to_cstr('not/the/same') local full = to_cstr('as/this.txt') eq(NULL, (path.path_shorten_fname(full, dir))) end) - it('returns NULL if the path is not separated properly', function() + itp('returns NULL if the path is not separated properly', function() local dir = to_cstr('some/very/long/') local full = to_cstr('some/very/long/directory/file.txt') eq(NULL, (path.path_shorten_fname(full, dir))) end) - it('shortens the filename if `dir_name` is the start of `full_path`', function() + itp('shortens the filename if `dir_name` is the start of `full_path`', function() local full = to_cstr('some/very/long/directory/file.txt') local dir = to_cstr('some/very/long') eq('directory/file.txt', (ffi.string(path.path_shorten_fname(full, dir)))) @@ -280,20 +274,20 @@ describe('path_shorten_fname_if_possible', function() end) describe('path_shorten_fname_if_possible', function() - it('returns shortened path if possible', function() + itp('returns shortened path if possible', function() lfs.chdir('ut_directory') local full = to_cstr(lfs.currentdir() .. '/subdir/file.txt') eq('subdir/file.txt', (ffi.string(path.path_shorten_fname_if_possible(full)))) end) - it('returns `full_path` if a shorter version is not possible', function() + itp('returns `full_path` if a shorter version is not possible', function() local old = lfs.currentdir() lfs.chdir('ut_directory') local full = old .. '/subdir/file.txt' eq(full, (ffi.string(path.path_shorten_fname_if_possible(to_cstr(full))))) end) - it('returns NULL if `full_path` is NULL', function() + itp('returns NULL if `full_path` is NULL', function() eq(NULL, (path.path_shorten_fname_if_possible(NULL))) end) end) @@ -330,13 +324,13 @@ describe('more path function', function() buffer = cstr(length, '') end) - it('fails if given filename is NULL', function() + itp('fails if given filename is NULL', function() local force_expansion = 1 local result = path.vim_FullName(NULL, buffer, length, force_expansion) eq(FAIL, result) end) - it('fails safely if given length is wrong #5737', function() + itp('fails safely if given length is wrong #5737', function() local force_expansion = 1 local filename = 'foo/bar/bazzzzzzz/buz/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/a' local too_short_len = 8 @@ -347,7 +341,7 @@ describe('more path function', function() eq(FAIL, result) end) - it('uses the filename if the filename is a URL', function() + itp('uses the filename if the filename is a URL', function() local force_expansion = 1 local filename = 'http://www.neovim.org' local result = vim_FullName(filename, buffer, length, force_expansion) @@ -355,7 +349,7 @@ describe('more path function', function() eq(OK, result) end) - it('fails and uses filename if given filename contains non-existing directory', function() + itp('fails and uses filename if given filename contains non-existing directory', function() local force_expansion = 1 local filename = 'non_existing_dir/test.file' local result = vim_FullName(filename, buffer, length, force_expansion) @@ -363,7 +357,7 @@ describe('more path function', function() eq(FAIL, result) end) - it('concatenates given filename if it does not contain a slash', function() + itp('concatenates given filename if it does not contain a slash', function() local force_expansion = 1 local result = vim_FullName('test.file', buffer, length, force_expansion) local expected = lfs.currentdir() .. '/test.file' @@ -371,7 +365,7 @@ describe('more path function', function() eq(OK, result) end) - it('concatenates given filename if it is a directory but does not contain a\n slash', function() + itp('concatenates given filename if it is a directory but does not contain a\n slash', function() local force_expansion = 1 local result = vim_FullName('..', buffer, length, force_expansion) local expected = lfs.currentdir() .. '/..' @@ -381,7 +375,7 @@ describe('more path function', function() -- Is it possible for every developer to enter '..' directory while running -- the unit tests? Which other directory would be better? - it('enters given directory (instead of just concatenating the strings) if possible and if path contains a slash', function() + itp('enters given directory (instead of just concatenating the strings) if possible and if path contains a slash', function() local force_expansion = 1 local result = vim_FullName('../test.file', buffer, length, force_expansion) local old_dir = lfs.currentdir() @@ -392,7 +386,7 @@ describe('more path function', function() eq(OK, result) end) - it('just copies the path if it is already absolute and force=0', function() + itp('just copies the path if it is already absolute and force=0', function() local force_expansion = 0 local absolute_path = '/absolute/path' local result = vim_FullName(absolute_path, buffer, length, force_expansion) @@ -400,7 +394,7 @@ describe('more path function', function() eq(OK, result) end) - it('fails and uses filename when the path is relative to HOME', function() + itp('fails and uses filename when the path is relative to HOME', function() local force_expansion = 1 local absolute_path = '~/home.file' local result = vim_FullName(absolute_path, buffer, length, force_expansion) @@ -408,14 +402,14 @@ describe('more path function', function() eq(FAIL, result) end) - it('works with some "normal" relative path with directories', function() + itp('works with some "normal" relative path with directories', function() local force_expansion = 1 local result = vim_FullName('unit-test-directory/test.file', buffer, length, force_expansion) eq(OK, result) eq(lfs.currentdir() .. '/unit-test-directory/test.file', (ffi.string(buffer))) end) - it('does not modify the given filename', function() + itp('does not modify the given filename', function() local force_expansion = 1 local filename = to_cstr('unit-test-directory/test.file') -- Don't use the wrapper here but pass a cstring directly to the c @@ -426,7 +420,7 @@ describe('more path function', function() eq(OK, result) end) - it('works with directories that have one path component', function() + itp('works with directories that have one path component', function() local force_expansion = 1 local filename = to_cstr('/tmp') local result = path.vim_FullName(filename, buffer, length, force_expansion) @@ -446,12 +440,12 @@ describe('more path function', function() after_each(function() lfs.rmdir('CamelCase') end) if ffi.os == 'Windows' or ffi.os == 'OSX' then - it('Corrects the case of file names in Mac and Windows', function() + itp('Corrects the case of file names in Mac and Windows', function() eq('CamelCase', fix_case('camelcase')) eq('CamelCase', fix_case('cAMELcASE')) end) else - it('does nothing on Linux', function() + itp('does nothing on Linux', function() eq('camelcase', fix_case('camelcase')) eq('cAMELcASE', fix_case('cAMELcASE')) end) @@ -459,41 +453,41 @@ describe('more path function', function() end) describe('append_path', function() - it('joins given paths with a slash', function() + itp('joins given paths with a slash', function() local path1 = cstr(100, 'path1') local to_append = to_cstr('path2') eq(OK, (path.append_path(path1, to_append, 100))) eq("path1/path2", (ffi.string(path1))) end) - it('joins given paths without adding an unnecessary slash', function() + itp('joins given paths without adding an unnecessary slash', function() local path1 = cstr(100, 'path1/') local to_append = to_cstr('path2') eq(OK, path.append_path(path1, to_append, 100)) eq("path1/path2", (ffi.string(path1))) end) - it('fails and uses filename if there is not enough space left for to_append', function() + itp('fails and uses filename if there is not enough space left for to_append', function() local path1 = cstr(11, 'path1/') local to_append = to_cstr('path2') eq(FAIL, (path.append_path(path1, to_append, 11))) end) - it('does not append a slash if to_append is empty', function() + itp('does not append a slash if to_append is empty', function() local path1 = cstr(6, 'path1') local to_append = to_cstr('') eq(OK, (path.append_path(path1, to_append, 6))) eq('path1', (ffi.string(path1))) end) - it('does not append unnecessary dots', function() + itp('does not append unnecessary dots', function() local path1 = cstr(6, 'path1') local to_append = to_cstr('.') eq(OK, (path.append_path(path1, to_append, 6))) eq('path1', (ffi.string(path1))) end) - it('copies to_append to path, if path is empty', function() + itp('copies to_append to path, if path is empty', function() local path1 = cstr(7, '') local to_append = to_cstr('/path2') eq(OK, (path.append_path(path1, to_append, 7))) @@ -507,15 +501,15 @@ describe('more path function', function() return path.path_is_absolute_path(filename) end - it('returns true if filename starts with a slash', function() + itp('returns true if filename starts with a slash', function() eq(OK, path_is_absolute_path('/some/directory/')) end) - it('returns true if filename starts with a tilde', function() + itp('returns true if filename starts with a tilde', function() eq(OK, path_is_absolute_path('~/in/my/home~/directory')) end) - it('returns false if filename starts not with slash nor tilde', function() + itp('returns false if filename starts not with slash nor tilde', function() eq(FAIL, path_is_absolute_path('not/in/my/home~/directory')) end) end) diff --git a/test/unit/preload.lua b/test/unit/preload.lua index d8ec2c3943..841e19b878 100644 --- a/test/unit/preload.lua +++ b/test/unit/preload.lua @@ -2,6 +2,6 @@ -- Busted started doing this to help provide more isolation. See issue #62 -- for more information about this. local ffi = require('ffi') -local helpers = require('test.unit.helpers') +local helpers = require('test.unit.helpers')(nil) local lfs = require('lfs') local preprocess = require('test.unit.preprocess') diff --git a/test/unit/profile_spec.lua b/test/unit/profile_spec.lua index 852475fe2c..08e5cedbab 100644 --- a/test/unit/profile_spec.lua +++ b/test/unit/profile_spec.lua @@ -1,10 +1,13 @@ -local helpers = require 'test.unit.helpers' +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) -local prof = helpers.cimport './src/nvim/profile.h' +local cimport = helpers.cimport local ffi = helpers.ffi local eq = helpers.eq local neq = helpers.neq +local prof = cimport('./src/nvim/profile.h') + local function split(inputstr, sep) if sep == nil then sep = "%s" @@ -78,7 +81,7 @@ describe('profiling related functions', function() end describe('profile_equal', function() - it('times are equal to themselves', function() + itp('times are equal to themselves', function() local start = profile_start() assert.is_true(profile_equal(start, start)) @@ -86,7 +89,7 @@ describe('profiling related functions', function() assert.is_true(profile_equal(e, e)) end) - it('times are unequal to others', function() + itp('times are unequal to others', function() assert.is_false(profile_equal(profile_start(), profile_start())) end) end) @@ -95,24 +98,24 @@ describe('profiling related functions', function() -- the profiling package. Those functions in turn will probably be tested -- using profile_cmp... circular reasoning. describe('profile_cmp', function() - it('can compare subsequent starts', function() + itp('can compare subsequent starts', function() local s1, s2 = profile_start(), profile_start() assert.is_true(profile_cmp(s1, s2) > 0) assert.is_true(profile_cmp(s2, s1) < 0) end) - it('can compare the zero element', function() + itp('can compare the zero element', function() assert.is_true(profile_cmp(profile_zero(), profile_zero()) == 0) end) - it('correctly orders divisions', function() + itp('correctly orders divisions', function() local start = profile_start() assert.is_true(profile_cmp(start, profile_divide(start, 10)) <= 0) end) end) describe('profile_divide', function() - it('actually performs division', function() + itp('actually performs division', function() -- note: the routine actually performs floating-point division to get -- better rounding behaviour, we have to take that into account when -- checking. (check range, not exact number). @@ -134,14 +137,14 @@ describe('profiling related functions', function() end) describe('profile_zero', function() - it('returns the same value on each call', function() + itp('returns the same value on each call', function() eq(0, profile_zero()) assert.is_true(profile_equal(profile_zero(), profile_zero())) end) end) describe('profile_start', function() - it('increases', function() + itp('increases', function() local last = profile_start() for _ = 1, 100 do local curr = profile_start() @@ -152,11 +155,11 @@ describe('profiling related functions', function() end) describe('profile_end', function() - it('the elapsed time cannot be zero', function() + itp('the elapsed time cannot be zero', function() neq(profile_zero(), profile_end(profile_start())) end) - it('outer elapsed >= inner elapsed', function() + itp('outer elapsed >= inner elapsed', function() for _ = 1, 100 do local start_outer = profile_start() local start_inner = profile_start() @@ -169,11 +172,11 @@ describe('profiling related functions', function() end) describe('profile_setlimit', function() - it('sets no limit when 0 is passed', function() + itp('sets no limit when 0 is passed', function() eq(true, profile_equal(profile_setlimit(0), profile_zero())) end) - it('sets a limit in the future otherwise', function() + itp('sets a limit in the future otherwise', function() local future = profile_setlimit(1000) local now = profile_start() assert.is_true(profile_cmp(future, now) < 0) @@ -181,12 +184,12 @@ describe('profiling related functions', function() end) describe('profile_passed_limit', function() - it('start is in the past', function() + itp('start is in the past', function() local start = profile_start() eq(true, profile_passed_limit(start)) end) - it('start + start is in the future', function() + itp('start + start is in the future', function() local start = profile_start() local future = profile_add(start, start) eq(false, profile_passed_limit(future)) @@ -194,12 +197,12 @@ describe('profiling related functions', function() end) describe('profile_msg', function() - it('prints the zero time as 0.00000', function() + itp('prints the zero time as 0.00000', function() local str = trim(profile_msg(profile_zero())) eq(str, "0.000000") end) - it('prints the time passed, in seconds.microsends', function() + itp('prints the time passed, in seconds.microsends', function() local start = profile_start() local endt = profile_end(start) local str = trim(profile_msg(endt)) @@ -221,14 +224,14 @@ describe('profiling related functions', function() end) describe('profile_add', function() - it('adds profiling times', function() + itp('adds profiling times', function() local start = profile_start() assert.equals(start, profile_add(profile_zero(), start)) end) end) describe('profile_sub', function() - it('subtracts profiling times', function() + itp('subtracts profiling times', function() -- subtracting zero does nothing local start = profile_start() assert.equals(start, profile_sub(start, profile_zero())) diff --git a/test/unit/rbuffer_spec.lua b/test/unit/rbuffer_spec.lua index 89136410d3..e9104dd5c4 100644 --- a/test/unit/rbuffer_spec.lua +++ b/test/unit/rbuffer_spec.lua @@ -1,9 +1,11 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) -local ffi = helpers.ffi -local eq = helpers.eq -local cstr = helpers.cstr +local eq = helpers.eq +local ffi = helpers.ffi +local cstr = helpers.cstr local to_cstr = helpers.to_cstr +local child_call_once = helpers.child_call_once local rbuffer = helpers.cimport("./test/unit/fixtures/rbuffer.h") @@ -31,9 +33,11 @@ describe('rbuffer functions', function() end before_each(function() - rbuf = ffi.gc(rbuffer.rbuffer_new(capacity), rbuffer.rbuffer_free) - -- fill the internal buffer with the character '0' to simplify inspecting - ffi.C.memset(rbuf.start_ptr, string.byte('0'), capacity) + child_call_once(function() + rbuf = ffi.gc(rbuffer.rbuffer_new(capacity), rbuffer.rbuffer_free) + -- fill the internal buffer with the character '0' to simplify inspecting + ffi.C.memset(rbuf.start_ptr, string.byte('0'), capacity) + end) end) describe('RBUFFER_UNTIL_FULL', function() @@ -50,66 +54,51 @@ describe('rbuffer functions', function() end) describe('with empty buffer in one contiguous chunk', function() - it('is called once with the empty chunk', function() + itp('is called once with the empty chunk', function() collect_write_chunks() eq({'0000000000000000'}, chunks) end) end) describe('with partially empty buffer in one contiguous chunk', function() - before_each(function() + itp('is called once with the empty chunk', function() write('string') - end) - - it('is called once with the empty chunk', function() collect_write_chunks() eq({'0000000000'}, chunks) end) end) describe('with filled buffer in one contiguous chunk', function() - before_each(function() + itp('is not called', function() write('abcdefghijklmnopq') - end) - - it('is not called', function() collect_write_chunks() eq({}, chunks) end) end) describe('with buffer partially empty in two contiguous chunks', function() - before_each(function() + itp('is called twice with each filled chunk', function() write('1234567890') read(8) - end) - - it('is called twice with each filled chunk', function() collect_write_chunks() eq({'000000', '12345678'}, chunks) end) end) describe('with buffer empty in two contiguous chunks', function() - before_each(function() + itp('is called twice with each filled chunk', function() write('12345678') read(8) - end) - - it('is called twice with each filled chunk', function() collect_write_chunks() eq({'00000000', '12345678'}, chunks) end) end) describe('with buffer filled in two contiguous chunks', function() - before_each(function() + itp('is not called', function() write('12345678') read(8) write('abcdefghijklmnopq') - end) - - it('is not called', function() collect_write_chunks() eq({}, chunks) end) @@ -130,55 +119,43 @@ describe('rbuffer functions', function() end) describe('with empty buffer', function() - it('is not called', function() + itp('is not called', function() collect_read_chunks() eq({}, chunks) end) end) describe('with partially filled buffer in one contiguous chunk', function() - before_each(function() + itp('is called once with the filled chunk', function() write('string') - end) - - it('is called once with the filled chunk', function() collect_read_chunks() eq({'string'}, chunks) end) end) describe('with filled buffer in one contiguous chunk', function() - before_each(function() + itp('is called once with the filled chunk', function() write('abcdefghijklmnopq') - end) - - it('is called once with the filled chunk', function() collect_read_chunks() eq({'abcdefghijklmnop'}, chunks) end) end) describe('with buffer partially filled in two contiguous chunks', function() - before_each(function() + itp('is called twice with each filled chunk', function() write('1234567890') read(10) write('long string') - end) - - it('is called twice with each filled chunk', function() collect_read_chunks() eq({'long s', 'tring'}, chunks) end) end) describe('with buffer filled in two contiguous chunks', function() - before_each(function() + itp('is called twice with each filled chunk', function() write('12345678') read(8) write('abcdefghijklmnopq') - end) - - it('is called twice with each filled chunk', function() collect_read_chunks() eq({'abcdefgh', 'ijklmnop'}, chunks) end) @@ -198,20 +175,17 @@ describe('rbuffer functions', function() end) describe('with empty buffer', function() - it('is not called', function() + itp('is not called', function() collect_chars() eq({}, chars) end) end) describe('with buffer filled in two contiguous chunks', function() - before_each(function() + itp('collects each character and index', function() write('1234567890') read(10) write('long string') - end) - - it('collects each character and index', function() collect_chars() eq({{'l', 0}, {'o', 1}, {'n', 2}, {'g', 3}, {' ', 4}, {'s', 5}, {'t', 6}, {'r', 7}, {'i', 8}, {'n', 9}, {'g', 10}}, chars) @@ -232,20 +206,17 @@ describe('rbuffer functions', function() end) describe('with empty buffer', function() - it('is not called', function() + itp('is not called', function() collect_chars() eq({}, chars) end) end) describe('with buffer filled in two contiguous chunks', function() - before_each(function() + itp('collects each character and index', function() write('1234567890') read(10) write('long string') - end) - - it('collects each character and index', function() collect_chars() eq({{'g', 10}, {'n', 9}, {'i', 8}, {'r', 7}, {'t', 6}, {'s', 5}, {' ', 4}, {'g', 3}, {'n', 2}, {'o', 1}, {'l', 0}}, chars) @@ -264,13 +235,10 @@ describe('rbuffer functions', function() end describe('with buffer filled in two contiguous chunks', function() - before_each(function() + itp('compares the common longest sequence', function() write('1234567890') read(10) write('long string') - end) - - it('compares the common longest sequence', function() eq(0, cmp('long string')) eq(0, cmp('long strin')) eq(-1, cmp('long striM')) @@ -282,31 +250,31 @@ describe('rbuffer functions', function() end) describe('with empty buffer', function() - it('returns 0 since no characters are compared', function() + itp('returns 0 since no characters are compared', function() eq(0, cmp('')) end) end) end) describe('rbuffer_write', function() - it('fills the internal buffer and returns the write count', function() + itp('fills the internal buffer and returns the write count', function() eq(12, write('short string')) eq('short string0000', inspect()) end) - it('wont write beyond capacity', function() + itp('wont write beyond capacity', function() eq(16, write('very very long string')) eq('very very long s', inspect()) end) end) describe('rbuffer_read', function() - it('reads what was previously written', function() + itp('reads what was previously written', function() write('to read') eq('to read', read(20)) end) - it('reads nothing if the buffer is empty', function() + itp('reads nothing if the buffer is empty', function() eq('', read(20)) write('empty') eq('empty', read(20)) @@ -315,7 +283,7 @@ describe('rbuffer functions', function() end) describe('rbuffer_get', function() - it('fetch the pointer at offset, wrapping if required', function() + itp('fetch the pointer at offset, wrapping if required', function() write('1234567890') read(10) write('long string') @@ -334,7 +302,7 @@ describe('rbuffer functions', function() end) describe('wrapping behavior', function() - it('writing/reading wraps across the end of the internal buffer', function() + itp('writing/reading wraps across the end of the internal buffer', function() write('1234567890') eq('1234', read(4)) eq('5678', read(4)) diff --git a/test/unit/set.lua b/test/unit/set.lua index 4e66546f32..f3d68c3042 100644 --- a/test/unit/set.lua +++ b/test/unit/set.lua @@ -26,6 +26,22 @@ function Set:new(items) return obj end +function Set:copy() + local obj = {} + obj.nelem = self.nelem + obj.tbl = {} + obj.items = {} + for k, v in pairs(self.tbl) do + obj.tbl[k] = v + end + for k, v in pairs(self.items) do + obj.items[k] = v + end + setmetatable(obj, Set) + obj.__index = Set + return obj +end + -- adds the argument Set to this Set function Set:union(other) for e in other:iterator() do diff --git a/test/unit/strings_spec.lua b/test/unit/strings_spec.lua index 072701ea78..3bc3dc7130 100644 --- a/test/unit/strings_spec.lua +++ b/test/unit/strings_spec.lua @@ -1,4 +1,5 @@ -local helpers = require("test.unit.helpers") +local helpers = require("test.unit.helpers")(after_each) +local itp = helpers.gen_itp(it) local cimport = helpers.cimport local eq = helpers.eq @@ -19,23 +20,23 @@ describe('vim_strsave_escaped()', function() return ret end - it('precedes by a backslash all chars from second argument', function() + itp('precedes by a backslash all chars from second argument', function() eq([[\a\b\c\d]], vim_strsave_escaped('abcd','abcd')) end) - it('precedes by a backslash chars only from second argument', function() + itp('precedes by a backslash chars only from second argument', function() eq([[\a\bcd]], vim_strsave_escaped('abcd','ab')) end) - it('returns a copy of passed string if second argument is empty', function() + itp('returns a copy of passed string if second argument is empty', function() eq('text \n text', vim_strsave_escaped('text \n text','')) end) - it('returns an empty string if first argument is empty string', function() + itp('returns an empty string if first argument is empty string', function() eq('', vim_strsave_escaped('','\r')) end) - it('returns a copy of passed string if it does not contain chars from 2nd argument', function() + itp('returns a copy of passed string if it does not contain chars from 2nd argument', function() eq('some text', vim_strsave_escaped('some text', 'a')) end) end) @@ -50,51 +51,51 @@ describe('vim_strnsave_unquoted()', function() return ret end - it('copies unquoted strings as-is', function() + itp('copies unquoted strings as-is', function() eq('-c', vim_strnsave_unquoted('-c')) eq('', vim_strnsave_unquoted('')) end) - it('respects length argument', function() + itp('respects length argument', function() eq('', vim_strnsave_unquoted('-c', 0)) eq('-', vim_strnsave_unquoted('-c', 1)) eq('-', vim_strnsave_unquoted('"-c', 2)) end) - it('unquotes fully quoted word', function() + itp('unquotes fully quoted word', function() eq('/bin/sh', vim_strnsave_unquoted('"/bin/sh"')) end) - it('unquotes partially quoted word', function() + itp('unquotes partially quoted word', function() eq('/Program Files/sh', vim_strnsave_unquoted('/Program" "Files/sh')) end) - it('removes ""', function() + itp('removes ""', function() eq('/Program Files/sh', vim_strnsave_unquoted('/""Program" "Files/sh')) end) - it('performs unescaping of "', function() + itp('performs unescaping of "', function() eq('/"Program Files"/sh', vim_strnsave_unquoted('/"\\""Program Files"\\""/sh')) end) - it('performs unescaping of \\', function() + itp('performs unescaping of \\', function() eq('/\\Program Files\\foo/sh', vim_strnsave_unquoted('/"\\\\"Program Files"\\\\foo"/sh')) end) - it('strips quote when there is no pair to it', function() + itp('strips quote when there is no pair to it', function() eq('/Program Files/sh', vim_strnsave_unquoted('/Program" Files/sh')) eq('', vim_strnsave_unquoted('"')) end) - it('allows string to end with one backslash unescaped', function() + itp('allows string to end with one backslash unescaped', function() eq('/Program Files/sh\\', vim_strnsave_unquoted('/Program" Files/sh\\')) end) - it('does not perform unescaping out of quotes', function() + itp('does not perform unescaping out of quotes', function() eq('/Program\\ Files/sh\\', vim_strnsave_unquoted('/Program\\ Files/sh\\')) end) - it('does not unescape \\n', function() + itp('does not unescape \\n', function() eq('/Program\\nFiles/sh', vim_strnsave_unquoted('/Program"\\n"Files/sh')) end) end) diff --git a/test/unit/tempfile_spec.lua b/test/unit/tempfile_spec.lua index cf0d78b7a7..210518fe1f 100644 --- a/test/unit/tempfile_spec.lua +++ b/test/unit/tempfile_spec.lua @@ -1,58 +1,65 @@ -local lfs = require 'lfs' -local helpers = require 'test.unit.helpers' +local lfs = require('lfs') +local helpers = require('test.unit.helpers')(after_each) +local itp = helpers.gen_itp(it) -local os = helpers.cimport './src/nvim/os/os.h' -local tempfile = helpers.cimport './src/nvim/fileio.h' +local eq = helpers.eq +local neq = helpers.neq +local cimport = helpers.cimport +local child_call_once = helpers.child_call_once +local child_cleanup_once = helpers.child_cleanup_once + +local lib = cimport('./src/nvim/os/os.h', './src/nvim/fileio.h') describe('tempfile related functions', function() before_each(function() - tempfile.vim_deltempdir() - end) - after_each(function() - tempfile.vim_deltempdir() + local function vim_deltempdir() + lib.vim_deltempdir() + end + child_call_once(vim_deltempdir) + child_cleanup_once(vim_deltempdir) end) local vim_gettempdir = function() - return helpers.ffi.string(tempfile.vim_gettempdir()) + return helpers.ffi.string(lib.vim_gettempdir()) end describe('vim_gettempdir', function() - it('returns path to Neovim own temp directory', function() + itp('returns path to Neovim own temp directory', function() local dir = vim_gettempdir() assert.True(dir ~= nil and dir:len() > 0) -- os_file_is_writable returns 2 for a directory which we have rights -- to write into. - assert.equals(os.os_file_is_writable(helpers.to_cstr(dir)), 2) + eq(lib.os_file_is_writable(helpers.to_cstr(dir)), 2) for entry in lfs.dir(dir) do assert.True(entry == '.' or entry == '..') end end) - it('returns the same directory on each call', function() + itp('returns the same directory on each call', function() local dir1 = vim_gettempdir() local dir2 = vim_gettempdir() - assert.equals(dir1, dir2) + eq(dir1, dir2) end) end) describe('vim_tempname', function() local vim_tempname = function() - return helpers.ffi.string(tempfile.vim_tempname()) + return helpers.ffi.string(lib.vim_tempname()) end - it('generate name of non-existing file', function() + itp('generate name of non-existing file', function() local file = vim_tempname() assert.truthy(file) - assert.False(os.os_path_exists(file)) + assert.False(lib.os_path_exists(file)) end) - it('generate different names on each call', function() + itp('generate different names on each call', function() local fst = vim_tempname() local snd = vim_tempname() - assert.not_equals(fst, snd) + neq(fst, snd) end) - it('generate file name in Neovim own temp directory', function() + itp('generate file name in Neovim own temp directory', function() local dir = vim_gettempdir() local file = vim_tempname() assert.truthy(file:find('^' .. dir .. '[^/]*$')) diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt index 83397a72ca..809a3623e4 100644 --- a/third-party/CMakeLists.txt +++ b/third-party/CMakeLists.txt @@ -124,11 +124,11 @@ set(GPERF_SHA256 767112a204407e62dbc3106647cf839ed544f3cf5d0f0523aaa2508623aad63 set(WINTOOLS_URL https://github.com/neovim/deps/raw/2f9acbecf06365c10baa3c0087f34a54c9c6f949/opt/win32tools.zip) set(WINTOOLS_SHA256 8bfce7e3a365721a027ce842f2ec1cf878f1726233c215c05964aac07300798c) -set(WINGUI_URL https://github.com/equalsraf/neovim-qt/releases/download/v0.2.5/neovim-qt.zip) -set(WINGUI_SHA256 07e2838c713bda9221a0b8022f4c9df3f8bbe69bb0b2fbeec473a0e8f0e779fa) +set(WINGUI_URL https://github.com/equalsraf/neovim-qt/releases/download/v0.2.6/neovim-qt.zip) +set(WINGUI_SHA256 90217351e9e51c81ef5bba39066f00d05e15ef1f881882c3c682e61cd446c211) -set(WIN32YANK_URL https://github.com/equalsraf/win32yank/releases/download/v0.0.2/win32yank.zip) -set(WIN32YANK_SHA256 78869bf68565607cda1b6a3d549e2487d59d6f0f16f9b003e123c0086f90309d) +set(WIN32YANK_URL https://github.com/equalsraf/win32yank/releases/download/v0.0.3/win32yank.zip) +set(WIN32YANK_SHA256 b474439ed2854a9a24941d66970c7fcfece219401eaaa5ebc0ffcc962e69887a) set(WINPTY_URL https://github.com/rprichard/winpty/releases/download/0.4.2/winpty-0.4.2-msvc2015.zip) set(WINPTY_SHA256 b465f2584ff394b3fe27c01aa1dcfc679583c1ee951d0e83de3f859d8b8305b8) diff --git a/third-party/README.md b/third-party/README.md deleted file mode 100644 index c3ec399695..0000000000 --- a/third-party/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Third party dependencies for neovim - -This directory contains the recipes to build any third party dependencies for -neovim which, for one reason or another, we cannot rely on the system to supply. - -Most of the details are captured in the `CMakeLists.txt`, but we have a -dedicated directory in case any of the dependencies requires patching and other -support. |