diff options
-rw-r--r-- | runtime/doc/pattern.txt | 6 | ||||
-rw-r--r-- | runtime/doc/vi_diff.txt | 5 | ||||
-rw-r--r-- | src/nvim/regexp.c | 17 | ||||
-rw-r--r-- | test/old/testdir/test_search.vim | 33 |
4 files changed, 55 insertions, 6 deletions
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index be913e941e..ec1eb9a4a0 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1206,7 +1206,8 @@ x A single character, with no special meaning, matches itself \o40 octal number of character up to 0o377 \x20 hexadecimal number of character up to 0xff \u20AC hex. number of multibyte character up to 0xffff - \U1234 hex. number of multibyte character up to 0xffffffff + \U1234 hex. number of multibyte character up to 8 characters + 0xffffffff |E1541| NOTE: The other backslash codes mentioned above do not work inside []! - Matching with a collection can be slow, because each character in @@ -1246,7 +1247,8 @@ x A single character, with no special meaning, matches itself \%u20AC Matches the character specified with up to four hexadecimal characters. \%U1234abcd Matches the character specified with up to eight hexadecimal - characters, up to 0x7fffffff + characters, up to 0x7fffffff (the maximum allowed value is INT_MAX + |E1541|, but the maximum valid Unicode codepoint is U+10FFFF). ============================================================================== 7. Ignoring case in a pattern */ignorecase* diff --git a/runtime/doc/vi_diff.txt b/runtime/doc/vi_diff.txt index 0a0cbc8ec6..c6de169853 100644 --- a/runtime/doc/vi_diff.txt +++ b/runtime/doc/vi_diff.txt @@ -31,8 +31,11 @@ Maximum display width Unix and Win32: 1024 characters, otherwise 255 Maximum lhs of a mapping 50 characters. Number of different highlighting types: over 30000 Range of a Number variable: -2147483648 to 2147483647 (might be more on 64 - bit systems) + bit systems) See also: |v:numbermax|, + |v:numbermin| and |v:numbersize| Maximum length of a line in a tags file: 512 bytes. + *E1541* +Maximum value for |/\U| and |/\%U|: 2147483647 (for 32bit integer). Information for undo and text in registers is kept in memory, thus when making (big) changes the amount of (virtual) memory available limits the number of diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index de9a7e580f..7a8d963dee 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -367,6 +367,8 @@ static const char e_nfa_regexp_missing_value_in_chr[] static const char e_atom_engine_must_be_at_start_of_pattern[] = N_("E1281: Atom '\\%%#=%c' must be at the start of the pattern"); static const char e_substitute_nesting_too_deep[] = N_("E1290: substitute nesting too deep"); +static const char e_unicode_val_too_large[] + = N_("E1541: Value too large, max Unicode codepoint is U+10FFFF"); #define NOT_MULTI 0 #define MULTI_ONE 1 @@ -4796,6 +4798,11 @@ collection: || *regparse == 'u' || *regparse == 'U') { startc = coll_get_char(); + // max UTF-8 Codepoint is U+10FFFF, + // but allow values until INT_MAX + if (startc == INT_MAX) { + EMSG_RET_NULL(_(e_unicode_val_too_large)); + } if (startc == 0) { regc(0x0a); } else { @@ -5548,12 +5555,15 @@ static int coll_get_char(void) case 'U': nr = gethexchrs(8); break; } - if (nr < 0 || nr > INT_MAX) { + if (nr < 0) { // If getting the number fails be backwards compatible: the character // is a backslash. regparse--; nr = '\\'; } + if (nr > INT_MAX) { + nr = INT_MAX; + } return (int)nr; } @@ -10565,6 +10575,11 @@ collection: || *regparse == 'U') { // TODO(RE): This needs more testing startc = coll_get_char(); + // max UTF-8 Codepoint is U+10FFFF, + // but allow values until INT_MAX + if (startc == INT_MAX) { + EMSG_RET_FAIL(_(e_unicode_val_too_large)); + } got_coll_char = true; MB_PTR_BACK(old_regparse, regparse); } else { diff --git a/test/old/testdir/test_search.vim b/test/old/testdir/test_search.vim index cd36f56f17..4e5cb574bd 100644 --- a/test/old/testdir/test_search.vim +++ b/test/old/testdir/test_search.vim @@ -1499,17 +1499,46 @@ func Test_large_hex_chars2() try /[\Ufffffc1f] catch - call assert_match('E486:', v:exception) + call assert_match('E1541:', v:exception) endtry try set re=1 /[\Ufffffc1f] catch - call assert_match('E486:', v:exception) + call assert_match('E1541:', v:exception) endtry set re& endfunc +func Test_large_hex_chars3() + " Validate max number of Unicode char + try + /[\UFFFFFFFF] + catch + call assert_match('E1541:', v:exception) + endtry + try + /[\UFFFFFFF] + catch + call assert_match('E486:', v:exception) + endtry + try + /\%#=2[\d32-\UFFFFFFFF] + catch + call assert_match('E1541:', v:exception) + endtry + try + /\%#=1[\UFFFFFFFF] + catch + call assert_match('E1541:', v:exception) + endtry + try + /\%#=1[\d32-\UFFFFFFFF] + catch + call assert_match('E945:', v:exception) + endtry +endfunc + func Test_one_error_msg() " This was also giving an internal error call assert_fails('call search(" \\((\\v[[=P=]]){185}+ ")', 'E871:') |