diff options
-rw-r--r-- | runtime/doc/pattern.txt | 3 | ||||
-rw-r--r-- | src/nvim/regexp.c | 34 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 32 | ||||
-rw-r--r-- | src/nvim/testdir/test_regexp_utf8.vim | 55 |
4 files changed, 123 insertions, 1 deletions
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index adfab07758..7129c6cd58 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1111,6 +1111,9 @@ x A single character, with no special meaning, matches itself *[:tab:]* [:tab:] the <Tab> character *[:escape:]* [:escape:] the <Esc> character *[:backspace:]* [:backspace:] the <BS> character +*[:ident:]* [:ident:] identifier character (same as "\i") +*[:keyword:]* [:keyword:] keyword character (same as "\k") +*[:fname:]* [:fname:] file name character (same as "\f") The brackets in character class expressions are additional to the brackets delimiting a collection. For example, the following is a plausible pattern for a Unix filename: "[-./[:alnum:]_~]\+" That is, diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index b4e2c7d766..9705896e9b 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -602,6 +602,12 @@ static int get_char_class(char_u **pp) #define CLASS_BACKSPACE 14 "escape:]", #define CLASS_ESCAPE 15 + "ident:]", +#define CLASS_IDENT 16 + "keyword:]", +#define CLASS_KEYWORD 17 + "fname:]", +#define CLASS_FNAME 18 }; #define CLASS_NONE 99 int i; @@ -2417,6 +2423,27 @@ collection: case CLASS_ESCAPE: regc(ESC); break; + case CLASS_IDENT: + for (cu = 1; cu <= 255; cu++) { + if (vim_isIDc(cu)) { + regmbc(cu); + } + } + break; + case CLASS_KEYWORD: + for (cu = 1; cu <= 255; cu++) { + if (reg_iswordc(cu)) { + regmbc(cu); + } + } + break; + case CLASS_FNAME: + for (cu = 1; cu <= 255; cu++) { + if (vim_isfilec(cu)) { + regmbc(cu); + } + } + break; } } else { // produce a multibyte character, including any @@ -3276,6 +3303,13 @@ void free_regexp_stuff(void) #endif +// Return true if character 'c' is included in 'iskeyword' option for +// "reg_buf" buffer. +static bool reg_iswordc(int c) +{ + return vim_iswordc_buf(c, rex.reg_buf); +} + /* * Get pointer to the line "lnum", which is relative to "reg_firstlnum". */ diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index a744071a6a..506c4e87db 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -230,7 +230,10 @@ enum { NFA_CLASS_TAB, NFA_CLASS_RETURN, NFA_CLASS_BACKSPACE, - NFA_CLASS_ESCAPE + NFA_CLASS_ESCAPE, + NFA_CLASS_IDENT, + NFA_CLASS_KEYWORD, + NFA_CLASS_FNAME, }; /* Keep in sync with classchars. */ @@ -1643,6 +1646,15 @@ collection: case CLASS_ESCAPE: EMIT(NFA_CLASS_ESCAPE); break; + case CLASS_IDENT: + EMIT(NFA_CLASS_IDENT); + break; + case CLASS_KEYWORD: + EMIT(NFA_CLASS_KEYWORD); + break; + case CLASS_FNAME: + EMIT(NFA_CLASS_FNAME); + break; } EMIT(NFA_CONCAT); continue; @@ -2407,6 +2419,9 @@ static void nfa_set_code(int c) case NFA_CLASS_RETURN: STRCPY(code, "NFA_CLASS_RETURN"); break; case NFA_CLASS_BACKSPACE: STRCPY(code, "NFA_CLASS_BACKSPACE"); break; case NFA_CLASS_ESCAPE: STRCPY(code, "NFA_CLASS_ESCAPE"); break; + case NFA_CLASS_IDENT: STRCPY(code, "NFA_CLASS_IDENT"); break; + case NFA_CLASS_KEYWORD: STRCPY(code, "NFA_CLASS_KEYWORD"); break; + case NFA_CLASS_FNAME: STRCPY(code, "NFA_CLASS_FNAME"); break; case NFA_ANY: STRCPY(code, "NFA_ANY"); break; case NFA_IDENT: STRCPY(code, "NFA_IDENT"); break; @@ -4474,6 +4489,21 @@ static int check_char_class(int class, int c) return OK; } break; + case NFA_CLASS_IDENT: + if (vim_isIDc(c)) { + return OK; + } + break; + case NFA_CLASS_KEYWORD: + if (reg_iswordc(c)) { + return OK; + } + break; + case NFA_CLASS_FNAME: + if (vim_isfilec(c)) { + return OK; + } + break; default: // should not be here :P diff --git a/src/nvim/testdir/test_regexp_utf8.vim b/src/nvim/testdir/test_regexp_utf8.vim index 209de2c35e..4466ad436a 100644 --- a/src/nvim/testdir/test_regexp_utf8.vim +++ b/src/nvim/testdir/test_regexp_utf8.vim @@ -54,6 +54,12 @@ func s:classes_test() let tabchar = '' let upperchars = '' let xdigitchars = '' + let identchars = '' + let identchars1 = '' + let kwordchars = '' + let kwordchars1 = '' + let fnamechars = '' + let fnamechars1 = '' let i = 1 while i <= 255 let c = nr2char(i) @@ -105,6 +111,24 @@ func s:classes_test() if c =~ '[[:xdigit:]]' let xdigitchars .= c endif + if c =~ '[[:ident:]]' + let identchars .= c + endif + if c =~ '\i' + let identchars1 .= c + endif + if c =~ '[[:keyword:]]' + let kwordchars .= c + endif + if c =~ '\k' + let kwordchars1 .= c + endif + if c =~ '[[:fname:]]' + let fnamechars .= c + endif + if c =~ '\f' + let fnamechars1 .= c + endif let i += 1 endwhile @@ -124,6 +148,37 @@ func s:classes_test() call assert_equal("\t\n\x0b\f\r ", spacechars) call assert_equal("\t", tabchar) call assert_equal('0123456789ABCDEFabcdef', xdigitchars) + + if has('win32') + let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' + let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + elseif has('ebcdic') + let identchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + let kwordchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + else + let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + endif + + if has('win32') + let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + elseif has('amiga') + let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + elseif has('vms') + let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + elseif has('ebcdic') + let fnamechars_ok = '#$%+,-./=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + else + let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' + endif + + call assert_equal(identchars_ok, identchars) + call assert_equal(kwordchars_ok, kwordchars) + call assert_equal(fnamechars_ok, fnamechars) + + call assert_equal(identchars1, identchars) + call assert_equal(kwordchars1, kwordchars) + call assert_equal(fnamechars1, fnamechars) endfunc func Test_classes_re1() |