diff options
author | Justin M. Keyes <justinkz@gmail.com> | 2014-08-15 08:31:13 -0400 |
---|---|---|
committer | Justin M. Keyes <justinkz@gmail.com> | 2014-08-15 08:31:13 -0400 |
commit | 6675a71fe4adb0ea21c842c6bba5e950d05a5f2c (patch) | |
tree | c146ed8ec2af0a06cc608b064c1e557ff432a5ea | |
parent | 6d4530979745aae216909f066c930893bbfbae81 (diff) | |
parent | 01d6898638851d01f6cb5812f1d6f64cda0ddc83 (diff) | |
download | rneovim-6675a71fe4adb0ea21c842c6bba5e950d05a5f2c.tar.gz rneovim-6675a71fe4adb0ea21c842c6bba5e950d05a5f2c.tar.bz2 rneovim-6675a71fe4adb0ea21c842c6bba5e950d05a5f2c.zip |
Merge pull request #972 from munshkr/p7.4.293
vim-patch:7.4.293, vim-patch:7.4.294
-rw-r--r-- | src/nvim/regexp.c | 20 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 41 | ||||
-rw-r--r-- | src/nvim/testdir/test95.in | 14 | ||||
-rw-r--r-- | src/nvim/testdir/test95.ok | 12 | ||||
-rw-r--r-- | src/nvim/version.c | 4 |
5 files changed, 75 insertions, 16 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index ba7e4eb2d3..193c68860d 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -258,6 +258,7 @@ #define RE_MARK 207 /* mark cmp Match mark position */ #define RE_VISUAL 208 /* Match Visual area */ +#define RE_COMPOSING 209 // any composing characters /* * Magic characters have a special meaning, they don't match literally. @@ -2024,6 +2025,10 @@ static char_u *regatom(int *flagp) ret = regnode(RE_VISUAL); break; + case 'C': + ret = regnode(RE_COMPOSING); + break; + /* \%[abc]: Emit as a list of branches, all ending at the last * branch which matches nothing. */ case '[': @@ -4099,10 +4104,12 @@ regmatch ( status = RA_NOMATCH; } } - // Check for following composing character. + // Check for following composing character, unless %C + // follows (skips over all composing chars). if (status != RA_NOMATCH && enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len) - && !ireg_icombine) { + && !ireg_icombine + && OP(next) != RE_COMPOSING) { // raaron: This code makes a composing character get // ignored, which is the correct behavior (sometimes) // for voweled Hebrew texts. @@ -4167,6 +4174,15 @@ regmatch ( status = RA_NOMATCH; break; + case RE_COMPOSING: + if (enc_utf8) { + // Skip composing characters. + while (utf_iscomposing(utf_ptr2char(reginput))) { + mb_cptr_adv(reginput); + } + } + break; + case NOTHING: break; diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 21581d3823..2659eac762 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -85,6 +85,7 @@ enum { NFA_COMPOSING, /* Next nodes in NFA are part of the composing multibyte char */ NFA_END_COMPOSING, /* End of a composing char in the NFA */ + NFA_ANY_COMPOSING, // \%C: Any composing characters. NFA_OPT_CHARS, /* \%[abc] */ /* The following are used only in the postfix form, not in the NFA */ @@ -1350,6 +1351,10 @@ static int nfa_regatom(void) EMIT(NFA_VISUAL); break; + case 'C': + EMIT(NFA_ANY_COMPOSING); + break; + case '[': { int n; @@ -2259,6 +2264,7 @@ static void nfa_set_code(int c) case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break; case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break; case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break; + case NFA_ANY_COMPOSING: STRCPY(code, "NFA_ANY_COMPOSING "); break; case NFA_STAR: STRCPY(code, "NFA_STAR "); break; case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; @@ -2716,6 +2722,7 @@ static int nfa_max_width(nfa_state_T *startstate, int depth) case NFA_NLOWER_IC: case NFA_UPPER_IC: case NFA_NUPPER_IC: + case NFA_ANY_COMPOSING: /* possibly non-ascii */ if (has_mbyte) len += 3; @@ -3714,6 +3721,7 @@ static int match_follows(nfa_state_T *startstate, int depth) continue; case NFA_ANY: + case NFA_ANY_COMPOSING: case NFA_IDENT: case NFA_SIDENT: case NFA_KWORD: @@ -3943,7 +3951,7 @@ skip_add: #endif switch (state->c) { case NFA_MATCH: - nfa_match = TRUE; + //nfa_match = TRUE; break; case NFA_SPLIT: @@ -4573,6 +4581,7 @@ static int failure_chance(nfa_state_T *state, int depth) case NFA_MATCH: case NFA_MCLOSE: + case NFA_ANY_COMPOSING: /* empty match works always */ return 0; @@ -4951,6 +4960,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm switch (t->state->c) { case NFA_MATCH: { + // If the match ends before a composing characters and + // ireg_icombine is not set, that is not really a match. + if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) { + break; + } nfa_match = TRUE; copy_sub(&submatch->norm, &t->subs.norm); if (nfa_has_zsubexpr) @@ -5430,6 +5444,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm } break; + case NFA_ANY_COMPOSING: + // On a composing character skip over it. Otherwise do + // nothing. Always matches. + if (enc_utf8 && utf_iscomposing(curc)) { + add_off = clen; + } else { + add_here = TRUE; + add_off = 0; + } + add_state = t->state->out; + break; + /* * Character classes like \a for alpha, \d for digit etc. */ @@ -5769,12 +5795,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm if (!result && ireg_ic) result = vim_tolower(c) == vim_tolower(curc); - /* If there is a composing character which is not being - * ignored there can be no match. Match with composing - * character uses NFA_COMPOSING above. */ - if (result && enc_utf8 && !ireg_icombine - && clen != utf_char2len(curc)) - result = FALSE; + + // If ireg_icombine is not set only skip over the character + // itself. When it is set skip over composing characters. + if (result && enc_utf8 && !ireg_icombine) { + clen = utf_char2len(curc); + } + ADD_STATE_IF_MATCH(t->state); break; } diff --git a/src/nvim/testdir/test95.in b/src/nvim/testdir/test95.in index 568563f88d..b2b9de772e 100644 --- a/src/nvim/testdir/test95.in +++ b/src/nvim/testdir/test95.in @@ -50,7 +50,11 @@ STARTTEST :call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"]) :call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"]) :call add(tl, [2, "a", "ca\u0300t"]) +:call add(tl, [2, "ca", "ca\u0300t"]) :call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"]) +:call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"]) +:call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"]) +:call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"]) :"""" Test \Z @@ -90,15 +94,15 @@ STARTTEST : try : let l = matchlist(text, pat) : catch -: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"' +: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"' : endtry :" check the match itself : if len(l) == 0 && len(t) > matchidx -: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' +: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"' : elseif len(l) > 0 && len(t) == matchidx -: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected no match' +: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected no match' : elseif len(t) > matchidx && l[0] != t[matchidx] -: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected: \"' . t[matchidx] . '\"' +: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected: \"' . t[matchidx] . '\"' : else : $put ='OK ' . engine . ' - ' . pat : endif @@ -111,7 +115,7 @@ STARTTEST : let e = t[matchidx + i] : endif : if l[i] != e -: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"' +: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"' : endif : endfor : unlet i diff --git a/src/nvim/testdir/test95.ok b/src/nvim/testdir/test95.ok index e2baee8d29..6762994c12 100644 --- a/src/nvim/testdir/test95.ok +++ b/src/nvim/testdir/test95.ok @@ -70,9 +70,21 @@ OK 2 - .ֹֻ OK 0 - a OK 1 - a OK 2 - a +OK 0 - ca +OK 1 - ca +OK 2 - ca OK 0 - à OK 1 - à OK 2 - à +OK 0 - a\%C +OK 1 - a\%C +OK 2 - a\%C +OK 0 - ca\%C +OK 1 - ca\%C +OK 2 - ca\%C +OK 0 - ca\%Ct +OK 1 - ca\%Ct +OK 2 - ca\%Ct OK 0 - ú\Z OK 1 - ú\Z OK 2 - ú\Z diff --git a/src/nvim/version.c b/src/nvim/version.c index 5416bc7473..70cfbed50e 100644 --- a/src/nvim/version.c +++ b/src/nvim/version.c @@ -255,8 +255,8 @@ static int included_patches[] = { //297, //296, 295, - //294, - //293, + 294, + 293, 292, 291, 290, |