aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin M. Keyes <justinkz@gmail.com>2014-08-15 08:31:13 -0400
committerJustin M. Keyes <justinkz@gmail.com>2014-08-15 08:31:13 -0400
commit6675a71fe4adb0ea21c842c6bba5e950d05a5f2c (patch)
treec146ed8ec2af0a06cc608b064c1e557ff432a5ea
parent6d4530979745aae216909f066c930893bbfbae81 (diff)
parent01d6898638851d01f6cb5812f1d6f64cda0ddc83 (diff)
downloadrneovim-6675a71fe4adb0ea21c842c6bba5e950d05a5f2c.tar.gz
rneovim-6675a71fe4adb0ea21c842c6bba5e950d05a5f2c.tar.bz2
rneovim-6675a71fe4adb0ea21c842c6bba5e950d05a5f2c.zip
Merge pull request #972 from munshkr/p7.4.293
vim-patch:7.4.293, vim-patch:7.4.294
-rw-r--r--src/nvim/regexp.c20
-rw-r--r--src/nvim/regexp_nfa.c41
-rw-r--r--src/nvim/testdir/test95.in14
-rw-r--r--src/nvim/testdir/test95.ok12
-rw-r--r--src/nvim/version.c4
5 files changed, 75 insertions, 16 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index ba7e4eb2d3..193c68860d 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -258,6 +258,7 @@
#define RE_MARK 207 /* mark cmp Match mark position */
#define RE_VISUAL 208 /* Match Visual area */
+#define RE_COMPOSING 209 // any composing characters
/*
* Magic characters have a special meaning, they don't match literally.
@@ -2024,6 +2025,10 @@ static char_u *regatom(int *flagp)
ret = regnode(RE_VISUAL);
break;
+ case 'C':
+ ret = regnode(RE_COMPOSING);
+ break;
+
/* \%[abc]: Emit as a list of branches, all ending at the last
* branch which matches nothing. */
case '[':
@@ -4099,10 +4104,12 @@ regmatch (
status = RA_NOMATCH;
}
}
- // Check for following composing character.
+ // Check for following composing character, unless %C
+ // follows (skips over all composing chars).
if (status != RA_NOMATCH && enc_utf8
&& UTF_COMPOSINGLIKE(reginput, reginput + len)
- && !ireg_icombine) {
+ && !ireg_icombine
+ && OP(next) != RE_COMPOSING) {
// raaron: This code makes a composing character get
// ignored, which is the correct behavior (sometimes)
// for voweled Hebrew texts.
@@ -4167,6 +4174,15 @@ regmatch (
status = RA_NOMATCH;
break;
+ case RE_COMPOSING:
+ if (enc_utf8) {
+ // Skip composing characters.
+ while (utf_iscomposing(utf_ptr2char(reginput))) {
+ mb_cptr_adv(reginput);
+ }
+ }
+ break;
+
case NOTHING:
break;
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c
index 21581d3823..2659eac762 100644
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@@ -85,6 +85,7 @@ enum {
NFA_COMPOSING, /* Next nodes in NFA are part of the
composing multibyte char */
NFA_END_COMPOSING, /* End of a composing char in the NFA */
+ NFA_ANY_COMPOSING, // \%C: Any composing characters.
NFA_OPT_CHARS, /* \%[abc] */
/* The following are used only in the postfix form, not in the NFA */
@@ -1350,6 +1351,10 @@ static int nfa_regatom(void)
EMIT(NFA_VISUAL);
break;
+ case 'C':
+ EMIT(NFA_ANY_COMPOSING);
+ break;
+
case '[':
{
int n;
@@ -2259,6 +2264,7 @@ static void nfa_set_code(int c)
case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break;
case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break;
case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break;
+ case NFA_ANY_COMPOSING: STRCPY(code, "NFA_ANY_COMPOSING "); break;
case NFA_STAR: STRCPY(code, "NFA_STAR "); break;
case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
@@ -2716,6 +2722,7 @@ static int nfa_max_width(nfa_state_T *startstate, int depth)
case NFA_NLOWER_IC:
case NFA_UPPER_IC:
case NFA_NUPPER_IC:
+ case NFA_ANY_COMPOSING:
/* possibly non-ascii */
if (has_mbyte)
len += 3;
@@ -3714,6 +3721,7 @@ static int match_follows(nfa_state_T *startstate, int depth)
continue;
case NFA_ANY:
+ case NFA_ANY_COMPOSING:
case NFA_IDENT:
case NFA_SIDENT:
case NFA_KWORD:
@@ -3943,7 +3951,7 @@ skip_add:
#endif
switch (state->c) {
case NFA_MATCH:
- nfa_match = TRUE;
+ //nfa_match = TRUE;
break;
case NFA_SPLIT:
@@ -4573,6 +4581,7 @@ static int failure_chance(nfa_state_T *state, int depth)
case NFA_MATCH:
case NFA_MCLOSE:
+ case NFA_ANY_COMPOSING:
/* empty match works always */
return 0;
@@ -4951,6 +4960,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
switch (t->state->c) {
case NFA_MATCH:
{
+ // If the match ends before a composing characters and
+ // ireg_icombine is not set, that is not really a match.
+ if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) {
+ break;
+ }
nfa_match = TRUE;
copy_sub(&submatch->norm, &t->subs.norm);
if (nfa_has_zsubexpr)
@@ -5430,6 +5444,18 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
}
break;
+ case NFA_ANY_COMPOSING:
+ // On a composing character skip over it. Otherwise do
+ // nothing. Always matches.
+ if (enc_utf8 && utf_iscomposing(curc)) {
+ add_off = clen;
+ } else {
+ add_here = TRUE;
+ add_off = 0;
+ }
+ add_state = t->state->out;
+ break;
+
/*
* Character classes like \a for alpha, \d for digit etc.
*/
@@ -5769,12 +5795,13 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
if (!result && ireg_ic)
result = vim_tolower(c) == vim_tolower(curc);
- /* If there is a composing character which is not being
- * ignored there can be no match. Match with composing
- * character uses NFA_COMPOSING above. */
- if (result && enc_utf8 && !ireg_icombine
- && clen != utf_char2len(curc))
- result = FALSE;
+
+ // If ireg_icombine is not set only skip over the character
+ // itself. When it is set skip over composing characters.
+ if (result && enc_utf8 && !ireg_icombine) {
+ clen = utf_char2len(curc);
+ }
+
ADD_STATE_IF_MATCH(t->state);
break;
}
diff --git a/src/nvim/testdir/test95.in b/src/nvim/testdir/test95.in
index 568563f88d..b2b9de772e 100644
--- a/src/nvim/testdir/test95.in
+++ b/src/nvim/testdir/test95.in
@@ -50,7 +50,11 @@ STARTTEST
:call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [2, "a", "ca\u0300t"])
+:call add(tl, [2, "ca", "ca\u0300t"])
:call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
+:call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
+:call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
+:call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
:"""" Test \Z
@@ -90,15 +94,15 @@ STARTTEST
: try
: let l = matchlist(text, pat)
: catch
-: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
+: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
: endtry
:" check the match itself
: if len(l) == 0 && len(t) > matchidx
-: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
+: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
: elseif len(l) > 0 && len(t) == matchidx
-: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected no match'
+: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected no match'
: elseif len(t) > matchidx && l[0] != t[matchidx]
-: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected: \"' . t[matchidx] . '\"'
+: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", match: \"' . l[0] . '\", expected: \"' . t[matchidx] . '\"'
: else
: $put ='OK ' . engine . ' - ' . pat
: endif
@@ -111,7 +115,7 @@ STARTTEST
: let e = t[matchidx + i]
: endif
: if l[i] != e
-: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"'
+: $put ='ERROR ' . engine . ': pat: \"' . pat . '\", text: \"' . text . '\", submatch ' . i . ': \"' . l[i] . '\", expected: \"' . e . '\"'
: endif
: endfor
: unlet i
diff --git a/src/nvim/testdir/test95.ok b/src/nvim/testdir/test95.ok
index e2baee8d29..6762994c12 100644
--- a/src/nvim/testdir/test95.ok
+++ b/src/nvim/testdir/test95.ok
@@ -70,9 +70,21 @@ OK 2 - .ֹֻ
OK 0 - a
OK 1 - a
OK 2 - a
+OK 0 - ca
+OK 1 - ca
+OK 2 - ca
OK 0 - à
OK 1 - à
OK 2 - à
+OK 0 - a\%C
+OK 1 - a\%C
+OK 2 - a\%C
+OK 0 - ca\%C
+OK 1 - ca\%C
+OK 2 - ca\%C
+OK 0 - ca\%Ct
+OK 1 - ca\%Ct
+OK 2 - ca\%Ct
OK 0 - ú\Z
OK 1 - ú\Z
OK 2 - ú\Z
diff --git a/src/nvim/version.c b/src/nvim/version.c
index 5416bc7473..70cfbed50e 100644
--- a/src/nvim/version.c
+++ b/src/nvim/version.c
@@ -255,8 +255,8 @@ static int included_patches[] = {
//297,
//296,
295,
- //294,
- //293,
+ 294,
+ 293,
292,
291,
290,