diff options
Diffstat (limited to 'src/nvim/regexp.c')
-rw-r--r-- | src/nvim/regexp.c | 233 |
1 files changed, 94 insertions, 139 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index d76da62c6d..15a701f022 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -772,13 +772,9 @@ static int get_equi_class(char_u **pp) char_u *p = *pp; if (p[1] == '=') { - if (has_mbyte) - l = (*mb_ptr2len)(p + 2); + l = (*mb_ptr2len)(p + 2); if (p[l + 2] == '=' && p[l + 3] == ']') { - if (has_mbyte) - c = mb_ptr2char(p + 2); - else - c = p[2]; + c = utf_ptr2char(p + 2); *pp += l + 4; return c; } @@ -1107,13 +1103,9 @@ static int get_coll_element(char_u **pp) char_u *p = *pp; if (p[0] != NUL && p[1] == '.') { - if (has_mbyte) - l = (*mb_ptr2len)(p + 2); + l = utfc_ptr2len(p + 2); if (p[l + 2] == '.' && p[l + 3] == ']') { - if (has_mbyte) - c = mb_ptr2char(p + 2); - else - c = p[2]; + c = utf_ptr2char(p + 2); *pp += l + 4; return c; } @@ -1299,10 +1291,7 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) } if (OP(scan) == EXACTLY) { - if (has_mbyte) - r->regstart = (*mb_ptr2char)(OPERAND(scan)); - else - r->regstart = *OPERAND(scan); + r->regstart = utf_ptr2char(OPERAND(scan)); } else if (OP(scan) == BOW || OP(scan) == EOW || OP(scan) == NOTHING @@ -1310,10 +1299,7 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags) || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) { char_u *regnext_scan = regnext(scan); if (OP(regnext_scan) == EXACTLY) { - if (has_mbyte) - r->regstart = (*mb_ptr2char)(OPERAND(regnext_scan)); - else - r->regstart = *OPERAND(regnext_scan); + r->regstart = utf_ptr2char(OPERAND(regnext_scan)); } } @@ -1678,9 +1664,8 @@ static char_u *regpiece(int *flagp) case Magic('@'): { int lop = END; - int nr; + int64_t nr = getdecchrs(); - nr = getdecchrs(); switch (no_Magic(getchr())) { case '=': lop = MATCH; break; /* \@= */ case '!': lop = NOMATCH; break; /* \@! */ @@ -1820,8 +1805,8 @@ static char_u *regatom(int *flagp) if (c == '[') goto collection; - /* "\_x" is character class plus newline */ - /*FALLTHROUGH*/ + // "\_x" is character class plus newline + FALLTHROUGH; /* * Character classes. @@ -2101,7 +2086,7 @@ static char_u *regatom(int *flagp) case 'u': /* %uabcd hex 4 */ case 'U': /* %U1234abcd hex 8 */ { - int i; + int64_t i; switch (c) { case 'd': i = getdecchrs(); break; @@ -2412,20 +2397,16 @@ collection: break; } } else { - if (has_mbyte) { - int len; - - /* produce a multibyte character, including any - * following composing characters */ - startc = mb_ptr2char(regparse); - len = (*mb_ptr2len)(regparse); - if (enc_utf8 && utf_char2len(startc) != len) - startc = -1; /* composing chars */ - while (--len >= 0) - regc(*regparse++); - } else { - startc = *regparse++; - regc(startc); + // produce a multibyte character, including any + // following composing characters. + startc = utf_ptr2char(regparse); + int len = utfc_ptr2len(regparse); + if (utf_char2len(startc) != len) { + // composing chars + startc = -1; + } + while (--len >= 0) { + regc(*regparse++); } } } @@ -2439,7 +2420,7 @@ collection: } else if (reg_strict) EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); } - /* FALLTHROUGH */ + FALLTHROUGH; default: { @@ -2557,12 +2538,11 @@ static void regc(int b) */ static void regmbc(int c) { - if (!has_mbyte && c > 0xff) - return; - if (regcode == JUST_CALC_SIZE) - regsize += (*mb_char2len)(c); - else - regcode += (*mb_char2bytes)(c, regcode); + if (regcode == JUST_CALC_SIZE) { + regsize += utf_char2len(c); + } else { + regcode += utf_char2bytes(c, regcode); + } } /* @@ -2906,17 +2886,13 @@ static int peekchr(void) * Next character can never be (made) magic? * Then backslashing it won't do anything. */ - if (has_mbyte) - curchr = (*mb_ptr2char)(regparse + 1); - else - curchr = c; + curchr = utf_ptr2char(regparse + 1); } break; } default: - if (has_mbyte) - curchr = (*mb_ptr2char)(regparse); + curchr = utf_ptr2char(regparse); } return curchr; @@ -2998,9 +2974,9 @@ static void ungetchr(void) * The parameter controls the maximum number of input characters. This will be * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. */ -static int gethexchrs(int maxinputlen) +static int64_t gethexchrs(int maxinputlen) { - int nr = 0; + int64_t nr = 0; int c; int i; @@ -3022,9 +2998,9 @@ static int gethexchrs(int maxinputlen) * Get and return the value of the decimal string immediately after the * current position. Return -1 for invalid. Consumes all digits. */ -static int getdecchrs(void) +static int64_t getdecchrs(void) { - int nr = 0; + int64_t nr = 0; int c; int i; @@ -3051,9 +3027,9 @@ static int getdecchrs(void) * blahblah\%o210asdf * before-^ ^-after */ -static int getoctchrs(void) +static int64_t getoctchrs(void) { - int nr = 0; + int64_t nr = 0; int c; int i; @@ -3077,7 +3053,7 @@ static int getoctchrs(void) */ static int coll_get_char(void) { - int nr = -1; + int64_t nr = -1; switch (*regparse++) { case 'd': nr = getdecchrs(); break; @@ -3466,12 +3442,7 @@ static long bt_regexec_both(char_u *line, /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { - int c; - - if (has_mbyte) - c = (*mb_ptr2char)(prog->regmust); - else - c = *prog->regmust; + int c = utf_ptr2char(prog->regmust); s = line + col; // This is used very often, esp. for ":global". Use two versions of @@ -3502,16 +3473,11 @@ static long bt_regexec_both(char_u *line, /* Simplest case: Anchored match need be tried only once. */ if (prog->reganch) { - int c; - - if (has_mbyte) - c = (*mb_ptr2char)(regline + col); - else - c = regline[col]; + int c = utf_ptr2char(regline + col); if (prog->regstart == NUL || prog->regstart == c || (rex.reg_ic - && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c))) + && (utf_fold(prog->regstart) == utf_fold(c) || (c < 255 && prog->regstart < 255 && mb_tolower(prog->regstart) == mb_tolower(c))))) { retval = regtry(prog, col); @@ -3686,8 +3652,7 @@ static long regtry(bt_regprog_T *prog, colnr_T col) static int reg_prev_class(void) { if (reginput > regline) { - return mb_get_class_tab(reginput - 1 - (*mb_head_off)(regline, - reginput - 1), + return mb_get_class_tab(reginput - 1 - utf_head_off(regline, reginput - 1), rex.reg_buf->b_chartab); } return -1; @@ -3858,12 +3823,10 @@ regmatch ( } else if (rex.reg_line_lbr && WITH_NL(op) && *reginput == '\n') { ADVANCE_REGINPUT(); } else { - if (WITH_NL(op)) + if (WITH_NL(op)) { op -= ADD_NL; - if (has_mbyte) - c = (*mb_ptr2char)(reginput); - else - c = *reginput; + } + c = utf_ptr2char(reginput); switch (op) { case BOL: if (reginput != regline) @@ -4956,13 +4919,13 @@ regmatch ( (colnr_T)STRLEN(regline); } } else { - if (has_mbyte) { - rp->rs_un.regsave.rs_u.pos.col -= - (*mb_head_off)(regline, regline - + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; - } else { - rp->rs_un.regsave.rs_u.pos.col--; - } + const char_u *const line = + reg_getline(behind_pos.rs_u.pos.lnum); + + rp->rs_un.regsave.rs_u.pos.col -= + utf_head_off(line, + line + rp->rs_un.regsave.rs_u.pos.col - 1) + + 1; } } else { if (rp->rs_un.regsave.rs_u.ptr == regline) { @@ -5191,8 +5154,8 @@ regrepeat ( case IDENT: case IDENT + ADD_NL: - testval = TRUE; - /*FALLTHROUGH*/ + testval = 1; + FALLTHROUGH; case SIDENT: case SIDENT + ADD_NL: while (count < maxcount) { @@ -5218,8 +5181,8 @@ regrepeat ( case KWORD: case KWORD + ADD_NL: - testval = TRUE; - /*FALLTHROUGH*/ + testval = 1; + FALLTHROUGH; case SKWORD: case SKWORD + ADD_NL: while (count < maxcount) { @@ -5247,8 +5210,8 @@ regrepeat ( case FNAME: case FNAME + ADD_NL: - testval = TRUE; - /*FALLTHROUGH*/ + testval = 1; + FALLTHROUGH; case SFNAME: case SFNAME + ADD_NL: while (count < maxcount) { @@ -5275,8 +5238,8 @@ regrepeat ( case PRINT: case PRINT + ADD_NL: - testval = TRUE; - /*FALLTHROUGH*/ + testval = 1; + FALLTHROUGH; case SPRINT: case SPRINT + ADD_NL: while (count < maxcount) { @@ -5454,8 +5417,8 @@ do_class: case ANYOF: case ANYOF + ADD_NL: - testval = TRUE; - /*FALLTHROUGH*/ + testval = 1; + FALLTHROUGH; case ANYBUT: case ANYBUT + ADD_NL: @@ -5473,8 +5436,8 @@ do_class: } } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) { scan++; - } else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1) { - if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval) { + } else if ((len = utfc_ptr2len(scan)) > 1) { + if ((cstrchr(opnd, utf_ptr2char(scan)) == NULL) == testval) { break; } scan += len; @@ -6782,40 +6745,36 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } c = *src++; } - } else if (has_mbyte) - c = mb_ptr2char(src - 1); - - /* Write to buffer, if copy is set. */ - if (func_one != (fptr_T)NULL) - /* Turbo C complains without the typecast */ + } else { + c = utf_ptr2char(src - 1); + } + // Write to buffer, if copy is set. + if (func_one != NULL) { func_one = (fptr_T)(func_one(&cc, c)); - else if (func_all != (fptr_T)NULL) - /* Turbo C complains without the typecast */ + } else if (func_all != NULL) { func_all = (fptr_T)(func_all(&cc, c)); - else /* just copy */ + } else { + // just copy cc = c; + } - if (has_mbyte) { - int totlen = mb_ptr2len(src - 1); + int totlen = utfc_ptr2len(src - 1); - if (copy) - mb_char2bytes(cc, dst); - dst += mb_char2len(cc) - 1; - if (enc_utf8) { - int clen = utf_ptr2len(src - 1); + if (copy) { + utf_char2bytes(cc, dst); + } + dst += utf_char2len(cc) - 1; + int clen = utf_ptr2len(src - 1); - /* If the character length is shorter than "totlen", there - * are composing characters; copy them as-is. */ - if (clen < totlen) { - if (copy) - memmove(dst + 1, src - 1 + clen, - (size_t)(totlen - clen)); - dst += totlen - clen; - } + // If the character length is shorter than "totlen", there + // are composing characters; copy them as-is. + if (clen < totlen) { + if (copy) { + memmove(dst + 1, src - 1 + clen, (size_t)(totlen - clen)); } - src += totlen - 1; - } else if (copy) - *dst = cc; + dst += totlen - clen; + } + src += totlen - 1; dst++; } else { if (REG_MULTI) { @@ -6878,10 +6837,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, } dst += 2; } else { - if (has_mbyte) - c = mb_ptr2char(s); - else - c = *s; + c = utf_ptr2char(s); if (func_one != (fptr_T)NULL) /* Turbo C complains without the typecast */ @@ -6895,20 +6851,19 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, if (has_mbyte) { int l; - /* Copy composing characters separately, one - * at a time. */ - if (enc_utf8) - l = utf_ptr2len(s) - 1; - else - l = mb_ptr2len(s) - 1; + // Copy composing characters separately, one + // at a time. + l = utf_ptr2len(s) - 1; s += l; len -= l; - if (copy) - mb_char2bytes(cc, dst); - dst += mb_char2len(cc) - 1; - } else if (copy) + if (copy) { + utf_char2bytes(cc, dst); + } + dst += utf_char2len(cc) - 1; + } else if (copy) { *dst = cc; + } dst++; } |