diff options
author | Björn Linse <bjorn.linse@gmail.com> | 2017-04-09 09:29:00 +0200 |
---|---|---|
committer | Björn Linse <bjorn.linse@gmail.com> | 2017-04-10 12:02:26 +0200 |
commit | 4c857dae1169f3b8c7b6a9740f50acfd3c16858d (patch) | |
tree | 2368cea5211cd31269b2cc3c5b7e31147e9f268e | |
parent | a3a06d02489a7a43cdd7909c92c79ef7971c8663 (diff) | |
download | rneovim-4c857dae1169f3b8c7b6a9740f50acfd3c16858d.tar.gz rneovim-4c857dae1169f3b8c7b6a9740f50acfd3c16858d.tar.bz2 rneovim-4c857dae1169f3b8c7b6a9740f50acfd3c16858d.zip |
vim-patch:8.0.0243
Problem: When making a character lower case with tolower() changes the byte
cound, it is not made lower case.
Solution: Add strlow_save(). (Dominique Pelle, closes vim/vim#1406)
https://github.com/vim/vim/commit/cc5b22b3bfdc0e9e835cf7871166badda31447bd
Join almost identical strup_save and strlow_save functions to one
Function.
-rw-r--r-- | src/nvim/eval.c | 26 | ||||
-rw-r--r-- | src/nvim/strings.c | 51 | ||||
-rw-r--r-- | src/nvim/testdir/test_functions.vim | 144 |
3 files changed, 168 insertions, 53 deletions
diff --git a/src/nvim/eval.c b/src/nvim/eval.c index c1ee33b06a..ed07de51c8 100644 --- a/src/nvim/eval.c +++ b/src/nvim/eval.c @@ -16791,30 +16791,8 @@ void timer_teardown(void) */ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr) { - char_u *p = (char_u *)xstrdup(tv_get_string(&argvars[0])); rettv->v_type = VAR_STRING; - rettv->vval.v_string = p; - - while (*p != NUL) { - int l; - - if (enc_utf8) { - int c, lc; - - c = utf_ptr2char(p); - lc = mb_tolower(c); - l = utf_ptr2len(p); - /* TODO: reallocate string when byte count changes. */ - if (utf_char2len(lc) == l) - utf_char2bytes(lc, p); - p += l; - } else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) - p += l; /* skip multi-byte character */ - else { - *p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */ - ++p; - } - } + rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), false); } /* @@ -16823,7 +16801,7 @@ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr) static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr) { rettv->v_type = VAR_STRING; - rettv->vval.v_string = (char_u *)strup_save(tv_get_string(&argvars[0])); + rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), true); } /* diff --git a/src/nvim/strings.c b/src/nvim/strings.c index e7c0fb8a7d..87e066d80a 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -291,14 +291,15 @@ void vim_strup(char_u *p) } } -/// Make given string all upper-case +/// Make given string all upper-case or all lower-case /// -/// Handels multi-byte characters as good as possible. +/// Handles multi-byte characters as good as possible. /// /// @param[in] orig Input string. +/// @param[in] upper If true make uppercase, otherwise lowercase /// /// @return [allocated] upper-cased string. -char *strup_save(const char *const orig) +char *strcase_save(const char *const orig, bool upper) FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL { char *res = xstrdup(orig); @@ -307,33 +308,25 @@ char *strup_save(const char *const orig) while (*p != NUL) { int l; - if (enc_utf8) { - int c = utf_ptr2char((const char_u *)p); - int uc = mb_toupper(c); - - // Reallocate string when byte count changes. This is rare, - // thus it's OK to do another malloc()/free(). - l = utf_ptr2len((const char_u *)p); - int newl = utf_char2len(uc); - if (newl != l) { - // TODO(philix): use xrealloc() in strup_save() - char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l)); - memcpy(s, res, (size_t)(p - res)); - STRCPY(s + (p - res) + newl, p + l); - p = s + (p - res); - xfree(res); - res = s; - } - - utf_char2bytes(uc, (char_u *)p); - p += newl; - } else if (has_mbyte && (l = (*mb_ptr2len)((const char_u *)p)) > 1) { - p += l; // Skip multi-byte character. - } else { - // note that toupper() can be a macro - *p = (char)(uint8_t)TOUPPER_LOC(*p); - p++; + int c = utf_ptr2char((const char_u *)p); + int uc = upper ? mb_toupper(c) : mb_tolower(c); + + // Reallocate string when byte count changes. This is rare, + // thus it's OK to do another malloc()/free(). + l = utf_ptr2len((const char_u *)p); + int newl = utf_char2len(uc); + if (newl != l) { + // TODO(philix): use xrealloc() in strup_save() + char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l)); + memcpy(s, res, (size_t)(p - res)); + STRCPY(s + (p - res) + newl, p + l); + p = s + (p - res); + xfree(res); + res = s; } + + utf_char2bytes(uc, (char_u *)p); + p += newl; } return res; diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim index 81cb6314ce..3c258299c1 100644 --- a/src/nvim/testdir/test_functions.vim +++ b/src/nvim/testdir/test_functions.vim @@ -29,3 +29,147 @@ func Test_setbufvar_options() bwipe! endfunc +func Test_tolower() + call assert_equal("", tolower("")) + + " Test with all printable ASCII characters. + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', + \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')) + + if !has('multi_byte') + return + endif + + " Test with a few uppercase diacritics. + call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ")) + call assert_equal("bḃḇ", tolower("BḂḆ")) + call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ")) + call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ")) + call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ")) + call assert_equal("fḟ ", tolower("FḞ ")) + call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ")) + call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ")) + call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ")) + call assert_equal("jĵ", tolower("JĴ")) + call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ")) + call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ")) + call assert_equal("mḿṁ", tolower("MḾṀ")) + call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ")) + call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ")) + call assert_equal("pṕṗ", tolower("PṔṖ")) + call assert_equal("q", tolower("Q")) + call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ")) + call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ")) + call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ")) + call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ")) + call assert_equal("vṽ", tolower("VṼ")) + call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ")) + call assert_equal("xẋẍ", tolower("XẊẌ")) + call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ")) + call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ")) + + " Test with a few lowercase diacritics, which should remain unchanged. + call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả")) + call assert_equal("bḃḇ", tolower("bḃḇ")) + call assert_equal("cçćĉċč", tolower("cçćĉċč")) + call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ")) + call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ")) + call assert_equal("fḟ", tolower("fḟ")) + call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ")) + call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ")) + call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ")) + call assert_equal("jĵǰ", tolower("jĵǰ")) + call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ")) + call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ")) + call assert_equal("mḿṁ ", tolower("mḿṁ ")) + call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ")) + call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ")) + call assert_equal("pṕṗ", tolower("pṕṗ")) + call assert_equal("q", tolower("q")) + call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ")) + call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ")) + call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ")) + call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ")) + call assert_equal("vṽ", tolower("vṽ")) + call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ")) + call assert_equal("ẋẍ", tolower("ẋẍ")) + call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ")) + call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ")) + + " According to https://twitter.com/jifa/status/625776454479970304 + " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase + " in length (2 to 3 bytes) when lowercased. So let's test them. + call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ")) +endfunc + +func Test_toupper() + call assert_equal("", toupper("")) + + " Test with all printable ASCII characters. + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~', + \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')) + + if !has('multi_byte') + return + endif + + " Test with a few lowercase diacritics. + call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả")) + call assert_equal("BḂḆ", toupper("bḃḇ")) + call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč")) + call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ")) + call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ")) + call assert_equal("FḞ", toupper("fḟ")) + call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ")) + call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ")) + call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ")) + call assert_equal("JĴǰ", toupper("jĵǰ")) + call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ")) + call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ")) + call assert_equal("MḾṀ ", toupper("mḿṁ ")) + call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ")) + call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ")) + call assert_equal("PṔṖ", toupper("pṕṗ")) + call assert_equal("Q", toupper("q")) + call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ")) + call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ")) + call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ")) + call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ")) + call assert_equal("VṼ", toupper("vṽ")) + call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ")) + call assert_equal("ẊẌ", toupper("ẋẍ")) + call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ")) + call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ")) + + " Test that uppercase diacritics, which should remain unchanged. + call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ")) + call assert_equal("BḂḆ", toupper("BḂḆ")) + call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ")) + call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ")) + call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ")) + call assert_equal("FḞ ", toupper("FḞ ")) + call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ")) + call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ")) + call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ")) + call assert_equal("JĴ", toupper("JĴ")) + call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ")) + call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ")) + call assert_equal("MḾṀ", toupper("MḾṀ")) + call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ")) + call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ")) + call assert_equal("PṔṖ", toupper("PṔṖ")) + call assert_equal("Q", toupper("Q")) + call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ")) + call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ")) + call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ")) + call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ")) + call assert_equal("VṼ", toupper("VṼ")) + call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ")) + call assert_equal("XẊẌ", toupper("XẊẌ")) + call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ")) + call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ")) + + call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ")) +endfunc + + |