aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/mbyte.c
diff options
context:
space:
mode:
authorJan Edmund Lazo <jan.lazo@mail.utoronto.ca>2020-10-23 12:29:05 -0400
committerGitHub <noreply@github.com>2020-10-23 12:29:05 -0400
commit2786d96fac2d5cb44eaf7de604adfdffe3dd9895 (patch)
tree854f215a410fc60151ab58652dcb0c9baaf958e7 /src/nvim/mbyte.c
parent9280a69a6d1828972cb8a0f6b7f81e0a5e73c886 (diff)
parent78ec28bca8eb868948ac4ad41b74148874f5a3bb (diff)
downloadrneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.tar.gz
rneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.tar.bz2
rneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.zip
Merge pull request #13145 from janlazo/vim-8.2.0901
vim-patch:8.2.{901,912}
Diffstat (limited to 'src/nvim/mbyte.c')
-rw-r--r--src/nvim/mbyte.c140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 6d188c6cd0..ec4f4cbc21 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1624,6 +1624,146 @@ int utf_head_off(const char_u *base, const char_u *p)
return (int)(p - q);
}
+// Whether space is NOT allowed before/after 'c'.
+bool utf_eat_space(int cc)
+ FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
+{
+ return (cc >= 0x2000 && cc <= 0x206F) // General punctuations
+ || (cc >= 0x2e00 && cc <= 0x2e7f) // Supplemental punctuations
+ || (cc >= 0x3000 && cc <= 0x303f) // CJK symbols and punctuations
+ || (cc >= 0xff01 && cc <= 0xff0f) // Full width ASCII punctuations
+ || (cc >= 0xff1a && cc <= 0xff20) // ..
+ || (cc >= 0xff3b && cc <= 0xff40) // ..
+ || (cc >= 0xff5b && cc <= 0xff65); // ..
+}
+
+// Whether line break is allowed before "cc".
+bool utf_allow_break_before(int cc)
+ FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
+{
+ static const int BOL_prohibition_punct[] = {
+ '!',
+ '%',
+ ')',
+ ',',
+ ':',
+ ';',
+ '>',
+ '?',
+ ']',
+ '}',
+ 0x2019, // ’ right single quotation mark
+ 0x201d, // ” right double quotation mark
+ 0x2020, // † dagger
+ 0x2021, // ‡ double dagger
+ 0x2026, // … horizontal ellipsis
+ 0x2030, // ‰ per mille sign
+ 0x2031, // ‱ per then thousand sign
+ 0x203c, // ‼ double exclamation mark
+ 0x2047, // ⁇ double question mark
+ 0x2048, // ⁈ question exclamation mark
+ 0x2049, // ⁉ exclamation question mark
+ 0x2103, // ℃ degree celsius
+ 0x2109, // ℉ degree fahrenheit
+ 0x3001, // 、 ideographic comma
+ 0x3002, // 。 ideographic full stop
+ 0x3009, // 〉 right angle bracket
+ 0x300b, // 》 right double angle bracket
+ 0x300d, // 」 right corner bracket
+ 0x300f, // 』 right white corner bracket
+ 0x3011, // 】 right black lenticular bracket
+ 0x3015, // 〕 right tortoise shell bracket
+ 0x3017, // 〗 right white lenticular bracket
+ 0x3019, // 〙 right white tortoise shell bracket
+ 0x301b, // 〛 right white square bracket
+ 0xff01, // ! fullwidth exclamation mark
+ 0xff09, // ) fullwidth right parenthesis
+ 0xff0c, // , fullwidth comma
+ 0xff0e, // . fullwidth full stop
+ 0xff1a, // : fullwidth colon
+ 0xff1b, // ; fullwidth semicolon
+ 0xff1f, // ? fullwidth question mark
+ 0xff3d, // ] fullwidth right square bracket
+ 0xff5d, // } fullwidth right curly bracket
+ };
+
+ int first = 0;
+ int last = ARRAY_SIZE(BOL_prohibition_punct) - 1;
+
+ while (first < last) {
+ const int mid = (first + last) / 2;
+
+ if (cc == BOL_prohibition_punct[mid]) {
+ return false;
+ } else if (cc > BOL_prohibition_punct[mid]) {
+ first = mid + 1;
+ } else {
+ last = mid - 1;
+ }
+ }
+
+ return cc != BOL_prohibition_punct[first];
+}
+
+// Whether line break is allowed after "cc".
+bool utf_allow_break_after(int cc)
+ FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
+{
+ static const int EOL_prohibition_punct[] = {
+ '(',
+ '<',
+ '[',
+ '`',
+ '{',
+ // 0x2014, // — em dash
+ 0x2018, // ‘ left single quotation mark
+ 0x201c, // “ left double quotation mark
+ // 0x2053, // ~ swung dash
+ 0x3008, // 〈 left angle bracket
+ 0x300a, // 《 left double angle bracket
+ 0x300c, // 「 left corner bracket
+ 0x300e, // 『 left white corner bracket
+ 0x3010, // 【 left black lenticular bracket
+ 0x3014, // 〔 left tortoise shell bracket
+ 0x3016, // 〖 left white lenticular bracket
+ 0x3018, // 〘 left white tortoise shell bracket
+ 0x301a, // 〚 left white square bracket
+ 0xff08, // ( fullwidth left parenthesis
+ 0xff3b, // [ fullwidth left square bracket
+ 0xff5b, // { fullwidth left curly bracket
+ };
+
+ int first = 0;
+ int last = ARRAY_SIZE(EOL_prohibition_punct) - 1;
+
+ while (first < last) {
+ const int mid = (first + last)/2;
+
+ if (cc == EOL_prohibition_punct[mid]) {
+ return false;
+ } else if (cc > EOL_prohibition_punct[mid]) {
+ first = mid + 1;
+ } else {
+ last = mid - 1;
+ }
+ }
+
+ return cc != EOL_prohibition_punct[first];
+}
+
+// Whether line break is allowed between "cc" and "ncc".
+bool utf_allow_break(int cc, int ncc)
+ FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
+{
+ // don't break between two-letter punctuations
+ if (cc == ncc
+ && (cc == 0x2014 // em dash
+ || cc == 0x2026)) { // horizontal ellipsis
+ return false;
+ }
+ return utf_allow_break_after(cc) && utf_allow_break_before(ncc);
+}
+
/// Copy a character, advancing the pointers
///
/// @param[in,out] fp Source of the character to copy.