Merge pull request #13145 from janlazo/vim-8.2.0901

vim-patch:8.2.{901,912}
author: Jan Edmund Lazo <jan.lazo@mail.utoronto.ca> 2020-10-23 12:29:05 -0400
committer: GitHub <noreply@github.com> 2020-10-23 12:29:05 -0400
commit: 2786d96fac2d5cb44eaf7de604adfdffe3dd9895 (patch)
tree: 854f215a410fc60151ab58652dcb0c9baaf958e7 /src/nvim/mbyte.c
parent: 9280a69a6d1828972cb8a0f6b7f81e0a5e73c886 (diff)
parent: 78ec28bca8eb868948ac4ad41b74148874f5a3bb (diff)
download: rneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.tar.gz
rneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.tar.bz2
rneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.zip
1 files changed, 140 insertions, 0 deletions
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 6d188c6cd0..ec4f4cbc21 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1624,6 +1624,146 @@ int utf_head_off(const char_u *base, const char_u *p)
   return (int)(p - q);
 }
 
+// Whether space is NOT allowed before/after 'c'.
+bool utf_eat_space(int cc)
+  FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
+{
+  return (cc >= 0x2000 && cc <= 0x206F)   // General punctuations
+      || (cc >= 0x2e00 && cc <= 0x2e7f)   // Supplemental punctuations
+      || (cc >= 0x3000 && cc <= 0x303f)   // CJK symbols and punctuations
+      || (cc >= 0xff01 && cc <= 0xff0f)   // Full width ASCII punctuations
+      || (cc >= 0xff1a && cc <= 0xff20)   // ..
+      || (cc >= 0xff3b && cc <= 0xff40)   // ..
+      || (cc >= 0xff5b && cc <= 0xff65);  // ..
+}
+
+// Whether line break is allowed before "cc".
+bool utf_allow_break_before(int cc)
+  FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
+{
+  static const int BOL_prohibition_punct[] = {
+    '!',
+    '%',
+    ')',
+    ',',
+    ':',
+    ';',
+    '>',
+    '?',
+    ']',
+    '}',
+    0x2019,  // ’ right single quotation mark
+    0x201d,  // ” right double quotation mark
+    0x2020,  // † dagger
+    0x2021,  // ‡ double dagger
+    0x2026,  // … horizontal ellipsis
+    0x2030,  // ‰ per mille sign
+    0x2031,  // ‱ per then thousand sign
+    0x203c,  // ‼ double exclamation mark
+    0x2047,  // ⁇ double question mark
+    0x2048,  // ⁈ question exclamation mark
+    0x2049,  // ⁉ exclamation question mark
+    0x2103,  // ℃ degree celsius
+    0x2109,  // ℉ degree fahrenheit
+    0x3001,  // 、 ideographic comma
+    0x3002,  // 。 ideographic full stop
+    0x3009,  // 〉 right angle bracket
+    0x300b,  // 》 right double angle bracket
+    0x300d,  // 」 right corner bracket
+    0x300f,  // 』 right white corner bracket
+    0x3011,  // 】 right black lenticular bracket
+    0x3015,  // 〕 right tortoise shell bracket
+    0x3017,  // 〗 right white lenticular bracket
+    0x3019,  // 〙 right white tortoise shell bracket
+    0x301b,  // 〛 right white square bracket
+    0xff01,  // ！ fullwidth exclamation mark
+    0xff09,  // ） fullwidth right parenthesis
+    0xff0c,  // ， fullwidth comma
+    0xff0e,  // ． fullwidth full stop
+    0xff1a,  // ： fullwidth colon
+    0xff1b,  // ； fullwidth semicolon
+    0xff1f,  // ？ fullwidth question mark
+    0xff3d,  // ］ fullwidth right square bracket
+    0xff5d,  // ｝ fullwidth right curly bracket
+  };
+
+  int first = 0;
+  int last = ARRAY_SIZE(BOL_prohibition_punct) - 1;
+
+  while (first < last) {
+    const int mid = (first + last) / 2;
+
+    if (cc == BOL_prohibition_punct[mid]) {
+      return false;
+    } else if (cc > BOL_prohibition_punct[mid]) {
+      first = mid + 1;
+    } else {
+      last = mid - 1;
+    }
+  }
+
+  return cc != BOL_prohibition_punct[first];
+}
+
+// Whether line break is allowed after "cc".
+bool utf_allow_break_after(int cc)
+  FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
+{
+  static const int EOL_prohibition_punct[] = {
+    '(',
+    '<',
+    '[',
+    '`',
+    '{',
+    // 0x2014,  // — em dash
+    0x2018,     // ‘ left single quotation mark
+    0x201c,     // “ left double quotation mark
+    // 0x2053,  // ～ swung dash
+    0x3008,     // 〈 left angle bracket
+    0x300a,     // 《 left double angle bracket
+    0x300c,     // 「 left corner bracket
+    0x300e,     // 『 left white corner bracket
+    0x3010,     // 【 left black lenticular bracket
+    0x3014,     // 〔 left tortoise shell bracket
+    0x3016,     // 〖 left white lenticular bracket
+    0x3018,     // 〘 left white tortoise shell bracket
+    0x301a,     // 〚 left white square bracket
+    0xff08,     // （ fullwidth left parenthesis
+    0xff3b,     // ［ fullwidth left square bracket
+    0xff5b,     // ｛ fullwidth left curly bracket
+  };
+
+  int first = 0;
+  int last = ARRAY_SIZE(EOL_prohibition_punct) - 1;
+
+  while (first < last) {
+    const int mid = (first + last)/2;
+
+    if (cc == EOL_prohibition_punct[mid]) {
+      return false;
+    } else if (cc > EOL_prohibition_punct[mid]) {
+      first = mid + 1;
+    } else {
+      last = mid - 1;
+    }
+  }
+
+  return cc != EOL_prohibition_punct[first];
+}
+
+// Whether line break is allowed between "cc" and "ncc".
+bool utf_allow_break(int cc, int ncc)
+  FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
+{
+  // don't break between two-letter punctuations
+  if (cc == ncc
+      && (cc == 0x2014         // em dash
+          || cc == 0x2026)) {  // horizontal ellipsis
+    return false;
+  }
+  return utf_allow_break_after(cc) && utf_allow_break_before(ncc);
+}
+
 /// Copy a character, advancing the pointers
 ///
 /// @param[in,out]  fp  Source of the character to copy.
author	Jan Edmund Lazo <jan.lazo@mail.utoronto.ca>	2020-10-23 12:29:05 -0400
committer	GitHub <noreply@github.com>	2020-10-23 12:29:05 -0400
commit	2786d96fac2d5cb44eaf7de604adfdffe3dd9895 (patch)
tree	854f215a410fc60151ab58652dcb0c9baaf958e7 /src/nvim/mbyte.c
parent	9280a69a6d1828972cb8a0f6b7f81e0a5e73c886 (diff)
parent	78ec28bca8eb868948ac4ad41b74148874f5a3bb (diff)
download	rneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.tar.gz rneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.tar.bz2 rneovim-2786d96fac2d5cb44eaf7de604adfdffe3dd9895.zip