1 files changed, 256 insertions, 75 deletions
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 223b4d6845..b874f0dc94 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -39,6 +39,7 @@
 #include "nvim/arabic.h"
 #include "nvim/charset.h"
 #include "nvim/cursor.h"
+#include "nvim/drawscreen.h"
 #include "nvim/eval.h"
 #include "nvim/fileio.h"
 #include "nvim/func_attr.h"
@@ -49,7 +50,6 @@
 #include "nvim/memline.h"
 #include "nvim/memory.h"
 #include "nvim/message.h"
-#include "nvim/option.h"
 #include "nvim/os/os.h"
 #include "nvim/path.h"
 #include "nvim/screen.h"
@@ -74,6 +74,19 @@ struct interval {
 # include "unicode_tables.generated.h"
 #endif
 
+static char e_list_item_nr_is_not_list[]
+  = N_("E1109: List item %d is not a List");
+static char e_list_item_nr_does_not_contain_3_numbers[]
+  = N_("E1110: List item %d does not contain 3 numbers");
+static char e_list_item_nr_range_invalid[]
+  = N_("E1111: List item %d range invalid");
+static char e_list_item_nr_cell_width_invalid[]
+  = N_("E1112: List item %d cell width invalid");
+static char e_overlapping_ranges_for_nr[]
+  = N_("E1113: Overlapping ranges for 0x%lx");
+static char e_only_values_of_0x100_and_higher_supported[]
+  = N_("E1114: Only values of 0x100 and higher supported");
+
 // To speed up BYTELEN(); keep a lookup table to quickly get the length in
 // bytes of a UTF-8 character from the first byte of a UTF-8 string.  Bytes
 // which are illegal when used as the first byte have a 1.  The NUL byte has
@@ -472,13 +485,18 @@ static bool intable(const struct interval *table, size_t n_items, int c)
 int utf_char2cells(int c)
 {
   if (c >= 0x100) {
+    int n = cw_value(c);
+    if (n != 0) {
+      return n;
+    }
+
     if (!utf_printable(c)) {
       return 6;                 // unprintable, displays <xxxx>
     }
     if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
       return 2;
     }
-    if (p_emoji && intable(emoji_width, ARRAY_SIZE(emoji_width), c)) {
+    if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
       return 2;
     }
   } else if (c >= 0x80 && !vim_isprintc(c)) {
@@ -736,21 +754,19 @@ bool utf_composinglike(const char_u *p1, const char_u *p2)
 ///                   space at least for #MAX_MCO + 1 elements.
 ///
 /// @return leading character.
-int utfc_ptr2char(const char_u *p, int *pcc)
+int utfc_ptr2char(const char *p_in, int *pcc)
 {
-  int len;
-  int c;
-  int cc;
+  uint8_t *p = (uint8_t *)p_in;
   int i = 0;
 
-  c = utf_ptr2char((char *)p);
-  len = utf_ptr2len((char *)p);
+  int c = utf_ptr2char((char *)p);
+  int len = utf_ptr2len((char *)p);
 
   // Only accept a composing char when the first char isn't illegal.
   if ((len > 1 || *p < 0x80)
       && p[len] >= 0x80
       && utf_composinglike(p, p + len)) {
-    cc = utf_ptr2char((char *)p + len);
+    int cc = utf_ptr2char((char *)p + len);
     for (;;) {
       pcc[i++] = cc;
       if (i == MAX_MCO) {
@@ -864,7 +880,7 @@ int utf_ptr2len_len(const char_u *p, int size)
   } else {
     m = len;
   }
-  for (i = 1; i < m; ++i) {
+  for (i = 1; i < m; i++) {
     if ((p[i] & 0xc0) != 0x80) {
       return 1;
     }
@@ -872,9 +888,9 @@ int utf_ptr2len_len(const char_u *p, int size)
   return len;
 }
 
-/// Return the number of bytes occupied by a UTF-8 character in a string
-///
+/// Return the number of bytes occupied by a UTF-8 character in a string.
 /// This includes following composing characters.
+/// Returns zero for NUL.
 int utfc_ptr2len(const char *const p_in)
   FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
 {
@@ -988,8 +1004,9 @@ int utf_char2len(const int c)
 
 /// Convert Unicode character to UTF-8 string
 ///
-/// @param c character to convert to \p buf
-/// @param[out] buf UTF-8 string generated from \p c, does not add \0
+/// @param c         character to convert to UTF-8 string in \p buf
+/// @param[out] buf  UTF-8 string generated from \p c, does not add \0
+///                  must have room for at least 6 bytes
 /// @return Number of bytes (1-6).
 int utf_char2bytes(const int c, char *const buf)
 {
@@ -1164,6 +1181,11 @@ int utf_class_tab(const int c, const uint64_t *const chartab)
     return 1;               // punctuation
   }
 
+  // emoji
+  if (intable(emoji_all, ARRAY_SIZE(emoji_all), c)) {
+    return 3;
+  }
+
   // binary search in table
   while (top >= bot) {
     mid = (bot + top) / 2;
@@ -1176,11 +1198,6 @@ int utf_class_tab(const int c, const uint64_t *const chartab)
     }
   }
 
-  // emoji
-  if (intable(emoji_all, ARRAY_SIZE(emoji_all), c)) {
-    return 3;
-  }
-
   // most other characters are "word" characters
   return 2;
 }
@@ -1576,7 +1593,7 @@ void show_utf8(void)
   }
 
   clen = 0;
-  for (i = 0; i < len; ++i) {
+  for (i = 0; i < len; i++) {
     if (clen == 0) {
       // start of (composing) character, get its length
       if (i > 0) {
@@ -1587,7 +1604,7 @@ void show_utf8(void)
     }
     sprintf((char *)IObuff + rlen, "%02x ",
             (line[i] == NL) ? NUL : line[i]);          // NUL is stored as NL
-    --clen;
+    clen--;
     rlen += (int)STRLEN(IObuff + rlen);
     if (rlen > IOSIZE - 20) {
       break;
@@ -1613,14 +1630,14 @@ int utf_head_off(const char_u *base, const char_u *p)
   // Skip backwards over trailing bytes: 10xx.xxxx
   // Skip backwards again if on a composing char.
   const char_u *q;
-  for (q = p;; --q) {
+  for (q = p;; q--) {
     // Move s to the last byte of this char.
     const char_u *s;
-    for (s = q; (s[1] & 0xc0) == 0x80; ++s) {}
+    for (s = q; (s[1] & 0xc0) == 0x80; s++) {}
 
     // Move q to the first byte of this char.
     while (q > base && (*q & 0xc0) == 0x80) {
-      --q;
+      q--;
     }
     // Check for illegal sequence. Do allow an illegal byte after where we
     // started.
@@ -1641,10 +1658,10 @@ int utf_head_off(const char_u *base, const char_u *p)
     if (arabic_maycombine(c)) {
       // Advance to get a sneak-peak at the next char
       const char_u *j = q;
-      --j;
+      j--;
       // Move j to the first byte of this char.
       while (j > base && (*j & 0xc0) == 0x80) {
-        --j;
+        j--;
       }
       if (arabic_combine(utf_ptr2char((char *)j), c)) {
         continue;
@@ -1800,9 +1817,9 @@ bool utf_allow_break(int cc, int ncc)
 ///
 /// @param[in,out]  fp  Source of the character to copy.
 /// @param[in,out]  tp  Destination to copy to.
-void mb_copy_char(const char_u **const fp, char_u **const tp)
+void mb_copy_char(const char **const fp, char **const tp)
 {
-  const size_t l = (size_t)utfc_ptr2len((char *)(*fp));
+  const size_t l = (size_t)utfc_ptr2len(*fp);
 
   memmove(*tp, *fp, l);
   *tp += l;
@@ -1913,7 +1930,7 @@ void utf_find_illegal(void)
   char_u *tofree = NULL;
 
   vimconv.vc_type = CONV_NONE;
-  if (enc_canon_props(curbuf->b_p_fenc) & ENC_8BIT) {
+  if (enc_canon_props((char_u *)curbuf->b_p_fenc) & ENC_8BIT) {
     // 'encoding' is "utf-8" but we are editing a 8-bit encoded file,
     // possibly a utf-8 file with illegal bytes.  Setup for conversion
     // from utf-8 to 'fileencoding'.
@@ -1925,7 +1942,7 @@ void utf_find_illegal(void)
     p = get_cursor_pos_ptr();
     if (vimconv.vc_type != CONV_NONE) {
       xfree(tofree);
-      tofree = string_convert(&vimconv, p, NULL);
+      tofree = (char_u *)string_convert(&vimconv, (char *)p, NULL);
       if (tofree == NULL) {
         break;
       }
@@ -1956,7 +1973,7 @@ void utf_find_illegal(void)
     if (curwin->w_cursor.lnum == curbuf->b_ml.ml_line_count) {
       break;
     }
-    ++curwin->w_cursor.lnum;
+    curwin->w_cursor.lnum++;
     curwin->w_cursor.col = 0;
   }
 
@@ -1970,8 +1987,7 @@ theend:
 }
 
 /// @return  true if string "s" is a valid utf-8 string.
-/// When "end" is NULL stop at the first NUL.
-/// When "end" is positive stop there.
+/// When "end" is NULL stop at the first NUL.  Otherwise stop at "end".
 bool utf_valid_string(const char_u *s, const char_u *end)
 {
   const char_u *p = s;
@@ -2128,10 +2144,8 @@ const char *mb_unescape(const char **const pp)
   return NULL;
 }
 
-/*
- * Skip the Vim specific head of a 'encoding' name.
- */
-char_u *enc_skip(char_u *p)
+/// Skip the Vim specific head of a 'encoding' name.
+char *enc_skip(char *p)
 {
   if (STRNCMP(p, "2byte-", 6) == 0) {
     return p + 6;
@@ -2142,27 +2156,25 @@ char_u *enc_skip(char_u *p)
   return p;
 }
 
-/*
- * Find the canonical name for encoding "enc".
- * When the name isn't recognized, returns "enc" itself, but with all lower
- * case characters and '_' replaced with '-'.
- * Returns an allocated string.
- */
-char_u *enc_canonize(char_u *enc) FUNC_ATTR_NONNULL_RET
+/// Find the canonical name for encoding "enc".
+/// When the name isn't recognized, returns "enc" itself, but with all lower
+/// case characters and '_' replaced with '-'.
+///
+/// @return  an allocated string.
+char *enc_canonize(char *enc)
+  FUNC_ATTR_NONNULL_RET
 {
   char_u *p, *s;
-  int i;
-
   if (STRCMP(enc, "default") == 0) {
     // Use the default encoding as found by set_init_1().
-    return vim_strsave(fenc_default);
+    return (char *)vim_strsave(fenc_default);
   }
 
   // copy "enc" to allocated memory, with room for two '-'
   char_u *r = xmalloc(STRLEN(enc) + 3);
   // Make it all lower case and replace '_' with '-'.
   p = r;
-  for (s = enc; *s != NUL; ++s) {
+  for (s = (char_u *)enc; *s != NUL; s++) {
     if (*s == '_') {
       *p++ = '-';
     } else {
@@ -2172,7 +2184,7 @@ char_u *enc_canonize(char_u *enc) FUNC_ATTR_NONNULL_RET
   *p = NUL;
 
   // Skip "2byte-" and "8bit-".
-  p = enc_skip(r);
+  p = (char_u *)enc_skip((char *)r);
 
   // Change "microsoft-cp" to "cp".  Used in some spell files.
   if (STRNCMP(p, "microsoft-cp", 12) == 0) {
@@ -2196,6 +2208,7 @@ char_u *enc_canonize(char_u *enc) FUNC_ATTR_NONNULL_RET
     STRMOVE(p + 5, p + 6);
   }
 
+  int i;
   if (enc_canon_search(p) >= 0) {
     // canonical name can be used unmodified
     if (p != r) {
@@ -2206,7 +2219,7 @@ char_u *enc_canonize(char_u *enc) FUNC_ATTR_NONNULL_RET
     xfree(r);
     r = vim_strsave((char_u *)enc_canon_table[i].name);
   }
-  return r;
+  return (char *)r;
 }
 
 /// Search for an encoding alias of "name".
@@ -2215,7 +2228,7 @@ static int enc_alias_search(const char_u *name)
 {
   int i;
 
-  for (i = 0; enc_alias_table[i].name != NULL; ++i) {
+  for (i = 0; enc_alias_table[i].name != NULL; i++) {
     if (STRCMP(name, enc_alias_table[i].name) == 0) {
       return enc_alias_table[i].canon;
     }
@@ -2291,7 +2304,7 @@ enc_locale_copy_enc:
     buf[i] = NUL;
   }
 
-  return enc_canonize((char_u *)buf);
+  return (char_u *)enc_canonize(buf);
 }
 
 #if defined(HAVE_ICONV)
@@ -2314,7 +2327,7 @@ void *my_iconv_open(char_u *to, char_u *from)
   if (iconv_working == kBroken) {
     return (void *)-1;          // detected a broken iconv() previously
   }
-  fd = iconv_open((char *)enc_skip(to), (char *)enc_skip(from));
+  fd = iconv_open(enc_skip((char *)to), enc_skip((char *)from));
 
   if (fd != (iconv_t)-1 && iconv_working == kUnknown) {
     /*
@@ -2425,18 +2438,17 @@ static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen
 
 #endif  // HAVE_ICONV
 
-/*
- * Setup "vcp" for conversion from "from" to "to".
- * The names must have been made canonical with enc_canonize().
- * vcp->vc_type must have been initialized to CONV_NONE.
- * Note: cannot be used for conversion from/to ucs-2 and ucs-4 (will use utf-8
- * instead).
- * Afterwards invoke with "from" and "to" equal to NULL to cleanup.
- * Return FAIL when conversion is not supported, OK otherwise.
- */
-int convert_setup(vimconv_T *vcp, char_u *from, char_u *to)
+/// Setup "vcp" for conversion from "from" to "to".
+/// The names must have been made canonical with enc_canonize().
+/// vcp->vc_type must have been initialized to CONV_NONE.
+/// Note: cannot be used for conversion from/to ucs-2 and ucs-4 (will use utf-8
+/// instead).
+/// Afterwards invoke with "from" and "to" equal to NULL to cleanup.
+///
+/// @return  FAIL when conversion is not supported, OK otherwise.
+int convert_setup(vimconv_T *vcp, char *from, char *to)
 {
-  return convert_setup_ext(vcp, from, true, to, true);
+  return convert_setup_ext(vcp, (char_u *)from, true, (char_u *)to, true);
 }
 
 /// As convert_setup(), but only when from_unicode_is_utf8 is true will all
@@ -2509,16 +2521,14 @@ int convert_setup_ext(vimconv_T *vcp, char_u *from, bool from_unicode_is_utf8, c
   return OK;
 }
 
-/*
- * Convert text "ptr[*lenp]" according to "vcp".
- * Returns the result in allocated memory and sets "*lenp".
- * When "lenp" is NULL, use NUL terminated strings.
- * Illegal chars are often changed to "?", unless vcp->vc_fail is set.
- * When something goes wrong, NULL is returned and "*lenp" is unchanged.
- */
-char_u *string_convert(const vimconv_T *const vcp, char_u *ptr, size_t *lenp)
+/// Convert text "ptr[*lenp]" according to "vcp".
+/// Returns the result in allocated memory and sets "*lenp".
+/// When "lenp" is NULL, use NUL terminated strings.
+/// Illegal chars are often changed to "?", unless vcp->vc_fail is set.
+/// When something goes wrong, NULL is returned and "*lenp" is unchanged.
+char *string_convert(const vimconv_T *const vcp, char *ptr, size_t *lenp)
 {
-  return string_convert_ext(vcp, ptr, lenp, NULL);
+  return (char *)string_convert_ext(vcp, (char_u *)ptr, lenp, NULL);
 }
 
 /*
@@ -2548,7 +2558,7 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
   case CONV_TO_UTF8:            // latin1 to utf-8 conversion
     retval = xmalloc(len * 2 + 1);
     d = retval;
-    for (size_t i = 0; i < len; ++i) {
+    for (size_t i = 0; i < len; i++) {
       c = ptr[i];
       if (c < 0x80) {
         *d++ = (char_u)c;
@@ -2566,7 +2576,7 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
   case CONV_9_TO_UTF8:          // latin9 to utf-8 conversion
     retval = xmalloc(len * 3 + 1);
     d = retval;
-    for (size_t i = 0; i < len; ++i) {
+    for (size_t i = 0; i < len; i++) {
       c = ptr[i];
       switch (c) {
       case 0xa4:
@@ -2678,3 +2688,174 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
 
   return retval;
 }
+
+/// Table set by setcellwidths().
+typedef struct {
+  long first;
+  long last;
+  char width;
+} cw_interval_T;
+
+static cw_interval_T *cw_table = NULL;
+static size_t cw_table_size = 0;
+
+/// Return the value of the cellwidth table for the character `c`.
+///
+/// @param c The source character.
+/// @return 1 or 2 when `c` is in the cellwidth table, 0 if not.
+static int cw_value(int c)
+{
+  if (cw_table == NULL) {
+    return 0;
+  }
+
+  // first quick check for Latin1 etc. characters
+  if (c < cw_table[0].first) {
+    return 0;
+  }
+
+  // binary search in table
+  int bot = 0;
+  int top = (int)cw_table_size - 1;
+  while (top >= bot) {
+    int mid = (bot + top) / 2;
+    if (cw_table[mid].last < c) {
+      bot = mid + 1;
+    } else if (cw_table[mid].first > c) {
+      top = mid - 1;
+    } else {
+      return cw_table[mid].width;
+    }
+  }
+  return 0;
+}
+
+static int tv_nr_compare(const void *a1, const void *a2)
+{
+  const listitem_T *const li1 = tv_list_first(*(const list_T **)a1);
+  const listitem_T *const li2 = tv_list_first(*(const list_T **)a2);
+
+  return (int)(TV_LIST_ITEM_TV(li1)->vval.v_number - TV_LIST_ITEM_TV(li2)->vval.v_number);
+}
+
+/// "setcellwidths()" function
+void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+  if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
+    emsg(_(e_listreq));
+    return;
+  }
+  const list_T *const l = argvars[0].vval.v_list;
+  if (tv_list_len(l) == 0) {
+    // Clearing the table.
+    xfree(cw_table);
+    cw_table = NULL;
+    cw_table_size = 0;
+    return;
+  }
+
+  // Note: use list_T instead of listitem_T so that TV_LIST_ITEM_NEXT can be used properly below.
+  const list_T **ptrs = xmalloc(sizeof(const list_T *) * (size_t)tv_list_len(l));
+
+  // Check that all entries are a list with three numbers, the range is
+  // valid and the cell width is valid.
+  int item = 0;
+  TV_LIST_ITER_CONST(l, li, {
+    const typval_T *const li_tv = TV_LIST_ITEM_TV(li);
+
+    if (li_tv->v_type != VAR_LIST || li_tv->vval.v_list == NULL) {
+      semsg(_(e_list_item_nr_is_not_list), item);
+      xfree(ptrs);
+      return;
+    }
+
+    const list_T *const li_l = li_tv->vval.v_list;
+    ptrs[item] = li_l;
+    const listitem_T *lili = tv_list_first(li_l);
+    int i;
+    varnumber_T n1;
+    for (i = 0; lili != NULL; lili = TV_LIST_ITEM_NEXT(li_l, lili), i++) {
+      const typval_T *const lili_tv = TV_LIST_ITEM_TV(lili);
+      if (lili_tv->v_type != VAR_NUMBER) {
+        break;
+      }
+      if (i == 0) {
+        n1 = lili_tv->vval.v_number;
+        if (n1 < 0x100) {
+          emsg(_(e_only_values_of_0x100_and_higher_supported));
+          xfree(ptrs);
+          return;
+        }
+      } else if (i == 1 && lili_tv->vval.v_number < n1) {
+        semsg(_(e_list_item_nr_range_invalid), item);
+        xfree(ptrs);
+        return;
+      } else if (i == 2 && (lili_tv->vval.v_number < 1 || lili_tv->vval.v_number > 2)) {
+        semsg(_(e_list_item_nr_cell_width_invalid), item);
+        xfree(ptrs);
+        return;
+      }
+    }
+
+    if (i != 3) {
+      semsg(_(e_list_item_nr_does_not_contain_3_numbers), item);
+      xfree(ptrs);
+      return;
+    }
+
+    item++;
+  });
+
+  // Sort the list on the first number.
+  qsort((void *)ptrs, (size_t)tv_list_len(l), sizeof(const list_T *), tv_nr_compare);
+
+  cw_interval_T *table = xmalloc(sizeof(cw_interval_T) * (size_t)tv_list_len(l));
+
+  // Store the items in the new table.
+  for (item = 0; item < tv_list_len(l); item++) {
+    const list_T *const li_l = ptrs[item];
+    const listitem_T *lili = tv_list_first(li_l);
+    const varnumber_T n1 = TV_LIST_ITEM_TV(lili)->vval.v_number;
+    if (item > 0 && n1 <= table[item - 1].last) {
+      semsg(_(e_overlapping_ranges_for_nr), (long)n1);
+      xfree(ptrs);
+      xfree(table);
+      return;
+    }
+    table[item].first = n1;
+    lili = TV_LIST_ITEM_NEXT(li_l, lili);
+    table[item].last = TV_LIST_ITEM_TV(lili)->vval.v_number;
+    lili = TV_LIST_ITEM_NEXT(li_l, lili);
+    table[item].width = (char)TV_LIST_ITEM_TV(lili)->vval.v_number;
+  }
+
+  xfree(ptrs);
+
+  cw_interval_T *const cw_table_save = cw_table;
+  const size_t cw_table_size_save = cw_table_size;
+  cw_table = table;
+  cw_table_size = (size_t)tv_list_len(l);
+
+  // Check that the new value does not conflict with 'listchars' or
+  // 'fillchars'.
+  const char *const error = check_chars_options();
+  if (error != NULL) {
+    emsg(_(error));
+    cw_table = cw_table_save;
+    cw_table_size = cw_table_size_save;
+    xfree(table);
+    return;
+  }
+
+  xfree(cw_table_save);
+  redraw_all_later(UPD_NOT_VALID);
+}
+
+void f_charclass(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+  if (tv_check_for_string(&argvars[0]) == FAIL
+      || argvars[0].vval.v_string == NULL) {
+    return;
+  }
+  rettv->vval.v_number = mb_get_class((const char_u *)argvars[0].vval.v_string);
+}