5 files changed, 205 insertions, 4 deletions
diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua
index 6d8776d08b..a2272f0c98 100644
--- a/src/nvim/eval.lua
+++ b/src/nvim/eval.lua
@@ -327,6 +327,7 @@ return {
     serverstop={args=1},
     setbufline={args=3, base=3},
     setbufvar={args=3, base=3},
+    setcellwidths={args=1, base=1},
     setcharpos={args=2, base=2},
     setcharsearch={args=1, base=1},
     setcmdpos={args=1, base=1},
diff --git a/src/nvim/generators/gen_unicode_tables.lua b/src/nvim/generators/gen_unicode_tables.lua
index aa96c97bc1..36553f4649 100644
--- a/src/nvim/generators/gen_unicode_tables.lua
+++ b/src/nvim/generators/gen_unicode_tables.lua
@@ -12,8 +12,8 @@
 --    2 then interval applies only to first, third, fifth, … character in range.
 --    Fourth value is number that should be added to the codepoint to yield
 --    folded/lower/upper codepoint.
--- 4. emoji_width and emoji_all tables: sorted lists of non-overlapping closed
---    intervals of Emoji characters.  emoji_width contains all the characters
+-- 4. emoji_wide and emoji_all tables: sorted lists of non-overlapping closed
+--    intervals of Emoji characters.  emoji_wide contains all the characters
 --    which don't have ambiguous or double width, and emoji_all has all Emojis.
 if arg[1] == '--help' then
   print('Usage:')
@@ -288,7 +288,7 @@ local build_emoji_table = function(ut_fp, emojiprops, doublewidth, ambiwidth)
   end
   ut_fp:write('};\n')
 
-  ut_fp:write('static const struct interval emoji_width[] = {\n')
+  ut_fp:write('static const struct interval emoji_wide[] = {\n')
   for _, p in ipairs(emojiwidth) do
     ut_fp:write(make_range(p[1], p[2]))
   end
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 223b4d6845..66262ebfad 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -74,6 +74,19 @@ struct interval {
 # include "unicode_tables.generated.h"
 #endif
 
+static char e_list_item_nr_is_not_list[]
+  = N_("E1109: List item %d is not a List");
+static char e_list_item_nr_does_not_contain_3_numbers[]
+  = N_("E1110: List item %d does not contain 3 numbers");
+static char e_list_item_nr_range_invalid[]
+  = N_("E1111: List item %d range invalid");
+static char e_list_item_nr_cell_width_invalid[]
+  = N_("E1112: List item %d cell width invalid");
+static char e_overlapping_ranges_for_nr[]
+  = N_("E1113: Overlapping ranges for %lx");
+static char e_only_values_of_0x100_and_higher_supported[]
+  = N_("E1114: Only values of 0x100 and higher supported");
+
 // To speed up BYTELEN(); keep a lookup table to quickly get the length in
 // bytes of a UTF-8 character from the first byte of a UTF-8 string.  Bytes
 // which are illegal when used as the first byte have a 1.  The NUL byte has
@@ -472,13 +485,18 @@ static bool intable(const struct interval *table, size_t n_items, int c)
 int utf_char2cells(int c)
 {
   if (c >= 0x100) {
+    int n = cw_value(c);
+    if (n != 0) {
+      return n;
+    }
+
     if (!utf_printable(c)) {
       return 6;                 // unprintable, displays <xxxx>
     }
     if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
       return 2;
     }
-    if (p_emoji && intable(emoji_width, ARRAY_SIZE(emoji_width), c)) {
+    if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
       return 2;
     }
   } else if (c >= 0x80 && !vim_isprintc(c)) {
@@ -2678,3 +2696,149 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
 
   return retval;
 }
+
+/// Table set by setcellwidths().
+typedef struct {
+  long first;
+  long last;
+  char width;
+} cw_interval_T;
+
+static cw_interval_T *cw_table = NULL;
+static size_t cw_table_size = 0;
+
+/// Return the value of the cellwidth table for the character `c`.
+///
+/// @param c The source character.
+/// @return 1 or 2 when `c` is in the cellwidth table, 0 if not.
+static int cw_value(int c)
+{
+  if (cw_table == NULL) {
+    return 0;
+  }
+
+  // first quick check for Latin1 etc. characters
+  if (c < cw_table[0].first) {
+    return 0;
+  }
+
+  // binary search in table
+  int bot = 0;
+  int top = (int)cw_table_size - 1;
+  while (top >= bot) {
+    int mid = (bot + top) / 2;
+    if (cw_table[mid].last < c) {
+      bot = mid + 1;
+    } else if (cw_table[mid].first > c) {
+      top = mid - 1;
+    } else {
+      return cw_table[mid].width;
+    }
+  }
+  return 0;
+}
+
+static int tv_nr_compare(const void *a1, const void *a2)
+{
+  const listitem_T *const li1 = (const listitem_T *)a1;
+  const listitem_T *const li2 = (const listitem_T *)a2;
+
+  return (int)(TV_LIST_ITEM_TV(li1)->vval.v_number - TV_LIST_ITEM_TV(li2)->vval.v_number);
+}
+
+/// "setcellwidths()" function
+void f_setcellwidths(typval_T *argvars, typval_T *rettv, FunPtr fptr)
+{
+  if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
+    emsg(_(e_listreq));
+    return;
+  }
+  const list_T *const l = argvars[0].vval.v_list;
+  if (tv_list_len(l) == 0) {
+    // Clearing the table.
+    xfree(cw_table);
+    cw_table = NULL;
+    cw_table_size = 0;
+    return;
+  }
+
+  const listitem_T **ptrs = xmalloc(sizeof(const listitem_T *) * (size_t)tv_list_len(l));
+
+  // Check that all entries are a list with three numbers, the range is
+  // valid and the cell width is valid.
+  int item = 0;
+  TV_LIST_ITER_CONST(l, li, {
+    const typval_T *const li_tv = TV_LIST_ITEM_TV(li);
+
+    if (li_tv->v_type != VAR_LIST || li_tv->vval.v_list == NULL) {
+      semsg(_(e_list_item_nr_is_not_list), item);
+      xfree(ptrs);
+      return;
+    }
+
+    const list_T *const li_l = li_tv->vval.v_list;
+    const listitem_T *lili = tv_list_first(li_l);
+    int i = 0;
+    varnumber_T n1;
+    for (; lili != NULL; lili = TV_LIST_ITEM_NEXT(li_l, lili), i++) {
+      const typval_T *const lili_tv = TV_LIST_ITEM_TV(lili);
+      if (lili_tv->v_type != VAR_NUMBER) {
+        break;
+      }
+      if (i == 0) {
+        n1 = lili_tv->vval.v_number;
+        if (n1 < 0x100) {
+          emsg(_(e_only_values_of_0x100_and_higher_supported));
+          xfree(ptrs);
+          return;
+        }
+      } else if (i == 1 && lili_tv->vval.v_number < n1) {
+        semsg(_(e_list_item_nr_range_invalid), item);
+        xfree(ptrs);
+        return;
+      } else if (i == 2 && (lili_tv->vval.v_number < 1 || lili_tv->vval.v_number > 2)) {
+        semsg(_(e_list_item_nr_cell_width_invalid), item);
+        xfree(ptrs);
+        return;
+      }
+    }
+
+    if (i != 3) {
+      semsg(_(e_list_item_nr_does_not_contain_3_numbers), item);
+      xfree(ptrs);
+      return;
+    }
+
+    ptrs[item++] = lili;
+  });
+
+  // Sort the list on the first number.
+  qsort((void *)ptrs, (size_t)tv_list_len(l), sizeof(const listitem_T *), tv_nr_compare);
+
+  cw_interval_T *table = xmalloc(sizeof(cw_interval_T) * (size_t)tv_list_len(l));
+
+  // Store the items in the new table.
+  item = 0;
+  TV_LIST_ITER_CONST(l, li, {
+    const list_T *const li_l = TV_LIST_ITEM_TV(li)->vval.v_list;
+    const listitem_T *lili = tv_list_first(li_l);
+    const varnumber_T n1 = TV_LIST_ITEM_TV(lili)->vval.v_number;
+    if (item > 0 && n1 <= table[item - 1].last) {
+      semsg(_(e_overlapping_ranges_for_nr), (long)n1);
+      xfree(ptrs);
+      xfree(table);
+      return;
+    }
+    table[item].first = n1;
+    lili = TV_LIST_ITEM_NEXT(li_l, lili);
+    table[item].last = TV_LIST_ITEM_TV(lili)->vval.v_number;
+    lili = TV_LIST_ITEM_NEXT(li_l, lili);
+    table[item].width = (char)TV_LIST_ITEM_TV(lili)->vval.v_number;
+    item++;
+  });
+
+  xfree(ptrs);
+  xfree(cw_table);
+  cw_table = table;
+  cw_table_size = (size_t)tv_list_len(l);
+}
diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h
index ffa8411675..2a9afcbd03 100644
--- a/src/nvim/mbyte.h
+++ b/src/nvim/mbyte.h
@@ -5,6 +5,7 @@
 #include <stdint.h>
 #include <string.h>
 
+#include "nvim/eval/typval.h"
 #include "nvim/func_attr.h"
 #include "nvim/mbyte_defs.h"
 #include "nvim/os/os_defs.h"  // For indirect
diff --git a/src/nvim/testdir/test_utf8.vim b/src/nvim/testdir/test_utf8.vim
index 9b010a5dbc..c5dfd85e5e 100644
--- a/src/nvim/testdir/test_utf8.vim
+++ b/src/nvim/testdir/test_utf8.vim
@@ -140,6 +140,41 @@ func Test_list2str_str2list_latin1()
   call assert_equal(s, sres)
 endfunc
 
+func Test_setcellwidths()
+  call setcellwidths([
+        \ [0x1330, 0x1330, 2],
+        \ [0x1337, 0x1339, 2],
+        \ [9999, 10000, 1],
+        \])
+
+  call assert_equal(2, strwidth("\u1330"))
+  call assert_equal(1, strwidth("\u1336"))
+  call assert_equal(2, strwidth("\u1337"))
+  call assert_equal(2, strwidth("\u1339"))
+  call assert_equal(1, strwidth("\u133a"))
+
+  call setcellwidths([])
+
+  call assert_fails('call setcellwidths(1)', 'E714:')
+
+  call assert_fails('call setcellwidths([1, 2, 0])', 'E1109:')
+
+  call assert_fails('call setcellwidths([[0x101]])', 'E1110:')
+  call assert_fails('call setcellwidths([[0x101, 0x102]])', 'E1110:')
+  call assert_fails('call setcellwidths([[0x101, 0x102, 1, 4]])', 'E1110:')
+  call assert_fails('call setcellwidths([["a"]])', 'E1110:')
+
+  call assert_fails('call setcellwidths([[0x102, 0x101, 1]])', 'E1111:')
+
+  call assert_fails('call setcellwidths([[0x101, 0x102, 0]])', 'E1112:')
+  call assert_fails('call setcellwidths([[0x101, 0x102, 3]])', 'E1112:')
+
+  call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x115, 0x116, 2]])', 'E1113:')
+  call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x122, 0x123, 2]])', 'E1113:')
+
+  call assert_fails('call setcellwidths([[0x33, 0x44, 2]])', 'E1114:')
+endfunc
+
 func Test_print_overlong()
   " Text with more composing characters than MB_MAXBYTES.
   new