Merge remote-tracking branch 'upstream/master' into userreg

author: Josh Rahm <joshuarahm@gmail.com> 2023-01-25 18:23:01 +0000
committer: Josh Rahm <joshuarahm@gmail.com> 2023-01-25 18:23:01 +0000
commit: 142d9041391780ac15b89886a54015fdc5c73995 (patch)
tree: 0f6b5cac1a60810a03f52826c9e67c9e2780b033 /src/nvim/mbyte.c
parent: ad86b5db74922285699ab2a1dbb2ff20e6268a33 (diff)
parent: 3c48d3c83fc21dbc0841f9210f04bdb073d73cd1 (diff)
download: rneovim-142d9041391780ac15b89886a54015fdc5c73995.tar.gz
rneovim-142d9041391780ac15b89886a54015fdc5c73995.tar.bz2
rneovim-142d9041391780ac15b89886a54015fdc5c73995.zip
1 files changed, 188 insertions, 177 deletions
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 33d652a51f..8b50ba719a 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -25,36 +25,51 @@
 /// Vim scripts may contain an ":scriptencoding" command. This has an effect
 /// for some commands, like ":menutrans".
 
-#include <inttypes.h>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <iconv.h>
 #include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <wchar.h>
 #include <wctype.h>
 
-#include "nvim/ascii.h"
-#include "nvim/vim.h"
-#ifdef HAVE_LOCALE_H
-# include <locale.h>
-#endif
+#include "auto/config.h"
 #include "nvim/arabic.h"
+#include "nvim/ascii.h"
+#include "nvim/buffer_defs.h"
 #include "nvim/charset.h"
 #include "nvim/cursor.h"
 #include "nvim/drawscreen.h"
-#include "nvim/eval.h"
-#include "nvim/fileio.h"
-#include "nvim/func_attr.h"
+#include "nvim/eval/typval.h"
+#include "nvim/eval/typval_defs.h"
 #include "nvim/getchar.h"
+#include "nvim/gettext.h"
+#include "nvim/globals.h"
+#include "nvim/grid_defs.h"
 #include "nvim/iconv.h"
+#include "nvim/keycodes.h"
+#include "nvim/macros.h"
 #include "nvim/mark.h"
 #include "nvim/mbyte.h"
+#include "nvim/mbyte_defs.h"
 #include "nvim/memline.h"
 #include "nvim/memory.h"
 #include "nvim/message.h"
+#include "nvim/option_defs.h"
 #include "nvim/os/os.h"
-#include "nvim/path.h"
+#include "nvim/os/os_defs.h"
+#include "nvim/pos.h"
 #include "nvim/screen.h"
-#include "nvim/spell.h"
 #include "nvim/strings.h"
+#include "nvim/types.h"
+#include "nvim/vim.h"
+
+#ifdef HAVE_LOCALE_H
+# include <locale.h>
+#endif
 
 typedef struct {
   int rangeStart;
@@ -68,11 +83,12 @@ struct interval {
   long last;
 };
 
+// uncrustify:off
 #ifdef INCLUDE_GENERATED_DECLARATIONS
 # include "mbyte.c.generated.h"
-
 # include "unicode_tables.generated.h"
 #endif
+// uncrustify:on
 
 static char e_list_item_nr_is_not_list[]
   = N_("E1109: List item %d is not a List");
@@ -84,8 +100,8 @@ static char e_list_item_nr_cell_width_invalid[]
   = N_("E1112: List item %d cell width invalid");
 static char e_overlapping_ranges_for_nr[]
   = N_("E1113: Overlapping ranges for 0x%lx");
-static char e_only_values_of_0x100_and_higher_supported[]
-  = N_("E1114: Only values of 0x100 and higher supported");
+static char e_only_values_of_0x80_and_higher_supported[]
+  = N_("E1114: Only values of 0x80 and higher supported");
 
 // To speed up BYTELEN(); keep a lookup table to quickly get the length in
 // bytes of a UTF-8 character from the first byte of a UTF-8 string.  Bytes
@@ -136,8 +152,7 @@ const uint8_t utf8len_tab_zero[] = {
 // "iso-8859-n" is handled by enc_canonize() directly.
 static struct
 {   const char *name;   int prop;              int codepage; }
-enc_canon_table[] =
-{
+enc_canon_table[] = {
 #define IDX_LATIN_1     0
   { "latin1",          ENC_8BIT + ENC_LATIN1,  1252 },
 #define IDX_ISO_2       1
@@ -270,8 +285,7 @@ enc_canon_table[] =
 // Aliases for encoding names.
 static struct
 {   const char *name; int canon; }
-enc_alias_table[] =
-{
+enc_alias_table[] = {
   { "ansi",            IDX_LATIN_1 },
   { "iso-8859-1",      IDX_LATIN_1 },
   { "latin2",          IDX_ISO_2 },
@@ -353,15 +367,15 @@ static int enc_canon_search(const char *name)
 
 // Find canonical encoding "name" in the list and return its properties.
 // Returns 0 if not found.
-int enc_canon_props(const char_u *name)
+int enc_canon_props(const char *name)
   FUNC_ATTR_PURE
 {
   int i = enc_canon_search((char *)name);
   if (i >= 0) {
     return enc_canon_table[i].prop;
-  } else if (STRNCMP(name, "2byte-", 6) == 0) {
+  } else if (strncmp(name, "2byte-", 6) == 0) {
     return ENC_DBCS;
-  } else if (STRNCMP(name, "8bit-", 5) == 0 || STRNCMP(name, "iso-8859-", 9) == 0) {
+  } else if (strncmp(name, "8bit-", 5) == 0 || strncmp(name, "iso-8859-", 9) == 0) {
     return ENC_8BIT;
   }
   return 0;
@@ -381,10 +395,10 @@ int bomb_size(void)
     if (*curbuf->b_p_fenc == NUL
         || strcmp(curbuf->b_p_fenc, "utf-8") == 0) {
       n = 3;
-    } else if (STRNCMP(curbuf->b_p_fenc, "ucs-2", 5) == 0
-               || STRNCMP(curbuf->b_p_fenc, "utf-16", 6) == 0) {
+    } else if (strncmp(curbuf->b_p_fenc, "ucs-2", 5) == 0
+               || strncmp(curbuf->b_p_fenc, "utf-16", 6) == 0) {
       n = 2;
-    } else if (STRNCMP(curbuf->b_p_fenc, "ucs-4", 5) == 0) {
+    } else if (strncmp(curbuf->b_p_fenc, "ucs-4", 5) == 0) {
       n = 4;
     }
   }
@@ -392,9 +406,9 @@ int bomb_size(void)
 }
 
 // Remove all BOM from "s" by moving remaining text.
-void remove_bom(char_u *s)
+void remove_bom(char *s)
 {
-  char *p = (char *)s;
+  char *p = s;
 
   while ((p = strchr(p, 0xef)) != NULL) {
     if ((uint8_t)p[1] == 0xbb && (uint8_t)p[2] == 0xbf) {
@@ -410,25 +424,25 @@ void remove_bom(char_u *s)
 // 1 for punctuation
 // 2 for an (ASCII) word character
 // >2 for other word characters
-int mb_get_class(const char_u *p)
+int mb_get_class(const char *p)
   FUNC_ATTR_PURE
 {
   return mb_get_class_tab(p, curbuf->b_chartab);
 }
 
-int mb_get_class_tab(const char_u *p, const uint64_t *const chartab)
+int mb_get_class_tab(const char *p, const uint64_t *const chartab)
   FUNC_ATTR_PURE
 {
-  if (MB_BYTE2LEN(p[0]) == 1) {
+  if (MB_BYTE2LEN((uint8_t)p[0]) == 1) {
     if (p[0] == NUL || ascii_iswhite(p[0])) {
       return 0;
     }
-    if (vim_iswordc_tab(p[0], chartab)) {
+    if (vim_iswordc_tab((uint8_t)p[0], chartab)) {
       return 2;
     }
     return 1;
   }
-  return utf_class_tab(utf_ptr2char((char *)p), chartab);
+  return utf_class_tab(utf_ptr2char(p), chartab);
 }
 
 // Return true if "c" is in "table".
@@ -468,12 +482,16 @@ static bool intable(const struct interval *table, size_t n_items, int c)
 ///       gen_unicode_tables.lua, which must be manually invoked as needed.
 int utf_char2cells(int c)
 {
-  if (c >= 0x100) {
+  // Use the value from setcellwidths() at 0x80 and higher, unless the
+  // character is not printable.
+  if (c >= 0x80 && vim_isprintc(c)) {
     int n = cw_value(c);
     if (n != 0) {
       return n;
     }
+  }
 
+  if (c >= 0x100) {
     if (!utf_printable(c)) {
       return 6;                 // unprintable, displays <xxxx>
     }
@@ -520,13 +538,13 @@ int utf_ptr2cells(const char *p)
 
 /// Like utf_ptr2cells(), but limit string length to "size".
 /// For an empty string or truncated character returns 1.
-int utf_ptr2cells_len(const char_u *p, int size)
+int utf_ptr2cells_len(const char *p, int size)
 {
   int c;
 
   // Need to convert to a wide character.
-  if (size > 0 && *p >= 0x80) {
-    if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) {
+  if (size > 0 && (uint8_t)(*p) >= 0x80) {
+    if (utf_ptr2len_len(p, size) < utf8len_tab[(uint8_t)(*p)]) {
       return 1;        // truncated
     }
     c = utf_ptr2char((char *)p);
@@ -552,8 +570,8 @@ size_t mb_string2cells(const char *str)
 {
   size_t clen = 0;
 
-  for (const char_u *p = (char_u *)str; *p != NUL; p += utfc_ptr2len((char *)p)) {
-    clen += (size_t)utf_ptr2cells((char *)p);
+  for (const char *p = str; *p != NUL; p += utfc_ptr2len(p)) {
+    clen += (size_t)utf_ptr2cells(p);
   }
 
   return clen;
@@ -570,9 +588,9 @@ size_t mb_string2cells_len(const char *str, size_t size)
 {
   size_t clen = 0;
 
-  for (const char_u *p = (char_u *)str; *p != NUL && p < (char_u *)str + size;
-       p += utfc_ptr2len_len((char *)p, (int)size + (int)(p - (char_u *)str))) {
-    clen += (size_t)utf_ptr2cells((char *)p);
+  for (const char *p = str; *p != NUL && p < str + size;
+       p += utfc_ptr2len_len(p, (int)size + (int)(p - str))) {
+    clen += (size_t)utf_ptr2cells(p);
   }
 
   return clen;
@@ -585,7 +603,7 @@ size_t mb_string2cells_len(const char *str, size_t size)
 /// For an overlong sequence this may return zero.
 /// Does not include composing characters for obvious reasons.
 ///
-/// @param[in]  p  String to convert.
+/// @param[in]  p_in  String to convert.
 ///
 /// @return Unicode codepoint or byte value.
 int utf_ptr2char(const char *const p_in)
@@ -685,23 +703,23 @@ static int utf_safe_read_char_adv(const char_u **s, size_t *n)
 
 // Get character at **pp and advance *pp to the next character.
 // Note: composing characters are skipped!
-int mb_ptr2char_adv(const char_u **const pp)
+int mb_ptr2char_adv(const char **const pp)
 {
   int c;
 
-  c = utf_ptr2char((char *)(*pp));
-  *pp += utfc_ptr2len((char *)(*pp));
+  c = utf_ptr2char(*pp);
+  *pp += utfc_ptr2len(*pp);
   return c;
 }
 
 // Get character at **pp and advance *pp to the next character.
 // Note: composing characters are returned as separate characters.
-int mb_cptr2char_adv(const char_u **pp)
+int mb_cptr2char_adv(const char **pp)
 {
   int c;
 
-  c = utf_ptr2char((char *)(*pp));
-  *pp += utf_ptr2len((char *)(*pp));
+  c = utf_ptr2char(*pp);
+  *pp += utf_ptr2len(*pp);
   return c;
 }
 
@@ -730,26 +748,25 @@ bool utf_composinglike(const char *p1, const char *p2)
 ///                   space at least for #MAX_MCO + 1 elements.
 ///
 /// @return leading character.
-int utfc_ptr2char(const char *p_in, int *pcc)
+int utfc_ptr2char(const char *p, int *pcc)
 {
-  uint8_t *p = (uint8_t *)p_in;
   int i = 0;
 
-  int c = utf_ptr2char((char *)p);
-  int len = utf_ptr2len((char *)p);
+  int c = utf_ptr2char(p);
+  int len = utf_ptr2len(p);
 
   // Only accept a composing char when the first char isn't illegal.
-  if ((len > 1 || *p < 0x80)
-      && p[len] >= 0x80
-      && utf_composinglike((char *)p, (char *)p + len)) {
-    int cc = utf_ptr2char((char *)p + len);
+  if ((len > 1 || (uint8_t)(*p) < 0x80)
+      && (uint8_t)p[len] >= 0x80
+      && utf_composinglike(p, p + len)) {
+    int cc = utf_ptr2char(p + len);
     for (;;) {
       pcc[i++] = cc;
       if (i == MAX_MCO) {
         break;
       }
-      len += utf_ptr2len((char *)p + len);
-      if (p[len] < 0x80 || !utf_iscomposing(cc = utf_ptr2char((char *)p + len))) {
+      len += utf_ptr2len(p + len);
+      if ((uint8_t)p[len] < 0x80 || !utf_iscomposing(cc = utf_ptr2char(p + len))) {
         break;
       }
     }
@@ -766,7 +783,7 @@ int utfc_ptr2char(const char *p_in, int *pcc)
 // composing characters.  Use no more than p[maxlen].
 //
 // @param [out] pcc: composing chars, last one is 0
-int utfc_ptr2char_len(const char_u *p, int *pcc, int maxlen)
+int utfc_ptr2char_len(const char *p, int *pcc, int maxlen)
 {
   assert(maxlen > 0);
 
@@ -775,15 +792,15 @@ int utfc_ptr2char_len(const char_u *p, int *pcc, int maxlen)
   int len = utf_ptr2len_len(p, maxlen);
   // Is it safe to use utf_ptr2char()?
   bool safe = len > 1 && len <= maxlen;
-  int c = safe ? utf_ptr2char((char *)p) : *p;
+  int c = safe ? utf_ptr2char(p) : (uint8_t)(*p);
 
   // Only accept a composing char when the first char isn't illegal.
-  if ((safe || c < 0x80) && len < maxlen && p[len] >= 0x80) {
+  if ((safe || c < 0x80) && len < maxlen && (uint8_t)p[len] >= 0x80) {
     for (; i < MAX_MCO; i++) {
       int len_cc = utf_ptr2len_len(p + len, maxlen - len);
       safe = len_cc > 1 && len_cc <= maxlen - len;
-      if (!safe || (pcc[i] = utf_ptr2char((char *)p + len)) < 0x80
-          || !(i == 0 ? utf_composinglike((char *)p, (char *)p + len) : utf_iscomposing(pcc[i]))) {
+      if (!safe || (pcc[i] = utf_ptr2char(p + len)) < 0x80
+          || !(i == 0 ? utf_composinglike(p, p + len) : utf_iscomposing(pcc[i]))) {
         break;
       }
       len += len_cc;
@@ -835,13 +852,13 @@ int utf_byte2len(int b)
 // Returns 1 for an illegal byte sequence (also in incomplete byte seq.).
 // Returns number > "size" for an incomplete byte sequence.
 // Never returns zero.
-int utf_ptr2len_len(const char_u *p, int size)
+int utf_ptr2len_len(const char *p, int size)
 {
   int len;
   int i;
   int m;
 
-  len = utf8len_tab[*p];
+  len = utf8len_tab[(uint8_t)(*p)];
   if (len == 1) {
     return 1;           // NUL, ascii or illegal lead byte
   }
@@ -861,21 +878,20 @@ int utf_ptr2len_len(const char_u *p, int size)
 /// Return the number of bytes occupied by a UTF-8 character in a string.
 /// This includes following composing characters.
 /// Returns zero for NUL.
-int utfc_ptr2len(const char *const p_in)
+int utfc_ptr2len(const char *const p)
   FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
 {
-  uint8_t *p = (uint8_t *)p_in;
-  uint8_t b0 = *p;
+  uint8_t b0 = (uint8_t)(*p);
 
   if (b0 == NUL) {
     return 0;
   }
-  if (b0 < 0x80 && p[1] < 0x80) {  // be quick for ASCII
+  if (b0 < 0x80 && (uint8_t)p[1] < 0x80) {  // be quick for ASCII
     return 1;
   }
 
   // Skip over first UTF-8 char, stopping at a NUL byte.
-  int len = utf_ptr2len((char *)p);
+  int len = utf_ptr2len(p);
 
   // Check for illegal byte.
   if (len == 1 && b0 >= 0x80) {
@@ -886,13 +902,13 @@ int utfc_ptr2len(const char *const p_in)
   // skip all of them (otherwise the cursor would get stuck).
   int prevlen = 0;
   for (;;) {
-    if (p[len] < 0x80 || !utf_composinglike((char *)p + prevlen, (char *)p + len)) {
+    if ((uint8_t)p[len] < 0x80 || !utf_composinglike(p + prevlen, p + len)) {
       return len;
     }
 
     // Skip over composing char.
     prevlen = len;
-    len += utf_ptr2len((char *)p + len);
+    len += utf_ptr2len(p + len);
   }
 }
 
@@ -913,7 +929,7 @@ int utfc_ptr2len_len(const char *p, int size)
   }
 
   // Skip over first UTF-8 char, stopping at a NUL byte.
-  len = utf_ptr2len_len((char_u *)p, size);
+  len = utf_ptr2len_len(p, size);
 
   // Check for illegal byte and incomplete byte sequence.
   if ((len == 1 && (uint8_t)p[0] >= 0x80) || len > size) {
@@ -932,7 +948,7 @@ int utfc_ptr2len_len(const char *p, int size)
 
     // Next character length should not go beyond size to ensure that
     // utf_composinglike(...) does not read beyond size.
-    len_next_char = utf_ptr2len_len((char_u *)p + len, size - len);
+    len_next_char = utf_ptr2len_len(p + len, size - len);
     if (len_next_char > size - len) {
       break;
     }
@@ -1024,8 +1040,7 @@ bool utf_printable(int c)
 {
   // Sorted list of non-overlapping intervals.
   // 0xd800-0xdfff is reserved for UTF-16, actually illegal.
-  static struct interval nonprint[] =
-  {
+  static struct interval nonprint[] = {
     { 0x070f, 0x070f }, { 0x180b, 0x180e }, { 0x200b, 0x200f }, { 0x202a, 0x202e },
     { 0x2060, 0x206f }, { 0xd800, 0xdfff }, { 0xfeff, 0xfeff }, { 0xfff9, 0xfffb },
     { 0xfffe, 0xffff }
@@ -1189,9 +1204,8 @@ static int utf_convert(int a, const convertStruct *const table, size_t n_items)
       && a <= table[start].rangeEnd
       && (a - table[start].rangeStart) % table[start].step == 0) {
     return a + table[start].offset;
-  } else {
-    return a;
   }
+  return a;
 }
 
 // Return the folded-case equivalent of "a", which is a UCS-4 character.  Uses
@@ -1219,12 +1233,9 @@ int mb_toupper(int a)
     return TOUPPER_ASC(a);
   }
 
-#if defined(__STDC_ISO_10646__)
-  // If towupper() is available and handles Unicode, use it.
   if (!(cmp_flags & CMP_INTERNAL)) {
     return (int)towupper((wint_t)a);
   }
-#endif
 
   // For characters below 128 use locale sensitive toupper().
   if (a < 128) {
@@ -1250,12 +1261,9 @@ int mb_tolower(int a)
     return TOLOWER_ASC(a);
   }
 
-#if defined(__STDC_ISO_10646__)
-  // If towlower() is available and handles Unicode, use it.
   if (!(cmp_flags & CMP_INTERNAL)) {
     return (int)towlower((wint_t)a);
   }
-#endif
 
   // For characters below 128 use locale sensitive tolower().
   if (a < 128) {
@@ -1452,7 +1460,7 @@ int utf16_to_utf8(const wchar_t *utf16, int utf16len, char **utf8)
 /// @param len maximum length (an earlier NUL terminates)
 /// @param[out] codepoints incremented with UTF-32 code point size
 /// @param[out] codeunits incremented with UTF-16 code unit size
-void mb_utflen(const char_u *s, size_t len, size_t *codepoints, size_t *codeunits)
+void mb_utflen(const char *s, size_t len, size_t *codepoints, size_t *codeunits)
   FUNC_ATTR_NONNULL_ALL
 {
   size_t count = 0, extra = 0;
@@ -1461,7 +1469,7 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints, size_t *codeunit
     clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i));
     // NB: gets the byte value of invalid sequence bytes.
     // we only care whether the char fits in the BMP or not
-    int c = (clen > 1) ? utf_ptr2char((char *)s + i) : s[i];
+    int c = (clen > 1) ? utf_ptr2char(s + i) : (uint8_t)s[i];
     count++;
     if (c > 0xFFFF) {
       extra++;
@@ -1471,7 +1479,7 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints, size_t *codeunit
   *codeunits += count + extra;
 }
 
-ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len, size_t index, bool use_utf16_units)
+ssize_t mb_utf_index_to_bytes(const char *s, size_t len, size_t index, bool use_utf16_units)
   FUNC_ATTR_NONNULL_ALL
 {
   size_t count = 0;
@@ -1483,7 +1491,7 @@ ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len, size_t index, bool us
     clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i));
     // NB: gets the byte value of invalid sequence bytes.
     // we only care whether the char fits in the BMP or not
-    int c = (clen > 1) ? utf_ptr2char((char *)s + i) : s[i];
+    int c = (clen > 1) ? utf_ptr2char(s + i) : (uint8_t)s[i];
     count++;
     if (use_utf16_units && c > 0xFFFF) {
       count++;
@@ -1530,14 +1538,14 @@ void show_utf8(void)
 {
   int len;
   int rlen = 0;
-  char_u *line;
+  char *line;
   int clen;
   int i;
 
   // Get the byte length of the char under the cursor, including composing
   // characters.
-  line = (char_u *)get_cursor_pos_ptr();
-  len = utfc_ptr2len((char *)line);
+  line = get_cursor_pos_ptr();
+  len = utfc_ptr2len(line);
   if (len == 0) {
     msg("NUL");
     return;
@@ -1551,10 +1559,10 @@ void show_utf8(void)
         STRCPY(IObuff + rlen, "+ ");
         rlen += 2;
       }
-      clen = utf_ptr2len((char *)line + i);
+      clen = utf_ptr2len(line + i);
     }
-    sprintf((char *)IObuff + rlen, "%02x ",
-            (line[i] == NL) ? NUL : line[i]);          // NUL is stored as NL
+    sprintf(IObuff + rlen, "%02x ",  // NOLINT(runtime/printf)
+            (line[i] == NL) ? NUL : (uint8_t)line[i]);          // NUL is stored as NL
     clen--;
     rlen += (int)strlen(IObuff + rlen);
     if (rlen > IOSIZE - 20) {
@@ -1562,7 +1570,7 @@ void show_utf8(void)
     }
   }
 
-  msg((char *)IObuff);
+  msg(IObuff);
 }
 
 /// Return offset from "p" to the start of a character, including composing characters.
@@ -1780,11 +1788,12 @@ void mb_copy_char(const char **const fp, char **const tp)
   *fp += l;
 }
 
-/// Return the offset from "p" to the first byte of a character.  When "p" is
+/// Return the offset from "p_in" to the first byte of a character.  When "p_in" is
 /// at the start of a character 0 is returned, otherwise the offset to the next
 /// character.  Can start anywhere in a stream of bytes.
-int mb_off_next(const char_u *base, const char_u *p)
+int mb_off_next(const char *base, const char *p_in)
 {
+  const uint8_t *p = (uint8_t *)p_in;
   int i;
   int j;
 
@@ -1796,7 +1805,7 @@ int mb_off_next(const char_u *base, const char_u *p)
   for (i = 0; (p[i] & 0xc0) == 0x80; i++) {}
   if (i > 0) {
     // Check for illegal sequence.
-    for (j = 0; p - j > base; j++) {
+    for (j = 0; p - j > (uint8_t *)base; j++) {
       if ((p[-j] & 0xc0) != 0x80) {
         break;
       }
@@ -1876,13 +1885,13 @@ int utf_cp_head_off(const char_u *base, const char_u *p)
 void utf_find_illegal(void)
 {
   pos_T pos = curwin->w_cursor;
-  char_u *p;
+  char *p;
   int len;
   vimconv_T vimconv;
-  char_u *tofree = NULL;
+  char *tofree = NULL;
 
   vimconv.vc_type = CONV_NONE;
-  if (enc_canon_props((char_u *)curbuf->b_p_fenc) & ENC_8BIT) {
+  if (enc_canon_props(curbuf->b_p_fenc) & ENC_8BIT) {
     // 'encoding' is "utf-8" but we are editing a 8-bit encoded file,
     // possibly a utf-8 file with illegal bytes.  Setup for conversion
     // from utf-8 to 'fileencoding'.
@@ -1891,10 +1900,10 @@ void utf_find_illegal(void)
 
   curwin->w_cursor.coladd = 0;
   for (;;) {
-    p = (char_u *)get_cursor_pos_ptr();
+    p = get_cursor_pos_ptr();
     if (vimconv.vc_type != CONV_NONE) {
       xfree(tofree);
-      tofree = (char_u *)string_convert(&vimconv, (char *)p, NULL);
+      tofree = string_convert(&vimconv, p, NULL);
       if (tofree == NULL) {
         break;
       }
@@ -1904,17 +1913,16 @@ void utf_find_illegal(void)
     while (*p != NUL) {
       // Illegal means that there are not enough trail bytes (checked by
       // utf_ptr2len()) or too many of them (overlong sequence).
-      len = utf_ptr2len((char *)p);
-      if (*p >= 0x80 && (len == 1
-                         || utf_char2len(utf_ptr2char((char *)p)) != len)) {
+      len = utf_ptr2len(p);
+      if ((uint8_t)(*p) >= 0x80 && (len == 1 || utf_char2len(utf_ptr2char(p)) != len)) {
         if (vimconv.vc_type == CONV_NONE) {
-          curwin->w_cursor.col += (colnr_T)(p - (char_u *)get_cursor_pos_ptr());
+          curwin->w_cursor.col += (colnr_T)(p - get_cursor_pos_ptr());
         } else {
           int l;
 
           len = (int)(p - tofree);
-          for (p = (char_u *)get_cursor_pos_ptr(); *p != NUL && len-- > 0; p += l) {
-            l = utf_ptr2len((char *)p);
+          for (p = get_cursor_pos_ptr(); *p != NUL && len-- > 0; p += l) {
+            l = utf_ptr2len(p);
             curwin->w_cursor.col += l;
           }
         }
@@ -1981,7 +1989,7 @@ void mb_check_adjust_col(void *win_)
   // Column 0 is always valid.
   if (oldcol != 0) {
     char *p = ml_get_buf(win->w_buffer, win->w_cursor.lnum, false);
-    colnr_T len = (colnr_T)STRLEN(p);
+    colnr_T len = (colnr_T)strlen(p);
 
     // Empty line or invalid column?
     if (len == 0 || oldcol < 0) {
@@ -2008,7 +2016,7 @@ void mb_check_adjust_col(void *win_)
 /// @param line  start of the string
 ///
 /// @return      a pointer to the character before "*p", if there is one.
-char_u *mb_prevptr(char_u *line, char_u *p)
+char *mb_prevptr(char *line, char *p)
 {
   if (p > line) {
     MB_PTR_BACK(line, p);
@@ -2018,9 +2026,9 @@ char_u *mb_prevptr(char_u *line, char_u *p)
 
 /// Return the character length of "str".  Each multi-byte character (with
 /// following composing characters) counts as one.
-int mb_charlen(const char_u *str)
+int mb_charlen(const char *str)
 {
-  const char_u *p = str;
+  const char *p = str;
   int count;
 
   if (p == NULL) {
@@ -2028,20 +2036,20 @@ int mb_charlen(const char_u *str)
   }
 
   for (count = 0; *p != NUL; count++) {
-    p += utfc_ptr2len((char *)p);
+    p += utfc_ptr2len(p);
   }
 
   return count;
 }
 
 /// Like mb_charlen() but for a string with specified length.
-int mb_charlen_len(const char_u *str, int len)
+int mb_charlen_len(const char *str, int len)
 {
-  const char_u *p = str;
+  const char *p = str;
   int count;
 
   for (count = 0; *p != NUL && p < str + len; count++) {
-    p += utfc_ptr2len((char *)p);
+    p += utfc_ptr2len(p);
   }
 
   return count;
@@ -2097,10 +2105,10 @@ const char *mb_unescape(const char **const pp)
 /// Skip the Vim specific head of a 'encoding' name.
 char *enc_skip(char *p)
 {
-  if (STRNCMP(p, "2byte-", 6) == 0) {
+  if (strncmp(p, "2byte-", 6) == 0) {
     return p + 6;
   }
-  if (STRNCMP(p, "8bit-", 5) == 0) {
+  if (strncmp(p, "8bit-", 5) == 0) {
     return p + 5;
   }
   return p;
@@ -2121,7 +2129,7 @@ char *enc_canonize(char *enc)
   }
 
   // copy "enc" to allocated memory, with room for two '-'
-  char *r = xmalloc(STRLEN(enc) + 3);
+  char *r = xmalloc(strlen(enc) + 3);
   // Make it all lower case and replace '_' with '-'.
   p = r;
   for (s = enc; *s != NUL; s++) {
@@ -2137,24 +2145,24 @@ char *enc_canonize(char *enc)
   p = enc_skip(r);
 
   // Change "microsoft-cp" to "cp".  Used in some spell files.
-  if (STRNCMP(p, "microsoft-cp", 12) == 0) {
+  if (strncmp(p, "microsoft-cp", 12) == 0) {
     STRMOVE(p, p + 10);
   }
 
   // "iso8859" -> "iso-8859"
-  if (STRNCMP(p, "iso8859", 7) == 0) {
+  if (strncmp(p, "iso8859", 7) == 0) {
     STRMOVE(p + 4, p + 3);
     p[3] = '-';
   }
 
   // "iso-8859n" -> "iso-8859-n"
-  if (STRNCMP(p, "iso-8859", 8) == 0 && p[8] != '-') {
+  if (strncmp(p, "iso-8859", 8) == 0 && p[8] != '-') {
     STRMOVE(p + 9, p + 8);
     p[8] = '-';
   }
 
   // "latin-N" -> "latinN"
-  if (STRNCMP(p, "latin-", 6) == 0) {
+  if (strncmp(p, "latin-", 6) == 0) {
     STRMOVE(p + 5, p + 6);
   }
 
@@ -2192,7 +2200,7 @@ static int enc_alias_search(const char *name)
 
 // Get the canonicalized encoding of the current locale.
 // Returns an allocated string when successful, NULL when not.
-char_u *enc_locale(void)
+char *enc_locale(void)
 {
   int i;
   char buf[50];
@@ -2228,7 +2236,7 @@ char_u *enc_locale(void)
   const char *p = vim_strchr(s, '.');
   if (p != NULL) {
     if (p > s + 2 && !STRNICMP(p + 1, "EUC", 3)
-        && !isalnum((int)p[4]) && p[4] != '-' && p[-3] == '_') {
+        && !isalnum((uint8_t)p[4]) && p[4] != '-' && p[-3] == '_') {
       // Copy "XY.EUC" to "euc-XY" to buf[10].
       memmove(buf, "euc-", 4);
       buf[4] = (char)(ASCII_ISALNUM(p[-2]) ? TOLOWER_ASC(p[-2]) : 0);
@@ -2252,20 +2260,18 @@ enc_locale_copy_enc:
     buf[i] = NUL;
   }
 
-  return (char_u *)enc_canonize(buf);
+  return enc_canonize(buf);
 }
 
-#if defined(HAVE_ICONV)
-
 // Call iconv_open() with a check if iconv() works properly (there are broken
 // versions).
 // Returns (void *)-1 if failed.
 // (should return iconv_t, but that causes problems with prototypes).
-void *my_iconv_open(char_u *to, char_u *from)
+void *my_iconv_open(char *to, char *from)
 {
   iconv_t fd;
-# define ICONV_TESTLEN 400
-  char_u tobuf[ICONV_TESTLEN];
+#define ICONV_TESTLEN 400
+  char tobuf[ICONV_TESTLEN];
   char *p;
   size_t tolen;
   static WorkingStatus iconv_working = kUnknown;
@@ -2273,7 +2279,7 @@ void *my_iconv_open(char_u *to, char_u *from)
   if (iconv_working == kBroken) {
     return (void *)-1;          // detected a broken iconv() previously
   }
-  fd = iconv_open(enc_skip((char *)to), enc_skip((char *)from));
+  fd = iconv_open(enc_skip(to), enc_skip(from));
 
   if (fd != (iconv_t)-1 && iconv_working == kUnknown) {
     // Do a dummy iconv() call to check if it actually works.  There is a
@@ -2281,7 +2287,7 @@ void *my_iconv_open(char_u *to, char_u *from)
     // because it's wide-spread.  The symptoms are that after outputting
     // the initial shift state the "to" pointer is NULL and conversion
     // stops for no apparent reason after about 8160 characters.
-    p = (char *)tobuf;
+    p = tobuf;
     tolen = ICONV_TESTLEN;
     (void)iconv(fd, NULL, NULL, &p, &tolen);
     if (p == NULL) {
@@ -2301,8 +2307,8 @@ void *my_iconv_open(char_u *to, char_u *from)
 // sequence and set "*unconvlenp" to the length of it.
 // Returns the converted string in allocated memory.  NULL for an error.
 // If resultlenp is not NULL, sets it to the result length in bytes.
-static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen,
-                            size_t *unconvlenp, size_t *resultlenp)
+static char *iconv_string(const vimconv_T *const vcp, const char *str, size_t slen,
+                          size_t *unconvlenp, size_t *resultlenp)
 {
   const char *from;
   size_t fromlen;
@@ -2310,11 +2316,11 @@ static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen
   size_t tolen;
   size_t len = 0;
   size_t done = 0;
-  char_u *result = NULL;
-  char_u *p;
+  char *result = NULL;
+  char *p;
   int l;
 
-  from = (char *)str;
+  from = str;
   fromlen = slen;
   for (;;) {
     if (len == 0 || ICONV_ERRNO == ICONV_E2BIG) {
@@ -2329,7 +2335,7 @@ static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen
       result = p;
     }
 
-    to = (char *)result + done;
+    to = result + done;
     tolen = len - done - 2;
     // Avoid a warning for systems with a wrong iconv() prototype by
     // casting the second argument to void *.
@@ -2369,17 +2375,15 @@ static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen
       break;
     }
     // Not enough room or skipping illegal sequence.
-    done = (size_t)(to - (char *)result);
+    done = (size_t)(to - result);
   }
 
   if (resultlenp != NULL && result != NULL) {
-    *resultlenp = (size_t)(to - (char *)result);
+    *resultlenp = (size_t)(to - result);
   }
   return result;
 }
 
-#endif  // HAVE_ICONV
-
 /// Setup "vcp" for conversion from "from" to "to".
 /// The names must have been made canonical with enc_canonize().
 /// vcp->vc_type must have been initialized to CONV_NONE.
@@ -2404,11 +2408,9 @@ int convert_setup_ext(vimconv_T *vcp, char *from, bool from_unicode_is_utf8, cha
   int to_is_utf8;
 
   // Reset to no conversion.
-#ifdef HAVE_ICONV
   if (vcp->vc_type == CONV_ICONV && vcp->vc_fd != (iconv_t)-1) {
     iconv_close(vcp->vc_fd);
   }
-#endif
   *vcp = (vimconv_T)MBYTE_NONE_CONV;
 
   // No conversion when one of the names is empty or they are equal.
@@ -2417,8 +2419,8 @@ int convert_setup_ext(vimconv_T *vcp, char *from, bool from_unicode_is_utf8, cha
     return OK;
   }
 
-  from_prop = enc_canon_props((char_u *)from);
-  to_prop = enc_canon_props((char_u *)to);
+  from_prop = enc_canon_props(from);
+  to_prop = enc_canon_props(to);
   if (from_unicode_is_utf8) {
     from_is_utf8 = from_prop & ENC_UNICODE;
   } else {
@@ -2444,18 +2446,15 @@ int convert_setup_ext(vimconv_T *vcp, char *from, bool from_unicode_is_utf8, cha
   } else if (from_is_utf8 && (to_prop & ENC_LATIN9)) {
     // Internal utf-8 -> latin9 conversion.
     vcp->vc_type = CONV_TO_LATIN9;
-  }
-#ifdef HAVE_ICONV
-  else {  // NOLINT(readability/braces)
+  } else {
     // Use iconv() for conversion.
-    vcp->vc_fd = (iconv_t)my_iconv_open(to_is_utf8 ? (char_u *)"utf-8" : (char_u *)to,
-                                        from_is_utf8 ? (char_u *)"utf-8" : (char_u *)from);
+    vcp->vc_fd = (iconv_t)my_iconv_open(to_is_utf8 ? "utf-8" : to,
+                                        from_is_utf8 ? "utf-8" : from);
     if (vcp->vc_fd != (iconv_t)-1) {
       vcp->vc_type = CONV_ICONV;
       vcp->vc_factor = 4;       // could be longer too...
     }
   }
-#endif
   if (vcp->vc_type == CONV_NONE) {
     return FAIL;
   }
@@ -2470,14 +2469,13 @@ int convert_setup_ext(vimconv_T *vcp, char *from, bool from_unicode_is_utf8, cha
 /// When something goes wrong, NULL is returned and "*lenp" is unchanged.
 char *string_convert(const vimconv_T *const vcp, char *ptr, size_t *lenp)
 {
-  return (char *)string_convert_ext(vcp, (char_u *)ptr, lenp, NULL);
+  return string_convert_ext(vcp, ptr, lenp, NULL);
 }
 
 // Like string_convert(), but when "unconvlenp" is not NULL and there are is
 // an incomplete sequence at the end it is not converted and "*unconvlenp" is
 // set to the number of remaining bytes.
-char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp,
-                           size_t *unconvlenp)
+char *string_convert_ext(const vimconv_T *const vcp, char *ptr, size_t *lenp, size_t *unconvlenp)
 {
   char_u *retval = NULL;
   char_u *d;
@@ -2486,12 +2484,12 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
 
   size_t len;
   if (lenp == NULL) {
-    len = STRLEN(ptr);
+    len = strlen(ptr);
   } else {
     len = *lenp;
   }
   if (len == 0) {
-    return (char_u *)xstrdup("");
+    return xstrdup("");
   }
 
   switch (vcp->vc_type) {
@@ -2499,7 +2497,7 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
     retval = xmalloc(len * 2 + 1);
     d = retval;
     for (size_t i = 0; i < len; i++) {
-      c = ptr[i];
+      c = (uint8_t)ptr[i];
       if (c < 0x80) {
         *d++ = (char_u)c;
       } else {
@@ -2517,7 +2515,7 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
     retval = xmalloc(len * 3 + 1);
     d = retval;
     for (size_t i = 0; i < len; i++) {
-      c = ptr[i];
+      c = (uint8_t)ptr[i];
       switch (c) {
       case 0xa4:
         c = 0x20ac; break;                 // euro
@@ -2553,7 +2551,7 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
       if (l == 0) {
         *d++ = NUL;
       } else if (l == 1) {
-        uint8_t l_w = utf8len_tab_zero[ptr[i]];
+        uint8_t l_w = utf8len_tab_zero[(uint8_t)ptr[i]];
 
         if (l_w == 0) {
           // Illegal utf-8 byte cannot be converted
@@ -2565,9 +2563,9 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
           *unconvlenp = len - i;
           break;
         }
-        *d++ = ptr[i];
+        *d++ = (uint8_t)ptr[i];
       } else {
-        c = utf_ptr2char((char *)ptr + i);
+        c = utf_ptr2char(ptr + i);
         if (vcp->vc_type == CONV_TO_LATIN9) {
           switch (c) {
           case 0x20ac:
@@ -2619,14 +2617,12 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
     }
     break;
 
-#ifdef HAVE_ICONV
   case CONV_ICONV:  // conversion with vcp->vc_fd
-    retval = iconv_string(vcp, ptr, len, unconvlenp, lenp);
+    retval = (char_u *)iconv_string(vcp, ptr, len, unconvlenp, lenp);
     break;
-#endif
   }
 
-  return retval;
+  return (char *)retval;
 }
 
 /// Table set by setcellwidths().
@@ -2705,7 +2701,7 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
 
     if (li_tv->v_type != VAR_LIST || li_tv->vval.v_list == NULL) {
       semsg(_(e_list_item_nr_is_not_list), item);
-      xfree(ptrs);
+      xfree((void *)ptrs);
       return;
     }
 
@@ -2721,25 +2717,25 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
       }
       if (i == 0) {
         n1 = lili_tv->vval.v_number;
-        if (n1 < 0x100) {
-          emsg(_(e_only_values_of_0x100_and_higher_supported));
-          xfree(ptrs);
+        if (n1 < 0x80) {
+          emsg(_(e_only_values_of_0x80_and_higher_supported));
+          xfree((void *)ptrs);
           return;
         }
       } else if (i == 1 && lili_tv->vval.v_number < n1) {
         semsg(_(e_list_item_nr_range_invalid), item);
-        xfree(ptrs);
+        xfree((void *)ptrs);
         return;
       } else if (i == 2 && (lili_tv->vval.v_number < 1 || lili_tv->vval.v_number > 2)) {
         semsg(_(e_list_item_nr_cell_width_invalid), item);
-        xfree(ptrs);
+        xfree((void *)ptrs);
         return;
       }
     }
 
     if (i != 3) {
       semsg(_(e_list_item_nr_does_not_contain_3_numbers), item);
-      xfree(ptrs);
+      xfree((void *)ptrs);
       return;
     }
 
@@ -2758,7 +2754,7 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
     const varnumber_T n1 = TV_LIST_ITEM_TV(lili)->vval.v_number;
     if (item > 0 && n1 <= table[item - 1].last) {
       semsg(_(e_overlapping_ranges_for_nr), (long)n1);
-      xfree(ptrs);
+      xfree((void *)ptrs);
       xfree(table);
       return;
     }
@@ -2769,7 +2765,7 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
     table[item].width = (char)TV_LIST_ITEM_TV(lili)->vval.v_number;
   }
 
-  xfree(ptrs);
+  xfree((void *)ptrs);
 
   cw_interval_T *const cw_table_save = cw_table;
   const size_t cw_table_size_save = cw_table_size;
@@ -2791,11 +2787,26 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   redraw_all_later(UPD_NOT_VALID);
 }
 
+/// "getcellwidths()" function
+void f_getcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+  tv_list_alloc_ret(rettv, (ptrdiff_t)cw_table_size);
+
+  for (size_t i = 0; i < cw_table_size; i++) {
+    list_T *entry = tv_list_alloc(3);
+    tv_list_append_number(entry, (varnumber_T)cw_table[i].first);
+    tv_list_append_number(entry, (varnumber_T)cw_table[i].last);
+    tv_list_append_number(entry, (varnumber_T)cw_table[i].width);
+
+    tv_list_append_list(rettv->vval.v_list, entry);
+  }
+}
+
 void f_charclass(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
 {
   if (tv_check_for_string_arg(argvars, 0) == FAIL
       || argvars[0].vval.v_string == NULL) {
     return;
   }
-  rettv->vval.v_number = mb_get_class((const char_u *)argvars[0].vval.v_string);
+  rettv->vval.v_number = mb_get_class(argvars[0].vval.v_string);
 }
author	Josh Rahm <joshuarahm@gmail.com>	2023-01-25 18:23:01 +0000
committer	Josh Rahm <joshuarahm@gmail.com>	2023-01-25 18:23:01 +0000
commit	142d9041391780ac15b89886a54015fdc5c73995 (patch)
tree	0f6b5cac1a60810a03f52826c9e67c9e2780b033 /src/nvim/mbyte.c
parent	ad86b5db74922285699ab2a1dbb2ff20e6268a33 (diff)
parent	3c48d3c83fc21dbc0841f9210f04bdb073d73cd1 (diff)
download	rneovim-142d9041391780ac15b89886a54015fdc5c73995.tar.gz rneovim-142d9041391780ac15b89886a54015fdc5c73995.tar.bz2 rneovim-142d9041391780ac15b89886a54015fdc5c73995.zip