From 63a956112aefc18de86197edc05a76eae0da024a Mon Sep 17 00:00:00 2001
From: Nicolas Hillegeer <nicolas@hillegeer.com>
Date: Thu, 29 May 2014 10:09:36 +0200
Subject: text/encoding: constify arguments

Most of these functions don't modify their strings, let's make the contract
a bit clearer. In some cases I've tried to get rid of C89-style variable
declarations at the start of functions, now that I was there.
---
 src/nvim/globals.h | 12 +++----
 src/nvim/mbyte.c   | 96 ++++++++++++++++++++++++++----------------------------
 2 files changed, 53 insertions(+), 55 deletions(-)

(limited to 'src')

diff --git a/src/nvim/globals.h b/src/nvim/globals.h
index 331d7ec5a4..903a02ca4e 100644
--- a/src/nvim/globals.h
+++ b/src/nvim/globals.h
@@ -654,21 +654,21 @@ EXTERN vimconv_T output_conv;                   /* type of output conversion */
  * The value is set in mb_init();
  */
 /* length of char in bytes, including following composing chars */
-EXTERN int (*mb_ptr2len)(char_u *p) INIT(= latin_ptr2len);
+EXTERN int (*mb_ptr2len)(const char_u *p) INIT(= latin_ptr2len);
 /* idem, with limit on string length */
-EXTERN int (*mb_ptr2len_len)(char_u *p, int size) INIT(= latin_ptr2len_len);
+EXTERN int (*mb_ptr2len_len)(const char_u *p, int size) INIT(= latin_ptr2len_len);
 /* byte length of char */
 EXTERN int (*mb_char2len)(int c) INIT(= latin_char2len);
 /* convert char to bytes, return the length */
 EXTERN int (*mb_char2bytes)(int c, char_u *buf) INIT(= latin_char2bytes);
-EXTERN int (*mb_ptr2cells)(char_u *p) INIT(= latin_ptr2cells);
-EXTERN int (*mb_ptr2cells_len)(char_u *p, int size) INIT(
+EXTERN int (*mb_ptr2cells)(const char_u *p) INIT(= latin_ptr2cells);
+EXTERN int (*mb_ptr2cells_len)(const char_u *p, int size) INIT(
       = latin_ptr2cells_len);
 EXTERN int (*mb_char2cells)(int c) INIT(= latin_char2cells);
 EXTERN int (*mb_off2cells)(unsigned off, unsigned max_off) INIT(
       = latin_off2cells);
-EXTERN int (*mb_ptr2char)(char_u *p) INIT(= latin_ptr2char);
-EXTERN int (*mb_head_off)(char_u *base, char_u *p) INIT(= latin_head_off);
+EXTERN int (*mb_ptr2char)(const char_u *p) INIT(= latin_ptr2char);
+EXTERN int (*mb_head_off)(const char_u *base, const char_u *p) INIT(= latin_head_off);
 
 # if defined(USE_ICONV) && defined(DYNAMIC_ICONV)
 /* Pointers to functions and variables to be loaded at runtime */
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index fd929cd0ae..8a669b0bc8 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -871,12 +871,12 @@ static int dbcs_char2bytes(int c, char_u *buf)
  * For UTF-8 this includes following composing characters.
  * Returns 0 when *p is NUL.
  */
-int latin_ptr2len(char_u *p)
+int latin_ptr2len(const char_u *p)
 {
   return MB_BYTE2LEN(*p);
 }
 
-static int dbcs_ptr2len(char_u *p)
+static int dbcs_ptr2len(const char_u *p)
 {
   int len;
 
@@ -893,14 +893,14 @@ static int dbcs_ptr2len(char_u *p)
  * Returns 0 for an empty string.
  * Returns 1 for an illegal char or an incomplete byte sequence.
  */
-int latin_ptr2len_len(char_u *p, int size)
+int latin_ptr2len_len(const char_u *p, int size)
 {
   if (size < 1 || *p == NUL)
     return 0;
   return 1;
 }
 
-static int dbcs_ptr2len_len(char_u *p, int size)
+static int dbcs_ptr2len_len(const char_u *p, int size)
 {
   int len;
 
@@ -1214,12 +1214,12 @@ int utf_char2cells(int c)
  * Return the number of display cells character at "*p" occupies.
  * This doesn't take care of unprintable characters, use ptr2cells() for that.
  */
-int latin_ptr2cells(char_u *p)
+int latin_ptr2cells(const char_u *p)
 {
   return 1;
 }
 
-int utf_ptr2cells(char_u *p)
+int utf_ptr2cells(const char_u *p)
 {
   int c;
 
@@ -1237,7 +1237,7 @@ int utf_ptr2cells(char_u *p)
   return 1;
 }
 
-int dbcs_ptr2cells(char_u *p)
+int dbcs_ptr2cells(const char_u *p)
 {
   /* Number of cells is equal to number of bytes, except for euc-jp when
    * the first byte is 0x8e. */
@@ -1251,12 +1251,12 @@ int dbcs_ptr2cells(char_u *p)
  * Like mb_ptr2cells(), but limit string length to "size".
  * For an empty string or truncated character returns 1.
  */
-int latin_ptr2cells_len(char_u *p, int size)
+int latin_ptr2cells_len(const char_u *p, int size)
 {
   return 1;
 }
 
-static int utf_ptr2cells_len(char_u *p, int size)
+static int utf_ptr2cells_len(const char_u *p, int size)
 {
   int c;
 
@@ -1276,7 +1276,7 @@ static int utf_ptr2cells_len(char_u *p, int size)
   return 1;
 }
 
-static int dbcs_ptr2cells_len(char_u *p, int size)
+static int dbcs_ptr2cells_len(const char_u *p, int size)
 {
   /* Number of cells is equal to number of bytes, except for euc-jp when
    * the first byte is 0x8e. */
@@ -1309,7 +1309,7 @@ static int dbcs_char2cells(int c)
  * Return the number of cells occupied by string "p".
  * Stop at a NUL character.  When "len" >= 0 stop at character "p[len]".
  */
-int mb_string2cells(char_u *p, int len)
+int mb_string2cells(const char_u *p, int len)
 {
   int i;
   int clen = 0;
@@ -1351,12 +1351,12 @@ int utf_off2cells(unsigned off, unsigned max_off)
  * mb_ptr2char() function pointer.
  * Convert a byte sequence into a character.
  */
-int latin_ptr2char(char_u *p)
+int latin_ptr2char(const char_u *p)
 {
   return *p;
 }
 
-static int dbcs_ptr2char(char_u *p)
+static int dbcs_ptr2char(const char_u *p)
 {
   if (MB_BYTE2LEN(*p) > 1 && p[1] != NUL)
     return (p[0] << 8) + p[1];
@@ -1369,7 +1369,7 @@ static int dbcs_ptr2char(char_u *p)
  * returned.
  * Does not include composing characters, of course.
  */
-int utf_ptr2char(char_u *p)
+int utf_ptr2char(const char_u *p)
 {
   int len;
 
@@ -1493,7 +1493,7 @@ int mb_cptr2char_adv(char_u **pp)
  * comes after "p1".  For Arabic sometimes "ab" is replaced with "c", which
  * behaves like a composing character.
  */
-int utf_composinglike(char_u *p1, char_u *p2)
+int utf_composinglike(const char_u *p1, const char_u *p2)
 {
   int c2;
 
@@ -1506,13 +1506,12 @@ int utf_composinglike(char_u *p1, char_u *p2)
 }
 
 /*
- * Convert a UTF-8 byte string to a wide character.  Also get up to MAX_MCO
+ * Convert a UTF-8 byte string to a wide character. Also get up to MAX_MCO
  * composing characters.
+ *
+ * @param [out] pcc: composing chars, last one is 0
  */
-int utfc_ptr2char(
-    char_u      *p,
-    int         *pcc        /* return: composing chars, last one is 0 */
-    )
+int utfc_ptr2char(const char_u *p, int *pcc)
 {
   int len;
   int c;
@@ -1546,12 +1545,10 @@ int utfc_ptr2char(
 /*
  * Convert a UTF-8 byte string to a wide character.  Also get up to MAX_MCO
  * composing characters.  Use no more than p[maxlen].
+ *
+ * @param [out] pcc: composing chars, last one is 0
  */
-int utfc_ptr2char_len(
-    char_u      *p,
-    int         *pcc,       /* return: composing chars, last one is 0 */
-    int maxlen
-    )
+int utfc_ptr2char_len(const char_u *p, int *pcc, int maxlen)
 {
   int len;
   int c;
@@ -1611,7 +1608,7 @@ int utfc_char2bytes(int off, char_u *buf)
  * Returns 0 for "".
  * Returns 1 for an illegal byte sequence.
  */
-int utf_ptr2len(char_u *p)
+int utf_ptr2len(const char_u *p)
 {
   int len;
   int i;
@@ -1643,7 +1640,7 @@ int utf_byte2len(int b)
  * Returns number > "size" for an incomplete byte sequence.
  * Never returns zero.
  */
-int utf_ptr2len_len(char_u *p, int size)
+int utf_ptr2len_len(const char_u *p, int size)
 {
   int len;
   int i;
@@ -1666,7 +1663,7 @@ int utf_ptr2len_len(char_u *p, int size)
  * Return the number of bytes the UTF-8 encoding of the character at "p" takes.
  * This includes following composing characters.
  */
-int utfc_ptr2len(char_u *p)
+int utfc_ptr2len(const char_u *p)
 {
   int len;
   int b0 = *p;
@@ -1705,7 +1702,7 @@ int utfc_ptr2len(char_u *p)
  * Returns 0 for an empty string.
  * Returns 1 for an illegal char or an incomplete byte sequence.
  */
-int utfc_ptr2len_len(char_u *p, int size)
+int utfc_ptr2len_len(const char_u *p, int size)
 {
   int len;
   int prevlen;
@@ -2886,25 +2883,26 @@ void show_utf8()
  * If "p" points to the NUL at the end of the string return 0.
  * Returns 0 when already at the first byte of a character.
  */
-int latin_head_off(char_u *base, char_u *p)
+int latin_head_off(const char_u *base, const char_u *p)
 {
   return 0;
 }
 
-int dbcs_head_off(char_u *base, char_u *p)
+int dbcs_head_off(const char_u *base, const char_u *p)
 {
-  char_u      *q;
-
   /* It can't be a trailing byte when not using DBCS, at the start of the
    * string or the previous byte can't start a double-byte. */
-  if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL)
+  if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL) {
     return 0;
+  }
 
   /* This is slow: need to start at the base and go forward until the
    * byte we are looking for.  Return 1 when we went past it, 0 otherwise. */
-  q = base;
-  while (q < p)
+  const char_u *q = base;
+  while (q < p) {
     q += dbcs_ptr2len(q);
+  }
+
   return (q == p) ? 0 : 1;
 }
 
@@ -2912,10 +2910,8 @@ int dbcs_head_off(char_u *base, char_u *p)
  * Special version of dbcs_head_off() that works for ScreenLines[], where
  * single-width DBCS_JPNU characters are stored separately.
  */
-int dbcs_screen_head_off(char_u *base, char_u *p)
+int dbcs_screen_head_off(const char_u *base, const char_u *p)
 {
-  char_u      *q;
-
   /* It can't be a trailing byte when not using DBCS, at the start of the
    * string or the previous byte can't start a double-byte.
    * For euc-jp an 0x8e byte in the previous cell always means we have a
@@ -2930,33 +2926,35 @@ int dbcs_screen_head_off(char_u *base, char_u *p)
    * byte we are looking for.  Return 1 when we went past it, 0 otherwise.
    * For DBCS_JPNU look out for 0x8e, which means the second byte is not
    * stored as the next byte. */
-  q = base;
+  const char_u *q = base;
   while (q < p) {
-    if (enc_dbcs == DBCS_JPNU && *q == 0x8e)
+    if (enc_dbcs == DBCS_JPNU && *q == 0x8e) {
       ++q;
-    else
+    }
+    else {
       q += dbcs_ptr2len(q);
+    }
   }
+
   return (q == p) ? 0 : 1;
 }
 
-int utf_head_off(char_u *base, char_u *p)
+int utf_head_off(const char_u *base, const char_u *p)
 {
-  char_u      *q;
-  char_u      *s;
   int c;
   int len;
-  char_u      *j;
 
   if (*p < 0x80)                /* be quick for ASCII */
     return 0;
 
   /* Skip backwards over trailing bytes: 10xx.xxxx
    * Skip backwards again if on a composing char. */
+  const char_u *q;
   for (q = p;; --q) {
     /* Move s to the last byte of this char. */
-    for (s = q; (s[1] & 0xc0) == 0x80; ++s)
-      ;
+    const char_u *s;
+    for (s = q; (s[1] & 0xc0) == 0x80; ++s);
+
     /* Move q to the first byte of this char. */
     while (q > base && (*q & 0xc0) == 0x80)
       --q;
@@ -2975,7 +2973,7 @@ int utf_head_off(char_u *base, char_u *p)
 
     if (arabic_maycombine(c)) {
       /* Advance to get a sneak-peak at the next char */
-      j = q;
+      const char_u *j = q;
       --j;
       /* Move j to the first byte of this char. */
       while (j > base && (*j & 0xc0) == 0x80)
-- 
cgit 


From 46e4bc04819935327636e918cc8fdd2f8b3c9ddf Mon Sep 17 00:00:00 2001
From: Nicolas Hillegeer <nicolas@hillegeer.com>
Date: Thu, 29 May 2014 10:13:08 +0200
Subject: text: remove useless arg from mb_string2cells

mb_string2cells was always called like mb_string2cells(..., -1) so that was
the only codepath that was tested. @tarruda was the first to try to input an
actual length, after which valgrind detected that funny business was going
on.

It's not even possible to do the right thing with the current text codec
infrastructure: they all assume to be working with C strings. Meaning that
if there is no NUL-terminator, they will happily keep on reading past the
end of Pascal strings. Ergo, passing the length parameter is moot. The
condition in the for-loop was wrong as well (but that's no longer relevant).

Also change the return value to size_t, by analogy with strlen.

ref:
https://github.com/neovim/neovim/commit/677d30d7966dd2766bbf20665791c568dacc427a
---
 src/nvim/api/vim.c |  2 +-
 src/nvim/eval.c    |  4 +---
 src/nvim/mbyte.c   | 22 ++++++++++++----------
 src/nvim/message.c |  2 +-
 src/nvim/screen.c  |  2 +-
 5 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/nvim/api/vim.c b/src/nvim/api/vim.c
index 59f4721da2..f07bfb196e 100644
--- a/src/nvim/api/vim.c
+++ b/src/nvim/api/vim.c
@@ -90,7 +90,7 @@ Integer vim_strwidth(String str, Error *err)
   }
 
   char *buf = xstrndup(str.data, str.size);
-  Integer rv = mb_string2cells((char_u *)buf, -1);
+  Integer rv = (Integer) mb_string2cells((char_u *) buf);
   free(buf);
   return rv;
 }
diff --git a/src/nvim/eval.c b/src/nvim/eval.c
index 31070c1118..c209f5c4af 100644
--- a/src/nvim/eval.c
+++ b/src/nvim/eval.c
@@ -13696,9 +13696,7 @@ static void f_strwidth(typval_T *argvars, typval_T *rettv)
 {
   char_u      *s = get_tv_string(&argvars[0]);
 
-  rettv->vval.v_number = (varnumber_T)(
-    mb_string2cells(s, -1)
-    );
+  rettv->vval.v_number = (varnumber_T) mb_string2cells(s);
 }
 
 /*
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 8a669b0bc8..899b94e0fb 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1305,17 +1305,19 @@ static int dbcs_char2cells(int c)
   return MB_BYTE2LEN((unsigned)c >> 8);
 }
 
-/*
- * Return the number of cells occupied by string "p".
- * Stop at a NUL character.  When "len" >= 0 stop at character "p[len]".
- */
-int mb_string2cells(const char_u *p, int len)
+/// Calculate the number of cells occupied by string `str`.
+///
+/// @param str The source string, may not be NULL, must be a NUL-terminated
+///            string.
+/// @return The number of cells occupied by string `str`
+size_t mb_string2cells(const char_u *str)
 {
-  int i;
-  int clen = 0;
+  size_t clen = 0;
+
+  for (const char_u *p = str; *p != NUL; p += (*mb_ptr2len)(p)) {
+    clen += (*mb_ptr2cells)(p);
+  }
 
-  for (i = 0; (len < 0 || i < len) && p[i] != NUL; i += (*mb_ptr2len)(p + i))
-    clen += (*mb_ptr2cells)(p + i);
   return clen;
 }
 
@@ -2953,7 +2955,7 @@ int utf_head_off(const char_u *base, const char_u *p)
   for (q = p;; --q) {
     /* Move s to the last byte of this char. */
     const char_u *s;
-    for (s = q; (s[1] & 0xc0) == 0x80; ++s);
+    for (s = q; (s[1] & 0xc0) == 0x80; ++s) {}
 
     /* Move q to the first byte of this char. */
     while (q > base && (*q & 0xc0) == 0x80)
diff --git a/src/nvim/message.c b/src/nvim/message.c
index 41a2345171..96be3bb17d 100644
--- a/src/nvim/message.c
+++ b/src/nvim/message.c
@@ -3352,7 +3352,7 @@ int vim_vsnprintf(char *str, size_t str_m, char *fmt, va_list ap, typval_T *tvs)
           if (fmt_spec == 'S') {
             if (min_field_width != 0)
               min_field_width += STRLEN(str_arg)
-                                 - mb_string2cells((char_u *)str_arg, -1);
+                                 - mb_string2cells((char_u *) str_arg);
             if (precision) {
               char_u *p1 = (char_u *)str_arg;
               size_t i;
diff --git a/src/nvim/screen.c b/src/nvim/screen.c
index d36ce6ca2b..18b1797c36 100644
--- a/src/nvim/screen.c
+++ b/src/nvim/screen.c
@@ -4834,7 +4834,7 @@ void win_redr_status(win_T *wp)
       int clen = 0, i;
 
       /* Count total number of display cells. */
-      clen = mb_string2cells(p, -1);
+      clen = (int) mb_string2cells(p);
 
       /* Find first character that will fit.
        * Going from start to end is much faster for DBCS. */
-- 
cgit