1 files changed, 150 insertions, 68 deletions
diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c
index c108ae4a2c..1f7f616782 100644
--- a/src/nvim/spellfile.c
+++ b/src/nvim/spellfile.c
@@ -1,3 +1,6 @@
+// This is an open source non-commercial project. Dear PVS-Studio, please check
+// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+
 // spellfile.c: code for reading and writing spell files.
 //
 // See spell.c for information about spell checking.
@@ -223,7 +226,9 @@
 //                          few bytes as possible, see offset2bytes())
 
 #include <stdio.h>
+#include <stdint.h>
 #include <wctype.h>
+#include <strings.h>
 
 #include "nvim/vim.h"
 #include "nvim/spell_defs.h"
@@ -266,7 +271,7 @@
 #define SAL_REM_ACCENTS         4
 
 #define VIMSPELLMAGIC "VIMspell"  // string at start of Vim spell file
-#define VIMSPELLMAGICL 8
+#define VIMSPELLMAGICL (sizeof(VIMSPELLMAGIC) - 1)
 #define VIMSPELLVERSION 50
 
 // Section IDs.  Only renumber them when VIMSPELLVERSION changes!
@@ -493,6 +498,64 @@ typedef struct spellinfo_S {
 # include "spellfile.c.generated.h"
 #endif
 
+/// Read n bytes from fd to buf, returning on errors
+///
+/// @param[out]  buf  Buffer to read to, must be at least n bytes long.
+/// @param[in]  n  Amount of bytes to read.
+/// @param  fd  FILE* to read from.
+/// @param  exit_code  Code to run before returning.
+///
+/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if
+///         there are not enough bytes, returns SP_OTHERERROR if reading failed.
+#define SPELL_READ_BYTES(buf, n, fd, exit_code) \
+    do { \
+      const size_t n__SPRB = (n); \
+      FILE *const fd__SPRB = (fd); \
+      char *const buf__SPRB = (buf); \
+      const size_t read_bytes__SPRB = fread(buf__SPRB, 1, n__SPRB, fd__SPRB); \
+      if (read_bytes__SPRB != n__SPRB) { \
+        exit_code; \
+        return feof(fd__SPRB) ? SP_TRUNCERROR : SP_OTHERERROR; \
+      } \
+    } while (0)
+
+/// Like #SPELL_READ_BYTES, but also error out if NUL byte was read
+///
+/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if
+///         there are not enough bytes, returns SP_OTHERERROR if reading failed,
+///         returns SP_FORMERROR if read out a NUL byte.
+#define SPELL_READ_NONNUL_BYTES(buf, n, fd, exit_code) \
+    do { \
+      const size_t n__SPRNB = (n); \
+      FILE *const fd__SPRNB = (fd); \
+      char *const buf__SPRNB = (buf); \
+      SPELL_READ_BYTES(buf__SPRNB, n__SPRNB, fd__SPRNB, exit_code); \
+      if (memchr(buf__SPRNB, NUL, (size_t)n__SPRNB)) { \
+        exit_code; \
+        return SP_FORMERROR; \
+      } \
+    } while (0)
+
+/// Check that spell file starts with a magic string
+///
+/// Does not check for version of the file.
+///
+/// @param  fd  File to check.
+///
+/// @return 0 in case of success, SP_TRUNCERROR if file contains not enough
+///         bytes, SP_FORMERROR if it does not match magic string and
+///         SP_OTHERERROR if reading file failed.
+static inline int spell_check_magic_string(FILE *const fd)
+  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
+{
+  char buf[VIMSPELLMAGICL];
+  SPELL_READ_BYTES(buf, VIMSPELLMAGICL, fd, ;);
+  if (memcmp(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) {
+    return SP_FORMERROR;
+  }
+  return 0;
+}
+
 // Load one spell file and store the info into a slang_T.
 //
 // This is invoked in three ways:
@@ -513,9 +576,7 @@ spell_load_file (
 )
 {
   FILE        *fd;
-  char_u buf[VIMSPELLMAGICL];
   char_u      *p;
-  int i;
   int n;
   int len;
   char_u      *save_sourcing_name = sourcing_name;
@@ -557,11 +618,20 @@ spell_load_file (
   sourcing_lnum = 0;
 
   // <HEADER>: <fileID>
-  for (i = 0; i < VIMSPELLMAGICL; ++i)
-    buf[i] = getc(fd);                                  // <fileID>
-  if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) {
-    EMSG(_("E757: This does not look like a spell file"));
-    goto endFAIL;
+  const int scms_ret = spell_check_magic_string(fd);
+  switch (scms_ret) {
+    case SP_FORMERROR:
+    case SP_TRUNCERROR: {
+      emsgf(_("E757: This does not look like a spell file"));
+      goto endFAIL;
+    }
+    case SP_OTHERERROR: {
+      emsgf(_("E5042: Failed to read spell file %s: %s"),
+            fname, strerror(ferror(fd)));
+    }
+    case 0: {
+      break;
+    }
   }
   c = getc(fd);                                         // <versionnr>
   if (c < VIMSPELLVERSION) {
@@ -934,12 +1004,10 @@ static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *cntp)
 // Return SP_*ERROR flags.
 static int read_region_section(FILE *fd, slang_T *lp, int len)
 {
-  int i;
-
-  if (len > 16)
+  if (len > 16) {
     return SP_FORMERROR;
-  for (i = 0; i < len; ++i)
-    lp->sl_regions[i] = getc(fd);                       // <regionname>
+  }
+  SPELL_READ_NONNUL_BYTES((char *)lp->sl_regions, (size_t)len, fd, ;);
   lp->sl_regions[len] = NUL;
   return 0;
 }
@@ -982,35 +1050,30 @@ static int read_charflags_section(FILE *fd)
 // Return SP_*ERROR flags.
 static int read_prefcond_section(FILE *fd, slang_T *lp)
 {
-  int cnt;
-  int i;
-  int n;
-  char_u      *p;
-  char_u buf[MAXWLEN + 1];
-
   // <prefcondcnt> <prefcond> ...
-  cnt = get2c(fd);                                      // <prefcondcnt>
-  if (cnt <= 0)
+  const int cnt = get2c(fd);  // <prefcondcnt>
+  if (cnt <= 0) {
     return SP_FORMERROR;
+  }
 
   lp->sl_prefprog = xcalloc(cnt, sizeof(regprog_T *));
   lp->sl_prefixcnt = cnt;
 
-  for (i = 0; i < cnt; ++i) {
+  for (int i = 0; i < cnt; i++) {
     // <prefcond> : <condlen> <condstr>
-    n = getc(fd);                                       // <condlen>
-    if (n < 0 || n >= MAXWLEN)
+    const int n = getc(fd);  // <condlen>
+    if (n < 0 || n >= MAXWLEN) {
       return SP_FORMERROR;
+    }
 
     // When <condlen> is zero we have an empty condition.  Otherwise
     // compile the regexp program used to check for the condition.
     if (n > 0) {
-      buf[0] = '^';                 // always match at one position only
-      p = buf + 1;
-      while (n-- > 0)
-        *p++ = getc(fd);                                // <condstr>
-      *p = NUL;
-      lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
+      char buf[MAXWLEN + 1];
+      buf[0] = '^';  // always match at one position only
+      SPELL_READ_NONNUL_BYTES(buf + 1, (size_t)n, fd, ;);
+      buf[n + 1] = NUL;
+      lp->sl_prefprog[i] = vim_regcomp((char_u *)buf, RE_MAGIC | RE_STRING);
     }
   }
   return 0;
@@ -1063,7 +1126,6 @@ static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first)
 // Return SP_*ERROR flags.
 static int read_sal_section(FILE *fd, slang_T *slang)
 {
-  int i;
   int cnt;
   garray_T    *gap;
   salitem_T   *smp;
@@ -1073,13 +1135,16 @@ static int read_sal_section(FILE *fd, slang_T *slang)
 
   slang->sl_sofo = false;
 
-  i = getc(fd);                                 // <salflags>
-  if (i & SAL_F0LLOWUP)
+  const int flags = getc(fd);                   // <salflags>
+  if (flags & SAL_F0LLOWUP) {
     slang->sl_followup = true;
-  if (i & SAL_COLLAPSE)
+  }
+  if (flags & SAL_COLLAPSE) {
     slang->sl_collapse = true;
-  if (i & SAL_REM_ACCENTS)
+  }
+  if (flags & SAL_REM_ACCENTS) {
     slang->sl_rem_accents = true;
+  }
 
   cnt = get2c(fd);                              // <salcount>
   if (cnt < 0)
@@ -1099,7 +1164,8 @@ static int read_sal_section(FILE *fd, slang_T *slang)
     smp->sm_lead = p;
 
     // Read up to the first special char into sm_lead.
-    for (i = 0; i < ccnt; ++i) {
+    int i = 0;
+    for (; i < ccnt; ++i) {
       c = getc(fd);                             // <salfrom>
       if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
         break;
@@ -1125,11 +1191,17 @@ static int read_sal_section(FILE *fd, slang_T *slang)
 
     // Any following chars go in sm_rules.
     smp->sm_rules = p;
-    if (i < ccnt)
+    if (i < ccnt) {
       // store the char we got while checking for end of sm_lead
       *p++ = c;
-    for (++i; i < ccnt; ++i)
-      *p++ = getc(fd);                          // <salfrom>
+    }
+    i++;
+    if (i < ccnt) {
+      SPELL_READ_NONNUL_BYTES(                  // <salfrom>
+          (char *)p, (size_t)(ccnt - i), fd, xfree(smp->sm_lead));
+      p += (ccnt - i);
+      i = ccnt;
+    }
     *p++ = NUL;
 
     // <saltolen> <salto>
@@ -1435,7 +1507,7 @@ static int set_sofo(slang_T *lp, char_u *from, char_u *to)
     // First count the number of items for each list.  Temporarily use
     // sl_sal_first[] for this.
     for (p = from, s = to; *p != NUL && *s != NUL; ) {
-      c = mb_cptr2char_adv(&p);
+      c = mb_cptr2char_adv((const char_u **)&p);
       mb_cptr_adv(s);
       if (c >= 256)
         ++lp->sl_sal_first[c & 0xff];
@@ -1455,8 +1527,8 @@ static int set_sofo(slang_T *lp, char_u *from, char_u *to)
     // list.
     memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
     for (p = from, s = to; *p != NUL && *s != NUL; ) {
-      c = mb_cptr2char_adv(&p);
-      i = mb_cptr2char_adv(&s);
+      c = mb_cptr2char_adv((const char_u **)&p);
+      i = mb_cptr2char_adv((const char_u **)&s);
       if (c >= 256) {
         // Append the from-to chars at the end of the list with
         // the low byte.
@@ -1542,8 +1614,9 @@ static int *mb_str2wide(char_u *s)
   int i = 0;
 
   int *res = xmalloc((mb_charlen(s) + 1) * sizeof(int));
-  for (char_u *p = s; *p != NUL; )
-    res[i++] = mb_ptr2char_adv(&p);
+  for (char_u *p = s; *p != NUL; ) {
+    res[i++] = mb_ptr2char_adv((const char_u **)&p);
+  }
   res[i] = NUL;
 
   return res;
@@ -1568,9 +1641,14 @@ spell_read_tree (
 
   // The tree size was computed when writing the file, so that we can
   // allocate it as one long block. <nodecount>
-  int len = get4c(fd);
-  if (len < 0)
+  long len = get4c(fd);
+  if (len < 0) {
     return SP_TRUNCERROR;
+  }
+  if ((size_t)len >= SIZE_MAX / sizeof(int)) {
+    // Invalid length, multiply with sizeof(int) would overflow.
+    return SP_FORMERROR;
+  }
   if (len > 0) {
     // Allocate the byte array.
     bp = xmalloc(len);
@@ -2390,8 +2468,7 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
               }
             }
 
-            if (aff_entry->ae_chop == NULL
-                && aff_entry->ae_flags == NULL) {
+            if (aff_entry->ae_chop == NULL) {
               int idx;
               char_u      **pp;
               int n;
@@ -2486,13 +2563,14 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
 
           // Check that every character appears only once.
           for (p = items[1]; *p != NUL; ) {
-            c = mb_ptr2char_adv(&p);
+            c = mb_ptr2char_adv((const char_u **)&p);
             if ((!GA_EMPTY(&spin->si_map)
                  && vim_strchr(spin->si_map.ga_data, c)
                  != NULL)
-                || vim_strchr(p, c) != NULL)
+                || vim_strchr(p, c) != NULL) {
               smsg(_("Duplicate character in MAP in %s line %d"),
                    fname, lnum);
+            }
           }
 
           // We simply concatenate all the MAP strings, separated by
@@ -2717,12 +2795,12 @@ static unsigned get_affitem(int flagtype, char_u **pp)
     }
     res = getdigits_int(pp);
   } else {
-    res = mb_ptr2char_adv(pp);
+    res = mb_ptr2char_adv((const char_u **)pp);
     if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
                                  && res >= 'A' && res <= 'Z')) {
       if (**pp == NUL)
         return 0;
-      res = mb_ptr2char_adv(pp) + (res << 16);
+      res = mb_ptr2char_adv((const char_u **)pp) + (res << 16);
     }
   }
   return res;
@@ -2823,12 +2901,14 @@ static bool flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
   case AFT_CAPLONG:
   case AFT_LONG:
     for (p = afflist; *p != NUL; ) {
-      n = mb_ptr2char_adv(&p);
+      n = mb_ptr2char_adv((const char_u **)&p);
       if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
-          && *p != NUL)
-        n = mb_ptr2char_adv(&p) + (n << 16);
-      if (n == flag)
+          && *p != NUL) {
+        n = mb_ptr2char_adv((const char_u **)&p) + (n << 16);
+      }
+      if (n == flag) {
         return true;
+      }
     }
     break;
 
@@ -5436,10 +5516,11 @@ static void init_spellfile(void)
         fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
                 ->lp_slang->sl_fname;
         vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
-            fname != NULL
-            && strstr((char *)path_tail(fname), ".ascii.") != NULL
-            ? (char_u *)"ascii" : spell_enc());
-        set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
+                     ((fname != NULL
+                       && strstr((char *)path_tail(fname), ".ascii.") != NULL)
+                      ? "ascii"
+                      : (const char *)spell_enc()));
+        set_option_value("spellfile", 0L, (const char *)buf, OPT_LOCAL);
         break;
       }
       aspath = false;
@@ -5465,9 +5546,9 @@ static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp)
       EMSG(_(e_affform));
       return FAIL;
     }
-    f = mb_ptr2char_adv(&pf);
-    l = mb_ptr2char_adv(&pl);
-    u = mb_ptr2char_adv(&pu);
+    f = mb_ptr2char_adv((const char_u **)&pf);
+    l = mb_ptr2char_adv((const char_u **)&pl);
+    u = mb_ptr2char_adv((const char_u **)&pu);
     // Every character that appears is a word character.
     if (f < 256)
       new_st.st_isw[f] = true;
@@ -5532,7 +5613,7 @@ set_spell_charflags (
     }
 
     if (*p != NUL) {
-      c = mb_ptr2char_adv(&p);
+      c = mb_ptr2char_adv((const char_u **)&p);
       new_st.st_fold[i + 128] = c;
       if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
         new_st.st_upper[c] = i + 128;
@@ -5619,12 +5700,13 @@ static void set_map_str(slang_T *lp, char_u *map)
   // "aaa/bbb/ccc/".  Fill sl_map_array[c] with the character before c and
   // before the same slash.  For characters above 255 sl_map_hash is used.
   for (p = map; *p != NUL; ) {
-    c = mb_cptr2char_adv(&p);
-    if (c == '/')
+    c = mb_cptr2char_adv((const char_u **)&p);
+    if (c == '/') {
       headc = 0;
-    else {
-      if (headc == 0)
+    } else {
+      if (headc == 0) {
         headc = c;
+      }
 
       // Characters above 255 don't fit in sl_map_array[], put them in
       // the hash table.  Each entry is the char, a NUL the headchar and