1 files changed, 146 insertions, 95 deletions
diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c
index 4d7ff558ad..dab9a2aacd 100644
--- a/src/nvim/spellfile.c
+++ b/src/nvim/spellfile.c
@@ -1,3 +1,6 @@
+// This is an open source non-commercial project. Dear PVS-Studio, please check
+// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+
 // spellfile.c: code for reading and writing spell files.
 //
 // See spell.c for information about spell checking.
@@ -223,6 +226,7 @@
 //                          few bytes as possible, see offset2bytes())
 
 #include <stdio.h>
+#include <stdint.h>
 #include <wctype.h>
 
 #include "nvim/vim.h"
@@ -266,7 +270,7 @@
 #define SAL_REM_ACCENTS         4
 
 #define VIMSPELLMAGIC "VIMspell"  // string at start of Vim spell file
-#define VIMSPELLMAGICL 8
+#define VIMSPELLMAGICL (sizeof(VIMSPELLMAGIC) - 1)
 #define VIMSPELLVERSION 50
 
 // Section IDs.  Only renumber them when VIMSPELLVERSION changes!
@@ -493,6 +497,64 @@ typedef struct spellinfo_S {
 # include "spellfile.c.generated.h"
 #endif
 
+/// Read n bytes from fd to buf, returning on errors
+///
+/// @param[out]  buf  Buffer to read to, must be at least n bytes long.
+/// @param[in]  n  Amount of bytes to read.
+/// @param  fd  FILE* to read from.
+/// @param  exit_code  Code to run before returning.
+///
+/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if
+///         there are not enough bytes, returns SP_OTHERERROR if reading failed.
+#define SPELL_READ_BYTES(buf, n, fd, exit_code) \
+    do { \
+      const size_t n__SPRB = (n); \
+      FILE *const fd__SPRB = (fd); \
+      char *const buf__SPRB = (buf); \
+      const size_t read_bytes__SPRB = fread(buf__SPRB, 1, n__SPRB, fd__SPRB); \
+      if (read_bytes__SPRB != n__SPRB) { \
+        exit_code; \
+        return feof(fd__SPRB) ? SP_TRUNCERROR : SP_OTHERERROR; \
+      } \
+    } while (0)
+
+/// Like #SPELL_READ_BYTES, but also error out if NUL byte was read
+///
+/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if
+///         there are not enough bytes, returns SP_OTHERERROR if reading failed,
+///         returns SP_FORMERROR if read out a NUL byte.
+#define SPELL_READ_NONNUL_BYTES(buf, n, fd, exit_code) \
+    do { \
+      const size_t n__SPRNB = (n); \
+      FILE *const fd__SPRNB = (fd); \
+      char *const buf__SPRNB = (buf); \
+      SPELL_READ_BYTES(buf__SPRNB, n__SPRNB, fd__SPRNB, exit_code); \
+      if (memchr(buf__SPRNB, NUL, (size_t)n__SPRNB)) { \
+        exit_code; \
+        return SP_FORMERROR; \
+      } \
+    } while (0)
+
+/// Check that spell file starts with a magic string
+///
+/// Does not check for version of the file.
+///
+/// @param  fd  File to check.
+///
+/// @return 0 in case of success, SP_TRUNCERROR if file contains not enough
+///         bytes, SP_FORMERROR if it does not match magic string and
+///         SP_OTHERERROR if reading file failed.
+static inline int spell_check_magic_string(FILE *const fd)
+  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
+{
+  char buf[VIMSPELLMAGICL];
+  SPELL_READ_BYTES(buf, VIMSPELLMAGICL, fd, ;);
+  if (memcmp(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) {
+    return SP_FORMERROR;
+  }
+  return 0;
+}
+
 // Load one spell file and store the info into a slang_T.
 //
 // This is invoked in three ways:
@@ -513,9 +575,7 @@ spell_load_file (
 )
 {
   FILE        *fd;
-  char_u buf[VIMSPELLMAGICL];
   char_u      *p;
-  int i;
   int n;
   int len;
   char_u      *save_sourcing_name = sourcing_name;
@@ -557,11 +617,20 @@ spell_load_file (
   sourcing_lnum = 0;
 
   // <HEADER>: <fileID>
-  for (i = 0; i < VIMSPELLMAGICL; ++i)
-    buf[i] = getc(fd);                                  // <fileID>
-  if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) {
-    EMSG(_("E757: This does not look like a spell file"));
-    goto endFAIL;
+  const int scms_ret = spell_check_magic_string(fd);
+  switch (scms_ret) {
+    case SP_FORMERROR:
+    case SP_TRUNCERROR: {
+      emsgf(_("E757: This does not look like a spell file"));
+      goto endFAIL;
+    }
+    case SP_OTHERERROR: {
+      emsgf(_("E5042: Failed to read spell file %s: %s"),
+            fname, strerror(ferror(fd)));
+    }
+    case 0: {
+      break;
+    }
   }
   c = getc(fd);                                         // <versionnr>
   if (c < VIMSPELLVERSION) {
@@ -934,12 +1003,10 @@ static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *cntp)
 // Return SP_*ERROR flags.
 static int read_region_section(FILE *fd, slang_T *lp, int len)
 {
-  int i;
-
-  if (len > 16)
+  if (len > 16) {
     return SP_FORMERROR;
-  for (i = 0; i < len; ++i)
-    lp->sl_regions[i] = getc(fd);                       // <regionname>
+  }
+  SPELL_READ_NONNUL_BYTES((char *)lp->sl_regions, (size_t)len, fd, ;);
   lp->sl_regions[len] = NUL;
   return 0;
 }
@@ -982,35 +1049,30 @@ static int read_charflags_section(FILE *fd)
 // Return SP_*ERROR flags.
 static int read_prefcond_section(FILE *fd, slang_T *lp)
 {
-  int cnt;
-  int i;
-  int n;
-  char_u      *p;
-  char_u buf[MAXWLEN + 1];
-
   // <prefcondcnt> <prefcond> ...
-  cnt = get2c(fd);                                      // <prefcondcnt>
-  if (cnt <= 0)
+  const int cnt = get2c(fd);  // <prefcondcnt>
+  if (cnt <= 0) {
     return SP_FORMERROR;
+  }
 
   lp->sl_prefprog = xcalloc(cnt, sizeof(regprog_T *));
   lp->sl_prefixcnt = cnt;
 
-  for (i = 0; i < cnt; ++i) {
+  for (int i = 0; i < cnt; i++) {
     // <prefcond> : <condlen> <condstr>
-    n = getc(fd);                                       // <condlen>
-    if (n < 0 || n >= MAXWLEN)
+    const int n = getc(fd);  // <condlen>
+    if (n < 0 || n >= MAXWLEN) {
       return SP_FORMERROR;
+    }
 
     // When <condlen> is zero we have an empty condition.  Otherwise
     // compile the regexp program used to check for the condition.
     if (n > 0) {
-      buf[0] = '^';                 // always match at one position only
-      p = buf + 1;
-      while (n-- > 0)
-        *p++ = getc(fd);                                // <condstr>
-      *p = NUL;
-      lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
+      char buf[MAXWLEN + 1];
+      buf[0] = '^';  // always match at one position only
+      SPELL_READ_NONNUL_BYTES(buf + 1, (size_t)n, fd, ;);
+      buf[n + 1] = NUL;
+      lp->sl_prefprog[i] = vim_regcomp((char_u *)buf, RE_MAGIC | RE_STRING);
     }
   }
   return 0;
@@ -1063,7 +1125,6 @@ static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first)
 // Return SP_*ERROR flags.
 static int read_sal_section(FILE *fd, slang_T *slang)
 {
-  int i;
   int cnt;
   garray_T    *gap;
   salitem_T   *smp;
@@ -1073,13 +1134,16 @@ static int read_sal_section(FILE *fd, slang_T *slang)
 
   slang->sl_sofo = false;
 
-  i = getc(fd);                                 // <salflags>
-  if (i & SAL_F0LLOWUP)
+  const int flags = getc(fd);                   // <salflags>
+  if (flags & SAL_F0LLOWUP) {
     slang->sl_followup = true;
-  if (i & SAL_COLLAPSE)
+  }
+  if (flags & SAL_COLLAPSE) {
     slang->sl_collapse = true;
-  if (i & SAL_REM_ACCENTS)
+  }
+  if (flags & SAL_REM_ACCENTS) {
     slang->sl_rem_accents = true;
+  }
 
   cnt = get2c(fd);                              // <salcount>
   if (cnt < 0)
@@ -1099,7 +1163,8 @@ static int read_sal_section(FILE *fd, slang_T *slang)
     smp->sm_lead = p;
 
     // Read up to the first special char into sm_lead.
-    for (i = 0; i < ccnt; ++i) {
+    int i = 0;
+    for (; i < ccnt; ++i) {
       c = getc(fd);                             // <salfrom>
       if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
         break;
@@ -1125,11 +1190,17 @@ static int read_sal_section(FILE *fd, slang_T *slang)
 
     // Any following chars go in sm_rules.
     smp->sm_rules = p;
-    if (i < ccnt)
+    if (i < ccnt) {
       // store the char we got while checking for end of sm_lead
       *p++ = c;
-    for (++i; i < ccnt; ++i)
-      *p++ = getc(fd);                          // <salfrom>
+    }
+    i++;
+    if (i < ccnt) {
+      SPELL_READ_NONNUL_BYTES(                  // <salfrom>
+          (char *)p, (size_t)(ccnt - i), fd, xfree(smp->sm_lead));
+      p += (ccnt - i);
+      i = ccnt;
+    }
     *p++ = NUL;
 
     // <saltolen> <salto>
@@ -1569,9 +1640,14 @@ spell_read_tree (
 
   // The tree size was computed when writing the file, so that we can
   // allocate it as one long block. <nodecount>
-  int len = get4c(fd);
-  if (len < 0)
+  long len = get4c(fd);
+  if (len < 0) {
     return SP_TRUNCERROR;
+  }
+  if ((size_t)len >= SIZE_MAX / sizeof(int)) {  // -V547
+    // Invalid length, multiply with sizeof(int) would overflow.
+    return SP_FORMERROR;
+  }
   if (len > 0) {
     // Allocate the byte array.
     bp = xmalloc(len);
@@ -1873,7 +1949,6 @@ static void spell_print_tree(wordnode_T *root)
 static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
 {
   FILE        *fd;
-  afffile_T   *aff;
   char_u rline[MAXLINELEN];
   char_u      *line;
   char_u      *pc = NULL;
@@ -1930,11 +2005,7 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
   do_mapline = GA_EMPTY(&spin->si_map);
 
   // Allocate and init the afffile_T structure.
-  aff = (afffile_T *)getroom(spin, sizeof(afffile_T), true);
-  if (aff == NULL) {
-    fclose(fd);
-    return NULL;
-  }
+  afffile_T *aff = getroom(spin, sizeof(*aff), true);
   hash_init(&aff->af_pref);
   hash_init(&aff->af_suff);
   hash_init(&aff->af_comp);
@@ -2022,20 +2093,18 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
           smsg(_("FLAG after using flags in %s line %d: %s"),
                fname, lnum, items[1]);
       } else if (spell_info_item(items[0]) && itemcnt > 1)   {
-        p = (char_u *)getroom(spin,
-            (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
-            + STRLEN(items[0])
-            + STRLEN(items[1]) + 3, false);
-        if (p != NULL) {
-          if (spin->si_info != NULL) {
-            STRCPY(p, spin->si_info);
-            STRCAT(p, "\n");
-          }
-          STRCAT(p, items[0]);
-          STRCAT(p, " ");
-          STRCAT(p, items[1]);
-          spin->si_info = p;
+        p = getroom(spin,
+                    (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
+                    + STRLEN(items[0])
+                    + STRLEN(items[1]) + 3, false);
+        if (spin->si_info != NULL) {
+          STRCPY(p, spin->si_info);
+          STRCAT(p, "\n");
         }
+        STRCAT(p, items[0]);
+        STRCAT(p, " ");
+        STRCAT(p, items[1]);
+        spin->si_info = p;
       } else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
                  && midword == NULL) {
         midword = getroom_save(spin, items[1]);
@@ -2215,14 +2284,12 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
                  fname, lnum, items[1]);
         } else {
           // New affix letter.
-          cur_aff = (affheader_T *)getroom(spin,
-              sizeof(affheader_T), true);
-          if (cur_aff == NULL)
-            break;
+          cur_aff = getroom(spin, sizeof(*cur_aff), true);
           cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
-              fname, lnum);
-          if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
+                                          fname, lnum);
+          if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) {
             break;
+          }
           if (cur_aff->ah_flag == aff->af_bad
               || cur_aff->ah_flag == aff->af_rare
               || cur_aff->ah_flag == aff->af_keepcase
@@ -2230,11 +2297,12 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
               || cur_aff->ah_flag == aff->af_circumfix
               || cur_aff->ah_flag == aff->af_nosuggest
               || cur_aff->ah_flag == aff->af_needcomp
-              || cur_aff->ah_flag == aff->af_comproot)
+              || cur_aff->ah_flag == aff->af_comproot) {
             smsg(_("Affix also used for "
                    "BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST"
                    "in %s line %d: %s"),
-                fname, lnum, items[1]);
+                 fname, lnum, items[1]);
+          }
           STRCPY(cur_aff->ah_key, items[1]);
           hash_add(tp, cur_aff->ah_key);
 
@@ -2296,11 +2364,8 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
           smsg(_(e_afftrailing), fname, lnum, items[lasti]);
 
         // New item for an affix letter.
-        --aff_todo;
-        aff_entry = (affentry_T *)getroom(spin,
-            sizeof(affentry_T), true);
-        if (aff_entry == NULL)
-          break;
+        aff_todo--;
+        aff_entry = getroom(spin, sizeof(*aff_entry), true);
 
         if (STRCMP(items[2], "0") != 0)
           aff_entry->ae_chop = getroom_save(spin, items[2]);
@@ -2391,8 +2456,7 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
               }
             }
 
-            if (aff_entry->ae_chop == NULL
-                && aff_entry->ae_flags == NULL) {
+            if (aff_entry->ae_chop == NULL) {
               int idx;
               char_u      **pp;
               int n;
@@ -2773,12 +2837,10 @@ static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compfla
         // the existing ID.  Otherwise add a new entry.
         STRLCPY(key, prevp, p - prevp + 1);
         hi = hash_find(&aff->af_comp, key);
-        if (!HASHITEM_EMPTY(hi))
+        if (!HASHITEM_EMPTY(hi)) {
           id = HI2CI(hi)->ci_newID;
-        else {
-          ci = (compitem_T *)getroom(spin, sizeof(compitem_T), true);
-          if (ci == NULL)
-            break;
+        } else {
+          ci = getroom(spin, sizeof(compitem_T), true);
           STRCPY(ci->ci_key, key);
           ci->ci_flag = flag;
           // Avoid using a flag ID that has a special meaning in a
@@ -3580,7 +3642,7 @@ static int spell_read_wordfile(spellinfo_T *spin, char_u *fname)
           flags |= WF_REGION;
 
           l = *p - '0';
-          if (l > spin->si_region_count) {
+          if (l == 0 || l > spin->si_region_count) {
             smsg(_("Invalid region nr in %s line %d: %s"),
                  fname, lnum, p);
             break;
@@ -3662,12 +3724,8 @@ static void *getroom(spellinfo_T *spin, size_t len, bool align)
 // Returns NULL when out of memory.
 static char_u *getroom_save(spellinfo_T *spin, char_u *s)
 {
-  char_u      *sc;
-
-  sc = (char_u *)getroom(spin, STRLEN(s) + 1, false);
-  if (sc != NULL)
-    STRCPY(sc, s);
-  return sc;
+  const size_t s_size = STRLEN(s) + 1;
+  return memcpy(getroom(spin, s_size, false), s, s_size);
 }
 
 
@@ -3686,6 +3744,7 @@ static void free_blocks(sblock_T *bl)
 // Allocate the root of a word tree.
 // Returns NULL when out of memory.
 static wordnode_T *wordtree_alloc(spellinfo_T *spin)
+  FUNC_ATTR_NONNULL_RET
 {
   return (wordnode_T *)getroom(spin, sizeof(wordnode_T), true);
 }
@@ -4719,8 +4778,6 @@ static int sug_filltree(spellinfo_T *spin, slang_T *slang)
 
   // We use si_foldroot for the soundfolded trie.
   spin->si_foldroot = wordtree_alloc(spin);
-  if (spin->si_foldroot == NULL)
-    return FAIL;
 
   // Let tree_add_word() know we're adding to the soundfolded tree
   spin->si_sugtree = true;
@@ -5108,12 +5165,6 @@ mkspell (
     spin.si_foldroot = wordtree_alloc(&spin);
     spin.si_keeproot = wordtree_alloc(&spin);
     spin.si_prefroot = wordtree_alloc(&spin);
-    if (spin.si_foldroot == NULL
-        || spin.si_keeproot == NULL
-        || spin.si_prefroot == NULL) {
-      free_blocks(spin.si_blocks);
-      goto theend;
-    }
 
     // When not producing a .add.spl file clear the character table when
     // we encounter one in the .aff file.  This means we dump the current