vim-patch:8.2.4494: the find_tags() function is much too long

Problem: The find_tags() function is much too long. Solution: Refactor the function. (Yegappan Lakshmanan, closes vim/vim#9869) https://github.com/vim/vim/commit/2f87a99b6e9b559d51e130769e7f8377db6749f8 Co-authored-by: Yegappan Lakshmanan <yegappan@yahoo.com>
author: zeertzjq <zeertzjq@outlook.com> 2022-11-18 13:25:21 +0800
committer: zeertzjq <zeertzjq@outlook.com> 2022-11-18 14:59:18 +0800
commit: 245035d6db7399cc077de0eaa8e97e2eecc08a9b (patch)
tree: 8d07e90dc1f7ff3cc7dda29255aa29cc5310395b
parent: 282cbc2350986c3fc1edb507c4facc8d8fe8cd97 (diff)
download: rneovim-245035d6db7399cc077de0eaa8e97e2eecc08a9b.tar.gz
rneovim-245035d6db7399cc077de0eaa8e97e2eecc08a9b.tar.bz2
rneovim-245035d6db7399cc077de0eaa8e97e2eecc08a9b.zip
3 files changed, 804 insertions, 743 deletions
diff --git a/src/nvim/quickfix.c b/src/nvim/quickfix.c
index d7590f6f57..27a0842016 100644
--- a/src/nvim/quickfix.c
+++ b/src/nvim/quickfix.c
@@ -4308,10 +4308,17 @@ void ex_make(exarg_T *eap)
 
   incr_quickfix_busy();
 
-  int res = qf_init(wp, fname, (eap->cmdidx != CMD_make
-                                && eap->cmdidx != CMD_lmake) ? p_gefm : p_efm,
-                    (eap->cmdidx != CMD_grepadd && eap->cmdidx != CMD_lgrepadd),
-                    qf_cmdtitle(*eap->cmdlinep), enc);
+  char *errorformat = p_efm;
+  bool newlist = true;
+
+  if (eap->cmdidx != CMD_make && eap->cmdidx != CMD_lmake) {
+    errorformat = p_gefm;
+  }
+  if (eap->cmdidx == CMD_grepadd || eap->cmdidx == CMD_lgrepadd) {
+    newlist = false;
+  }
+
+  int res = qf_init(wp, fname, errorformat, newlist, qf_cmdtitle(*eap->cmdlinep), enc);
 
   qf_info_T *qi = &ql_info;
   if (wp != NULL) {
diff --git a/src/nvim/tag.c b/src/nvim/tag.c
index 36d6432c06..e309f05f62 100644
--- a/src/nvim/tag.c
+++ b/src/nvim/tag.c
@@ -110,6 +110,23 @@ static char *mt_names[MT_COUNT/2] =
 #define NOTAGFILE       99              // return value for jumpto_tag
 static char *nofile_fname = NULL;       // fname for NOTAGFILE error
 
+/// State information used during a tag search
+typedef struct {
+  pat_T orgpat;                  ///< holds unconverted pattern info
+  char *help_lang_find;          ///< lang to be found
+  bool is_txt;                   ///< flag of file extension
+  bool did_open;                 ///< did open a tag file
+  int mincount;                  ///< MAXCOL: find all matches
+                                 ///< other: minimal number of matches
+  bool linear;                   ///< do a linear search
+  char *lbuf;                    ///< line buffer
+  int lbuf_size;                 ///< length of lbuf
+  int match_count;               ///< number of matches found
+  garray_T ga_match[MT_COUNT];   ///< stores matches in sequence
+  hashtab_T ht_match[MT_COUNT];  ///< stores matches by key
+  bool stop_searching;           ///< stop when match found or error
+} findtags_state_T;
+
 #ifdef INCLUDE_GENERATED_DECLARATIONS
 # include "tag.c.generated.h"
 #endif
@@ -1355,6 +1372,740 @@ static int find_tagfunc_tags(char_u *pat, garray_T *ga, int *match_count, int fl
   return result;
 }
 
+/// Initialize the state used by find_tags()
+static void findtags_state_init(findtags_state_T *st, char *pat, int mincount)
+{
+  st->orgpat.pat = (char_u *)pat;
+  st->orgpat.len = (int)strlen(pat);
+  st->orgpat.regmatch.regprog = NULL;
+  st->help_lang_find = NULL;
+  st->is_txt = false;
+  st->did_open = false;
+  st->mincount = mincount;
+  st->lbuf_size = LSIZE;
+  st->lbuf = xmalloc((size_t)st->lbuf_size);
+  st->match_count = 0;
+  st->stop_searching = false;
+
+  for (int mtt = 0; mtt < MT_COUNT; mtt++) {
+    ga_init(&st->ga_match[mtt], sizeof(char *), 100);
+    hash_init(&st->ht_match[mtt]);
+  }
+}
+
+/// Search for tags in the "tag_fname" tags file.
+/// Information needed to search for the tags is in the "st" state structure.
+/// The matching tags are returned in "st".
+static void find_tags_in_file(char *tag_fname, findtags_state_T *st, int flags, char *buf_ffname)
+{
+  FILE *fp;
+  tagptrs_T tagp;
+  int is_static;                        // current tag line is static
+  int is_current;                       // file name matches
+  bool eof = false;                     // found end-of-file
+  char *p;
+  char_u *s;
+  int i;
+  int help_pri = 0;
+  char_u help_lang[3];                  // lang of current tags file
+  int tag_file_sorted = NUL;            // !_TAG_FILE_SORTED value
+  int tagcmp;
+  off_T offset;
+  enum {
+    TS_START,         ///< at start of file
+    TS_LINEAR,        ///< linear searching forward, till EOF
+    TS_BINARY,        ///< binary searching
+    TS_SKIP_BACK,     ///< skipping backwards
+    TS_STEP_FORWARD,  ///< stepping forwards
+  } state;  // Current search state
+  struct tag_search_info {      // Binary search file offsets
+    off_T low_offset;           // offset for first char of first line that
+                                // could match
+    off_T high_offset;          // offset of char after last line that could
+                                // match
+    off_T curr_offset;          // Current file offset in search range
+    off_T curr_offset_used;     // curr_offset used when skipping back
+    off_T match_offset;         // Where the binary search found a tag
+    int low_char;               // first char at low_offset
+    int high_char;              // first char at high_offset
+  } search_info;
+
+  int cmplen;
+  int match;                    // matches
+  int match_no_ic = 0;          // matches with rm_ic == false
+  int match_re;                 // match with regexp
+  int matchoff = 0;
+
+  char *mfp;
+  int mtt;
+  hash_T hash = 0;
+
+  bool sort_error = false;                      // tags file not sorted
+  bool sortic = false;                          // tag file sorted in nocase
+  int noic = (flags & TAG_NOIC);
+  bool line_error = false;                      // syntax error
+  int has_re = (flags & TAG_REGEXP);            // regexp used
+  int help_only = (flags & TAG_HELP);
+  int name_only = (flags & TAG_NAMES);
+  int get_it_again = false;
+  vimconv_T vimconv;
+
+  vimconv.vc_type = CONV_NONE;
+
+  // This is only to avoid a compiler warning for using search_info
+  // uninitialised.
+  CLEAR_FIELD(search_info);
+
+  // A file that doesn't exist is silently ignored.  Only when not a
+  // single file is found, an error message is given (further on).
+  if (curbuf->b_help) {
+    // Keep en if the file extension is .txt
+    if (st->is_txt) {
+      STRCPY(help_lang, "en");
+    } else {
+      // Prefer help tags according to 'helplang'.  Put the
+      // two-letter language name in help_lang[].
+      i = (int)STRLEN(tag_fname);
+      if (i > 3 && tag_fname[i - 3] == '-') {
+        STRCPY(help_lang, tag_fname + i - 2);
+      } else {
+        STRCPY(help_lang, "en");
+      }
+    }
+
+    // When searching for a specific language skip tags files
+    // for other languages.
+    if (st->help_lang_find != NULL
+        && STRICMP(help_lang, st->help_lang_find) != 0) {
+      return;
+    }
+
+    // For CTRL-] in a help file prefer a match with the same
+    // language.
+    if ((flags & TAG_KEEP_LANG)
+        && st->help_lang_find == NULL
+        && curbuf->b_fname != NULL
+        && (i = (int)strlen(curbuf->b_fname)) > 4
+        && curbuf->b_fname[i - 1] == 'x'
+        && curbuf->b_fname[i - 4] == '.'
+        && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0) {
+      help_pri = 0;
+    } else {
+      help_pri = 1;
+      for (s = p_hlg; *s != NUL; s++) {
+        if (STRNICMP(s, help_lang, 2) == 0) {
+          break;
+        }
+        help_pri++;
+        if ((s = (char_u *)vim_strchr((char *)s, ',')) == NULL) {
+          break;
+        }
+      }
+      if (s == NULL || *s == NUL) {
+        // Language not in 'helplang': use last, prefer English,
+        // unless found already.
+        help_pri++;
+        if (STRICMP(help_lang, "en") != 0) {
+          help_pri++;
+        }
+      }
+    }
+  }
+
+  if ((fp = os_fopen(tag_fname, "r")) == NULL) {
+    return;
+  }
+
+  if (p_verbose >= 5) {
+    verbose_enter();
+    smsg(_("Searching tags file %s"), tag_fname);
+    verbose_leave();
+  }
+
+  st->did_open = true;  // remember that we found at least one file
+
+  state = TS_START;     // we're at the start of the file
+
+  // Read and parse the lines in the file one by one
+  for (;;) {
+    // check for CTRL-C typed, more often when jumping around
+    if (state == TS_BINARY || state == TS_SKIP_BACK) {
+      line_breakcheck();
+    } else {
+      fast_breakcheck();
+    }
+    if ((flags & TAG_INS_COMP)) {   // Double brackets for gcc
+      ins_compl_check_keys(30, false);
+    }
+    if (got_int || ins_compl_interrupted()) {
+      st->stop_searching = true;
+      break;
+    }
+    // When mincount is TAG_MANY, stop when enough matches have been
+    // found (for completion).
+    if (st->mincount == TAG_MANY && st->match_count >= TAG_MANY) {
+      st->stop_searching = true;
+      return;
+    }
+    if (get_it_again) {
+      goto line_read_in;
+    }
+    // For binary search: compute the next offset to use.
+    if (state == TS_BINARY) {
+      offset = search_info.low_offset + ((search_info.high_offset
+                                          - search_info.low_offset) / 2);
+      if (offset == search_info.curr_offset) {
+        break;  // End the binary search without a match.
+      }
+      search_info.curr_offset = offset;
+    } else if (state == TS_SKIP_BACK) {
+      // Skipping back (after a match during binary search).
+      search_info.curr_offset -= st->lbuf_size * 2;
+      if (search_info.curr_offset < 0) {
+        search_info.curr_offset = 0;
+        rewind(fp);
+        state = TS_STEP_FORWARD;
+      }
+    }
+
+    // When jumping around in the file, first read a line to find the
+    // start of the next line.
+    if (state == TS_BINARY || state == TS_SKIP_BACK) {
+      // Adjust the search file offset to the correct position
+      search_info.curr_offset_used = search_info.curr_offset;
+      vim_fseek(fp, search_info.curr_offset, SEEK_SET);
+      eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp);
+      if (!eof && search_info.curr_offset != 0) {
+        search_info.curr_offset = vim_ftell(fp);
+        if (search_info.curr_offset == search_info.high_offset) {
+          // oops, gone a bit too far; try from low offset
+          vim_fseek(fp, search_info.low_offset, SEEK_SET);
+          search_info.curr_offset = search_info.low_offset;
+        }
+        eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp);
+      }
+      // skip empty and blank lines
+      while (!eof && vim_isblankline(st->lbuf)) {
+        search_info.curr_offset = vim_ftell(fp);
+        eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp);
+      }
+      if (eof) {
+        // Hit end of file.  Skip backwards.
+        state = TS_SKIP_BACK;
+        search_info.match_offset = vim_ftell(fp);
+        search_info.curr_offset = search_info.curr_offset_used;
+        continue;
+      }
+    } else {
+      // Not jumping around in the file: Read the next line.
+
+      // skip empty and blank lines
+      do {
+        search_info.curr_offset = vim_ftell(fp);
+        eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp);
+      } while (!eof && vim_isblankline(st->lbuf));
+
+      if (eof) {
+        break;                                  // end of file
+      }
+    }
+line_read_in:
+
+    if (vimconv.vc_type != CONV_NONE) {
+      char *conv_line;
+      int len;
+
+      // Convert every line.  Converting the pattern from 'enc' to
+      // the tags file encoding doesn't work, because characters are
+      // not recognized.
+      conv_line = string_convert(&vimconv, st->lbuf, NULL);
+      if (conv_line != NULL) {
+        // Copy or swap lbuf and conv_line.
+        len = (int)strlen(conv_line) + 1;
+        if (len > st->lbuf_size) {
+          xfree(st->lbuf);
+          st->lbuf = conv_line;
+          st->lbuf_size = len;
+        } else {
+          STRCPY(st->lbuf, conv_line);
+          xfree(conv_line);
+        }
+      }
+    }
+
+    // When still at the start of the file, check for Emacs tags file
+    // format, and for "not sorted" flag.
+    if (state == TS_START) {
+      // The header ends when the line sorts below "!_TAG_".  When
+      // case is folded lower case letters sort before "_".
+      if (strncmp(st->lbuf, "!_TAG_", 6) <= 0
+          || (st->lbuf[0] == '!' && ASCII_ISLOWER(st->lbuf[1]))) {
+        if (strncmp(st->lbuf, "!_TAG_", 6) != 0) {
+          // Non-header item before the header, e.g. "!" itself.
+          goto parse_line;
+        }
+
+        // Read header line.
+        if (strncmp(st->lbuf, "!_TAG_FILE_SORTED\t", 18) == 0) {
+          tag_file_sorted = (uint8_t)st->lbuf[18];
+        }
+        if (strncmp(st->lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0) {
+          // Prepare to convert every line from the specified
+          // encoding to 'encoding'.
+          for (p = st->lbuf + 20; *p > ' ' && *p < 127; p++) {}
+          *p = NUL;
+          convert_setup(&vimconv, st->lbuf + 20, p_enc);
+        }
+
+        // Read the next line.  Unrecognized flags are ignored.
+        continue;
+      }
+
+      // Headers ends.
+
+      // When there is no tag head, or ignoring case, need to do a
+      // linear search.
+      // When no "!_TAG_" is found, default to binary search.  If
+      // the tag file isn't sorted, the second loop will find it.
+      // When "!_TAG_FILE_SORTED" found: start binary search if
+      // flag set.
+      if (st->linear) {
+        state = TS_LINEAR;
+      } else if (tag_file_sorted == NUL) {
+        state = TS_BINARY;
+      } else if (tag_file_sorted == '1') {
+        state = TS_BINARY;
+      } else if (tag_file_sorted == '2') {
+        state = TS_BINARY;
+        sortic = true;
+        st->orgpat.regmatch.rm_ic = (p_ic || !noic);
+      } else {
+        state = TS_LINEAR;
+      }
+
+      if (state == TS_BINARY && st->orgpat.regmatch.rm_ic && !sortic) {
+        // Binary search won't work for ignoring case, use linear
+        // search.
+        st->linear = true;
+        state = TS_LINEAR;
+      }
+
+      // When starting a binary search, get the size of the file and
+      // compute the first offset.
+      if (state == TS_BINARY) {
+        if (vim_fseek(fp, 0, SEEK_END) != 0) {
+          // can't seek, don't use binary search
+          state = TS_LINEAR;
+        } else {
+          // Get the tag file size.
+          // Don't use lseek(), it doesn't work
+          // properly on MacOS Catalina.
+          const off_T filesize = vim_ftell(fp);
+          vim_fseek(fp, 0, SEEK_SET);
+
+          // Calculate the first read offset in the file.  Start
+          // the search in the middle of the file.
+          search_info.low_offset = 0;
+          search_info.low_char = 0;
+          search_info.high_offset = filesize;
+          search_info.curr_offset = 0;
+          search_info.high_char = 0xff;
+        }
+        continue;
+      }
+    }
+
+parse_line:
+    // When the line is too long the NUL will not be in the
+    // last-but-one byte (see vim_fgets()).
+    // Has been reported for Mozilla JS with extremely long names.
+    // In that case we need to increase lbuf_size.
+    if (st->lbuf[st->lbuf_size - 2] != NUL) {
+      st->lbuf_size *= 2;
+      xfree(st->lbuf);
+      st->lbuf = xmalloc((size_t)st->lbuf_size);
+
+      if (state == TS_STEP_FORWARD) {
+        // Seek to the same position to read the same line again
+        vim_fseek(fp, search_info.curr_offset, SEEK_SET);
+      }
+      // this will try the same thing again, make sure the offset is
+      // different
+      search_info.curr_offset = 0;
+      continue;
+    }
+
+    // Figure out where the different strings are in this line.
+    // For "normal" tags: Do a quick check if the tag matches.
+    // This speeds up tag searching a lot!
+    if (st->orgpat.headlen) {
+      CLEAR_FIELD(tagp);
+      tagp.tagname = st->lbuf;
+      tagp.tagname_end = (char_u *)vim_strchr(st->lbuf, TAB);
+      if (tagp.tagname_end == NULL) {
+        // Corrupted tag line.
+        line_error = true;
+        break;
+      }
+
+      // Skip this line if the length of the tag is different and
+      // there is no regexp, or the tag is too short.
+      cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname);
+      if (p_tl != 0 && cmplen > p_tl) {         // adjust for 'taglength'
+        cmplen = (int)p_tl;
+      }
+      if (has_re && st->orgpat.headlen < cmplen) {
+        cmplen = st->orgpat.headlen;
+      } else if (state == TS_LINEAR && st->orgpat.headlen != cmplen) {
+        continue;
+      }
+
+      if (state == TS_BINARY) {
+        // Simplistic check for unsorted tags file.
+        i = (int)tagp.tagname[0];
+        if (sortic) {
+          i = TOUPPER_ASC(tagp.tagname[0]);
+        }
+        if (i < search_info.low_char || i > search_info.high_char) {
+          sort_error = true;
+        }
+
+        // Compare the current tag with the searched tag.
+        if (sortic) {
+          tagcmp = tag_strnicmp((char_u *)tagp.tagname, st->orgpat.head,
+                                (size_t)cmplen);
+        } else {
+          tagcmp = STRNCMP(tagp.tagname, st->orgpat.head, cmplen);
+        }
+
+        // A match with a shorter tag means to search forward.
+        // A match with a longer tag means to search backward.
+        if (tagcmp == 0) {
+          if (cmplen < st->orgpat.headlen) {
+            tagcmp = -1;
+          } else if (cmplen > st->orgpat.headlen) {
+            tagcmp = 1;
+          }
+        }
+
+        if (tagcmp == 0) {
+          // We've located the tag, now skip back and search
+          // forward until the first matching tag is found.
+          state = TS_SKIP_BACK;
+          search_info.match_offset = search_info.curr_offset;
+          continue;
+        }
+        if (tagcmp < 0) {
+          search_info.curr_offset = vim_ftell(fp);
+          if (search_info.curr_offset < search_info.high_offset) {
+            search_info.low_offset = search_info.curr_offset;
+            if (sortic) {
+              search_info.low_char =
+                TOUPPER_ASC(tagp.tagname[0]);
+            } else {
+              search_info.low_char = (uint8_t)tagp.tagname[0];
+            }
+            continue;
+          }
+        }
+        if (tagcmp > 0
+            && search_info.curr_offset != search_info.high_offset) {
+          search_info.high_offset = search_info.curr_offset;
+          if (sortic) {
+            search_info.high_char =
+              TOUPPER_ASC(tagp.tagname[0]);
+          } else {
+            search_info.high_char = (uint8_t)tagp.tagname[0];
+          }
+          continue;
+        }
+
+        // No match yet and are at the end of the binary search.
+        break;
+      } else if (state == TS_SKIP_BACK) {
+        assert(cmplen >= 0);
+        if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) {
+          state = TS_STEP_FORWARD;
+        } else {
+          // Have to skip back more.  Restore the curr_offset
+          // used, otherwise we get stuck at a long line.
+          search_info.curr_offset = search_info.curr_offset_used;
+        }
+        continue;
+      } else if (state == TS_STEP_FORWARD) {
+        assert(cmplen >= 0);
+        if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) {
+          if ((off_T)vim_ftell(fp) > search_info.match_offset) {
+            break;                  // past last match
+          } else {
+            continue;               // before first match
+          }
+        }
+      } else {
+        // skip this match if it can't match
+        assert(cmplen >= 0);
+        if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) {
+          continue;
+        }
+      }
+
+      // Can be a matching tag, isolate the file name and command.
+      tagp.fname = tagp.tagname_end + 1;
+      tagp.fname_end = (char_u *)vim_strchr((char *)tagp.fname, TAB);
+      tagp.command = tagp.fname_end + 1;
+      if (tagp.fname_end == NULL) {
+        i = FAIL;
+      } else {
+        i = OK;
+      }
+    } else {
+      i = parse_tag_line((char_u *)st->lbuf, &tagp);
+    }
+    if (i == FAIL) {
+      line_error = true;
+      break;
+    }
+
+    // First try matching with the pattern literally (also when it is
+    // a regexp).
+    cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname);
+    if (p_tl != 0 && cmplen > p_tl) {           // adjust for 'taglength'
+      cmplen = (int)p_tl;
+    }
+    // if tag length does not match, don't try comparing
+    if (st->orgpat.len != cmplen) {
+      match = false;
+    } else {
+      if (st->orgpat.regmatch.rm_ic) {
+        assert(cmplen >= 0);
+        match = mb_strnicmp(tagp.tagname, (char *)st->orgpat.pat, (size_t)cmplen) == 0;
+        if (match) {
+          match_no_ic = (STRNCMP(tagp.tagname, st->orgpat.pat,
+                                 cmplen) == 0);
+        }
+      } else {
+        match = (STRNCMP(tagp.tagname, st->orgpat.pat, cmplen) == 0);
+      }
+    }
+
+    // Has a regexp: Also find tags matching regexp.
+    match_re = false;
+    if (!match && st->orgpat.regmatch.regprog != NULL) {
+      int cc;
+
+      cc = *tagp.tagname_end;
+      *tagp.tagname_end = NUL;
+      match = vim_regexec(&st->orgpat.regmatch, tagp.tagname, (colnr_T)0);
+      if (match) {
+        matchoff = (int)(st->orgpat.regmatch.startp[0] - tagp.tagname);
+        if (st->orgpat.regmatch.rm_ic) {
+          st->orgpat.regmatch.rm_ic = false;
+          match_no_ic = vim_regexec(&st->orgpat.regmatch, tagp.tagname, (colnr_T)0);
+          st->orgpat.regmatch.rm_ic = true;
+        }
+      }
+      *tagp.tagname_end = (char_u)cc;
+      match_re = true;
+    }
+
+    // If a match is found, add it to ht_match[] and ga_match[].
+    if (match) {
+      size_t len = 0;
+
+      // Decide in which array to store this match.
+      is_current = test_for_current((char *)tagp.fname, (char *)tagp.fname_end,
+                                    tag_fname,
+                                    buf_ffname);
+      is_static = test_for_static(&tagp);
+
+      // Decide in which of the sixteen tables to store this match.
+      if (is_static) {
+        if (is_current) {
+          mtt = MT_ST_CUR;
+        } else {
+          mtt = MT_ST_OTH;
+        }
+      } else {
+        if (is_current) {
+          mtt = MT_GL_CUR;
+        } else {
+          mtt = MT_GL_OTH;
+        }
+      }
+      if (st->orgpat.regmatch.rm_ic && !match_no_ic) {
+        mtt += MT_IC_OFF;
+      }
+      if (match_re) {
+        mtt += MT_RE_OFF;
+      }
+
+      // Add the found match in ht_match[mtt] and ga_match[mtt].
+      // Store the info we need later, which depends on the kind of
+      // tags we are dealing with.
+      if (help_only) {
+#define ML_EXTRA 3
+        // Append the help-heuristic number after the tagname, for
+        // sorting it later.  The heuristic is ignored for
+        // detecting duplicates.
+        // The format is {tagname}@{lang}NUL{heuristic}NUL
+        *tagp.tagname_end = NUL;
+        len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname);
+        mfp = xmalloc(sizeof(char) + len + 10 + ML_EXTRA + 1);
+
+        p = mfp;
+        STRCPY(p, tagp.tagname);
+        p[len] = '@';
+        STRCPY(p + len + 1, help_lang);
+        snprintf(p + len + 1 + ML_EXTRA, strlen(p) + len + 1 + ML_EXTRA, "%06d",
+                 help_heuristic(tagp.tagname,
+                                match_re ? matchoff : 0, !match_no_ic) + help_pri);
+
+        *tagp.tagname_end = TAB;
+      } else if (name_only) {
+        if (get_it_again) {
+          char_u *temp_end = tagp.command;
+
+          if (*temp_end == '/') {
+            while (*temp_end && *temp_end != '\r'
+                   && *temp_end != '\n'
+                   && *temp_end != '$') {
+              temp_end++;
+            }
+          }
+
+          if (tagp.command + 2 < temp_end) {
+            len = (size_t)(temp_end - tagp.command - 2);
+            mfp = xmalloc(len + 2);
+            STRLCPY(mfp, tagp.command + 2, len + 1);
+          } else {
+            mfp = NULL;
+          }
+          get_it_again = false;
+        } else {
+          len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname);
+          mfp = xmalloc(sizeof(char) + len + 1);
+          STRLCPY(mfp, tagp.tagname, len + 1);
+
+          // if wanted, re-read line to get long form too
+          if (State & MODE_INSERT) {
+            get_it_again = p_sft;
+          }
+        }
+      } else {
+        size_t tag_fname_len = strlen(tag_fname);
+        // Save the tag in a buffer.
+        // Use 0x02 to separate fields (Can't use NUL, because the
+        // hash key is terminated by NUL).
+        // Emacs tag: <mtt><tag_fname><0x02><ebuf><0x02><lbuf><NUL>
+        // other tag: <mtt><tag_fname><0x02><0x02><lbuf><NUL>
+        // without Emacs tags: <mtt><tag_fname><0x02><lbuf><NUL>
+        // Here <mtt> is the "mtt" value plus 1 to avoid NUL.
+        len = tag_fname_len + strlen(st->lbuf) + 3;
+        mfp = xmalloc(sizeof(char) + len + 1);
+        p = mfp;
+        p[0] = (char)(mtt + 1);
+        STRCPY(p + 1, tag_fname);
+#ifdef BACKSLASH_IN_FILENAME
+        // Ignore differences in slashes, avoid adding
+        // both path/file and path\file.
+        slash_adjust(p + 1);
+#endif
+        p[tag_fname_len + 1] = TAG_SEP;
+        s = (char_u *)p + 1 + tag_fname_len + 1;
+        STRCPY(s, st->lbuf);
+      }
+
+      if (mfp != NULL) {
+        hashitem_T *hi;
+
+        // Don't add identical matches.
+        // "mfp" is used as a hash key, there is a NUL byte to end
+        // the part that matters for comparing, more bytes may
+        // follow after it.  E.g. help tags store the priority
+        // after the NUL.
+        hash = hash_hash((char_u *)mfp);
+        hi = hash_lookup(&st->ht_match[mtt], (const char *)mfp, strlen(mfp), hash);
+        if (HASHITEM_EMPTY(hi)) {
+          hash_add_item(&st->ht_match[mtt], hi, (char_u *)mfp, hash);
+          GA_APPEND(char *, &st->ga_match[mtt], mfp);
+          st->match_count++;
+        } else {
+          // duplicate tag, drop it
+          xfree(mfp);
+        }
+      }
+    }
+  }   // forever
+
+  if (line_error) {
+    semsg(_("E431: Format error in tags file \"%s\""), tag_fname);
+    semsg(_("Before byte %" PRId64), (int64_t)vim_ftell(fp));
+    st->stop_searching = true;
+    line_error = false;
+  }
+
+  fclose(fp);
+  if (vimconv.vc_type != CONV_NONE) {
+    convert_setup(&vimconv, NULL, NULL);
+  }
+
+  tag_file_sorted = NUL;
+  if (sort_error) {
+    semsg(_("E432: Tags file not sorted: %s"), tag_fname);
+    sort_error = false;
+  }
+
+  // Stop searching if sufficient tags have been found.
+  if (st->match_count >= st->mincount) {
+    st->stop_searching = true;
+  }
+}
+
+/// Copy the tags found by find_tags() to "matchesp".
+static void findtags_copy_matches(findtags_state_T *st, char ***matchesp, int *num_matches,
+                                  int name_only)
+{
+  char **matches;
+  int mtt;
+  int i;
+  char *mfp;
+  char *p;
+
+  if (st->match_count > 0) {
+    matches = xmalloc((size_t)st->match_count * sizeof(char *));
+  } else {
+    matches = NULL;
+  }
+  st->match_count = 0;
+  for (mtt = 0; mtt < MT_COUNT; mtt++) {
+    for (i = 0; i < st->ga_match[mtt].ga_len; i++) {
+      mfp = ((char **)(st->ga_match[mtt].ga_data))[i];
+      if (matches == NULL) {
+        xfree(mfp);
+      } else {
+        if (!name_only) {
+          // Change mtt back to zero-based.
+          *mfp = (char)(*mfp - 1);
+
+          // change the TAG_SEP back to NUL
+          for (p = mfp + 1; *p != NUL; p++) {
+            if (*p == TAG_SEP) {
+              *p = NUL;
+            }
+          }
+        }
+        matches[st->match_count++] = mfp;
+      }
+    }
+
+    ga_clear(&st->ga_match[mtt]);
+    hash_clear(&st->ht_match[mtt]);
+  }
+
+  *matchesp = matches;
+  *num_matches = st->match_count;
+}
+
 /// find_tags() - search for tags in tags files
 ///
 /// Return FAIL if search completely failed (*num_matches will be 0, *matchesp
@@ -1383,85 +2134,30 @@ static int find_tagfunc_tags(char_u *pat, garray_T *ga, int *match_count, int fl
 /// @param pat  pattern to search for
 /// @param num_matches  return: number of matches found
 /// @param matchesp  return: array of matches found
-/// @param mincount  MAXCOL: find all matches other: minimal number of matches */
+/// @param mincount  MAXCOL: find all matches
+///                  other: minimal number of matches
 /// @param buf_ffname  name of buffer for priority
 int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int mincount,
               char *buf_ffname)
 {
-  FILE *fp;
-  char *lbuf;                           // line buffer
-  int lbuf_size = LSIZE;                // length of lbuf
+  findtags_state_T st;
   char *tag_fname;                      // name of tag file
   tagname_T tn;                         // info for get_tagfname()
   int first_file;                       // trying first tag file
-  tagptrs_T tagp;
-  bool did_open = false;                // did open a tag file
-  bool stop_searching = false;          // stop when match found or error
   int retval = FAIL;                    // return value
-  int is_static;                        // current tag line is static
-  int is_current;                       // file name matches
-  bool eof = false;                     // found end-of-file
-  char *p;
-  char_u *s;
-  int i;
-  int tag_file_sorted = NUL;            // !_TAG_FILE_SORTED value
-  struct tag_search_info {      // Binary search file offsets
-    off_T low_offset;           // offset for first char of first line that
-                                // could match
-    off_T high_offset;          // offset of char after last line that could
-                                // match
-    off_T curr_offset;          // Current file offset in search range
-    off_T curr_offset_used;     // curr_offset used when skipping back
-    off_T match_offset;         // Where the binary search found a tag
-    int low_char;               // first char at low_offset
-    int high_char;              // first char at high_offset
-  } search_info;
-  int tagcmp;
-  off_T offset;
   int round;
-  enum {
-    TS_START,                   // at start of file
-    TS_LINEAR,                  // linear searching forward, till EOF
-    TS_BINARY,                // binary searching
-    TS_SKIP_BACK,               // skipping backwards
-    TS_STEP_FORWARD,  // stepping forwards
-  }   state;                    // Current search state
 
-  int cmplen;
-  int match;                    // matches
-  int match_no_ic = 0;          // matches with rm_ic == false
-  int match_re;                 // match with regexp
-  int matchoff = 0;
   int save_emsg_off;
 
-  char *mfp;
-  garray_T ga_match[MT_COUNT];   // stores matches in sequence
-  hashtab_T ht_match[MT_COUNT];  // stores matches by key
-  hash_T hash = 0;
-  int match_count = 0;                          // number of matches found
-  char **matches;
-  int mtt;
   int help_save;
-  int help_pri = 0;
-  char_u *help_lang_find = NULL;           // lang to be found
-  char_u help_lang[3];                          // lang of current tags file
+  int i;
   char *saved_pat = NULL;                // copy of pat[]
-  bool is_txt = false;
-
-  pat_T orgpat;                         // holds unconverted pattern info
-  vimconv_T vimconv;
 
-  int findall = (mincount == MAXCOL || mincount == TAG_MANY);
-  // find all matching tags
-  bool sort_error = false;                      // tags file not sorted
-  int linear;                                   // do a linear search
-  bool sortic = false;                          // tag file sorted in nocase
-  bool line_error = false;                      // syntax error
+  int findall = (mincount == MAXCOL || mincount == TAG_MANY);  // find all matching tags
   int has_re = (flags & TAG_REGEXP);            // regexp used
   int help_only = (flags & TAG_HELP);
   int name_only = (flags & TAG_NAMES);
   int noic = (flags & TAG_NOIC);
-  int get_it_again = false;
   int verbose = (flags & TAG_VERBOSE);
   int use_tfu = ((flags & TAG_NO_TAGFUNC) == 0);
   int save_p_ic = p_ic;
@@ -1488,55 +2184,44 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc
   }
 
   help_save = curbuf->b_help;
-  orgpat.pat = (char_u *)pat;
-  orgpat.regmatch.regprog = NULL;
-  vimconv.vc_type = CONV_NONE;
 
   // Allocate memory for the buffers that are used
-  lbuf = xmalloc((size_t)lbuf_size);
   tag_fname = xmalloc(MAXPATHL + 1);
-  for (mtt = 0; mtt < MT_COUNT; mtt++) {
-    ga_init(&ga_match[mtt], sizeof(char *), 100);
-    hash_init(&ht_match[mtt]);
-  }
+
+  findtags_state_init(&st, pat, mincount);
 
   // Initialize a few variables
   if (help_only) {                              // want tags from help file
     curbuf->b_help = true;                      // will be restored later
   }
 
-  orgpat.len = (int)strlen(pat);
   if (curbuf->b_help) {
     // When "@ab" is specified use only the "ab" language, otherwise
     // search all languages.
-    if (orgpat.len > 3 && pat[orgpat.len - 3] == '@'
-        && ASCII_ISALPHA(pat[orgpat.len - 2])
-        && ASCII_ISALPHA(pat[orgpat.len - 1])) {
-      saved_pat = xstrnsave(pat, (size_t)orgpat.len - 3);
-      help_lang_find = (char_u *)&pat[orgpat.len - 2];
-      orgpat.pat = (char_u *)saved_pat;
-      orgpat.len -= 3;
+    if (st.orgpat.len > 3 && pat[st.orgpat.len - 3] == '@'
+        && ASCII_ISALPHA(pat[st.orgpat.len - 2])
+        && ASCII_ISALPHA(pat[st.orgpat.len - 1])) {
+      saved_pat = xstrnsave(pat, (size_t)st.orgpat.len - 3);
+      st.help_lang_find = &pat[st.orgpat.len - 2];
+      st.orgpat.pat = (char_u *)saved_pat;
+      st.orgpat.len -= 3;
     }
   }
-  if (p_tl != 0 && orgpat.len > p_tl) {         // adjust for 'taglength'
-    orgpat.len = (int)p_tl;
+  if (p_tl != 0 && st.orgpat.len > p_tl) {  // adjust for 'taglength'
+    st.orgpat.len = (int)p_tl;
   }
 
   save_emsg_off = emsg_off;
   emsg_off = true;    // don't want error for invalid RE here
-  prepare_pats(&orgpat, has_re);
+  prepare_pats(&st.orgpat, has_re);
   emsg_off = save_emsg_off;
-  if (has_re && orgpat.regmatch.regprog == NULL) {
+  if (has_re && st.orgpat.regmatch.regprog == NULL) {
     goto findtag_end;
   }
 
-  // This is only to avoid a compiler warning for using search_info
-  // uninitialised.
-  CLEAR_FIELD(search_info);
-
   if (*curbuf->b_p_tfu != NUL && use_tfu && !tfu_in_use) {
     tfu_in_use = true;
-    retval = find_tagfunc_tags((char_u *)pat, &ga_match[0], &match_count, flags,
+    retval = find_tagfunc_tags((char_u *)pat, &st.ga_match[0], &st.match_count, flags,
                                (char_u *)buf_ffname);
     tfu_in_use = false;
     if (retval != NOTDONE) {
@@ -1555,631 +2240,24 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc
 
   // Set a flag if the file extension is .txt
   if ((flags & TAG_KEEP_LANG)
-      && help_lang_find == NULL
+      && st.help_lang_find == NULL
       && curbuf->b_fname != NULL
       && (i = (int)strlen(curbuf->b_fname)) > 4
       && STRICMP(curbuf->b_fname + i - 4, ".txt") == 0) {
-    is_txt = true;
+    st.is_txt = true;
   }
-  orgpat.regmatch.rm_ic = ((p_ic || !noic)
-                           && (findall || orgpat.headlen == 0 || !p_tbs));
+  st.orgpat.regmatch.rm_ic = ((p_ic || !noic)
+                              && (findall || st.orgpat.headlen == 0 || !p_tbs));
   for (round = 1; round <= 2; round++) {
-    linear = (orgpat.headlen == 0 || !p_tbs || round == 2);
+    st.linear = (st.orgpat.headlen == 0 || !p_tbs || round == 2);
 
     // Try tag file names from tags option one by one.
     for (first_file = true;
          get_tagfname(&tn, first_file, tag_fname) == OK;
          first_file = false) {
-      // A file that doesn't exist is silently ignored.  Only when not a
-      // single file is found, an error message is given (further on).
-      if (curbuf->b_help) {
-        // Keep en if the file extension is .txt
-        if (is_txt) {
-          STRCPY(help_lang, "en");
-        } else {
-          // Prefer help tags according to 'helplang'.  Put the
-          // two-letter language name in help_lang[].
-          i = (int)STRLEN(tag_fname);
-          if (i > 3 && tag_fname[i - 3] == '-') {
-            STRCPY(help_lang, tag_fname + i - 2);
-          } else {
-            STRCPY(help_lang, "en");
-          }
-        }
-
-        // When searching for a specific language skip tags files
-        // for other languages.
-        if (help_lang_find != NULL
-            && STRICMP(help_lang, help_lang_find) != 0) {
-          continue;
-        }
-
-        // For CTRL-] in a help file prefer a match with the same
-        // language.
-        if ((flags & TAG_KEEP_LANG)
-            && help_lang_find == NULL
-            && curbuf->b_fname != NULL
-            && (i = (int)strlen(curbuf->b_fname)) > 4
-            && curbuf->b_fname[i - 1] == 'x'
-            && curbuf->b_fname[i - 4] == '.'
-            && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0) {
-          help_pri = 0;
-        } else {
-          help_pri = 1;
-          for (s = p_hlg; *s != NUL; s++) {
-            if (STRNICMP(s, help_lang, 2) == 0) {
-              break;
-            }
-            help_pri++;
-            if ((s = (char_u *)vim_strchr((char *)s, ',')) == NULL) {
-              break;
-            }
-          }
-          if (s == NULL || *s == NUL) {
-            // Language not in 'helplang': use last, prefer English,
-            // unless found already.
-            help_pri++;
-            if (STRICMP(help_lang, "en") != 0) {
-              help_pri++;
-            }
-          }
-        }
-      }
-
-      if ((fp = os_fopen(tag_fname, "r")) == NULL) {
-        continue;
-      }
-
-      if (p_verbose >= 5) {
-        verbose_enter();
-        smsg(_("Searching tags file %s"), tag_fname);
-        verbose_leave();
-      }
-
-      did_open = true;      // remember that we found at least one file
-
-      state = TS_START;     // we're at the start of the file
-
-      // Read and parse the lines in the file one by one
-      for (;;) {
-        // check for CTRL-C typed, more often when jumping around
-        if (state == TS_BINARY || state == TS_SKIP_BACK) {
-          line_breakcheck();
-        } else {
-          fast_breakcheck();
-        }
-        if ((flags & TAG_INS_COMP)) {   // Double brackets for gcc
-          ins_compl_check_keys(30, false);
-        }
-        if (got_int || ins_compl_interrupted()) {
-          stop_searching = true;
-          break;
-        }
-        // When mincount is TAG_MANY, stop when enough matches have been
-        // found (for completion).
-        if (mincount == TAG_MANY && match_count >= TAG_MANY) {
-          stop_searching = true;
-          retval = OK;
-          break;
-        }
-        if (get_it_again) {
-          goto line_read_in;
-        }
-        // For binary search: compute the next offset to use.
-        if (state == TS_BINARY) {
-          offset = search_info.low_offset + ((search_info.high_offset
-                                              - search_info.low_offset) / 2);
-          if (offset == search_info.curr_offset) {
-            break;              // End the binary search without a match.
-          }
-          search_info.curr_offset = offset;
-        } else if (state == TS_SKIP_BACK) {
-          // Skipping back (after a match during binary search).
-          search_info.curr_offset -= lbuf_size * 2;
-          if (search_info.curr_offset < 0) {
-            search_info.curr_offset = 0;
-            rewind(fp);
-            state = TS_STEP_FORWARD;
-          }
-        }
-
-        // When jumping around in the file, first read a line to find the
-        // start of the next line.
-        if (state == TS_BINARY || state == TS_SKIP_BACK) {
-          // Adjust the search file offset to the correct position
-          search_info.curr_offset_used = search_info.curr_offset;
-          vim_fseek(fp, search_info.curr_offset, SEEK_SET);
-          eof = vim_fgets((char_u *)lbuf, lbuf_size, fp);
-          if (!eof && search_info.curr_offset != 0) {
-            search_info.curr_offset = vim_ftell(fp);
-            if (search_info.curr_offset == search_info.high_offset) {
-              // oops, gone a bit too far; try from low offset
-              vim_fseek(fp, search_info.low_offset, SEEK_SET);
-              search_info.curr_offset = search_info.low_offset;
-            }
-            eof = vim_fgets((char_u *)lbuf, lbuf_size, fp);
-          }
-          // skip empty and blank lines
-          while (!eof && vim_isblankline(lbuf)) {
-            search_info.curr_offset = vim_ftell(fp);
-            eof = vim_fgets((char_u *)lbuf, lbuf_size, fp);
-          }
-          if (eof) {
-            // Hit end of file.  Skip backwards.
-            state = TS_SKIP_BACK;
-            search_info.match_offset = vim_ftell(fp);
-            search_info.curr_offset = search_info.curr_offset_used;
-            continue;
-          }
-        } else {
-          // Not jumping around in the file: Read the next line.
-
-          // skip empty and blank lines
-          do {
-            search_info.curr_offset = vim_ftell(fp);
-            eof = vim_fgets((char_u *)lbuf, lbuf_size, fp);
-          } while (!eof && vim_isblankline(lbuf));
-
-          if (eof) {
-            break;                                  // end of file
-          }
-        }
-line_read_in:
-
-        if (vimconv.vc_type != CONV_NONE) {
-          char *conv_line;
-          int len;
-
-          // Convert every line.  Converting the pattern from 'enc' to
-          // the tags file encoding doesn't work, because characters are
-          // not recognized.
-          conv_line = string_convert(&vimconv, lbuf, NULL);
-          if (conv_line != NULL) {
-            // Copy or swap lbuf and conv_line.
-            len = (int)strlen(conv_line) + 1;
-            if (len > lbuf_size) {
-              xfree(lbuf);
-              lbuf = conv_line;
-              lbuf_size = len;
-            } else {
-              STRCPY(lbuf, conv_line);
-              xfree(conv_line);
-            }
-          }
-        }
-
-        // When still at the start of the file, check for Emacs tags file
-        // format, and for "not sorted" flag.
-        if (state == TS_START) {
-          // The header ends when the line sorts below "!_TAG_".  When
-          // case is folded lower case letters sort before "_".
-          if (STRNCMP(lbuf, "!_TAG_", 6) <= 0
-              || (lbuf[0] == '!' && ASCII_ISLOWER(lbuf[1]))) {
-            if (STRNCMP(lbuf, "!_TAG_", 6) != 0) {
-              // Non-header item before the header, e.g. "!" itself.
-              goto parse_line;
-            }
-
-            // Read header line.
-            if (STRNCMP(lbuf, "!_TAG_FILE_SORTED\t", 18) == 0) {
-              tag_file_sorted = (uint8_t)lbuf[18];
-            }
-            if (STRNCMP(lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0) {
-              // Prepare to convert every line from the specified
-              // encoding to 'encoding'.
-              for (p = lbuf + 20; *p > ' ' && *p < 127; p++) {}
-              *p = NUL;
-              convert_setup(&vimconv, lbuf + 20, p_enc);
-            }
-
-            // Read the next line.  Unrecognized flags are ignored.
-            continue;
-          }
-
-          // Headers ends.
-
-          // When there is no tag head, or ignoring case, need to do a
-          // linear search.
-          // When no "!_TAG_" is found, default to binary search.  If
-          // the tag file isn't sorted, the second loop will find it.
-          // When "!_TAG_FILE_SORTED" found: start binary search if
-          // flag set.
-          if (linear) {
-            state = TS_LINEAR;
-          } else if (tag_file_sorted == NUL) {
-            state = TS_BINARY;
-          } else if (tag_file_sorted == '1') {
-            state = TS_BINARY;
-          } else if (tag_file_sorted == '2') {
-            state = TS_BINARY;
-            sortic = true;
-            orgpat.regmatch.rm_ic = (p_ic || !noic);
-          } else {
-            state = TS_LINEAR;
-          }
-
-          if (state == TS_BINARY && orgpat.regmatch.rm_ic && !sortic) {
-            // Binary search won't work for ignoring case, use linear
-            // search.
-            linear = true;
-            state = TS_LINEAR;
-          }
-
-          // When starting a binary search, get the size of the file and
-          // compute the first offset.
-          if (state == TS_BINARY) {
-            if (vim_fseek(fp, 0, SEEK_END) != 0) {
-              // can't seek, don't use binary search
-              state = TS_LINEAR;
-            } else {
-              // Get the tag file size.
-              // Don't use lseek(), it doesn't work
-              // properly on MacOS Catalina.
-              const off_T filesize = vim_ftell(fp);
-              vim_fseek(fp, 0, SEEK_SET);
-
-              // Calculate the first read offset in the file.  Start
-              // the search in the middle of the file.
-              search_info.low_offset = 0;
-              search_info.low_char = 0;
-              search_info.high_offset = filesize;
-              search_info.curr_offset = 0;
-              search_info.high_char = 0xff;
-            }
-            continue;
-          }
-        }
-
-parse_line:
-        // When the line is too long the NUL will not be in the
-        // last-but-one byte (see vim_fgets()).
-        // Has been reported for Mozilla JS with extremely long names.
-        // In that case we need to increase lbuf_size.
-        if (lbuf[lbuf_size - 2] != NUL) {
-          lbuf_size *= 2;
-          xfree(lbuf);
-          lbuf = xmalloc((size_t)lbuf_size);
-
-          if (state == TS_STEP_FORWARD) {
-            // Seek to the same position to read the same line again
-            vim_fseek(fp, search_info.curr_offset, SEEK_SET);
-          }
-          // this will try the same thing again, make sure the offset is
-          // different
-          search_info.curr_offset = 0;
-          continue;
-        }
-
-        // Figure out where the different strings are in this line.
-        // For "normal" tags: Do a quick check if the tag matches.
-        // This speeds up tag searching a lot!
-        if (orgpat.headlen) {
-          CLEAR_FIELD(tagp);
-          tagp.tagname = lbuf;
-          tagp.tagname_end = (char_u *)vim_strchr(lbuf, TAB);
-          if (tagp.tagname_end == NULL) {
-            // Corrupted tag line.
-            line_error = true;
-            break;
-          }
-
-          // Skip this line if the length of the tag is different and
-          // there is no regexp, or the tag is too short.
-          cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname);
-          if (p_tl != 0 && cmplen > p_tl) {         // adjust for 'taglength'
-            cmplen = (int)p_tl;
-          }
-          if (has_re && orgpat.headlen < cmplen) {
-            cmplen = orgpat.headlen;
-          } else if (state == TS_LINEAR && orgpat.headlen != cmplen) {
-            continue;
-          }
-
-          if (state == TS_BINARY) {
-            // Simplistic check for unsorted tags file.
-            i = (int)tagp.tagname[0];
-            if (sortic) {
-              i = TOUPPER_ASC(tagp.tagname[0]);
-            }
-            if (i < search_info.low_char || i > search_info.high_char) {
-              sort_error = true;
-            }
-
-            // Compare the current tag with the searched tag.
-            if (sortic) {
-              tagcmp = tag_strnicmp((char_u *)tagp.tagname, orgpat.head,
-                                    (size_t)cmplen);
-            } else {
-              tagcmp = STRNCMP(tagp.tagname, orgpat.head, cmplen);
-            }
-
-            // A match with a shorter tag means to search forward.
-            // A match with a longer tag means to search backward.
-            if (tagcmp == 0) {
-              if (cmplen < orgpat.headlen) {
-                tagcmp = -1;
-              } else if (cmplen > orgpat.headlen) {
-                tagcmp = 1;
-              }
-            }
-
-            if (tagcmp == 0) {
-              // We've located the tag, now skip back and search
-              // forward until the first matching tag is found.
-              state = TS_SKIP_BACK;
-              search_info.match_offset = search_info.curr_offset;
-              continue;
-            }
-            if (tagcmp < 0) {
-              search_info.curr_offset = vim_ftell(fp);
-              if (search_info.curr_offset < search_info.high_offset) {
-                search_info.low_offset = search_info.curr_offset;
-                if (sortic) {
-                  search_info.low_char =
-                    TOUPPER_ASC(tagp.tagname[0]);
-                } else {
-                  search_info.low_char = (uint8_t)tagp.tagname[0];
-                }
-                continue;
-              }
-            }
-            if (tagcmp > 0
-                && search_info.curr_offset != search_info.high_offset) {
-              search_info.high_offset = search_info.curr_offset;
-              if (sortic) {
-                search_info.high_char =
-                  TOUPPER_ASC(tagp.tagname[0]);
-              } else {
-                search_info.high_char = (uint8_t)tagp.tagname[0];
-              }
-              continue;
-            }
-
-            // No match yet and are at the end of the binary search.
-            break;
-          } else if (state == TS_SKIP_BACK) {
-            assert(cmplen >= 0);
-            if (mb_strnicmp(tagp.tagname, (char *)orgpat.head, (size_t)cmplen) != 0) {
-              state = TS_STEP_FORWARD;
-            } else {
-              // Have to skip back more.  Restore the curr_offset
-              // used, otherwise we get stuck at a long line.
-              search_info.curr_offset = search_info.curr_offset_used;
-            }
-            continue;
-          } else if (state == TS_STEP_FORWARD) {
-            assert(cmplen >= 0);
-            if (mb_strnicmp(tagp.tagname, (char *)orgpat.head, (size_t)cmplen) != 0) {
-              if ((off_T)vim_ftell(fp) > search_info.match_offset) {
-                break;                  // past last match
-              } else {
-                continue;               // before first match
-              }
-            }
-          } else {
-            // skip this match if it can't match
-            assert(cmplen >= 0);
-          }
-          if (mb_strnicmp(tagp.tagname, (char *)orgpat.head, (size_t)cmplen) != 0) {
-            continue;
-          }
-
-          // Can be a matching tag, isolate the file name and command.
-          tagp.fname = tagp.tagname_end + 1;
-          tagp.fname_end = (char_u *)vim_strchr((char *)tagp.fname, TAB);
-          tagp.command = tagp.fname_end + 1;
-          if (tagp.fname_end == NULL) {
-            i = FAIL;
-          } else {
-            i = OK;
-          }
-        } else {
-          i = parse_tag_line((char_u *)lbuf, &tagp);
-        }
-        if (i == FAIL) {
-          line_error = true;
-          break;
-        }
-
-        // First try matching with the pattern literally (also when it is
-        // a regexp).
-        cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname);
-        if (p_tl != 0 && cmplen > p_tl) {           // adjust for 'taglength'
-          cmplen = (int)p_tl;
-        }
-        // if tag length does not match, don't try comparing
-        if (orgpat.len != cmplen) {
-          match = false;
-        } else {
-          if (orgpat.regmatch.rm_ic) {
-            assert(cmplen >= 0);
-            match = mb_strnicmp(tagp.tagname, (char *)orgpat.pat, (size_t)cmplen) == 0;
-            if (match) {
-              match_no_ic = (STRNCMP(tagp.tagname, orgpat.pat,
-                                     cmplen) == 0);
-            }
-          } else {
-            match = (STRNCMP(tagp.tagname, orgpat.pat, cmplen) == 0);
-          }
-        }
-
-        // Has a regexp: Also find tags matching regexp.
-        match_re = false;
-        if (!match && orgpat.regmatch.regprog != NULL) {
-          int cc;
-
-          cc = *tagp.tagname_end;
-          *tagp.tagname_end = NUL;
-          match = vim_regexec(&orgpat.regmatch, tagp.tagname, (colnr_T)0);
-          if (match) {
-            matchoff = (int)(orgpat.regmatch.startp[0] - tagp.tagname);
-            if (orgpat.regmatch.rm_ic) {
-              orgpat.regmatch.rm_ic = false;
-              match_no_ic = vim_regexec(&orgpat.regmatch, tagp.tagname, (colnr_T)0);
-              orgpat.regmatch.rm_ic = true;
-            }
-          }
-          *tagp.tagname_end = (char_u)cc;
-          match_re = true;
-        }
-
-        // If a match is found, add it to ht_match[] and ga_match[].
-        if (match) {
-          size_t len = 0;
-
-          // Decide in which array to store this match.
-          is_current = test_for_current((char *)tagp.fname, (char *)tagp.fname_end,
-                                        tag_fname,
-                                        buf_ffname);
-          is_static = test_for_static(&tagp);
-
-          // Decide in which of the sixteen tables to store this match.
-          if (is_static) {
-            if (is_current) {
-              mtt = MT_ST_CUR;
-            } else {
-              mtt = MT_ST_OTH;
-            }
-          } else {
-            if (is_current) {
-              mtt = MT_GL_CUR;
-            } else {
-              mtt = MT_GL_OTH;
-            }
-          }
-          if (orgpat.regmatch.rm_ic && !match_no_ic) {
-            mtt += MT_IC_OFF;
-          }
-          if (match_re) {
-            mtt += MT_RE_OFF;
-          }
-
-          // Add the found match in ht_match[mtt] and ga_match[mtt].
-          // Store the info we need later, which depends on the kind of
-          // tags we are dealing with.
-          if (help_only) {
-#define ML_EXTRA 3
-            // Append the help-heuristic number after the tagname, for
-            // sorting it later.  The heuristic is ignored for
-            // detecting duplicates.
-            // The format is {tagname}@{lang}NUL{heuristic}NUL
-            *tagp.tagname_end = NUL;
-            len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname);
-            mfp = xmalloc(sizeof(char) + len + 10 + ML_EXTRA + 1);
-
-            p = mfp;
-            STRCPY(p, tagp.tagname);
-            p[len] = '@';
-            STRCPY(p + len + 1, help_lang);
-            snprintf(p + len + 1 + ML_EXTRA, strlen(p) + len + 1 + ML_EXTRA, "%06d",
-                     help_heuristic(tagp.tagname,
-                                    match_re ? matchoff : 0, !match_no_ic)
-                     + help_pri);
-
-            *tagp.tagname_end = TAB;
-          } else if (name_only) {
-            if (get_it_again) {
-              char_u *temp_end = tagp.command;
-
-              if (*temp_end == '/') {
-                while (*temp_end && *temp_end != '\r'
-                       && *temp_end != '\n'
-                       && *temp_end != '$') {
-                  temp_end++;
-                }
-              }
-
-              if (tagp.command + 2 < temp_end) {
-                len = (size_t)(temp_end - tagp.command - 2);
-                mfp = xmalloc(len + 2);
-                STRLCPY(mfp, tagp.command + 2, len + 1);
-              } else {
-                mfp = NULL;
-              }
-              get_it_again = false;
-            } else {
-              len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname);
-              mfp = xmalloc(sizeof(char) + len + 1);
-              STRLCPY(mfp, tagp.tagname, len + 1);
-
-              // if wanted, re-read line to get long form too
-              if (State & MODE_INSERT) {
-                get_it_again = p_sft;
-              }
-            }
-          } else {
-            size_t tag_fname_len = strlen(tag_fname);
-            // Save the tag in a buffer.
-            // Use 0x02 to separate fields (Can't use NUL, because the
-            // hash key is terminated by NUL).
-            // Emacs tag: <mtt><tag_fname><0x02><ebuf><0x02><lbuf><NUL>
-            // other tag: <mtt><tag_fname><0x02><0x02><lbuf><NUL>
-            // without Emacs tags: <mtt><tag_fname><0x02><lbuf><NUL>
-            // Here <mtt> is the "mtt" value plus 1 to avoid NUL.
-            len = tag_fname_len + strlen(lbuf) + 3;
-            mfp = xmalloc(sizeof(char) + len + 1);
-            p = mfp;
-            p[0] = (char)(mtt + 1);
-            STRCPY(p + 1, tag_fname);
-#ifdef BACKSLASH_IN_FILENAME
-            // Ignore differences in slashes, avoid adding
-            // both path/file and path\file.
-            slash_adjust(p + 1);
-#endif
-            p[tag_fname_len + 1] = TAG_SEP;
-            s = (char_u *)p + 1 + tag_fname_len + 1;
-            STRCPY(s, lbuf);
-          }
-
-          if (mfp != NULL) {
-            hashitem_T *hi;
-
-            // Don't add identical matches.
-            // "mfp" is used as a hash key, there is a NUL byte to end
-            // the part that matters for comparing, more bytes may
-            // follow after it.  E.g. help tags store the priority
-            // after the NUL.
-            hash = hash_hash((char_u *)mfp);
-            hi = hash_lookup(&ht_match[mtt], (const char *)mfp,
-                             strlen(mfp), hash);
-            if (HASHITEM_EMPTY(hi)) {
-              hash_add_item(&ht_match[mtt], hi, (char_u *)mfp, hash);
-              ga_grow(&ga_match[mtt], 1);
-              ((char **)(ga_match[mtt].ga_data))[ga_match[mtt].ga_len++] = mfp;
-              match_count++;
-            } else {
-              // duplicate tag, drop it
-              xfree(mfp);
-            }
-          }
-        }
-      }   // forever
-
-      if (line_error) {
-        semsg(_("E431: Format error in tags file \"%s\""), tag_fname);
-        semsg(_("Before byte %" PRId64), (int64_t)vim_ftell(fp));
-        stop_searching = true;
-        line_error = false;
-      }
-
-      fclose(fp);
-      if (vimconv.vc_type != CONV_NONE) {
-        convert_setup(&vimconv, NULL, NULL);
-      }
-
-      tag_file_sorted = NUL;
-      if (sort_error) {
-        semsg(_("E432: Tags file not sorted: %s"), tag_fname);
-        sort_error = false;
-      }
-
-      // Stop searching if sufficient tags have been found.
-      if (match_count >= mincount) {
+      find_tags_in_file(tag_fname, &st, flags, buf_ffname);
+      if (st.stop_searching) {
         retval = OK;
-        stop_searching = true;
-      }
-
-      if (stop_searching) {
         break;
       }
     }   // end of for-each-file loop
@@ -2188,63 +2266,34 @@ parse_line:
 
     // stop searching when already did a linear search, or when TAG_NOIC
     // used, and 'ignorecase' not set or already did case-ignore search
-    if (stop_searching || linear || (!p_ic && noic) || orgpat.regmatch.rm_ic) {
+    if (st.stop_searching || st.linear || (!p_ic && noic)
+        || st.orgpat.regmatch.rm_ic) {
       break;
     }
-    orgpat.regmatch.rm_ic = true;       // try another time while ignoring case
+
+    // try another time while ignoring case
+    st.orgpat.regmatch.rm_ic = true;
   }
 
-  if (!stop_searching) {
-    if (!did_open && verbose) {         // never opened any tags file
+  if (!st.stop_searching) {
+    if (!st.did_open && verbose) {  // never opened any tags file
       emsg(_("E433: No tags file"));
     }
     retval = OK;                // It's OK even when no tag found
   }
 
 findtag_end:
-  xfree(lbuf);
-  vim_regfree(orgpat.regmatch.regprog);
+  xfree(st.lbuf);
+  vim_regfree(st.orgpat.regmatch.regprog);
   xfree(tag_fname);
 
   // Move the matches from the ga_match[] arrays into one list of
   // matches.  When retval == FAIL, free the matches.
   if (retval == FAIL) {
-    match_count = 0;
-  }
-
-  if (match_count > 0) {
-    matches = xmalloc((size_t)match_count * sizeof(char *));
-  } else {
-    matches = NULL;
-  }
-  match_count = 0;
-  for (mtt = 0; mtt < MT_COUNT; mtt++) {
-    for (i = 0; i < ga_match[mtt].ga_len; i++) {
-      mfp = ((char **)(ga_match[mtt].ga_data))[i];
-      if (matches == NULL) {
-        xfree(mfp);
-      } else {
-        if (!name_only) {
-          // Change mtt back to zero-based.
-          *mfp = (char)(*mfp - 1);
-
-          // change the TAG_SEP back to NUL
-          for (p = mfp + 1; *p != NUL; p++) {
-            if (*p == TAG_SEP) {
-              *p = NUL;
-            }
-          }
-        }
-        matches[match_count++] = mfp;
-      }
-    }
-
-    ga_clear(&ga_match[mtt]);
-    hash_clear(&ht_match[mtt]);
+    st.match_count = 0;
   }
 
-  *matchesp = matches;
-  *num_matches = match_count;
+  findtags_copy_matches(&st, matchesp, num_matches, name_only);
 
   curbuf->b_help = help_save;
   xfree(saved_pat);
diff --git a/src/nvim/testdir/test_tagjump.vim b/src/nvim/testdir/test_tagjump.vim
index bfc61e7b48..361aa23291 100644
--- a/src/nvim/testdir/test_tagjump.vim
+++ b/src/nvim/testdir/test_tagjump.vim
@@ -1448,6 +1448,11 @@ func Test_tagfile_errors()
   endtry
   call assert_equal(v:true, caught_431)
 
+  " tag name and file name are not separated by a tab
+  call writefile(["!_TAG_FILE_ENCODING\tutf-8\t//",
+        \ "foo Xfile 1"], 'Xtags')
+  call assert_fails('tag foo', 'E431:')
+
   call delete('Xtags')
   call delete('Xfile')
   set tags&
author	zeertzjq <zeertzjq@outlook.com>	2022-11-18 13:25:21 +0800
committer	zeertzjq <zeertzjq@outlook.com>	2022-11-18 14:59:18 +0800
commit	245035d6db7399cc077de0eaa8e97e2eecc08a9b (patch)
tree	8d07e90dc1f7ff3cc7dda29255aa29cc5310395b
parent	282cbc2350986c3fc1edb507c4facc8d8fe8cd97 (diff)
download	rneovim-245035d6db7399cc077de0eaa8e97e2eecc08a9b.tar.gz rneovim-245035d6db7399cc077de0eaa8e97e2eecc08a9b.tar.bz2 rneovim-245035d6db7399cc077de0eaa8e97e2eecc08a9b.zip