diff options
-rw-r--r-- | src/nvim/tag.c | 1368 | ||||
-rw-r--r-- | src/nvim/testdir/test_tagjump.vim | 145 | ||||
-rw-r--r-- | src/nvim/testdir/test_taglist.vim | 11 |
3 files changed, 921 insertions, 603 deletions
diff --git a/src/nvim/tag.c b/src/nvim/tag.c index 6dce3c500b..c5df68d451 100644 --- a/src/nvim/tag.c +++ b/src/nvim/tag.c @@ -110,10 +110,66 @@ static char *mt_names[MT_COUNT/2] = #define NOTAGFILE 99 // return value for jumpto_tag static char *nofile_fname = NULL; // fname for NOTAGFILE error +/// Return values used when reading lines from a tags file. +typedef enum { + TAGS_READ_SUCCESS = 1, + TAGS_READ_EOF, + TAGS_READ_IGNORE, +} tags_read_status_T; + +/// States used during a tags search +typedef enum { + TS_START, ///< at start of file + TS_LINEAR, ///< linear searching forward, till EOF + TS_BINARY, ///< binary searching + TS_SKIP_BACK, ///< skipping backwards + TS_STEP_FORWARD, ///< stepping forwards +} tagsearch_state_T; + +/// Binary search file offsets in a tags file +typedef struct { + off_T low_offset; ///< offset for first char of first line that + ///< could match + off_T high_offset; ///< offset of char after last line that could + ///< match + off_T curr_offset; ///< Current file offset in search range + off_T curr_offset_used; ///< curr_offset used when skipping back + off_T match_offset; ///< Where the binary search found a tag + int low_char; ///< first char at low_offset + int high_char; ///< first char at high_offset +} tagsearch_info_T; + +/// Return values used when matching tags against a pattern. +typedef enum { + TAG_MATCH_SUCCESS = 1, + TAG_MATCH_FAIL, + TAG_MATCH_STOP, + TAG_MATCH_NEXT, +} tagmatch_status_T; + +/// Arguments used for matching tags read from a tags file against a pattern. +typedef struct { + int matchoff; ///< tag match offset + bool match_re; ///< true if the tag matches a regexp + bool match_no_ic; ///< true if the tag matches with case + bool has_re; ///< regular expression used + bool sortic; ///< tags file sorted ignoring case (foldcase) + bool sort_error; ///< tags file not sorted +} findtags_match_args_T; + /// State information used during a tag search typedef struct { - char *tag_fname; ///< name of tag file + tagsearch_state_T state; ///< tag search state + char *tag_fname; ///< name of the tag file + FILE *fp; ///< current tags file pointer pat_T orgpat; ///< holds unconverted pattern info + int flags; ///< flags used for tag search + int tag_file_sorted; ///< !_TAG_FILE_SORTED value + bool get_searchpat; ///< used for 'showfulltag' + bool help_only; ///< only search for help tags + vimconv_T vimconv; + char help_lang[3]; ///< lang of current tags file + int help_pri; ///< help language priority char *help_lang_find; ///< lang to be found bool is_txt; ///< flag of file extension bool did_open; ///< did open a tag file @@ -1374,15 +1430,22 @@ static int find_tagfunc_tags(char_u *pat, garray_T *ga, int *match_count, int fl } /// Initialize the state used by find_tags() -static void findtags_state_init(findtags_state_T *st, char *pat, int mincount) +static void findtags_state_init(findtags_state_T *st, char *pat, int flags, int mincount) { st->tag_fname = xmalloc(MAXPATHL + 1); + st->fp = NULL; st->orgpat.pat = (char_u *)pat; st->orgpat.len = (int)strlen(pat); st->orgpat.regmatch.regprog = NULL; + st->flags = flags; + st->tag_file_sorted = NUL; st->help_lang_find = NULL; st->is_txt = false; st->did_open = false; + st->help_only = (flags & TAG_HELP); + st->get_searchpat = false; + st->help_lang[0] = NUL; + st->help_pri = 0; st->mincount = mincount; st->lbuf_size = LSIZE; st->lbuf = xmalloc((size_t)st->lbuf_size); @@ -1395,148 +1458,662 @@ static void findtags_state_init(findtags_state_T *st, char *pat, int mincount) } } -/// Search for tags matching "st->orgpat.pat" in the "st->tag_fname" tags file. -/// Information needed to search for the tags is in the "st" state structure. -/// The matching tags are returned in "st". -static void find_tags_in_file(findtags_state_T *st, int flags, char *buf_ffname) +/// Free the state used by find_tags() +static void findtags_state_free(findtags_state_T *st) +{ + xfree(st->tag_fname); + xfree(st->lbuf); + vim_regfree(st->orgpat.regmatch.regprog); +} + +/// Initialize the language and priority used for searching tags in a Vim help +/// file. +/// Returns true to process the help file for tags and false to skip the file. +static bool findtags_in_help_init(findtags_state_T *st) { - FILE *fp = NULL; - tagptrs_T tagp; - int is_static; // current tag line is static - int is_current; // file name matches - bool eof = false; // found end-of-file - char *p; - char_u *s; int i; - int help_pri = 0; - char_u help_lang[3] = ""; // lang of current tags file - int tag_file_sorted = NUL; // !_TAG_FILE_SORTED value - int tagcmp; + + // Keep 'en' as the language if the file extension is '.txt' + if (st->is_txt) { + STRCPY(st->help_lang, "en"); + } else { + // Prefer help tags according to 'helplang'. Put the two-letter + // language name in help_lang[]. + i = (int)STRLEN(st->tag_fname); + if (i > 3 && st->tag_fname[i - 3] == '-') { + STRLCPY(st->help_lang, st->tag_fname + i - 2, 3); + } else { + STRCPY(st->help_lang, "en"); + } + } + // When searching for a specific language skip tags files for other + // languages. + if (st->help_lang_find != NULL + && STRICMP(st->help_lang, st->help_lang_find) != 0) { + return false; + } + + // For CTRL-] in a help file prefer a match with the same language. + if ((st->flags & TAG_KEEP_LANG) + && st->help_lang_find == NULL + && curbuf->b_fname != NULL + && (i = (int)strlen(curbuf->b_fname)) > 4 + && curbuf->b_fname[i - 1] == 'x' + && curbuf->b_fname[i - 4] == '.' + && STRNICMP(curbuf->b_fname + i - 3, st->help_lang, 2) == 0) { + st->help_pri = 0; + } else { + st->help_pri = 1; + char *s; + for (s = (char *)p_hlg; *s != NUL; s++) { + if (STRNICMP(s, st->help_lang, 2) == 0) { + break; + } + st->help_pri++; + if ((s = vim_strchr(s, ',')) == NULL) { + break; + } + } + if (s == NULL || *s == NUL) { + // Language not in 'helplang': use last, prefer English, unless + // found already. + st->help_pri++; + if (STRICMP(st->help_lang, "en") != 0) { + st->help_pri++; + } + } + } + + return true; +} + +/// Use the function set in 'tagfunc' (if configured and enabled) to get the +/// tags. +/// Return OK if at least 1 tag has been successfully found, NOTDONE if the +/// 'tagfunc' is not used or the 'tagfunc' returns v:null and FAIL otherwise. +static int findtags_apply_tfu(findtags_state_T *st, char *pat, char *buf_ffname) +{ + const bool use_tfu = ((st->flags & TAG_NO_TAGFUNC) == 0); + + if (!use_tfu || tfu_in_use || *curbuf->b_p_tfu == NUL) { + return NOTDONE; + } + + tfu_in_use = true; + int retval = find_tagfunc_tags((char_u *)pat, st->ga_match, &st->match_count, + st->flags, (char_u *)buf_ffname); + tfu_in_use = false; + + return retval; +} + +/// Read the next line from a tags file. +/// Returns TAGS_READ_SUCCESS if a tags line is successfully read and should be +/// processed. +/// Returns TAGS_READ_EOF if the end of file is reached. +/// Returns TAGS_READ_IGNORE if the current line should be ignored (used when +/// reached end of a emacs included tags file) +static tags_read_status_T findtags_get_next_line(findtags_state_T *st, tagsearch_info_T *sinfo_p) +{ + int eof; off_T offset; - enum { - TS_START, ///< at start of file - TS_LINEAR, ///< linear searching forward, till EOF - TS_BINARY, ///< binary searching - TS_SKIP_BACK, ///< skipping backwards - TS_STEP_FORWARD, ///< stepping forwards - } state; // Current search state - struct tag_search_info { // Binary search file offsets - off_T low_offset; // offset for first char of first line that - // could match - off_T high_offset; // offset of char after last line that could - // match - off_T curr_offset; // Current file offset in search range - off_T curr_offset_used; // curr_offset used when skipping back - off_T match_offset; // Where the binary search found a tag - int low_char; // first char at low_offset - int high_char; // first char at high_offset - } search_info; - int cmplen; - int match; // matches - int match_no_ic = 0; // matches with rm_ic == false - int match_re; // match with regexp - int matchoff = 0; + // For binary search: compute the next offset to use. + if (st->state == TS_BINARY) { + offset = sinfo_p->low_offset + ((sinfo_p->high_offset - sinfo_p->low_offset) / 2); + if (offset == sinfo_p->curr_offset) { + return TAGS_READ_EOF; // End the binary search without a match. + } else { + sinfo_p->curr_offset = offset; + } + } else if (st->state == TS_SKIP_BACK) { + // Skipping back (after a match during binary search). + sinfo_p->curr_offset -= st->lbuf_size * 2; + if (sinfo_p->curr_offset < 0) { + sinfo_p->curr_offset = 0; + rewind(st->fp); + st->state = TS_STEP_FORWARD; + } + } + + // When jumping around in the file, first read a line to find the + // start of the next line. + if (st->state == TS_BINARY || st->state == TS_SKIP_BACK) { + // Adjust the search file offset to the correct position + sinfo_p->curr_offset_used = sinfo_p->curr_offset; + vim_ignored = vim_fseek(st->fp, sinfo_p->curr_offset, SEEK_SET); + eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, st->fp); + if (!eof && sinfo_p->curr_offset != 0) { + sinfo_p->curr_offset = vim_ftell(st->fp); + if (sinfo_p->curr_offset == sinfo_p->high_offset) { + // oops, gone a bit too far; try from low offset + vim_ignored = vim_fseek(st->fp, sinfo_p->low_offset, SEEK_SET); + sinfo_p->curr_offset = sinfo_p->low_offset; + } + eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, st->fp); + } + // skip empty and blank lines + while (!eof && vim_isblankline(st->lbuf)) { + sinfo_p->curr_offset = vim_ftell(st->fp); + eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, st->fp); + } + if (eof) { + // Hit end of file. Skip backwards. + st->state = TS_SKIP_BACK; + sinfo_p->match_offset = vim_ftell(st->fp); + sinfo_p->curr_offset = sinfo_p->curr_offset_used; + return TAGS_READ_IGNORE; + } + } else { + // Not jumping around in the file: Read the next line. - char *mfp; - int mtt; - hash_T hash = 0; + // skip empty and blank lines + do { + sinfo_p->curr_offset = vim_ftell(st->fp); + eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, st->fp); + } while (!eof && vim_isblankline(st->lbuf)); - bool sort_error = false; // tags file not sorted - bool sortic = false; // tag file sorted in nocase - int noic = (flags & TAG_NOIC); - bool line_error = false; // syntax error - int has_re = (flags & TAG_REGEXP); // regexp used - int help_only = (flags & TAG_HELP); - int name_only = (flags & TAG_NAMES); - int get_it_again = false; - vimconv_T vimconv; + if (eof) { + return TAGS_READ_EOF; + } + } - vimconv.vc_type = CONV_NONE; + return TAGS_READ_SUCCESS; +} - // This is only to avoid a compiler warning for using search_info - // uninitialised. - CLEAR_FIELD(search_info); +/// Parse a tags file header line in "st->lbuf". +/// Returns true if the current line in st->lbuf is not a tags header line and +/// should be parsed as a regular tag line. Returns false if the line is a +/// header line and the next header line should be read. +static bool findtags_hdr_parse(findtags_state_T *st) +{ + // Header lines in a tags file start with "!_TAG_" + if (strncmp(st->lbuf, "!_TAG_", 6) != 0) { + // Non-header item before the header, e.g. "!" itself. + return true; + } - // A file that doesn't exist is silently ignored. Only when not a - // single file is found, an error message is given (further on). - if (curbuf->b_help) { - // Keep en if the file extension is .txt - if (st->is_txt) { - STRCPY(help_lang, "en"); + // Process the header line. + if (strncmp(st->lbuf, "!_TAG_FILE_SORTED\t", 18) == 0) { + st->tag_file_sorted = (uint8_t)st->lbuf[18]; + } + if (strncmp(st->lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0) { + // Prepare to convert every line from the specified encoding to + // 'encoding'. + char *p; + for (p = st->lbuf + 20; *p > ' ' && *p < 127; p++) {} + *p = NUL; + convert_setup(&st->vimconv, st->lbuf + 20, p_enc); + } + + // Read the next line. Unrecognized flags are ignored. + return false; +} + +/// Handler to initialize the state when starting to process a new tags file. +/// Called in the TS_START state when finding tags from a tags file. +/// Returns true if the line read from the tags file should be parsed and +/// false if the line should be ignored. +static bool findtags_start_state_handler(findtags_state_T *st, bool *sortic, + tagsearch_info_T *sinfo_p) +{ + const bool noic = (st->flags & TAG_NOIC); + + // The header ends when the line sorts below "!_TAG_". When case is + // folded lower case letters sort before "_". + if (strncmp(st->lbuf, "!_TAG_", 6) <= 0 + || (st->lbuf[0] == '!' && ASCII_ISLOWER(st->lbuf[1]))) { + return findtags_hdr_parse(st); + } + + // Headers ends. + + // When there is no tag head, or ignoring case, need to do a + // linear search. + // When no "!_TAG_" is found, default to binary search. If + // the tag file isn't sorted, the second loop will find it. + // When "!_TAG_FILE_SORTED" found: start binary search if + // flag set. + if (st->linear) { + st->state = TS_LINEAR; + } else if (st->tag_file_sorted == NUL) { + st->state = TS_BINARY; + } else if (st->tag_file_sorted == '1') { + st->state = TS_BINARY; + } else if (st->tag_file_sorted == '2') { + st->state = TS_BINARY; + *sortic = true; + st->orgpat.regmatch.rm_ic = (p_ic || !noic); + } else { + st->state = TS_LINEAR; + } + + if (st->state == TS_BINARY && st->orgpat.regmatch.rm_ic && !*sortic) { + // Binary search won't work for ignoring case, use linear + // search. + st->linear = true; + st->state = TS_LINEAR; + } + + // When starting a binary search, get the size of the file and + // compute the first offset. + if (st->state == TS_BINARY) { + if (vim_fseek(st->fp, 0, SEEK_END) != 0) { + // can't seek, don't use binary search + st->state = TS_LINEAR; } else { - // Prefer help tags according to 'helplang'. Put the - // two-letter language name in help_lang[]. - i = (int)STRLEN(st->tag_fname); - if (i > 3 && st->tag_fname[i - 3] == '-') { - STRCPY(help_lang, st->tag_fname + i - 2); - } else { - STRCPY(help_lang, "en"); - } + // Get the tag file size. + // Don't use lseek(), it doesn't work + // properly on MacOS Catalina. + const off_T filesize = vim_ftell(st->fp); + vim_ignored = vim_fseek(st->fp, 0, SEEK_SET); + + // Calculate the first read offset in the file. Start + // the search in the middle of the file. + sinfo_p->low_offset = 0; + sinfo_p->low_char = 0; + sinfo_p->high_offset = filesize; + sinfo_p->curr_offset = 0; + sinfo_p->high_char = 0xff; } + return false; + } - // When searching for a specific language skip tags files - // for other languages. - if (st->help_lang_find != NULL - && STRICMP(help_lang, st->help_lang_find) != 0) { - return; + return true; +} + +/// Parse a tag line read from a tags file. +/// Returns OK if a tags line is successfully parsed. +/// Returns FAIL if a format error is encountered. +static int findtags_parse_line(findtags_state_T *st, tagptrs_T *tagpp) +{ + int status; + + // Figure out where the different strings are in this line. + // For "normal" tags: Do a quick check if the tag matches. + // This speeds up tag searching a lot! + if (st->orgpat.headlen) { + CLEAR_FIELD(*tagpp); + tagpp->tagname = st->lbuf; + tagpp->tagname_end = (char_u *)vim_strchr(st->lbuf, TAB); + if (tagpp->tagname_end == NULL) { + // Corrupted tag line. + return FAIL; } - // For CTRL-] in a help file prefer a match with the same - // language. - if ((flags & TAG_KEEP_LANG) - && st->help_lang_find == NULL - && curbuf->b_fname != NULL - && (i = (int)strlen(curbuf->b_fname)) > 4 - && curbuf->b_fname[i - 1] == 'x' - && curbuf->b_fname[i - 4] == '.' - && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0) { - help_pri = 0; + // Can be a matching tag, isolate the file name and command. + tagpp->fname = tagpp->tagname_end + 1; + tagpp->fname_end = (char_u *)vim_strchr((char *)tagpp->fname, TAB); + if (tagpp->fname_end == NULL) { + status = FAIL; } else { - help_pri = 1; - for (s = p_hlg; *s != NUL; s++) { - if (STRNICMP(s, help_lang, 2) == 0) { - break; + tagpp->command = tagpp->fname_end + 1; + status = OK; + } + } else { + status = parse_tag_line((char_u *)st->lbuf, tagpp); + } + + if (status == FAIL) { + return FAIL; + } + + return OK; +} + +/// Initialize the structure used for tag matching. +static void findtags_matchargs_init(findtags_match_args_T *margs, int flags) +{ + margs->matchoff = 0; // match offset + margs->match_re = false; // match with regexp + margs->match_no_ic = false; // matches with case + margs->has_re = (flags & TAG_REGEXP); // regexp used + margs->sortic = false; // tag file sorted in nocase + margs->sort_error = false; // tags file not sorted +} + +/// Compares the tag name in "tagpp->tagname" with a search pattern in +/// "st->orgpat.head". +/// Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag +/// doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a +/// binary search) and TAG_MATCH_STOP if all the tags are processed without a +/// match. Uses the values in "margs" for doing the comparison. +static tagmatch_status_T findtags_match_tag(findtags_state_T *st, tagptrs_T *tagpp, + findtags_match_args_T *margs, tagsearch_info_T *sinfo_p) +{ + bool match = false; + int cmplen; + int i; + int tagcmp; + + // Skip this line if the length of the tag is different and + // there is no regexp, or the tag is too short. + if (st->orgpat.headlen) { + cmplen = (int)(tagpp->tagname_end - (char_u *)tagpp->tagname); + if (p_tl != 0 && cmplen > p_tl) { // adjust for 'taglength' + cmplen = (int)p_tl; + } + if (margs->has_re && st->orgpat.headlen < cmplen) { + cmplen = st->orgpat.headlen; + } else if (st->state == TS_LINEAR && st->orgpat.headlen != cmplen) { + return TAG_MATCH_FAIL; + } + + if (st->state == TS_BINARY) { + // Simplistic check for unsorted tags file. + i = (int)tagpp->tagname[0]; + if (margs->sortic) { + i = TOUPPER_ASC(tagpp->tagname[0]); + } + if (i < sinfo_p->low_char || i > sinfo_p->high_char) { + margs->sort_error = true; + } + + // Compare the current tag with the searched tag. + if (margs->sortic) { + tagcmp = tag_strnicmp((char_u *)tagpp->tagname, st->orgpat.head, + (size_t)cmplen); + } else { + tagcmp = STRNCMP(tagpp->tagname, st->orgpat.head, cmplen); + } + + // A match with a shorter tag means to search forward. + // A match with a longer tag means to search backward. + if (tagcmp == 0) { + if (cmplen < st->orgpat.headlen) { + tagcmp = -1; + } else if (cmplen > st->orgpat.headlen) { + tagcmp = 1; } - help_pri++; - if ((s = (char_u *)vim_strchr((char *)s, ',')) == NULL) { - break; + } + + if (tagcmp == 0) { + // We've located the tag, now skip back and search + // forward until the first matching tag is found. + st->state = TS_SKIP_BACK; + sinfo_p->match_offset = sinfo_p->curr_offset; + return TAG_MATCH_NEXT; + } + if (tagcmp < 0) { + sinfo_p->curr_offset = vim_ftell(st->fp); + if (sinfo_p->curr_offset < sinfo_p->high_offset) { + sinfo_p->low_offset = sinfo_p->curr_offset; + if (margs->sortic) { + sinfo_p->low_char = TOUPPER_ASC(tagpp->tagname[0]); + } else { + sinfo_p->low_char = (uint8_t)tagpp->tagname[0]; + } + return TAG_MATCH_NEXT; } } - if (s == NULL || *s == NUL) { - // Language not in 'helplang': use last, prefer English, - // unless found already. - help_pri++; - if (STRICMP(help_lang, "en") != 0) { - help_pri++; + if (tagcmp > 0 && sinfo_p->curr_offset != sinfo_p->high_offset) { + sinfo_p->high_offset = sinfo_p->curr_offset; + if (margs->sortic) { + sinfo_p->high_char = TOUPPER_ASC(tagpp->tagname[0]); + } else { + sinfo_p->high_char = (uint8_t)tagpp->tagname[0]; + } + return TAG_MATCH_NEXT; + } + + // No match yet and are at the end of the binary search. + return TAG_MATCH_STOP; + } else if (st->state == TS_SKIP_BACK) { + assert(cmplen >= 0); + if (mb_strnicmp(tagpp->tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { + st->state = TS_STEP_FORWARD; + } else { + // Have to skip back more. Restore the curr_offset + // used, otherwise we get stuck at a long line. + sinfo_p->curr_offset = sinfo_p->curr_offset_used; + } + return TAG_MATCH_NEXT; + } else if (st->state == TS_STEP_FORWARD) { + assert(cmplen >= 0); + if (mb_strnicmp(tagpp->tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { + if ((off_T)vim_ftell(st->fp) > sinfo_p->match_offset) { + return TAG_MATCH_STOP; // past last match + } else { + return TAG_MATCH_NEXT; // before first match } } + } else { + // skip this match if it can't match + assert(cmplen >= 0); + if (mb_strnicmp(tagpp->tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { + return TAG_MATCH_FAIL; + } + } + } + + // First try matching with the pattern literally (also when it is + // a regexp). + cmplen = (int)(tagpp->tagname_end - (char_u *)tagpp->tagname); + if (p_tl != 0 && cmplen > p_tl) { // adjust for 'taglength' + cmplen = (int)p_tl; + } + // if tag length does not match, don't try comparing + if (st->orgpat.len != cmplen) { + match = false; + } else { + if (st->orgpat.regmatch.rm_ic) { + assert(cmplen >= 0); + match = mb_strnicmp(tagpp->tagname, (char *)st->orgpat.pat, (size_t)cmplen) == 0; + if (match) { + margs->match_no_ic = STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0; + } + } else { + match = STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0; + } + } + + // Has a regexp: Also find tags matching regexp. + margs->match_re = false; + if (!match && st->orgpat.regmatch.regprog != NULL) { + int cc; + + cc = *tagpp->tagname_end; + *tagpp->tagname_end = NUL; + match = vim_regexec(&st->orgpat.regmatch, tagpp->tagname, (colnr_T)0); + if (match) { + margs->matchoff = (int)(st->orgpat.regmatch.startp[0] - tagpp->tagname); + if (st->orgpat.regmatch.rm_ic) { + st->orgpat.regmatch.rm_ic = false; + margs->match_no_ic = vim_regexec(&st->orgpat.regmatch, + tagpp->tagname, (colnr_T)0); + st->orgpat.regmatch.rm_ic = true; + } } + *tagpp->tagname_end = (char_u)cc; + margs->match_re = true; } - if ((fp = os_fopen(st->tag_fname, "r")) == NULL) { + return match ? TAG_MATCH_SUCCESS : TAG_MATCH_FAIL; +} + +/// Convert the encoding of a line read from a tags file in "st->lbuf". +/// Converting the pattern from 'enc' to the tags file encoding doesn't work, +/// because characters are not recognized. The converted line is saved in +/// st->lbuf. +static void findtags_string_convert(findtags_state_T *st) +{ + char *conv_line = string_convert(&st->vimconv, st->lbuf, NULL); + if (conv_line == NULL) { return; } - if (p_verbose >= 5) { - verbose_enter(); - smsg(_("Searching tags file %s"), st->tag_fname); - verbose_leave(); + // Copy or swap lbuf and conv_line. + int len = (int)strlen(conv_line) + 1; + if (len > st->lbuf_size) { + xfree(st->lbuf); + st->lbuf = conv_line; + st->lbuf_size = len; + } else { + STRCPY(st->lbuf, conv_line); + xfree(conv_line); } +} - st->did_open = true; // remember that we found at least one file +/// Add a matching tag found in a tags file to st->ht_match and st->ga_match. +static void findtags_add_match(findtags_state_T *st, tagptrs_T *tagpp, findtags_match_args_T *margs, + char *buf_ffname, hash_T *hash) +{ + const bool name_only = (st->flags & TAG_NAMES); + int mtt; + size_t len = 0; + bool is_current; // file name matches + bool is_static; // current tag line is static + char *mfp; + char *p; + char_u *s; - state = TS_START; // we're at the start of the file + // Decide in which array to store this match. + is_current = test_for_current((char *)tagpp->fname, (char *)tagpp->fname_end, + st->tag_fname, buf_ffname); + is_static = test_for_static(tagpp); + + // Decide in which of the sixteen tables to store this match. + if (is_static) { + if (is_current) { + mtt = MT_ST_CUR; + } else { + mtt = MT_ST_OTH; + } + } else { + if (is_current) { + mtt = MT_GL_CUR; + } else { + mtt = MT_GL_OTH; + } + } + if (st->orgpat.regmatch.rm_ic && !margs->match_no_ic) { + mtt += MT_IC_OFF; + } + if (margs->match_re) { + mtt += MT_RE_OFF; + } + + // Add the found match in ht_match[mtt] and ga_match[mtt]. + // Store the info we need later, which depends on the kind of + // tags we are dealing with. + if (st->help_only) { +#define ML_EXTRA 3 + // Append the help-heuristic number after the tagname, for + // sorting it later. The heuristic is ignored for + // detecting duplicates. + // The format is {tagname}@{lang}NUL{heuristic}NUL + *tagpp->tagname_end = NUL; + len = (size_t)(tagpp->tagname_end - (char_u *)tagpp->tagname); + mfp = xmalloc(sizeof(char) + len + 10 + ML_EXTRA + 1); + + p = mfp; + STRCPY(p, tagpp->tagname); + p[len] = '@'; + STRCPY(p + len + 1, st->help_lang); + snprintf(p + len + 1 + ML_EXTRA, strlen(p) + len + 1 + ML_EXTRA, "%06d", + help_heuristic(tagpp->tagname, + margs->match_re ? margs->matchoff : 0, + !margs->match_no_ic) + st->help_pri); + + *tagpp->tagname_end = TAB; + } else if (name_only) { + if (st->get_searchpat) { + char_u *temp_end = tagpp->command; + + if (*temp_end == '/') { + while (*temp_end && *temp_end != '\r' + && *temp_end != '\n' + && *temp_end != '$') { + temp_end++; + } + } + + if (tagpp->command + 2 < temp_end) { + len = (size_t)(temp_end - tagpp->command - 2); + mfp = xmalloc(len + 2); + STRLCPY(mfp, tagpp->command + 2, len + 1); + } else { + mfp = NULL; + } + st->get_searchpat = false; + } else { + len = (size_t)((char *)tagpp->tagname_end - tagpp->tagname); + mfp = xmalloc(sizeof(char) + len + 1); + STRLCPY(mfp, tagpp->tagname, len + 1); + + // if wanted, re-read line to get long form too + if (State & MODE_INSERT) { + st->get_searchpat = p_sft; + } + } + } else { + size_t tag_fname_len = strlen(st->tag_fname); + // Save the tag in a buffer. + // Use 0x02 to separate fields (Can't use NUL, because the + // hash key is terminated by NUL). + // Emacs tag: <mtt><tag_fname><0x02><ebuf><0x02><lbuf><NUL> + // other tag: <mtt><tag_fname><0x02><0x02><lbuf><NUL> + // without Emacs tags: <mtt><tag_fname><0x02><lbuf><NUL> + // Here <mtt> is the "mtt" value plus 1 to avoid NUL. + len = tag_fname_len + strlen(st->lbuf) + 3; + mfp = xmalloc(sizeof(char) + len + 1); + p = mfp; + p[0] = (char)(mtt + 1); + STRCPY(p + 1, st->tag_fname); +#ifdef BACKSLASH_IN_FILENAME + // Ignore differences in slashes, avoid adding + // both path/file and path\file. + slash_adjust(p + 1); +#endif + p[tag_fname_len + 1] = TAG_SEP; + s = (char_u *)p + 1 + tag_fname_len + 1; + STRCPY(s, st->lbuf); + } + + if (mfp != NULL) { + hashitem_T *hi; + + // Don't add identical matches. + // "mfp" is used as a hash key, there is a NUL byte to end + // the part that matters for comparing, more bytes may + // follow after it. E.g. help tags store the priority + // after the NUL. + *hash = hash_hash((char_u *)mfp); + hi = hash_lookup(&st->ht_match[mtt], (const char *)mfp, strlen(mfp), *hash); + if (HASHITEM_EMPTY(hi)) { + hash_add_item(&st->ht_match[mtt], hi, (char_u *)mfp, *hash); + GA_APPEND(char *, &st->ga_match[mtt], mfp); + st->match_count++; + } else { + // duplicate tag, drop it + xfree(mfp); + } + } +} + +/// Read and get all the tags from file st->tag_fname. +/// Sets "st->stop_searching" to true to stop searching for additional tags. +static void findtags_get_all_tags(findtags_state_T *st, findtags_match_args_T *margs, + char *buf_ffname) +{ + tagptrs_T tagp; + tagsearch_info_T search_info; + int retval; + hash_T hash = 0; + + // This is only to avoid a compiler warning for using search_info + // uninitialised. + CLEAR_FIELD(search_info); // Read and parse the lines in the file one by one for (;;) { // check for CTRL-C typed, more often when jumping around - if (state == TS_BINARY || state == TS_SKIP_BACK) { + if (st->state == TS_BINARY || st->state == TS_SKIP_BACK) { line_breakcheck(); } else { fast_breakcheck(); } - if ((flags & TAG_INS_COMP)) { // Double brackets for gcc + if ((st->flags & TAG_INS_COMP)) { // Double brackets for gcc ins_compl_check_keys(30, false); } if (got_int || ins_compl_interrupted()) { @@ -1547,177 +2124,34 @@ static void find_tags_in_file(findtags_state_T *st, int flags, char *buf_ffname) // found (for completion). if (st->mincount == TAG_MANY && st->match_count >= TAG_MANY) { st->stop_searching = true; - return; + break; } - if (get_it_again) { + if (st->get_searchpat) { goto line_read_in; } - // For binary search: compute the next offset to use. - if (state == TS_BINARY) { - offset = search_info.low_offset + ((search_info.high_offset - - search_info.low_offset) / 2); - if (offset == search_info.curr_offset) { - break; // End the binary search without a match. - } - search_info.curr_offset = offset; - } else if (state == TS_SKIP_BACK) { - // Skipping back (after a match during binary search). - search_info.curr_offset -= st->lbuf_size * 2; - if (search_info.curr_offset < 0) { - search_info.curr_offset = 0; - rewind(fp); - state = TS_STEP_FORWARD; - } - } - - // When jumping around in the file, first read a line to find the - // start of the next line. - if (state == TS_BINARY || state == TS_SKIP_BACK) { - // Adjust the search file offset to the correct position - search_info.curr_offset_used = search_info.curr_offset; - vim_fseek(fp, search_info.curr_offset, SEEK_SET); - eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp); - if (!eof && search_info.curr_offset != 0) { - search_info.curr_offset = vim_ftell(fp); - if (search_info.curr_offset == search_info.high_offset) { - // oops, gone a bit too far; try from low offset - vim_fseek(fp, search_info.low_offset, SEEK_SET); - search_info.curr_offset = search_info.low_offset; - } - eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp); - } - // skip empty and blank lines - while (!eof && vim_isblankline(st->lbuf)) { - search_info.curr_offset = vim_ftell(fp); - eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp); - } - if (eof) { - // Hit end of file. Skip backwards. - state = TS_SKIP_BACK; - search_info.match_offset = vim_ftell(fp); - search_info.curr_offset = search_info.curr_offset_used; - continue; - } - } else { - // Not jumping around in the file: Read the next line. - - // skip empty and blank lines - do { - search_info.curr_offset = vim_ftell(fp); - eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp); - } while (!eof && vim_isblankline(st->lbuf)); - if (eof) { - break; // end of file - } + retval = (int)findtags_get_next_line(st, &search_info); + if (retval == TAGS_READ_IGNORE) { + continue; } + if (retval == TAGS_READ_EOF) { + break; + } + line_read_in: - if (vimconv.vc_type != CONV_NONE) { - char *conv_line; - int len; - - // Convert every line. Converting the pattern from 'enc' to - // the tags file encoding doesn't work, because characters are - // not recognized. - conv_line = string_convert(&vimconv, st->lbuf, NULL); - if (conv_line != NULL) { - // Copy or swap lbuf and conv_line. - len = (int)strlen(conv_line) + 1; - if (len > st->lbuf_size) { - xfree(st->lbuf); - st->lbuf = conv_line; - st->lbuf_size = len; - } else { - STRCPY(st->lbuf, conv_line); - xfree(conv_line); - } - } + if (st->vimconv.vc_type != CONV_NONE) { + findtags_string_convert(st); } // When still at the start of the file, check for Emacs tags file // format, and for "not sorted" flag. - if (state == TS_START) { - // The header ends when the line sorts below "!_TAG_". When - // case is folded lower case letters sort before "_". - if (strncmp(st->lbuf, "!_TAG_", 6) <= 0 - || (st->lbuf[0] == '!' && ASCII_ISLOWER(st->lbuf[1]))) { - if (strncmp(st->lbuf, "!_TAG_", 6) != 0) { - // Non-header item before the header, e.g. "!" itself. - goto parse_line; - } - - // Read header line. - if (strncmp(st->lbuf, "!_TAG_FILE_SORTED\t", 18) == 0) { - tag_file_sorted = (uint8_t)st->lbuf[18]; - } - if (strncmp(st->lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0) { - // Prepare to convert every line from the specified - // encoding to 'encoding'. - for (p = st->lbuf + 20; *p > ' ' && *p < 127; p++) {} - *p = NUL; - convert_setup(&vimconv, st->lbuf + 20, p_enc); - } - - // Read the next line. Unrecognized flags are ignored. - continue; - } - - // Headers ends. - - // When there is no tag head, or ignoring case, need to do a - // linear search. - // When no "!_TAG_" is found, default to binary search. If - // the tag file isn't sorted, the second loop will find it. - // When "!_TAG_FILE_SORTED" found: start binary search if - // flag set. - if (st->linear) { - state = TS_LINEAR; - } else if (tag_file_sorted == NUL) { - state = TS_BINARY; - } else if (tag_file_sorted == '1') { - state = TS_BINARY; - } else if (tag_file_sorted == '2') { - state = TS_BINARY; - sortic = true; - st->orgpat.regmatch.rm_ic = (p_ic || !noic); - } else { - state = TS_LINEAR; - } - - if (state == TS_BINARY && st->orgpat.regmatch.rm_ic && !sortic) { - // Binary search won't work for ignoring case, use linear - // search. - st->linear = true; - state = TS_LINEAR; - } - - // When starting a binary search, get the size of the file and - // compute the first offset. - if (state == TS_BINARY) { - if (vim_fseek(fp, 0, SEEK_END) != 0) { - // can't seek, don't use binary search - state = TS_LINEAR; - } else { - // Get the tag file size. - // Don't use lseek(), it doesn't work - // properly on MacOS Catalina. - const off_T filesize = vim_ftell(fp); - vim_fseek(fp, 0, SEEK_SET); - - // Calculate the first read offset in the file. Start - // the search in the middle of the file. - search_info.low_offset = 0; - search_info.low_char = 0; - search_info.high_offset = filesize; - search_info.curr_offset = 0; - search_info.high_char = 0xff; - } + if (st->state == TS_START) { + if (!findtags_start_state_handler(st, &margs->sortic, &search_info)) { continue; } } -parse_line: // When the line is too long the NUL will not be in the // last-but-one byte (see vim_fgets()). // Has been reported for Mozilla JS with extremely long names. @@ -1727,9 +2161,9 @@ parse_line: xfree(st->lbuf); st->lbuf = xmalloc((size_t)st->lbuf_size); - if (state == TS_STEP_FORWARD) { + if (st->state == TS_STEP_FORWARD) { // Seek to the same position to read the same line again - vim_fseek(fp, search_info.curr_offset, SEEK_SET); + vim_ignored = vim_fseek(st->fp, search_info.curr_offset, SEEK_SET); } // this will try the same thing again, make sure the offset is // different @@ -1737,324 +2171,76 @@ parse_line: continue; } - // Figure out where the different strings are in this line. - // For "normal" tags: Do a quick check if the tag matches. - // This speeds up tag searching a lot! - if (st->orgpat.headlen) { - CLEAR_FIELD(tagp); - tagp.tagname = st->lbuf; - tagp.tagname_end = (char_u *)vim_strchr(st->lbuf, TAB); - if (tagp.tagname_end == NULL) { - // Corrupted tag line. - line_error = true; - break; - } - - // Skip this line if the length of the tag is different and - // there is no regexp, or the tag is too short. - cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname); - if (p_tl != 0 && cmplen > p_tl) { // adjust for 'taglength' - cmplen = (int)p_tl; - } - if (has_re && st->orgpat.headlen < cmplen) { - cmplen = st->orgpat.headlen; - } else if (state == TS_LINEAR && st->orgpat.headlen != cmplen) { - continue; - } - - if (state == TS_BINARY) { - // Simplistic check for unsorted tags file. - i = (int)tagp.tagname[0]; - if (sortic) { - i = TOUPPER_ASC(tagp.tagname[0]); - } - if (i < search_info.low_char || i > search_info.high_char) { - sort_error = true; - } - - // Compare the current tag with the searched tag. - if (sortic) { - tagcmp = tag_strnicmp((char_u *)tagp.tagname, st->orgpat.head, - (size_t)cmplen); - } else { - tagcmp = STRNCMP(tagp.tagname, st->orgpat.head, cmplen); - } - - // A match with a shorter tag means to search forward. - // A match with a longer tag means to search backward. - if (tagcmp == 0) { - if (cmplen < st->orgpat.headlen) { - tagcmp = -1; - } else if (cmplen > st->orgpat.headlen) { - tagcmp = 1; - } - } - - if (tagcmp == 0) { - // We've located the tag, now skip back and search - // forward until the first matching tag is found. - state = TS_SKIP_BACK; - search_info.match_offset = search_info.curr_offset; - continue; - } - if (tagcmp < 0) { - search_info.curr_offset = vim_ftell(fp); - if (search_info.curr_offset < search_info.high_offset) { - search_info.low_offset = search_info.curr_offset; - if (sortic) { - search_info.low_char = - TOUPPER_ASC(tagp.tagname[0]); - } else { - search_info.low_char = (uint8_t)tagp.tagname[0]; - } - continue; - } - } - if (tagcmp > 0 - && search_info.curr_offset != search_info.high_offset) { - search_info.high_offset = search_info.curr_offset; - if (sortic) { - search_info.high_char = - TOUPPER_ASC(tagp.tagname[0]); - } else { - search_info.high_char = (uint8_t)tagp.tagname[0]; - } - continue; - } - - // No match yet and are at the end of the binary search. - break; - } else if (state == TS_SKIP_BACK) { - assert(cmplen >= 0); - if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { - state = TS_STEP_FORWARD; - } else { - // Have to skip back more. Restore the curr_offset - // used, otherwise we get stuck at a long line. - search_info.curr_offset = search_info.curr_offset_used; - } - continue; - } else if (state == TS_STEP_FORWARD) { - assert(cmplen >= 0); - if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { - if ((off_T)vim_ftell(fp) > search_info.match_offset) { - break; // past last match - } else { - continue; // before first match - } - } - } else { - // skip this match if it can't match - assert(cmplen >= 0); - if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { - continue; - } - } + if (findtags_parse_line(st, &tagp) == FAIL) { + semsg(_("E431: Format error in tags file \"%s\""), st->tag_fname); + semsg(_("Before byte %" PRId64), (int64_t)vim_ftell(st->fp)); + st->stop_searching = true; + return; + } - // Can be a matching tag, isolate the file name and command. - tagp.fname = tagp.tagname_end + 1; - tagp.fname_end = (char_u *)vim_strchr((char *)tagp.fname, TAB); - tagp.command = tagp.fname_end + 1; - if (tagp.fname_end == NULL) { - i = FAIL; - } else { - i = OK; - } - } else { - i = parse_tag_line((char_u *)st->lbuf, &tagp); + retval = (int)findtags_match_tag(st, &tagp, margs, &search_info); + if (retval == TAG_MATCH_NEXT) { + continue; } - if (i == FAIL) { - line_error = true; + if (retval == TAG_MATCH_STOP) { break; } - // First try matching with the pattern literally (also when it is - // a regexp). - cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname); - if (p_tl != 0 && cmplen > p_tl) { // adjust for 'taglength' - cmplen = (int)p_tl; - } - // if tag length does not match, don't try comparing - if (st->orgpat.len != cmplen) { - match = false; - } else { - if (st->orgpat.regmatch.rm_ic) { - assert(cmplen >= 0); - match = mb_strnicmp(tagp.tagname, (char *)st->orgpat.pat, (size_t)cmplen) == 0; - if (match) { - match_no_ic = (STRNCMP(tagp.tagname, st->orgpat.pat, - cmplen) == 0); - } - } else { - match = (STRNCMP(tagp.tagname, st->orgpat.pat, cmplen) == 0); - } + // If a match is found, add it to ht_match[] and ga_match[]. + if (retval == TAG_MATCH_SUCCESS) { + findtags_add_match(st, &tagp, margs, buf_ffname, &hash); } + } // forever +} - // Has a regexp: Also find tags matching regexp. - match_re = false; - if (!match && st->orgpat.regmatch.regprog != NULL) { - int cc; +/// Search for tags matching "st->orgpat.pat" in the "st->tag_fname" tags file. +/// Information needed to search for the tags is in the "st" state structure. +/// The matching tags are returned in "st". If an error is encountered, then +/// "st->stop_searching" is set to true. +static void findtags_in_file(findtags_state_T *st, int flags, char *buf_ffname) +{ + findtags_match_args_T margs; - cc = *tagp.tagname_end; - *tagp.tagname_end = NUL; - match = vim_regexec(&st->orgpat.regmatch, tagp.tagname, (colnr_T)0); - if (match) { - matchoff = (int)(st->orgpat.regmatch.startp[0] - tagp.tagname); - if (st->orgpat.regmatch.rm_ic) { - st->orgpat.regmatch.rm_ic = false; - match_no_ic = vim_regexec(&st->orgpat.regmatch, tagp.tagname, (colnr_T)0); - st->orgpat.regmatch.rm_ic = true; - } - } - *tagp.tagname_end = (char_u)cc; - match_re = true; + st->vimconv.vc_type = CONV_NONE; + st->tag_file_sorted = NUL; + st->fp = NULL; + findtags_matchargs_init(&margs, st->flags); + + // A file that doesn't exist is silently ignored. Only when not a + // single file is found, an error message is given (further on). + if (curbuf->b_help) { + if (!findtags_in_help_init(st)) { + return; } + } - // If a match is found, add it to ht_match[] and ga_match[]. - if (match) { - size_t len = 0; - - // Decide in which array to store this match. - is_current = test_for_current((char *)tagp.fname, (char *)tagp.fname_end, - st->tag_fname, - buf_ffname); - is_static = test_for_static(&tagp); - - // Decide in which of the sixteen tables to store this match. - if (is_static) { - if (is_current) { - mtt = MT_ST_CUR; - } else { - mtt = MT_ST_OTH; - } - } else { - if (is_current) { - mtt = MT_GL_CUR; - } else { - mtt = MT_GL_OTH; - } - } - if (st->orgpat.regmatch.rm_ic && !match_no_ic) { - mtt += MT_IC_OFF; - } - if (match_re) { - mtt += MT_RE_OFF; - } + st->fp = os_fopen(st->tag_fname, "r"); + if (st->fp == NULL) { + return; + } - // Add the found match in ht_match[mtt] and ga_match[mtt]. - // Store the info we need later, which depends on the kind of - // tags we are dealing with. - if (help_only) { -#define ML_EXTRA 3 - // Append the help-heuristic number after the tagname, for - // sorting it later. The heuristic is ignored for - // detecting duplicates. - // The format is {tagname}@{lang}NUL{heuristic}NUL - *tagp.tagname_end = NUL; - len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname); - mfp = xmalloc(sizeof(char) + len + 10 + ML_EXTRA + 1); - - p = mfp; - STRCPY(p, tagp.tagname); - p[len] = '@'; - STRCPY(p + len + 1, help_lang); - snprintf(p + len + 1 + ML_EXTRA, strlen(p) + len + 1 + ML_EXTRA, "%06d", - help_heuristic(tagp.tagname, - match_re ? matchoff : 0, !match_no_ic) + help_pri); - - *tagp.tagname_end = TAB; - } else if (name_only) { - if (get_it_again) { - char_u *temp_end = tagp.command; - - if (*temp_end == '/') { - while (*temp_end && *temp_end != '\r' - && *temp_end != '\n' - && *temp_end != '$') { - temp_end++; - } - } + if (p_verbose >= 5) { + verbose_enter(); + smsg(_("Searching tags file %s"), st->tag_fname); + verbose_leave(); + } + st->did_open = true; // remember that we found at least one file - if (tagp.command + 2 < temp_end) { - len = (size_t)(temp_end - tagp.command - 2); - mfp = xmalloc(len + 2); - STRLCPY(mfp, tagp.command + 2, len + 1); - } else { - mfp = NULL; - } - get_it_again = false; - } else { - len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname); - mfp = xmalloc(sizeof(char) + len + 1); - STRLCPY(mfp, tagp.tagname, len + 1); + st->state = TS_START; // we're at the start of the file - // if wanted, re-read line to get long form too - if (State & MODE_INSERT) { - get_it_again = p_sft; - } - } - } else { - size_t tag_fname_len = strlen(st->tag_fname); - // Save the tag in a buffer. - // Use 0x02 to separate fields (Can't use NUL, because the - // hash key is terminated by NUL). - // Emacs tag: <mtt><tag_fname><0x02><ebuf><0x02><lbuf><NUL> - // other tag: <mtt><tag_fname><0x02><0x02><lbuf><NUL> - // without Emacs tags: <mtt><tag_fname><0x02><lbuf><NUL> - // Here <mtt> is the "mtt" value plus 1 to avoid NUL. - len = tag_fname_len + strlen(st->lbuf) + 3; - mfp = xmalloc(sizeof(char) + len + 1); - p = mfp; - p[0] = (char)(mtt + 1); - STRCPY(p + 1, st->tag_fname); -#ifdef BACKSLASH_IN_FILENAME - // Ignore differences in slashes, avoid adding - // both path/file and path\file. - slash_adjust(p + 1); -#endif - p[tag_fname_len + 1] = TAG_SEP; - s = (char_u *)p + 1 + tag_fname_len + 1; - STRCPY(s, st->lbuf); - } - - if (mfp != NULL) { - hashitem_T *hi; - - // Don't add identical matches. - // "mfp" is used as a hash key, there is a NUL byte to end - // the part that matters for comparing, more bytes may - // follow after it. E.g. help tags store the priority - // after the NUL. - hash = hash_hash((char_u *)mfp); - hi = hash_lookup(&st->ht_match[mtt], (const char *)mfp, strlen(mfp), hash); - if (HASHITEM_EMPTY(hi)) { - hash_add_item(&st->ht_match[mtt], hi, (char_u *)mfp, hash); - GA_APPEND(char *, &st->ga_match[mtt], mfp); - st->match_count++; - } else { - // duplicate tag, drop it - xfree(mfp); - } - } - } - } // forever + // Read and parse the lines in the file one by one + findtags_get_all_tags(st, &margs, buf_ffname); - if (line_error) { - semsg(_("E431: Format error in tags file \"%s\""), st->tag_fname); - semsg(_("Before byte %" PRId64), (int64_t)vim_ftell(fp)); - st->stop_searching = true; - line_error = false; + if (st->fp != NULL) { + fclose(st->fp); + st->fp = NULL; } - - fclose(fp); - if (vimconv.vc_type != CONV_NONE) { - convert_setup(&vimconv, NULL, NULL); + if (st->vimconv.vc_type != CONV_NONE) { + convert_setup(&st->vimconv, NULL, NULL); } - tag_file_sorted = NUL; - if (sort_error) { + if (margs.sort_error) { semsg(_("E432: Tags file not sorted: %s"), st->tag_fname); - sort_error = false; } // Stop searching if sufficient tags have been found. @@ -2064,9 +2250,10 @@ parse_line: } /// Copy the tags found by find_tags() to "matchesp". -static void findtags_copy_matches(findtags_state_T *st, char ***matchesp, int *num_matches, - int name_only) +/// Returns the number of matches copied. +static int findtags_copy_matches(findtags_state_T *st, char ***matchesp) { + const bool name_only = (st->flags & TAG_NAMES); char **matches; int mtt; int i; @@ -2105,7 +2292,7 @@ static void findtags_copy_matches(findtags_state_T *st, char ***matchesp, int *n } *matchesp = matches; - *num_matches = st->match_count; + return st->match_count; } /// find_tags() - search for tags in tags files @@ -2156,11 +2343,8 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc int findall = (mincount == MAXCOL || mincount == TAG_MANY); // find all matching tags int has_re = (flags & TAG_REGEXP); // regexp used - int help_only = (flags & TAG_HELP); - int name_only = (flags & TAG_NAMES); int noic = (flags & TAG_NOIC); int verbose = (flags & TAG_VERBOSE); - int use_tfu = ((flags & TAG_NO_TAGFUNC) == 0); int save_p_ic = p_ic; // Change the value of 'ignorecase' according to 'tagcase' for the @@ -2186,10 +2370,10 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc help_save = curbuf->b_help; - findtags_state_init(&st, pat, mincount); + findtags_state_init(&st, pat, flags, mincount); // Initialize a few variables - if (help_only) { // want tags from help file + if (st.help_only) { // want tags from help file curbuf->b_help = true; // will be restored later } @@ -2217,24 +2401,13 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc goto findtag_end; } - if (*curbuf->b_p_tfu != NUL && use_tfu && !tfu_in_use) { - tfu_in_use = true; - retval = find_tagfunc_tags((char_u *)pat, &st.ga_match[0], &st.match_count, flags, - (char_u *)buf_ffname); - tfu_in_use = false; - if (retval != NOTDONE) { - goto findtag_end; - } + retval = findtags_apply_tfu(&st, pat, buf_ffname); + if (retval != NOTDONE) { + goto findtag_end; } - // When finding a specified number of matches, first try with matching - // case, so binary search can be used, and try ignore-case matches in a - // second loop. - // When finding all matches, 'tagbsearch' is off, or there is no fixed - // string to look for, ignore case right away to avoid going though the - // tags files twice. - // When the tag file is case-fold sorted, it is either one or the other. - // Only ignore case when TAG_NOIC not used or 'ignorecase' set. + // re-initialize the default return value + retval = FAIL; // Set a flag if the file extension is .txt if ((flags & TAG_KEEP_LANG) @@ -2244,6 +2417,15 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc && STRICMP(curbuf->b_fname + i - 4, ".txt") == 0) { st.is_txt = true; } + + // When finding a specified number of matches, first try with matching + // case, so binary search can be used, and try ignore-case matches in a + // second loop. + // When finding all matches, 'tagbsearch' is off, or there is no fixed + // string to look for, ignore case right away to avoid going though the + // tags files twice. + // When the tag file is case-fold sorted, it is either one or the other. + // Only ignore case when TAG_NOIC not used or 'ignorecase' set. st.orgpat.regmatch.rm_ic = ((p_ic || !noic) && (findall || st.orgpat.headlen == 0 || !p_tbs)); for (round = 1; round <= 2; round++) { @@ -2253,7 +2435,7 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc for (first_file = true; get_tagfname(&tn, first_file, st.tag_fname) == OK; first_file = false) { - find_tags_in_file(&st, flags, buf_ffname); + findtags_in_file(&st, flags, buf_ffname); if (st.stop_searching) { retval = OK; break; @@ -2281,9 +2463,7 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc } findtag_end: - xfree(st.tag_fname); - xfree(st.lbuf); - vim_regfree(st.orgpat.regmatch.regprog); + findtags_state_free(&st); // Move the matches from the ga_match[] arrays into one list of // matches. When retval == FAIL, free the matches. @@ -2291,7 +2471,7 @@ findtag_end: st.match_count = 0; } - findtags_copy_matches(&st, matchesp, num_matches, name_only); + *num_matches = findtags_copy_matches(&st, matchesp); curbuf->b_help = help_save; xfree(saved_pat); diff --git a/src/nvim/testdir/test_tagjump.vim b/src/nvim/testdir/test_tagjump.vim index 361aa23291..be60a3535c 100644 --- a/src/nvim/testdir/test_tagjump.vim +++ b/src/nvim/testdir/test_tagjump.vim @@ -231,15 +231,13 @@ func Test_tag_symbolic() endfunc " Tests for tag search with !_TAG_FILE_ENCODING. -" Depends on the test83-tags2 and test83-tags3 files. func Test_tag_file_encoding() - throw 'skipped: Nvim removed test83-tags2, test83-tags3' if has('vms') - return + throw 'Skipped: does not work on VMS' endif if !has('iconv') || iconv("\x82\x60", "cp932", "utf-8") != "\uff21" - return + throw 'Skipped: iconv does not work' endif let save_enc = &encoding @@ -264,18 +262,31 @@ func Test_tag_file_encoding() " case2: new - set tags=test83-tags2 + let content = ['!_TAG_FILE_ENCODING cp932 //', + \ "\x82`\x82a\x82b Xtags2.txt /\x82`\x82a\x82b"] + call writefile(content, 'Xtags') + set tags=Xtags tag /.BC call assert_equal('Xtags2.txt', expand('%:t')) call assert_equal('ABC', getline('.')) + call delete('Xtags') close " case3: new - set tags=test83-tags3 + let contents = [ + \ "!_TAG_FILE_SORTED 1 //", + \ "!_TAG_FILE_ENCODING cp932 //"] + for i in range(1, 100) + call add(contents, 'abc' .. i + \ .. " Xtags3.txt /\x82`\x82a\x82b") + endfor + call writefile(contents, 'Xtags') + set tags=Xtags tag abc50 call assert_equal('Xtags3.txt', expand('%:t')) call assert_equal('ABC', getline('.')) + call delete('Xtags') close set tags& @@ -327,6 +338,7 @@ func Test_tagjump_etags() \ ], 'Xtags2') tag main call assert_equal(2, line('.')) + call assert_fails('tag bar', 'E426:') " corrupted tag line call writefile([ @@ -352,6 +364,27 @@ func Test_tagjump_etags() \ ], 'Xtags') call assert_fails('tag foo', 'E431:') + " end of file after a CTRL-L line + call writefile([ + \ "\x0c", + \ "Xmain.c,64", + \ "void foo() {}\x7ffoo\x011,0", + \ "\x0c", + \ ], 'Xtags') + call assert_fails('tag main', 'E426:') + + " error in an included tags file + call writefile([ + \ "\x0c", + \ "Xtags2,include" + \ ], 'Xtags') + call writefile([ + \ "\x0c", + \ "Xmain.c,64", + \ "void foo() {}", + \ ], 'Xtags2') + call assert_fails('tag foo', 'E431:') + call delete('Xtags') call delete('Xtags2') call delete('Xmain.c') @@ -772,11 +805,11 @@ endfunc " Test for an unsorted tags file func Test_tag_sort() - call writefile([ + let l = [ \ "first\tXfoo\t1", \ "ten\tXfoo\t3", - \ "six\tXfoo\t2"], - \ 'Xtags') + \ "six\tXfoo\t2"] + call writefile(l, 'Xtags') set tags=Xtags let code =<< trim [CODE] int first() {} @@ -787,7 +820,14 @@ func Test_tag_sort() call assert_fails('tag first', 'E432:') + " When multiple tag files are not sorted, then message should be displayed + " multiple times + call writefile(l, 'Xtags2') + set tags=Xtags,Xtags2 + call assert_fails('tag first', ['E432:', 'E432:']) + call delete('Xtags') + call delete('Xtags2') call delete('Xfoo') set tags& %bwipe @@ -1453,6 +1493,11 @@ func Test_tagfile_errors() \ "foo Xfile 1"], 'Xtags') call assert_fails('tag foo', 'E431:') + " file name and search pattern are not separated by a tab + call writefile(["!_TAG_FILE_ENCODING\tutf-8\t//", + \ "foo\tXfile 1;"], 'Xtags') + call assert_fails('tag foo', 'E431:') + call delete('Xtags') call delete('Xfile') set tags& @@ -1485,4 +1530,86 @@ func Test_stag_close_window_on_error() set tags& endfunc +" Test for 'tagbsearch' (binary search) +func Test_tagbsearch() + " If a tags file header says the tags are sorted, but the tags are actually + " unsorted, then binary search should fail and linear search should work. + call writefile([ + \ "!_TAG_FILE_ENCODING\tutf-8\t//", + \ "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/", + \ "third\tXfoo\t3", + \ "second\tXfoo\t2", + \ "first\tXfoo\t1"], + \ 'Xtags') + set tags=Xtags + let code =<< trim [CODE] + int first() {} + int second() {} + int third() {} + [CODE] + call writefile(code, 'Xfoo') + + enew + set tagbsearch + call assert_fails('tag first', 'E426:') + call assert_equal('', bufname()) + call assert_fails('tag second', 'E426:') + call assert_equal('', bufname()) + tag third + call assert_equal('Xfoo', bufname()) + call assert_equal(3, line('.')) + %bw! + + set notagbsearch + tag first + call assert_equal('Xfoo', bufname()) + call assert_equal(1, line('.')) + enew + tag second + call assert_equal('Xfoo', bufname()) + call assert_equal(2, line('.')) + enew + tag third + call assert_equal('Xfoo', bufname()) + call assert_equal(3, line('.')) + %bw! + + " If a tags file header says the tags are unsorted, but the tags are + " actually sorted, then binary search should work. + call writefile([ + \ "!_TAG_FILE_ENCODING\tutf-8\t//", + \ "!_TAG_FILE_SORTED\t0\t/0=unsorted, 1=sorted, 2=foldcase/", + \ "first\tXfoo\t1", + \ "second\tXfoo\t2", + \ "third\tXfoo\t3"], + \ 'Xtags') + + set tagbsearch + tag first + call assert_equal('Xfoo', bufname()) + call assert_equal(1, line('.')) + enew + tag second + call assert_equal('Xfoo', bufname()) + call assert_equal(2, line('.')) + enew + tag third + call assert_equal('Xfoo', bufname()) + call assert_equal(3, line('.')) + %bw! + + " Binary search fails on EOF + call writefile([ + \ "!_TAG_FILE_ENCODING\tutf-8\t//", + \ "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/", + \ "bar\tXfoo\t1", + \ "foo\tXfoo\t2"], + \ 'Xtags') + call assert_fails('tag bbb', 'E426:') + + call delete('Xtags') + call delete('Xfoo') + set tags& tagbsearch& +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/nvim/testdir/test_taglist.vim b/src/nvim/testdir/test_taglist.vim index 68f9982e36..1a3d1986bf 100644 --- a/src/nvim/testdir/test_taglist.vim +++ b/src/nvim/testdir/test_taglist.vim @@ -36,6 +36,12 @@ func Test_taglist() call assert_equal('d', cmd[0]['kind']) call assert_equal('call cursor(3, 4)', cmd[0]['cmd']) + " Use characters with value > 127 in the tag extra field. + call writefile([ + \ "vFoo\tXfoo\t4" .. ';"' .. "\ttypename:int\ta£££\tv", + \ ], 'Xtags') + call assert_equal('v', taglist('vFoo')[0].kind) + call assert_fails("let l=taglist([])", 'E730:') call delete('Xtags') @@ -221,6 +227,11 @@ func Test_format_error() endtry call assert_true(caught_exception) + " no field after the filename for a tag + call writefile(["!_TAG_FILE_ENCODING\tutf-8\t//", + \ "foo\tXfile"], 'Xtags') + call assert_fails("echo taglist('foo')", 'E431:') + set tags& call delete('Xtags') endfunc |