diff options
author | James McCoy <jamessan@jamessan.com> | 2017-03-19 13:24:45 -0400 |
---|---|---|
committer | James McCoy <jamessan@jamessan.com> | 2017-03-19 23:42:01 -0400 |
commit | 058516aaf9c29b41bf27c8d4adfa2773c3896205 (patch) | |
tree | af7d07bd6d90a50c1e32a37348efc57176de4c44 | |
parent | 934137fb489f0c99c32abe32e32f683e0ba3d40f (diff) | |
download | rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.tar.gz rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.tar.bz2 rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.zip |
vim-patch:8.0.0190
Problem: Detecting duplicate tags uses a slow linear search.
Solution: Use a much faster hash table solution. (James McCoy, closes vim/vim#1046)
But don't add hi_keylen, it makes hash tables 50% bigger.
https://github.com/vim/vim/commit/810f9c361c83afb36b9f1cdadca2b93f1201d039
-rw-r--r-- | src/nvim/tag.c | 243 |
1 files changed, 118 insertions, 125 deletions
diff --git a/src/nvim/tag.c b/src/nvim/tag.c index 9c9f24b9c0..c3c2634598 100644 --- a/src/nvim/tag.c +++ b/src/nvim/tag.c @@ -73,9 +73,9 @@ typedef struct { } pat_T; /* - * The matching tags are first stored in ga_match[]. In which one depends on - * the priority of the match. - * At the end, the matches from ga_match[] are concatenated, to make a list + * The matching tags are first stored in one of the ht_match[] hash tables. In + * which one depends on the priority of the match. + * At the end, the matches from ht_match[] are concatenated, to make a list * sorted on priority. */ #define MT_ST_CUR 0 /* static match in current file */ @@ -1122,11 +1122,9 @@ find_tags ( int save_emsg_off; - struct match_found { - int len; /* nr of chars of match[] to be compared */ - char_u match[1]; /* actually longer */ - } *mfp, *mfp2; - garray_T ga_match[MT_COUNT]; + char_u *mfp; + hashtab_T ht_match[MT_COUNT]; + hash_T hash = 0; int match_count = 0; /* number of matches found */ char_u **matches; int mtt; @@ -1186,7 +1184,7 @@ find_tags ( lbuf = xmalloc(lbuf_size); tag_fname = xmalloc(MAXPATHL + 1); for (mtt = 0; mtt < MT_COUNT; ++mtt) - ga_init(&ga_match[mtt], (int)sizeof(struct match_found *), 100); + hash_init(&ht_match[mtt]); STRCPY(tag_fname, "from cscope"); /* for error messages */ @@ -1752,9 +1750,11 @@ parse_line: } /* - * If a match is found, add it to ga_match[]. + * If a match is found, add it to ht_match[]. */ if (match) { + int len = 0; + if (use_cscope) { /* Don't change the ordering, always use the same table. */ mtt = MT_GL_OTH; @@ -1790,116 +1790,102 @@ parse_line: } /* - * Add the found match in ga_match[mtt], avoiding duplicates. + * Add the found match in ht_match[mtt]. * Store the info we need later, which depends on the kind of * tags we are dealing with. */ - ga_grow(&ga_match[mtt], 1); - { - int len; - - if (help_only) { + if (help_only) { # define ML_EXTRA 3 - /* - * Append the help-heuristic number after the - * tagname, for sorting it later. - */ - *tagp.tagname_end = NUL; - len = (int)(tagp.tagname_end - tagp.tagname); - mfp = xmalloc(sizeof(struct match_found) + len + 10 + ML_EXTRA); - /* "len" includes the language and the NUL, but - * not the priority. */ - mfp->len = len + ML_EXTRA + 1; -#define ML_HELP_LEN 6 - p = mfp->match; - STRCPY(p, tagp.tagname); - p[len] = '@'; - STRCPY(p + len + 1, help_lang); - sprintf((char *)p + len + 1 + ML_EXTRA, "%06d", - help_heuristic(tagp.tagname, - match_re ? matchoff : 0, !match_no_ic) - + help_pri - ); - - *tagp.tagname_end = TAB; - } else if (name_only) { - if (get_it_again) { - char_u *temp_end = tagp.command; - - if (*temp_end == '/') - while (*temp_end && *temp_end != '\r' - && *temp_end != '\n' - && *temp_end != '$') - temp_end++; - - if (tagp.command + 2 < temp_end) { - len = (int)(temp_end - tagp.command - 2); - mfp = xmalloc(sizeof(struct match_found) + len); - mfp->len = len + 1; /* include the NUL */ - p = mfp->match; - STRLCPY(p, tagp.command + 2, len + 1); - } else - mfp = NULL; - get_it_again = FALSE; - } else { - len = (int)(tagp.tagname_end - tagp.tagname); - mfp = xmalloc(sizeof(struct match_found) + len); - mfp->len = len + 1; /* include the NUL */ - p = mfp->match; - STRLCPY(p, tagp.tagname, len + 1); - - /* if wanted, re-read line to get long form too */ - if (State & INSERT) - get_it_again = p_sft; - } + // Append the help-heuristic number after the tagname, for + // sorting it later. The heuristic is ignored for + // detecting duplicates. + // The format is {tagname}@{lang}NUL{heuristic}NUL + *tagp.tagname_end = NUL; + len = (int)(tagp.tagname_end - tagp.tagname); + mfp = xmalloc(sizeof(char_u) + len + 10 + ML_EXTRA + 1); + + p = mfp; + STRCPY(p, tagp.tagname); + p[len] = '@'; + STRCPY(p + len + 1, help_lang); + sprintf((char *)p + len + 1 + ML_EXTRA, "%06d", + help_heuristic(tagp.tagname, + match_re ? matchoff : 0, !match_no_ic) + + help_pri); + + *tagp.tagname_end = TAB; + } else if (name_only) { + if (get_it_again) { + char_u *temp_end = tagp.command; + + if (*temp_end == '/') + while (*temp_end && *temp_end != '\r' + && *temp_end != '\n' + && *temp_end != '$') + temp_end++; + + if (tagp.command + 2 < temp_end) { + len = (int)(temp_end - tagp.command - 2); + mfp = xmalloc(sizeof(char_u) + len + 1); + STRLCPY(mfp, tagp.command + 2, len + 1); + } else + mfp = NULL; + get_it_again = FALSE; } else { - /* Save the tag in a buffer. - * Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf> - * other tag: <mtt><tag_fname><NUL><NUL><lbuf> - * without Emacs tags: <mtt><tag_fname><NUL><lbuf> - */ - len = (int)STRLEN(tag_fname) - + (int)STRLEN(lbuf) + 3; - mfp = xmalloc(sizeof(struct match_found) + len); - mfp->len = len; - p = mfp->match; - p[0] = mtt; - STRCPY(p + 1, tag_fname); + len = (int)(tagp.tagname_end - tagp.tagname); + mfp = xmalloc(sizeof(char_u) + len + 1); + STRLCPY(mfp, tagp.tagname, len + 1); + + /* if wanted, re-read line to get long form too */ + if (State & INSERT) + get_it_again = p_sft; + } + } else { +#define TAG_SEP 0x01 + size_t tag_fname_len = STRLEN(tag_fname); + /* Save the tag in a buffer. + * Use 0x01 to separate fields (Can't use NUL, because the + * hash key is terminated by NUL). + * Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf> + * other tag: <mtt><tag_fname><NUL><NUL><lbuf> + * without Emacs tags: <mtt><tag_fname><NUL><lbuf> + */ + len = (int)tag_fname_len + (int)STRLEN(lbuf) + 3; + mfp = xmalloc(sizeof(char_u) + len + 1); + p = mfp; + p[0] = mtt; + STRCPY(p + 1, tag_fname); #ifdef BACKSLASH_IN_FILENAME - /* Ignore differences in slashes, avoid adding - * both path/file and path\file. */ - slash_adjust(p + 1); + /* Ignore differences in slashes, avoid adding + * both path/file and path\file. */ + slash_adjust(p + 1); #endif - s = p + 1 + STRLEN(tag_fname) + 1; - STRCPY(s, lbuf); - } + p[tag_fname_len + 1] = TAG_SEP; + s = p + 1 + tag_fname_len + 1; + STRCPY(s, lbuf); + } - if (mfp != NULL) { - /* - * Don't add identical matches. - * This can take a lot of time when finding many - * matches, check for CTRL-C now and then. - * Add all cscope tags, because they are all listed. - */ - if (use_cscope) - i = -1; - else - for (i = ga_match[mtt].ga_len; --i >= 0 && !got_int; ) { - mfp2 = ((struct match_found **) - (ga_match[mtt].ga_data))[i]; - if (mfp2->len == mfp->len - && memcmp(mfp2->match, mfp->match, - (size_t)mfp->len) == 0) - break; - fast_breakcheck(); - } - if (i < 0) { - ((struct match_found **)(ga_match[mtt].ga_data)) - [ga_match[mtt].ga_len++] = mfp; - ++match_count; - } else - xfree(mfp); - } + if (mfp != NULL) { + hashitem_T *hi; + + // Don't add identical matches. + // Add all cscope tags, because they are all listed. + // "mfp" is used as a hash key, there is a NUL byte to end + // the part matters for comparing, more bytes may follow + // after it. E.g. help tags store the priority after the + // NUL. + if (use_cscope) + hash++; + else + hash = hash_hash(mfp); + hi = hash_lookup(&ht_match[mtt], (const char *)mfp, + STRLEN(mfp), hash); + if (HASHITEM_EMPTY(hi)) { + hash_add_item(&ht_match[mtt], hi, mfp, hash); + ++match_count; + } else + // duplicate tag, drop it + xfree(mfp); } } if (use_cscope && eof) @@ -1962,7 +1948,7 @@ findtag_end: xfree(tag_fname); /* - * Move the matches from the ga_match[] arrays into one list of + * Move the matches from the ht_match[] arrays into one list of * matches. When retval == FAIL, free the matches. */ if (retval == FAIL) @@ -1974,20 +1960,27 @@ findtag_end: matches = NULL; match_count = 0; for (mtt = 0; mtt < MT_COUNT; ++mtt) { - for (int i = 0; i < ga_match[mtt].ga_len; ++i) { - mfp = ((struct match_found **)(ga_match[mtt].ga_data))[i]; - if (matches == NULL) - xfree(mfp); - else { - /* To avoid allocating memory again we turn the struct - * match_found into a string. For help the priority was not - * included in the length. */ - memmove(mfp, mfp->match, - (size_t)(mfp->len + (help_only ? ML_HELP_LEN : 0))); - matches[match_count++] = (char_u *)mfp; + hashitem_T *hi; + long todo = (long)ht_match[mtt].ht_used; + + for (hi = ht_match[mtt].ht_array; todo > 0; hi++) { + if (!HASHITEM_EMPTY(hi)) { + mfp = hi->hi_key; + if (matches == NULL) { + xfree(mfp); + } else { + // now change the TAG_SEP back to NUL + for (p = mfp; *p != NUL; p++) { + if (*p == TAG_SEP) { + *p = NUL; + } + } + matches[match_count++] = (char_u *)mfp; + } + todo--; } } - ga_clear(&ga_match[mtt]); + hash_clear(&ht_match[mtt]); } *matchesp = matches; |