vim-patch:8.0.0190

Problem: Detecting duplicate tags uses a slow linear search. Solution: Use a much faster hash table solution. (James McCoy, closes vim/vim#1046) But don't add hi_keylen, it makes hash tables 50% bigger. https://github.com/vim/vim/commit/810f9c361c83afb36b9f1cdadca2b93f1201d039
author: James McCoy <jamessan@jamessan.com> 2017-03-19 13:24:45 -0400
committer: James McCoy <jamessan@jamessan.com> 2017-03-19 23:42:01 -0400
commit: 058516aaf9c29b41bf27c8d4adfa2773c3896205 (patch)
tree: af7d07bd6d90a50c1e32a37348efc57176de4c44
parent: 934137fb489f0c99c32abe32e32f683e0ba3d40f (diff)
download: rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.tar.gz
rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.tar.bz2
rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.zip
1 files changed, 118 insertions, 125 deletions
diff --git a/src/nvim/tag.c b/src/nvim/tag.c
index 9c9f24b9c0..c3c2634598 100644
--- a/src/nvim/tag.c
+++ b/src/nvim/tag.c
@@ -73,9 +73,9 @@ typedef struct {
 } pat_T;
 
 /*
- * The matching tags are first stored in ga_match[].  In which one depends on
- * the priority of the match.
- * At the end, the matches from ga_match[] are concatenated, to make a list
+ * The matching tags are first stored in one of the ht_match[] hash tables.  In
+ * which one depends on the priority of the match.
+ * At the end, the matches from ht_match[] are concatenated, to make a list
  * sorted on priority.
  */
 #define MT_ST_CUR       0               /* static match in current file */
@@ -1122,11 +1122,9 @@ find_tags (
   int save_emsg_off;
 
 
-  struct match_found {
-    int len;                    /* nr of chars of match[] to be compared */
-    char_u match[1];            /* actually longer */
-  } *mfp, *mfp2;
-  garray_T ga_match[MT_COUNT];
+  char_u *mfp;
+  hashtab_T ht_match[MT_COUNT];
+  hash_T hash = 0;
   int match_count = 0;                          /* number of matches found */
   char_u      **matches;
   int mtt;
@@ -1186,7 +1184,7 @@ find_tags (
   lbuf = xmalloc(lbuf_size);
   tag_fname = xmalloc(MAXPATHL + 1);
   for (mtt = 0; mtt < MT_COUNT; ++mtt)
-    ga_init(&ga_match[mtt], (int)sizeof(struct match_found *), 100);
+    hash_init(&ht_match[mtt]);
 
   STRCPY(tag_fname, "from cscope");             /* for error messages */
 
@@ -1752,9 +1750,11 @@ parse_line:
         }
 
         /*
-         * If a match is found, add it to ga_match[].
+         * If a match is found, add it to ht_match[].
          */
         if (match) {
+          int len = 0;
+
           if (use_cscope) {
             /* Don't change the ordering, always use the same table. */
             mtt = MT_GL_OTH;
@@ -1790,116 +1790,102 @@ parse_line:
           }
 
           /*
-           * Add the found match in ga_match[mtt], avoiding duplicates.
+           * Add the found match in ht_match[mtt].
            * Store the info we need later, which depends on the kind of
            * tags we are dealing with.
            */
-          ga_grow(&ga_match[mtt], 1);
-          {
-            int len;
-
-            if (help_only) {
+          if (help_only) {
 # define ML_EXTRA 3
-              /*
-               * Append the help-heuristic number after the
-               * tagname, for sorting it later.
-               */
-              *tagp.tagname_end = NUL;
-              len = (int)(tagp.tagname_end - tagp.tagname);
-              mfp = xmalloc(sizeof(struct match_found) + len + 10 + ML_EXTRA);
-              /* "len" includes the language and the NUL, but
-               * not the priority. */
-              mfp->len = len + ML_EXTRA + 1;
-#define ML_HELP_LEN 6
-              p = mfp->match;
-              STRCPY(p, tagp.tagname);
-              p[len] = '@';
-              STRCPY(p + len + 1, help_lang);
-              sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
-                  help_heuristic(tagp.tagname,
-                      match_re ? matchoff : 0, !match_no_ic)
-                  + help_pri
-                  );
-
-              *tagp.tagname_end = TAB;
-            } else if (name_only)   {
-              if (get_it_again) {
-                char_u *temp_end = tagp.command;
-
-                if (*temp_end == '/')
-                  while (*temp_end && *temp_end != '\r'
-                         && *temp_end != '\n'
-                         && *temp_end != '$')
-                    temp_end++;
-
-                if (tagp.command + 2 < temp_end) {
-                  len = (int)(temp_end - tagp.command - 2);
-                  mfp = xmalloc(sizeof(struct match_found) + len);
-                  mfp->len = len + 1;                 /* include the NUL */
-                  p = mfp->match;
-                  STRLCPY(p, tagp.command + 2, len + 1);
-                } else
-                  mfp = NULL;
-                get_it_again = FALSE;
-              } else {
-                len = (int)(tagp.tagname_end - tagp.tagname);
-                mfp = xmalloc(sizeof(struct match_found) + len);
-                mfp->len = len + 1;               /* include the NUL */
-                p = mfp->match;
-                STRLCPY(p, tagp.tagname, len + 1);
-
-                /* if wanted, re-read line to get long form too */
-                if (State & INSERT)
-                  get_it_again = p_sft;
-              }
+            // Append the help-heuristic number after the tagname, for
+            // sorting it later.  The heuristic is ignored for
+            // detecting duplicates.
+            // The format is {tagname}@{lang}NUL{heuristic}NUL
+            *tagp.tagname_end = NUL;
+            len = (int)(tagp.tagname_end - tagp.tagname);
+            mfp = xmalloc(sizeof(char_u) + len + 10 + ML_EXTRA + 1);
+
+            p = mfp;
+            STRCPY(p, tagp.tagname);
+            p[len] = '@';
+            STRCPY(p + len + 1, help_lang);
+            sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
+                    help_heuristic(tagp.tagname,
+                                   match_re ? matchoff : 0, !match_no_ic)
+                    + help_pri);
+
+            *tagp.tagname_end = TAB;
+          } else if (name_only)   {
+            if (get_it_again) {
+              char_u *temp_end = tagp.command;
+
+              if (*temp_end == '/')
+                while (*temp_end && *temp_end != '\r'
+                       && *temp_end != '\n'
+                       && *temp_end != '$')
+                  temp_end++;
+
+              if (tagp.command + 2 < temp_end) {
+                len = (int)(temp_end - tagp.command - 2);
+                mfp = xmalloc(sizeof(char_u) + len + 1);
+                STRLCPY(mfp, tagp.command + 2, len + 1);
+              } else
+                mfp = NULL;
+              get_it_again = FALSE;
             } else {
-              /* Save the tag in a buffer.
-               * Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf>
-               * other tag: <mtt><tag_fname><NUL><NUL><lbuf>
-               * without Emacs tags: <mtt><tag_fname><NUL><lbuf>
-               */
-              len = (int)STRLEN(tag_fname)
-                    + (int)STRLEN(lbuf) + 3;
-              mfp = xmalloc(sizeof(struct match_found) + len);
-              mfp->len = len;
-              p = mfp->match;
-              p[0] = mtt;
-              STRCPY(p + 1, tag_fname);
+              len = (int)(tagp.tagname_end - tagp.tagname);
+              mfp = xmalloc(sizeof(char_u) + len + 1);
+              STRLCPY(mfp, tagp.tagname, len + 1);
+
+              /* if wanted, re-read line to get long form too */
+              if (State & INSERT)
+                get_it_again = p_sft;
+            }
+          } else {
+#define TAG_SEP 0x01
+            size_t tag_fname_len = STRLEN(tag_fname);
+            /* Save the tag in a buffer.
+             * Use 0x01 to separate fields (Can't use NUL, because the
+             * hash key is terminated by NUL).
+             * Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf>
+             * other tag: <mtt><tag_fname><NUL><NUL><lbuf>
+             * without Emacs tags: <mtt><tag_fname><NUL><lbuf>
+             */
+            len = (int)tag_fname_len + (int)STRLEN(lbuf) + 3;
+            mfp = xmalloc(sizeof(char_u) + len + 1);
+            p = mfp;
+            p[0] = mtt;
+            STRCPY(p + 1, tag_fname);
 #ifdef BACKSLASH_IN_FILENAME
-              /* Ignore differences in slashes, avoid adding
-               * both path/file and path\file. */
-              slash_adjust(p + 1);
+            /* Ignore differences in slashes, avoid adding
+             * both path/file and path\file. */
+            slash_adjust(p + 1);
 #endif
-              s = p + 1 + STRLEN(tag_fname) + 1;
-              STRCPY(s, lbuf);
-            }
+            p[tag_fname_len + 1] = TAG_SEP;
+            s = p + 1 + tag_fname_len + 1;
+            STRCPY(s, lbuf);
+          }
 
-            if (mfp != NULL) {
-              /*
-               * Don't add identical matches.
-               * This can take a lot of time when finding many
-               * matches, check for CTRL-C now and then.
-               * Add all cscope tags, because they are all listed.
-               */
-              if (use_cscope)
-                i = -1;
-              else
-                for (i = ga_match[mtt].ga_len; --i >= 0 && !got_int; ) {
-                  mfp2 = ((struct match_found **)
-                          (ga_match[mtt].ga_data))[i];
-                  if (mfp2->len == mfp->len
-                      && memcmp(mfp2->match, mfp->match,
-                          (size_t)mfp->len) == 0)
-                    break;
-                  fast_breakcheck();
-                }
-              if (i < 0) {
-                ((struct match_found **)(ga_match[mtt].ga_data))
-                [ga_match[mtt].ga_len++] = mfp;
-                ++match_count;
-              } else
-                xfree(mfp);
-            }
+          if (mfp != NULL) {
+            hashitem_T *hi;
+
+            // Don't add identical matches.
+            // Add all cscope tags, because they are all listed.
+            // "mfp" is used as a hash key, there is a NUL byte to end
+            // the part matters for comparing, more bytes may follow
+            // after it.  E.g. help tags store the priority after the
+            // NUL.
+            if (use_cscope)
+              hash++;
+            else
+              hash = hash_hash(mfp);
+            hi = hash_lookup(&ht_match[mtt], (const char *)mfp,
+                             STRLEN(mfp), hash);
+            if (HASHITEM_EMPTY(hi)) {
+              hash_add_item(&ht_match[mtt], hi, mfp, hash);
+              ++match_count;
+            } else
+              // duplicate tag, drop it
+              xfree(mfp);
           }
         }
         if (use_cscope && eof)
@@ -1962,7 +1948,7 @@ findtag_end:
   xfree(tag_fname);
 
   /*
-   * Move the matches from the ga_match[] arrays into one list of
+   * Move the matches from the ht_match[] arrays into one list of
    * matches.  When retval == FAIL, free the matches.
    */
   if (retval == FAIL)
@@ -1974,20 +1960,27 @@ findtag_end:
     matches = NULL;
   match_count = 0;
   for (mtt = 0; mtt < MT_COUNT; ++mtt) {
-    for (int i = 0; i < ga_match[mtt].ga_len; ++i) {
-      mfp = ((struct match_found **)(ga_match[mtt].ga_data))[i];
-      if (matches == NULL)
-        xfree(mfp);
-      else {
-        /* To avoid allocating memory again we turn the struct
-         * match_found into a string.  For help the priority was not
-         * included in the length. */
-        memmove(mfp, mfp->match,
-            (size_t)(mfp->len + (help_only ? ML_HELP_LEN : 0)));
-        matches[match_count++] = (char_u *)mfp;
+    hashitem_T *hi;
+    long todo = (long)ht_match[mtt].ht_used;
+
+    for (hi = ht_match[mtt].ht_array; todo > 0; hi++) {
+      if (!HASHITEM_EMPTY(hi)) {
+        mfp = hi->hi_key;
+        if (matches == NULL) {
+          xfree(mfp);
+        } else {
+          // now change the TAG_SEP back to NUL
+          for (p = mfp; *p != NUL; p++) {
+            if (*p == TAG_SEP) {
+              *p = NUL;
+            }
+          }
+          matches[match_count++] = (char_u *)mfp;
+        }
+        todo--;
       }
     }
-    ga_clear(&ga_match[mtt]);
+    hash_clear(&ht_match[mtt]);
   }
 
   *matchesp = matches;
author	James McCoy <jamessan@jamessan.com>	2017-03-19 13:24:45 -0400
committer	James McCoy <jamessan@jamessan.com>	2017-03-19 23:42:01 -0400
commit	058516aaf9c29b41bf27c8d4adfa2773c3896205 (patch)
tree	af7d07bd6d90a50c1e32a37348efc57176de4c44
parent	934137fb489f0c99c32abe32e32f683e0ba3d40f (diff)
download	rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.tar.gz rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.tar.bz2 rneovim-058516aaf9c29b41bf27c8d4adfa2773c3896205.zip