diff options
Diffstat (limited to 'src/nvim/spellsuggest.c')
-rw-r--r-- | src/nvim/spellsuggest.c | 236 |
1 files changed, 124 insertions, 112 deletions
diff --git a/src/nvim/spellsuggest.c b/src/nvim/spellsuggest.c index aef26ef288..54b6f552b5 100644 --- a/src/nvim/spellsuggest.c +++ b/src/nvim/spellsuggest.c @@ -276,26 +276,27 @@ static int score_wordcount_adj(slang_T *slang, int score, char_u *word, bool spl int newscore; hashitem_T *hi = hash_find(&slang->sl_wordcount, (char *)word); - if (!HASHITEM_EMPTY(hi)) { - wc = HI2WC(hi); - if (wc->wc_count < SCORE_THRES2) { - bonus = SCORE_COMMON1; - } else if (wc->wc_count < SCORE_THRES3) { - bonus = SCORE_COMMON2; - } else { - bonus = SCORE_COMMON3; - } - if (split) { - newscore = score - bonus / 2; - } else { - newscore = score - bonus; - } - if (newscore < 0) { - return 0; - } - return newscore; + if (HASHITEM_EMPTY(hi)) { + return score; + } + + wc = HI2WC(hi); + if (wc->wc_count < SCORE_THRES2) { + bonus = SCORE_COMMON1; + } else if (wc->wc_count < SCORE_THRES3) { + bonus = SCORE_COMMON2; + } else { + bonus = SCORE_COMMON3; } - return score; + if (split) { + newscore = score - bonus / 2; + } else { + newscore = score - bonus; + } + if (newscore < 0) { + return 0; + } + return newscore; } /// Like captype() but for a KEEPCAP word add ONECAP if the word starts with a @@ -304,42 +305,45 @@ static int score_wordcount_adj(slang_T *slang, int score, char_u *word, bool spl static int badword_captype(char_u *word, char_u *end) FUNC_ATTR_NONNULL_ALL { - int flags = captype(word, end); + int flags = captype((char *)word, (char *)end); int c; int l, u; bool first; char_u *p; - if (flags & WF_KEEPCAP) { - // Count the number of UPPER and lower case letters. - l = u = 0; - first = false; - for (p = word; p < end; MB_PTR_ADV(p)) { - c = utf_ptr2char((char *)p); - if (SPELL_ISUPPER(c)) { - u++; - if (p == word) { - first = true; - } - } else { - l++; + if (!(flags & WF_KEEPCAP)) { + return flags; + } + + // Count the number of UPPER and lower case letters. + l = u = 0; + first = false; + for (p = word; p < end; MB_PTR_ADV(p)) { + c = utf_ptr2char((char *)p); + if (SPELL_ISUPPER(c)) { + u++; + if (p == word) { + first = true; } + } else { + l++; } + } - // If there are more UPPER than lower case letters suggest an - // ALLCAP word. Otherwise, if the first letter is UPPER then - // suggest ONECAP. Exception: "ALl" most likely should be "All", - // require three upper case letters. - if (u > l && u > 2) { - flags |= WF_ALLCAP; - } else if (first) { - flags |= WF_ONECAP; - } + // If there are more UPPER than lower case letters suggest an + // ALLCAP word. Otherwise, if the first letter is UPPER then + // suggest ONECAP. Exception: "ALl" most likely should be "All", + // require three upper case letters. + if (u > l && u > 2) { + flags |= WF_ALLCAP; + } else if (first) { + flags |= WF_ONECAP; + } - if (u >= 2 && l >= 2) { // maCARONI maCAroni - flags |= WF_MIXCAP; - } + if (u >= 2 && l >= 2) { // maCARONI maCAroni + flags |= WF_MIXCAP; } + return flags; } @@ -483,6 +487,11 @@ void spell_suggest(int count) } badlen++; end_visual_mode(); + // make sure we don't include the NUL at the end of the line + line = get_cursor_line_ptr(); + if (badlen > (int)strlen(line) - (int)curwin->w_cursor.col) { + badlen = (int)strlen(line) - (int)curwin->w_cursor.col; + } // Find the start of the badly spelled word. } else if (spell_move_to(curwin, FORWARD, true, true, NULL) == 0 || curwin->w_cursor.col > prev_cursor.col) { @@ -492,15 +501,15 @@ void spell_suggest(int count) line = get_cursor_line_ptr(); p = (char_u *)line + curwin->w_cursor.col; // Backup to before start of word. - while (p > (char_u *)line && spell_iswordp_nmw(p, curwin)) { + while (p > (char_u *)line && spell_iswordp_nmw((char *)p, curwin)) { MB_PTR_BACK(line, p); } // Forward to start of word. - while (*p != NUL && !spell_iswordp_nmw(p, curwin)) { + while (*p != NUL && !spell_iswordp_nmw((char *)p, curwin)) { MB_PTR_ADV(p); } - if (!spell_iswordp_nmw(p, curwin)) { // No word found. + if (!spell_iswordp_nmw((char *)p, curwin)) { // No word found. beep_flush(); return; } @@ -673,13 +682,13 @@ void spell_suggest(int count) /// /// @param maxcount maximum nr of suggestions /// @param need_cap 'spellcapcheck' matched -void spell_suggest_list(garray_T *gap, char_u *word, int maxcount, bool need_cap, bool interactive) +void spell_suggest_list(garray_T *gap, char *word, int maxcount, bool need_cap, bool interactive) { suginfo_T sug; suggest_T *stp; char_u *wcopy; - spell_find_suggest(word, 0, &sug, maxcount, false, need_cap, interactive); + spell_find_suggest((char_u *)word, 0, &sug, maxcount, false, need_cap, interactive); // Make room in "gap". ga_init(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); @@ -733,7 +742,7 @@ static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int ma if (badlen != 0) { su->su_badlen = badlen; } else { - size_t tmplen = spell_check(curwin, (char_u *)su->su_badptr, &attr, NULL, false); + size_t tmplen = spell_check(curwin, su->su_badptr, &attr, NULL, false); assert(tmplen <= INT_MAX); su->su_badlen = (int)tmplen; } @@ -744,7 +753,7 @@ static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int ma su->su_badlen = MAXWLEN - 1; // just in case } xstrlcpy((char *)su->su_badword, su->su_badptr, (size_t)su->su_badlen + 1); - (void)spell_casefold(curwin, (char_u *)su->su_badptr, su->su_badlen, (char_u *)su->su_fbadword, + (void)spell_casefold(curwin, su->su_badptr, su->su_badlen, su->su_fbadword, MAXWLEN); // TODO(vim): make this work if the case-folded text is longer than the @@ -783,7 +792,7 @@ static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int ma // for that. c = utf_ptr2char(su->su_badptr); if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) { - make_case_word((char_u *)su->su_badword, (char_u *)buf, WF_ONECAP); + make_case_word((char *)su->su_badword, buf, WF_ONECAP); add_suggestion(su, &su->su_ga, (char *)buf, su->su_badlen, SCORE_ICASE, 0, true, su->su_sallang, false); } @@ -895,8 +904,8 @@ static void spell_suggest_file(suginfo_T *su, char_u *fname) // If the suggestion doesn't have specific case duplicate the case // of the bad word. - if (captype(p, NULL) == 0) { - make_case_word(p, cword, su->su_badflags); + if (captype((char *)p, NULL) == 0) { + make_case_word((char *)p, (char *)cword, su->su_badflags); p = cword; } @@ -1016,7 +1025,7 @@ static void suggest_try_special(suginfo_T *su) // use that for the goodword too: "The the" -> "The". c = su->su_fbadword[len]; su->su_fbadword[len] = NUL; - make_case_word((char_u *)su->su_fbadword, word, su->su_badflags); + make_case_word(su->su_fbadword, (char *)word, su->su_badflags); su->su_fbadword[len] = c; // Give a soundalike score of 0, compute the score as if deleting one @@ -1085,7 +1094,7 @@ static void suggest_try_change(suginfo_T *su) STRCPY(fword, su->su_fbadword); n = (int)strlen(fword); p = su->su_badptr + su->su_badlen; - (void)spell_casefold(curwin, (char_u *)p, (int)strlen(p), (char_u *)fword + n, MAXWLEN - n); + (void)spell_casefold(curwin, p, (int)strlen(p), fword + n, MAXWLEN - n); // Make sure the resulting text is not longer than the original text. n = (int)strlen(su->su_badptr); @@ -1193,7 +1202,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun if (soundfold) { // Going through the soundfold tree. - byts = fbyts = slang->sl_sbyts; + byts = fbyts = (char_u *)slang->sl_sbyts; idxs = fidxs = slang->sl_sidxs; pbyts = NULL; pidxs = NULL; @@ -1202,9 +1211,9 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun } else { // When there are postponed prefixes we need to use these first. At // the end of the prefix we continue in the case-fold tree. - fbyts = slang->sl_fbyts; + fbyts = (char_u *)slang->sl_fbyts; fidxs = slang->sl_fidxs; - pbyts = slang->sl_pbyts; + pbyts = (char_u *)slang->sl_pbyts; pidxs = slang->sl_pidxs; if (pbyts != NULL) { byts = pbyts; @@ -1258,7 +1267,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun if (depth < MAXWLEN - 1 && (byts[arridx] == 0 || n == STATE_NOPREFIX)) { // Set su->su_badflags to the caps type at this position. // Use the caps type until here for the prefix itself. - n = nofold_len((char_u *)fword, sp->ts_fidx, (char_u *)su->su_badptr); + n = nofold_len(fword, sp->ts_fidx, su->su_badptr); flags = badword_captype((char_u *)su->su_badptr, (char_u *)su->su_badptr + n); su->su_badflags = badword_captype((char_u *)su->su_badptr + n, (char_u *)su->su_badptr + su->su_badlen); @@ -1276,8 +1285,8 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun // Move the prefix to preword[] with the right case // and make find_keepcap_word() works. tword[sp->ts_twordlen] = NUL; - make_case_word((char_u *)tword + sp->ts_splitoff, - (char_u *)preword + sp->ts_prewordlen, flags); + make_case_word(tword + sp->ts_splitoff, + preword + sp->ts_prewordlen, flags); sp->ts_prewordlen = (char_u)strlen(preword); sp->ts_splitoff = sp->ts_twordlen; } @@ -1305,7 +1314,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun fword_ends = (fword[sp->ts_fidx] == NUL || (soundfold ? ascii_iswhite(fword[sp->ts_fidx]) - : !spell_iswordp((char_u *)fword + sp->ts_fidx, curwin))); + : !spell_iswordp(fword + sp->ts_fidx, curwin))); tword[sp->ts_twordlen] = NUL; if (sp->ts_prefixdepth <= PFD_NOTSPECIAL @@ -1320,7 +1329,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun for (c = 0; c < len && pbyts[n + c] == 0; c++) {} if (c > 0) { c = valid_word_prefix(c, n, flags, - (char_u *)tword + sp->ts_splitoff, slang, false); + tword + sp->ts_splitoff, slang, false); if (c == 0) { break; } @@ -1359,7 +1368,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun == sp->ts_twordlen - sp->ts_splitoff && strncmp(fword + sp->ts_splitfidx, tword + sp->ts_splitoff, - sp->ts_fidx - sp->ts_splitfidx) == 0) { + (size_t)(sp->ts_fidx - sp->ts_splitfidx)) == 0) { preword[sp->ts_prewordlen] = NUL; newscore = score_wordcount_adj(slang, sp->ts_score, (char_u *)preword + sp->ts_prewordlen, @@ -1441,11 +1450,11 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun // When appending a compound word after a word character don't // use Onecap. - if (p != NULL && spell_iswordp_nmw((char_u *)p, curwin)) { + if (p != NULL && spell_iswordp_nmw(p, curwin)) { c &= ~WF_ONECAP; } - make_case_word((char_u *)tword + sp->ts_splitoff, - (char_u *)preword + sp->ts_prewordlen, c); + make_case_word(tword + sp->ts_splitoff, + preword + sp->ts_prewordlen, c); } if (!soundfold) { @@ -1477,7 +1486,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun } if (!spell_valid_case(su->su_badflags, - captype((char_u *)preword + sp->ts_prewordlen, NULL))) { + captype(preword + sp->ts_prewordlen, NULL))) { newscore += SCORE_ICASE; } } @@ -1508,10 +1517,10 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun // char, e.g., "thes," -> "these". p = fword + sp->ts_fidx; MB_PTR_BACK(fword, p); - if (!spell_iswordp((char_u *)p, curwin) && *preword != NUL) { + if (!spell_iswordp(p, curwin) && *preword != NUL) { p = preword + strlen(preword); MB_PTR_BACK(preword, p); - if (spell_iswordp((char_u *)p, curwin)) { + if (spell_iswordp(p, curwin)) { newscore += SCORE_NONWORD; } } @@ -1531,10 +1540,10 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun if (su->su_badflags & WF_MIXCAP) { // We really don't know if the word should be // upper or lower case, add both. - c = captype((char_u *)preword, NULL); + c = captype(preword, NULL); if (c == 0 || c == WF_ALLCAP) { - make_case_word((char_u *)tword + sp->ts_splitoff, - (char_u *)preword + sp->ts_prewordlen, + make_case_word(tword + sp->ts_splitoff, + preword + sp->ts_prewordlen, c == 0 ? WF_ALLCAP : 0); add_suggestion(su, &su->su_ga, (char *)preword, @@ -1673,7 +1682,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun // non-word character with a space. Always skip a // character when the word ends. But only when the // good word can end. - if (((!try_compound && !spell_iswordp_nmw((char_u *)fword + if (((!try_compound && !spell_iswordp_nmw(fword + sp->ts_fidx, curwin)) || fword_ends) @@ -1705,7 +1714,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun // set su->su_badflags to the caps type at this // position - n = nofold_len((char_u *)fword, sp->ts_fidx, (char_u *)su->su_badptr); + n = nofold_len(fword, sp->ts_fidx, su->su_badptr); su->su_badflags = badword_captype((char_u *)su->su_badptr + n, (char_u *)su->su_badptr + su->su_badlen); @@ -2029,7 +2038,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun // Don't swap if the first character is not a word character. // SWAP3 etc. also don't make sense then. - if (!soundfold && !spell_iswordp((char_u *)p, curwin)) { + if (!soundfold && !spell_iswordp(p, curwin)) { PROF_STORE(sp->ts_state) sp->ts_state = STATE_REP_INI; break; @@ -2039,7 +2048,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun c = utf_ptr2char(p); if (p[n] == NUL) { c2 = NUL; - } else if (!soundfold && !spell_iswordp((char_u *)p + n, curwin)) { + } else if (!soundfold && !spell_iswordp(p + n, curwin)) { c2 = c; // don't swap non-word char } else { c2 = utf_ptr2char(p + n); @@ -2099,7 +2108,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun c = utf_ptr2char(p); fl = utf_ptr2len(p + n); c2 = utf_ptr2char(p + n); - if (!soundfold && !spell_iswordp((char_u *)p + n + fl, curwin)) { + if (!soundfold && !spell_iswordp(p + n + fl, curwin)) { c3 = c; // don't swap non-word char } else { c3 = utf_ptr2char(p + n + fl); @@ -2150,7 +2159,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char *fword, bool soun utf_char2bytes(c2, p + tl); p = p + tl; - if (!soundfold && !spell_iswordp((char_u *)p, curwin)) { + if (!soundfold && !spell_iswordp(p, curwin)) { // Middle char is not a word char, skip the rotate. First and // third char were already checked at swap and swap3. PROF_STORE(sp->ts_state) @@ -2394,7 +2403,7 @@ static void find_keepcap_word(slang_T *slang, char *fword, char *kword) int c; idx_T lo, hi, m; char_u *p; - char_u *byts = slang->sl_kbyts; // array with bytes of the words + char_u *byts = (char_u *)slang->sl_kbyts; // array with bytes of the words idx_T *idxs = slang->sl_kidxs; // array with indexes if (byts == NULL) { @@ -2404,7 +2413,7 @@ static void find_keepcap_word(slang_T *slang, char *fword, char *kword) } // Make an all-cap version of "fword". - allcap_copy((char_u *)fword, (char_u *)uword); + allcap_copy(fword, uword); // Each character needs to be tried both case-folded and upper-case. // All this gets very complicated if we keep in mind that changing case @@ -2661,7 +2670,7 @@ static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u * pbad = badsound; } else { // soundfold the bad word with more characters following - (void)spell_casefold(curwin, (char_u *)su->su_badptr, stp->st_orglen, fword, MAXWLEN); + (void)spell_casefold(curwin, su->su_badptr, stp->st_orglen, (char *)fword, MAXWLEN); // When joining two words the sound often changes a lot. E.g., "t he" // sounds like "t h" while "the" sounds like "@". Avoid that by @@ -2814,15 +2823,15 @@ static void add_sound_suggest(suginfo_T *su, char *goodword, int score, langp_T // times with different scores. Since the following is quite slow only do // the words that have a better score than before. Use a hashtable to // remember the words that have been done. - hash = hash_hash((char_u *)goodword); + hash = hash_hash(goodword); const size_t goodword_len = strlen(goodword); hi = hash_lookup(&slang->sl_sounddone, (const char *)goodword, goodword_len, hash); if (HASHITEM_EMPTY(hi)) { - sft = xmalloc(sizeof(sftword_T) + goodword_len); + sft = xmalloc(offsetof(sftword_T, sft_word) + goodword_len + 1); sft->sft_score = (int16_t)score; memcpy(sft->sft_word, goodword, goodword_len + 1); - hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash); + hash_add_item(&slang->sl_sounddone, hi, (char *)sft->sft_word, hash); } else { sft = HI2SFT(hi); if (score >= sft->sft_score) { @@ -2846,7 +2855,7 @@ static void add_sound_suggest(suginfo_T *su, char *goodword, int score, langp_T // previous wordnr. orgnr += bytes2offset(&nrline); - byts = slang->sl_fbyts; + byts = (char_u *)slang->sl_fbyts; idxs = slang->sl_fidxs; // Lookup the word "orgnr" one of the two tries. @@ -2904,7 +2913,7 @@ badword: flags |= su->su_badflags; if ((flags & WF_CAPMASK) != 0) { // Need to fix case according to "flags". - make_case_word(theword, cword, flags); + make_case_word((char *)theword, (char *)cword, flags); p = cword; } else { p = theword; @@ -2984,7 +2993,7 @@ static int soundfold_find(slang_T *slang, char_u *word) idx_T *idxs; int wordnr = 0; - byts = slang->sl_sbyts; + byts = (char_u *)slang->sl_sbyts; idxs = slang->sl_sidxs; for (;;) { @@ -3220,7 +3229,7 @@ static void check_suggestions(suginfo_T *su, garray_T *gap) xstrlcpy(longword + len, su->su_badptr + stp[i].st_orglen, (size_t)(MAXWLEN - len + 1)); attr = HLF_COUNT; - (void)spell_check(curwin, (char_u *)longword, &attr, NULL, false); + (void)spell_check(curwin, longword, &attr, NULL, false); if (attr != HLF_COUNT) { // Remove this entry. xfree(stp[i].st_word); @@ -3239,13 +3248,14 @@ static void add_banned(suginfo_T *su, char *word) hash_T hash; hashitem_T *hi; - hash = hash_hash((char_u *)word); + hash = hash_hash(word); const size_t word_len = strlen(word); hi = hash_lookup(&su->su_banned, word, word_len, hash); - if (HASHITEM_EMPTY(hi)) { - s = xmemdupz(word, word_len); - hash_add_item(&su->su_banned, hi, s, hash); + if (!HASHITEM_EMPTY(hi)) { // already present + return; } + s = xmemdupz(word, word_len); + hash_add_item(&su->su_banned, hi, (char *)s, hash); } /// Recompute the score for all suggestions if sound-folding is possible. This @@ -3312,21 +3322,23 @@ static int sug_compare(const void *s1, const void *s2) static int cleanup_suggestions(garray_T *gap, int maxscore, int keep) FUNC_ATTR_NONNULL_ALL { - if (gap->ga_len > 0) { - // Sort the list. - qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); + if (gap->ga_len <= 0) { + return maxscore; + } - // Truncate the list to the number of suggestions that will be displayed. - if (gap->ga_len > keep) { - suggest_T *const stp = &SUG(*gap, 0); + // Sort the list. + qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); - for (int i = keep; i < gap->ga_len; i++) { - xfree(stp[i].st_word); - } - gap->ga_len = keep; - if (keep >= 1) { - return stp[keep - 1].st_score; - } + // Truncate the list to the number of suggestions that will be displayed. + if (gap->ga_len > keep) { + suggest_T *const stp = &SUG(*gap, 0); + + for (int i = keep; i < gap->ga_len; i++) { + xfree(stp[i].st_word); + } + gap->ga_len = keep; + if (keep >= 1) { + return stp[keep - 1].st_score; } } return maxscore; @@ -3580,12 +3592,12 @@ static int spell_edit_score(slang_T *slang, const char_u *badword, const char_u // Get the characters from the multi-byte strings and put them in an // int array for easy access. badlen = 0; - for (const char_u *p = badword; *p != NUL;) { + for (const char *p = (char *)badword; *p != NUL;) { wbadword[badlen++] = mb_cptr2char_adv(&p); } wbadword[badlen++] = 0; goodlen = 0; - for (const char_u *p = goodword; *p != NUL;) { + for (const char *p = (char *)goodword; *p != NUL;) { wgoodword[goodlen++] = mb_cptr2char_adv(&p); } wgoodword[goodlen++] = 0; @@ -3686,12 +3698,12 @@ static int spell_edit_score_limit_w(slang_T *slang, const char_u *badword, const // Get the characters from the multi-byte strings and put them in an // int array for easy access. bi = 0; - for (const char_u *p = badword; *p != NUL;) { + for (const char *p = (char *)badword; *p != NUL;) { wbadword[bi++] = mb_cptr2char_adv(&p); } wbadword[bi++] = 0; gi = 0; - for (const char_u *p = goodword; *p != NUL;) { + for (const char *p = (char *)goodword; *p != NUL;) { wgoodword[gi++] = mb_cptr2char_adv(&p); } wgoodword[gi++] = 0; |