aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/spell.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/nvim/spell.c')
-rw-r--r--src/nvim/spell.c181
1 files changed, 153 insertions, 28 deletions
diff --git a/src/nvim/spell.c b/src/nvim/spell.c
index 0acaa9ae2b..d2401b6776 100644
--- a/src/nvim/spell.c
+++ b/src/nvim/spell.c
@@ -45,6 +45,9 @@
// Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
// Only use it for small word lists!
+// Use SPELL_COMPRESS_ALLWAYS for debugging: compress the word tree after
+// adding a word. Only use it for small word lists!
+
// Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
// specific word.
@@ -156,6 +159,8 @@
//
// sectionID == SN_NOSPLITSUGS: nothing
//
+// sectionID == SN_NOCOMPOUNDSUGS: nothing
+//
// sectionID == SN_WORDS: <word> ...
// <word> N bytes NUL terminated common word
//
@@ -482,7 +487,7 @@ struct slang_S {
regprog_T **sl_prefprog; // table with regprogs for prefixes
garray_T sl_rep; // list of fromto_T entries from REP lines
- short sl_rep_first[256]; // indexes where byte first appears, -1 if
+ int16_t sl_rep_first[256]; // indexes where byte first appears, -1 if
// there is none
garray_T sl_sal; // list of salitem_T entries from SAL lines
salfirst_T sl_sal_first[256]; // indexes where byte first appears, -1 if
@@ -494,8 +499,9 @@ struct slang_S {
// "sl_sal_first" maps chars, when has_mbyte
// "sl_sal" is a list of wide char lists.
garray_T sl_repsal; // list of fromto_T entries from REPSAL lines
- short sl_repsal_first[256]; // sl_rep_first for REPSAL lines
- bool sl_nosplitsugs; // don't suggest splitting a word
+ int16_t sl_repsal_first[256]; // sl_rep_first for REPSAL lines
+ bool sl_nosplitsugs; // don't suggest splitting a word
+ bool sl_nocompoundsugs; // don't suggest compounding
// Info from the .sug file. Loaded on demand.
time_t sl_sugtime; // timestamp for .sug file
@@ -558,6 +564,7 @@ typedef struct langp_S {
#define SN_WORDS 13 // common words
#define SN_NOSPLITSUGS 14 // don't split word for suggestions
#define SN_INFO 15 // info section
+#define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions
#define SN_END 255 // end of sections
#define SNF_REQUIRED 1 // <sectionflags>: required section
@@ -948,6 +955,7 @@ typedef struct spellinfo_S {
char_u *si_sofoto; // SOFOTO text
int si_nosugfile; // NOSUGFILE item found
int si_nosplitsugs; // NOSPLITSUGS item found
+ int si_nocompoundsugs; // NOCOMPOUNDSUGS item found
int si_followup; // soundsalike: ?
int si_collapse; // soundsalike: ?
hashtab_T si_commonwords; // hashtable for common words
@@ -2666,7 +2674,11 @@ spell_load_file (
break;
case SN_NOSPLITSUGS:
- lp->sl_nosplitsugs = true; // <timestamp>
+ lp->sl_nosplitsugs = true;
+ break;
+
+ case SN_NOCOMPOUNDSUGS:
+ lp->sl_nocompoundsugs = true;
break;
case SN_COMPOUND:
@@ -2868,7 +2880,7 @@ static int read_prefcond_section(FILE *fd, slang_T *lp)
// Read REP or REPSAL items section from "fd": <repcount> <rep> ...
// Return SP_*ERROR flags.
-static int read_rep_section(FILE *fd, garray_T *gap, short *first)
+static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first)
{
int cnt;
fromto_T *ftp;
@@ -4266,9 +4278,9 @@ static void spell_print_node(wordnode_T *node, int depth)
PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
PRINTSOME(line2, depth, " ", 0, 0);
PRINTSOME(line3, depth, " ", 0, 0);
- msg(line1);
- msg(line2);
- msg(line3);
+ msg((char_u *)line1);
+ msg((char_u *)line2);
+ msg((char_u *)line3);
} else {
node->wn_u1.index = TRUE;
@@ -4289,9 +4301,9 @@ static void spell_print_node(wordnode_T *node, int depth)
PRINTSOME(line3, depth, " ", 0, 0);
if (node->wn_byte == NUL) {
- msg(line1);
- msg(line2);
- msg(line3);
+ msg((char_u *)line1);
+ msg((char_u *)line2);
+ msg((char_u *)line3);
}
// do the children
@@ -4633,6 +4645,8 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
spin->si_nobreak = true;
} else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) {
spin->si_nosplitsugs = true;
+ } else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) {
+ spin->si_nocompoundsugs = true;
} else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) {
spin->si_nosugfile = true;
} else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) {
@@ -6289,7 +6303,7 @@ static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int
node = *prev;
}
#ifdef SPELL_PRINTTREE
- smsg("Added \"%s\"", word);
+ smsg((char_u *)"Added \"%s\"", word);
spell_print_tree(root->wn_sibling);
#endif
@@ -6312,8 +6326,8 @@ static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int
// 3. When compressed before, added "compress_added" words
// (si_compress_cnt == 1) and the number of free nodes drops below the
// maximum word length.
-#ifndef SPELL_PRINTTREE
- if (spin->si_compress_cnt == 1
+#ifndef SPELL_COMPRESS_ALLWAYS
+ if (spin->si_compress_cnt == 1 // NOLINT(readability/braces)
? spin->si_free_count < MAXWLEN
: spin->si_blocks_cnt >= compress_start)
#endif
@@ -6857,6 +6871,15 @@ static int write_vim_spell(spellinfo_T *spin, char_u *fname)
put_bytes(fd, 0, 4); // <sectionlen>
}
+ // SN_NOCOMPUNDSUGS: nothing
+ // This is used to notify that no suggestions with compounds are to be
+ // made.
+ if (spin->si_nocompoundsugs) {
+ putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID>
+ putc(0, fd); // <sectionflags>
+ put_bytes(fd, 0, 4); // <sectionlen>
+ }
+
// SN_COMPOUND: compound info.
// We don't mark it required, when not supported all compound words will
// be bad words.
@@ -9296,7 +9319,56 @@ static void suggest_try_special(suginfo_T *su)
}
}
+// Measure how much time is spent in each state.
+// Output is dumped in "suggestprof".
+
+#ifdef SUGGEST_PROFILE
+proftime_T current;
+proftime_T total;
+proftime_T times[STATE_FINAL + 1];
+long counts[STATE_FINAL + 1];
+
+ static void
+prof_init(void)
+{
+ for (int i = 0; i <= STATE_FINAL; i++) {
+ profile_zero(&times[i]);
+ counts[i] = 0;
+ }
+ profile_start(&current);
+ profile_start(&total);
+}
+
+// call before changing state
+ static void
+prof_store(state_T state)
+{
+ profile_end(&current);
+ profile_add(&times[state], &current);
+ counts[state]++;
+ profile_start(&current);
+}
+# define PROF_STORE(state) prof_store(state);
+
+ static void
+prof_report(char *name)
+{
+ FILE *fd = fopen("suggestprof", "a");
+
+ profile_end(&total);
+ fprintf(fd, "-----------------------\n");
+ fprintf(fd, "%s: %s\n", name, profile_msg(&total));
+ for (int i = 0; i <= STATE_FINAL; i++) {
+ fprintf(fd, "%d: %s ("%" PRId64)\n", i, profile_msg(&times[i]), counts[i]);
+ }
+ fclose(fd);
+}
+#else
+# define PROF_STORE(state)
+#endif
+
// Try finding suggestions by adding/removing/swapping letters.
+
static void suggest_try_change(suginfo_T *su)
{
char_u fword[MAXWLEN]; // copy of the bad word, case-folded
@@ -9321,7 +9393,14 @@ static void suggest_try_change(suginfo_T *su)
continue;
// Try it for this language. Will add possible suggestions.
+ //
+#ifdef SUGGEST_PROFILE
+ prof_init();
+#endif
suggest_trie_walk(su, lp, fword, false);
+#ifdef SUGGEST_PROFILE
+ prof_report("try_change");
+#endif
}
}
@@ -9455,6 +9534,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// Always past NUL bytes now.
n = (int)sp->ts_state;
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_ENDNUL;
sp->ts_save_badflags = su->su_badflags;
@@ -9494,6 +9574,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
if (sp->ts_curi > len || byts[arridx] != 0) {
// Past bytes in node and/or past NUL bytes.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_ENDNUL;
sp->ts_save_badflags = su->su_badflags;
break;
@@ -9771,6 +9852,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// be possible to compound another (short) word.
try_compound = false;
if (!soundfold
+ && !slang->sl_nocompoundsugs
&& slang->sl_compprog != NULL
&& ((unsigned)flags >> 24) != 0
&& sp->ts_twordlen - sp->ts_splitoff
@@ -9791,21 +9873,21 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// For NOBREAK we never try splitting, it won't make any word
// valid.
- if (slang->sl_nobreak)
+ if (slang->sl_nobreak && !slang->sl_nocompoundsugs) {
try_compound = true;
-
- // If we could add a compound word, and it's also possible to
- // split at this point, do the split first and set
- // TSF_DIDSPLIT to avoid doing it again.
- else if (!fword_ends
- && try_compound
- && (sp->ts_flags & TSF_DIDSPLIT) == 0) {
+ } else if (!fword_ends
+ && try_compound
+ && (sp->ts_flags & TSF_DIDSPLIT) == 0) {
+ // If we could add a compound word, and it's also possible to
+ // split at this point, do the split first and set
+ // TSF_DIDSPLIT to avoid doing it again.
try_compound = false;
sp->ts_flags |= TSF_DIDSPLIT;
--sp->ts_curi; // do the same NUL again
compflags[sp->ts_complen] = NUL;
- } else
+ } else {
sp->ts_flags &= ~TSF_DIDSPLIT;
+ }
if (try_split || try_compound) {
if (!try_compound && (!fword_ends || !goodword_ends)) {
@@ -9846,6 +9928,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
#endif
// Save things to be restored at STATE_SPLITUNDO.
sp->ts_save_badflags = su->su_badflags;
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_SPLITUNDO;
++depth;
@@ -9912,6 +9995,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
byts = pbyts;
idxs = pidxs;
sp->ts_prefixdepth = PFD_PREFIXTREE;
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_NOPREFIX;
}
}
@@ -9924,6 +10008,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
su->su_badflags = sp->ts_save_badflags;
// Continue looking for NUL bytes.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_START;
// In case we went into the prefix tree.
@@ -9938,9 +10023,11 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
&& sp->ts_tcharlen == 0
) {
// The badword ends, can't use STATE_PLAIN.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_DEL;
break;
}
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_PLAIN;
// FALLTHROUGH
@@ -9951,10 +10038,12 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
if (sp->ts_curi > byts[arridx]) {
// Done all bytes at this node, do next state. When still at
// already changed bytes skip the other tricks.
- if (sp->ts_fidx >= sp->ts_fidxtry)
+ PROF_STORE(sp->ts_state)
+ if (sp->ts_fidx >= sp->ts_fidxtry) {
sp->ts_state = STATE_DEL;
- else
+ } else {
sp->ts_state = STATE_FINAL;
+ }
} else {
arridx += sp->ts_curi++;
c = byts[arridx];
@@ -10086,10 +10175,12 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// When past the first byte of a multi-byte char don't try
// delete/insert/swap a character.
if (has_mbyte && sp->ts_tcharlen > 0) {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_FINAL;
break;
}
// Try skipping one character in the bad word (delete it).
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_INS_PREP;
sp->ts_curi = 1;
if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
@@ -10137,6 +10228,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
if (sp->ts_flags & TSF_DIDDEL) {
// If we just deleted a byte then inserting won't make sense,
// a substitute is always cheaper.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_SWAP;
break;
}
@@ -10146,11 +10238,13 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
for (;; ) {
if (sp->ts_curi > byts[n]) {
// Only NUL bytes at this node, go to next state.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_SWAP;
break;
}
if (byts[n + sp->ts_curi] != NUL) {
// Found a byte to insert.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_INS;
break;
}
@@ -10166,6 +10260,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
n = sp->ts_arridx;
if (sp->ts_curi > byts[n]) {
// Done all bytes at this node, go to next state.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_SWAP;
break;
}
@@ -10226,6 +10321,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
c = *p;
if (c == NUL) {
// End of word, can't swap or replace.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_FINAL;
break;
}
@@ -10233,6 +10329,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// Don't swap if the first character is not a word character.
// SWAP3 etc. also don't make sense then.
if (!soundfold && !spell_iswordp(p, curwin)) {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_INI;
break;
}
@@ -10257,6 +10354,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// When the second character is NUL we can't swap.
if (c2 == NUL) {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_INI;
break;
}
@@ -10264,6 +10362,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// When characters are identical, swap won't do anything.
// Also get here if the second char is not a word character.
if (c == c2) {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_SWAP3;
break;
}
@@ -10274,6 +10373,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
sp->ts_twordlen, tword, fword + sp->ts_fidx,
c, c2);
#endif
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_UNSWAP;
++depth;
if (has_mbyte) {
@@ -10288,6 +10388,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
}
} else
// If this swap doesn't work then SWAP3 won't either.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_INI;
break;
@@ -10335,6 +10436,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// Also get here when the third character is not a word character.
// Second character may any char: "a.b" -> "b.a"
if (c == c3 || c3 == NUL) {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_INI;
break;
}
@@ -10345,6 +10447,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
sp->ts_twordlen, tword, fword + sp->ts_fidx,
c, c3);
#endif
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_UNSWAP3;
++depth;
if (has_mbyte) {
@@ -10358,8 +10461,10 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
p[2] = c;
stack[depth].ts_fidxtry = sp->ts_fidx + 3;
}
- } else
+ } else {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_INI;
+ }
break;
case STATE_UNSWAP3:
@@ -10385,6 +10490,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
if (!soundfold && !spell_iswordp(p, curwin)) {
// Middle char is not a word char, skip the rotate. First and
// third char were already checked at swap and swap3.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_INI;
break;
}
@@ -10399,6 +10505,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
sp->ts_twordlen, tword, fword + sp->ts_fidx,
p[0], p[1], p[2]);
#endif
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_UNROT3L;
++depth;
p = fword + sp->ts_fidx;
@@ -10417,8 +10524,10 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
p[2] = c;
stack[depth].ts_fidxtry = sp->ts_fidx + 3;
}
- } else
+ } else {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_INI;
+ }
break;
case STATE_UNROT3L:
@@ -10448,6 +10557,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
sp->ts_twordlen, tword, fword + sp->ts_fidx,
p[0], p[1], p[2]);
#endif
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_UNROT3R;
++depth;
p = fword + sp->ts_fidx;
@@ -10466,8 +10576,10 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
*p = c;
stack[depth].ts_fidxtry = sp->ts_fidx + 3;
}
- } else
+ } else {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_INI;
+ }
break;
case STATE_UNROT3R:
@@ -10497,6 +10609,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
if ((lp->lp_replang == NULL && !soundfold)
|| sp->ts_score + SCORE_REP >= su->su_maxscore
|| sp->ts_fidx < sp->ts_fidxtry) {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_FINAL;
break;
}
@@ -10509,10 +10622,12 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
if (sp->ts_curi < 0) {
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_FINAL;
break;
}
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP;
// FALLTHROUGH
@@ -10542,6 +10657,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
ftp->ft_from, ftp->ft_to);
#endif
// Need to undo this afterwards.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP_UNDO;
// Change the "from" to the "to" string.
@@ -10561,6 +10677,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
// No (more) matches.
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_FINAL;
break;
@@ -10580,6 +10697,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
repextra -= tl - fl;
}
memmove(p, ftp->ft_from, fl);
+ PROF_STORE(sp->ts_state)
sp->ts_state = STATE_REP;
break;
@@ -10990,7 +11108,13 @@ static void suggest_try_soundalike(suginfo_T *su)
// try all kinds of inserts/deletes/swaps/etc.
// TODO: also soundfold the next words, so that we can try joining
// and splitting
+#ifdef SUGGEST_PROFILE
+ prof_init();
+#endif
suggest_trie_walk(su, lp, salword, true);
+#ifdef SUGGEST_PROFILE
+ prof_report("soundalike");
+#endif
}
}
}
@@ -13340,3 +13464,4 @@ int expand_spelling(linenr_T lnum, char_u *pat, char_u ***matchp)
return ga.ga_len;
}
+