aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/spellfile.c
diff options
context:
space:
mode:
authorJames McCoy <jamessan@jamessan.com>2020-08-08 08:57:35 -0400
committerJames McCoy <jamessan@jamessan.com>2020-08-08 08:57:35 -0400
commit840c12c10741d8f70e1787534fb6ea6d2b70edee (patch)
treef89ad27acbbf0b36db7ac08eeae0b8362da1fabb /src/nvim/spellfile.c
parente813ec79c201c85c5af3b10c051ae92ab5cb8606 (diff)
parentf26df8bb66158baacb79c79822babaf137607cd6 (diff)
downloadrneovim-840c12c10741d8f70e1787534fb6ea6d2b70edee.tar.gz
rneovim-840c12c10741d8f70e1787534fb6ea6d2b70edee.tar.bz2
rneovim-840c12c10741d8f70e1787534fb6ea6d2b70edee.zip
Merge remote-tracking branch 'upstream/master' into libcallnr
Diffstat (limited to 'src/nvim/spellfile.c')
-rw-r--r--src/nvim/spellfile.c207
1 files changed, 76 insertions, 131 deletions
diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c
index 01daafa09e..6b9348e55d 100644
--- a/src/nvim/spellfile.c
+++ b/src/nvim/spellfile.c
@@ -265,6 +265,8 @@
// follow; never used in prefix tree
#define BY_SPECIAL BY_FLAGS2 // highest special byte value
+#define ZERO_FLAG 65009 // used when flag is zero: "0"
+
// Flags used in .spl file for soundsalike flags.
#define SAL_F0LLOWUP 1
#define SAL_COLLAPSE 2
@@ -302,9 +304,6 @@
static char *e_spell_trunc = N_("E758: Truncated spell file");
static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
static char *e_affname = N_("Affix name too long in %s line %d: %s");
-static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
-static char *e_affrange = N_(
- "E762: Character in FOL, LOW or UPP is out of range");
static char *msg_compressing = N_("Compressing word tree...");
#define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff
@@ -623,7 +622,7 @@ spell_load_file (
switch (scms_ret) {
case SP_FORMERROR:
case SP_TRUNCERROR: {
- emsgf(_("E757: This does not look like a spell file"));
+ emsgf("%s", _("E757: This does not look like a spell file"));
goto endFAIL;
}
case SP_OTHERERROR: {
@@ -1135,7 +1134,6 @@ static int read_sal_section(FILE *fd, slang_T *slang)
salitem_T *smp;
int ccnt;
char_u *p;
- int c = NUL;
slang->sl_sofo = false;
@@ -1159,7 +1157,9 @@ static int read_sal_section(FILE *fd, slang_T *slang)
ga_grow(gap, cnt + 1);
// <sal> : <salfromlen> <salfrom> <saltolen> <salto>
- for (; gap->ga_len < cnt; ++gap->ga_len) {
+ for (; gap->ga_len < cnt; gap->ga_len++) {
+ int c = NUL;
+
smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
ccnt = getc(fd); // <salfromlen>
if (ccnt < 0)
@@ -1236,17 +1236,14 @@ static int read_sal_section(FILE *fd, slang_T *slang)
p = xmalloc(1);
p[0] = NUL;
smp->sm_lead = p;
+ smp->sm_lead_w = mb_str2wide(smp->sm_lead);
smp->sm_leadlen = 0;
smp->sm_oneof = NULL;
+ smp->sm_oneof_w = NULL;
smp->sm_rules = p;
smp->sm_to = NULL;
- if (has_mbyte) {
- smp->sm_lead_w = mb_str2wide(smp->sm_lead);
- smp->sm_leadlen = 0;
- smp->sm_oneof_w = NULL;
- smp->sm_to_w = NULL;
- }
- ++gap->ga_len;
+ smp->sm_to_w = NULL;
+ gap->ga_len++;
}
// Fill the first-index table.
@@ -1387,8 +1384,7 @@ static int read_compound(FILE *fd, slang_T *slang, int len)
// Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
// Conversion to utf-8 may double the size.
c = todo * 2 + 7;
- if (enc_utf8)
- c += todo * 2;
+ c += todo * 2;
pat = xmalloc(c);
// We also need a list of all flags that can appear at the start and one
@@ -1711,7 +1707,6 @@ read_tree_node (
if (c == BY_NOFLAGS && !prefixtree) {
// No flags, all regions.
idxs[idx] = 0;
- c = 0;
} else if (c != BY_INDEX) {
if (prefixtree) {
// Read the optional pflags byte, the prefix ID and the
@@ -1787,7 +1782,7 @@ spell_reload_one (
bool didit = false;
for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
- if (path_full_compare(fname, slang->sl_fname, false) == kEqualFiles) {
+ if (path_full_compare(fname, slang->sl_fname, false, true) == kEqualFiles) {
slang_clear(slang);
if (spell_load_file(fname, NULL, slang, false) == NULL)
// reloading failed, clear the language
@@ -1816,7 +1811,8 @@ spell_reload_one (
#define CONDIT_SUF 4 // add a suffix for matching flags
#define CONDIT_AFF 8 // word already has an affix
-// Tunable parameters for when the tree is compressed. See 'mkspellmem'.
+// Tunable parameters for when the tree is compressed. Filled from the
+// 'mkspellmem' option.
static long compress_start = 30000; // memory / SBLOCKSIZE
static long compress_inc = 100; // memory / SBLOCKSIZE
static long compress_added = 500000; // word count
@@ -2626,19 +2622,6 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
spin->si_clear_chartab = false;
}
- // Don't write a word table for an ASCII file, so that we don't check
- // for conflicts with a word table that matches 'encoding'.
- // Don't write one for utf-8 either, we use utf_*() and
- // mb_get_class(), the list of chars in the file will be incomplete.
- if (!spin->si_ascii
- && !enc_utf8
- ) {
- if (fol == NULL || low == NULL || upp == NULL)
- smsg(_("Missing FOL/LOW/UPP line in %s"), fname);
- else
- (void)set_spell_chartab(fol, low, upp);
- }
-
xfree(fol);
xfree(low);
xfree(upp);
@@ -2656,8 +2639,9 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
}
if (compsylmax != 0) {
- if (syllable == NULL)
- smsg(_("COMPOUNDSYLMAX used without SYLLABLE"));
+ if (syllable == NULL) {
+ smsg("%s", _("COMPOUNDSYLMAX used without SYLLABLE"));
+ }
aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
spin->si_compsylmax = compsylmax;
}
@@ -2783,6 +2767,7 @@ static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum
}
// Get one affix name from "*pp" and advance the pointer.
+// Returns ZERO_FLAG for "0".
// Returns zero for an error, still advances the pointer then.
static unsigned get_affitem(int flagtype, char_u **pp)
{
@@ -2794,6 +2779,9 @@ static unsigned get_affitem(int flagtype, char_u **pp)
return 0;
}
res = getdigits_int(pp, true, 0);
+ if (res == 0) {
+ res = ZERO_FLAG;
+ }
} else {
res = mb_ptr2char_adv((const char_u **)pp);
if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
@@ -2915,10 +2903,15 @@ static bool flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
int digits = getdigits_int(&p, true, 0);
assert(digits >= 0);
n = (unsigned int)digits;
- if (n == flag)
+ if (n == 0) {
+ n = ZERO_FLAG;
+ }
+ if (n == flag) {
return true;
- if (*p != NUL) // skip over comma
- ++p;
+ }
+ if (*p != NUL) { // skip over comma
+ p++;
+ }
}
break;
}
@@ -3024,6 +3017,7 @@ static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile)
char_u message[MAXLINELEN + MAXWLEN];
int flags;
int duplicate = 0;
+ Timestamp last_msg_time = 0;
// Open the file.
fd = os_fopen((char *)fname, "r");
@@ -3099,18 +3093,22 @@ static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile)
continue;
}
- // This takes time, print a message every 10000 words.
+ // This takes time, print a message every 10000 words, but not more
+ // often than once per second.
if (spin->si_verbose && spin->si_msg_count > 10000) {
spin->si_msg_count = 0;
- vim_snprintf((char *)message, sizeof(message),
- _("line %6d, word %6ld - %s"),
- lnum, spin->si_foldwcount + spin->si_keepwcount, w);
- msg_start();
- msg_puts_long_attr(message, 0);
- msg_clr_eos();
- msg_didout = FALSE;
- msg_col = 0;
- ui_flush();
+ if (os_time() > last_msg_time) {
+ last_msg_time = os_time();
+ vim_snprintf((char *)message, sizeof(message),
+ _("line %6d, word %6ld - %s"),
+ lnum, spin->si_foldwcount + spin->si_keepwcount, w);
+ msg_start();
+ msg_puts_long_attr(message, 0);
+ msg_clr_eos();
+ msg_didout = false;
+ msg_col = 0;
+ ui_flush();
+ }
}
// Store the word in the hashtable to be able to find duplicates.
@@ -3923,9 +3921,10 @@ static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int
++spin->si_msg_count;
if (spin->si_compress_cnt > 1) {
- if (--spin->si_compress_cnt == 1)
+ if (--spin->si_compress_cnt == 1) {
// Did enough words to lower the block count limit.
spin->si_blocks_cnt += compress_inc;
+ }
}
// When we have allocated lots of memory we need to compress the word tree
@@ -3964,9 +3963,10 @@ static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int
// compression useful, or one of them is small, which means
// compression goes fast. But when filling the soundfold word tree
// there is no keep-case tree.
- wordtree_compress(spin, spin->si_foldroot);
- if (affixID >= 0)
- wordtree_compress(spin, spin->si_keeproot);
+ wordtree_compress(spin, spin->si_foldroot, "case-folded");
+ if (affixID >= 0) {
+ wordtree_compress(spin, spin->si_keeproot, "keep-case");
+ }
}
return OK;
@@ -3999,6 +3999,7 @@ static wordnode_T *get_wordnode(spellinfo_T *spin)
// siblings.
// Returns the number of nodes actually freed.
static int deref_wordnode(spellinfo_T *spin, wordnode_T *node)
+ FUNC_ATTR_NONNULL_ALL
{
wordnode_T *np;
int cnt = 0;
@@ -4018,6 +4019,7 @@ static int deref_wordnode(spellinfo_T *spin, wordnode_T *node)
// Free a wordnode_T for re-use later.
// Only the "wn_child" field becomes invalid.
static void free_wordnode(spellinfo_T *spin, wordnode_T *n)
+ FUNC_ATTR_NONNULL_ALL
{
n->wn_child = spin->si_first_free;
spin->si_first_free = n;
@@ -4025,18 +4027,19 @@ static void free_wordnode(spellinfo_T *spin, wordnode_T *n)
}
// Compress a tree: find tails that are identical and can be shared.
-static void wordtree_compress(spellinfo_T *spin, wordnode_T *root)
+static void wordtree_compress(spellinfo_T *spin, wordnode_T *root,
+ const char *name)
+ FUNC_ATTR_NONNULL_ALL
{
hashtab_T ht;
- int n;
- int tot = 0;
- int perc;
+ long tot = 0;
+ long perc;
// Skip the root itself, it's not actually used. The first sibling is the
// start of the tree.
if (root->wn_sibling != NULL) {
hash_init(&ht);
- n = node_compress(spin, root->wn_sibling, &ht, &tot);
+ const long n = node_compress(spin, root->wn_sibling, &ht, &tot);
#ifndef SPELL_PRINTTREE
if (spin->si_verbose || p_verbose > 2)
@@ -4049,8 +4052,8 @@ static void wordtree_compress(spellinfo_T *spin, wordnode_T *root)
else
perc = (tot - n) * 100 / tot;
vim_snprintf((char *)IObuff, IOSIZE,
- _("Compressed %d of %d nodes; %d (%d%%) remaining"),
- n, tot, tot - n, perc);
+ _("Compressed %s of %ld nodes; %ld (%ld%%) remaining"),
+ name, tot, tot - n, perc);
spell_message(spin, IObuff);
}
#ifdef SPELL_PRINTTREE
@@ -4062,23 +4065,23 @@ static void wordtree_compress(spellinfo_T *spin, wordnode_T *root)
// Compress a node, its siblings and its children, depth first.
// Returns the number of compressed nodes.
-static int
-node_compress (
+static long node_compress(
spellinfo_T *spin,
wordnode_T *node,
hashtab_T *ht,
- int *tot // total count of nodes before compressing,
+ long *tot // total count of nodes before compressing,
// incremented while going through the tree
)
+ FUNC_ATTR_NONNULL_ALL
{
wordnode_T *np;
wordnode_T *tp;
wordnode_T *child;
hash_T hash;
hashitem_T *hi;
- int len = 0;
+ long len = 0;
unsigned nr, n;
- int compressed = 0;
+ long compressed = 0;
// Go through the list of siblings. Compress each child and then try
// finding an identical child to replace it.
@@ -4151,7 +4154,7 @@ node_compress (
node->wn_u1.hashkey[5] = NUL;
// Check for CTRL-C pressed now and then.
- fast_breakcheck();
+ veryfast_breakcheck();
return compressed;
}
@@ -4719,7 +4722,8 @@ static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname)
// of the code for the soundfolding stuff.
// It might have been done already by spell_reload_one().
for (slang = first_lang; slang != NULL; slang = slang->sl_next) {
- if (path_full_compare(wfname, slang->sl_fname, false) == kEqualFiles) {
+ if (path_full_compare(wfname, slang->sl_fname, false, true)
+ == kEqualFiles) {
break;
}
}
@@ -4757,7 +4761,7 @@ static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname)
// Compress the soundfold trie.
spell_message(spin, (char_u *)_(msg_compressing));
- wordtree_compress(spin, spin->si_foldroot);
+ wordtree_compress(spin, spin->si_foldroot, "case-folded");
// Write the .sug file.
// Make the file name by changing ".spl" to ".sug".
@@ -5103,7 +5107,8 @@ mkspell (
spin.si_newcompID = 127; // start compound ID at first maximum
// default: fnames[0] is output file, following are input files
- innames = &fnames[1];
+ // When "fcount" is 1 there is only one file.
+ innames = &fnames[fcount == 1 ? 0 : 1];
incount = fcount - 1;
wfname = xmalloc(MAXPATHL);
@@ -5113,12 +5118,10 @@ mkspell (
if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) {
// For ":mkspell path/en.latin1.add" output file is
// "path/en.latin1.add.spl".
- innames = &fnames[0];
incount = 1;
vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
} else if (fcount == 1) {
// For ":mkspell path/vim" output file is "path/vim.latin1.spl".
- innames = &fnames[0];
incount = 1;
vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
@@ -5228,9 +5231,9 @@ mkspell (
if (!error && !got_int) {
// Combine tails in the tree.
spell_message(&spin, (char_u *)_(msg_compressing));
- wordtree_compress(&spin, spin.si_foldroot);
- wordtree_compress(&spin, spin.si_keeproot);
- wordtree_compress(&spin, spin.si_prefroot);
+ wordtree_compress(&spin, spin.si_foldroot, "case-folded");
+ wordtree_compress(&spin, spin.si_keeproot, "keep-case");
+ wordtree_compress(&spin, spin.si_prefroot, "prefixes");
}
if (!error && !got_int) {
@@ -5282,7 +5285,8 @@ theend:
// Display a message for spell file processing when 'verbose' is set or using
// ":mkspell". "str" can be IObuff.
-static void spell_message(spellinfo_T *spin, char_u *str)
+static void spell_message(const spellinfo_T *spin, char_u *str)
+ FUNC_ATTR_NONNULL_ALL
{
if (spin->si_verbose || p_verbose > 2) {
if (!spin->si_verbose)
@@ -5524,65 +5528,6 @@ static void init_spellfile(void)
}
}
-// Set the spell character tables from strings in the affix file.
-static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp)
-{
- // We build the new tables here first, so that we can compare with the
- // previous one.
- spelltab_T new_st;
- char_u *pf = fol, *pl = low, *pu = upp;
- int f, l, u;
-
- clear_spell_chartab(&new_st);
-
- while (*pf != NUL) {
- if (*pl == NUL || *pu == NUL) {
- EMSG(_(e_affform));
- return FAIL;
- }
- f = mb_ptr2char_adv((const char_u **)&pf);
- l = mb_ptr2char_adv((const char_u **)&pl);
- u = mb_ptr2char_adv((const char_u **)&pu);
- // Every character that appears is a word character.
- if (f < 256)
- new_st.st_isw[f] = true;
- if (l < 256)
- new_st.st_isw[l] = true;
- if (u < 256)
- new_st.st_isw[u] = true;
-
- // if "LOW" and "FOL" are not the same the "LOW" char needs
- // case-folding
- if (l < 256 && l != f) {
- if (f >= 256) {
- EMSG(_(e_affrange));
- return FAIL;
- }
- new_st.st_fold[l] = f;
- }
-
- // if "UPP" and "FOL" are not the same the "UPP" char needs
- // case-folding, it's upper case and the "UPP" is the upper case of
- // "FOL" .
- if (u < 256 && u != f) {
- if (f >= 256) {
- EMSG(_(e_affrange));
- return FAIL;
- }
- new_st.st_fold[u] = f;
- new_st.st_isu[u] = true;
- new_st.st_upper[f] = u;
- }
- }
-
- if (*pl != NUL || *pu != NUL) {
- EMSG(_(e_affform));
- return FAIL;
- }
-
- return set_spell_finish(&new_st);
-}
-
// Set the spell character tables from strings in the .spl file.
static void
set_spell_charflags (