diff options
Diffstat (limited to 'src/nvim/spellfile.c')
-rw-r--r-- | src/nvim/spellfile.c | 207 |
1 files changed, 76 insertions, 131 deletions
diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c index 01daafa09e..6b9348e55d 100644 --- a/src/nvim/spellfile.c +++ b/src/nvim/spellfile.c @@ -265,6 +265,8 @@ // follow; never used in prefix tree #define BY_SPECIAL BY_FLAGS2 // highest special byte value +#define ZERO_FLAG 65009 // used when flag is zero: "0" + // Flags used in .spl file for soundsalike flags. #define SAL_F0LLOWUP 1 #define SAL_COLLAPSE 2 @@ -302,9 +304,6 @@ static char *e_spell_trunc = N_("E758: Truncated spell file"); static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); static char *e_affname = N_("Affix name too long in %s line %d: %s"); -static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); -static char *e_affrange = N_( - "E762: Character in FOL, LOW or UPP is out of range"); static char *msg_compressing = N_("Compressing word tree..."); #define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff @@ -623,7 +622,7 @@ spell_load_file ( switch (scms_ret) { case SP_FORMERROR: case SP_TRUNCERROR: { - emsgf(_("E757: This does not look like a spell file")); + emsgf("%s", _("E757: This does not look like a spell file")); goto endFAIL; } case SP_OTHERERROR: { @@ -1135,7 +1134,6 @@ static int read_sal_section(FILE *fd, slang_T *slang) salitem_T *smp; int ccnt; char_u *p; - int c = NUL; slang->sl_sofo = false; @@ -1159,7 +1157,9 @@ static int read_sal_section(FILE *fd, slang_T *slang) ga_grow(gap, cnt + 1); // <sal> : <salfromlen> <salfrom> <saltolen> <salto> - for (; gap->ga_len < cnt; ++gap->ga_len) { + for (; gap->ga_len < cnt; gap->ga_len++) { + int c = NUL; + smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; ccnt = getc(fd); // <salfromlen> if (ccnt < 0) @@ -1236,17 +1236,14 @@ static int read_sal_section(FILE *fd, slang_T *slang) p = xmalloc(1); p[0] = NUL; smp->sm_lead = p; + smp->sm_lead_w = mb_str2wide(smp->sm_lead); smp->sm_leadlen = 0; smp->sm_oneof = NULL; + smp->sm_oneof_w = NULL; smp->sm_rules = p; smp->sm_to = NULL; - if (has_mbyte) { - smp->sm_lead_w = mb_str2wide(smp->sm_lead); - smp->sm_leadlen = 0; - smp->sm_oneof_w = NULL; - smp->sm_to_w = NULL; - } - ++gap->ga_len; + smp->sm_to_w = NULL; + gap->ga_len++; } // Fill the first-index table. @@ -1387,8 +1384,7 @@ static int read_compound(FILE *fd, slang_T *slang, int len) // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. // Conversion to utf-8 may double the size. c = todo * 2 + 7; - if (enc_utf8) - c += todo * 2; + c += todo * 2; pat = xmalloc(c); // We also need a list of all flags that can appear at the start and one @@ -1711,7 +1707,6 @@ read_tree_node ( if (c == BY_NOFLAGS && !prefixtree) { // No flags, all regions. idxs[idx] = 0; - c = 0; } else if (c != BY_INDEX) { if (prefixtree) { // Read the optional pflags byte, the prefix ID and the @@ -1787,7 +1782,7 @@ spell_reload_one ( bool didit = false; for (slang = first_lang; slang != NULL; slang = slang->sl_next) { - if (path_full_compare(fname, slang->sl_fname, false) == kEqualFiles) { + if (path_full_compare(fname, slang->sl_fname, false, true) == kEqualFiles) { slang_clear(slang); if (spell_load_file(fname, NULL, slang, false) == NULL) // reloading failed, clear the language @@ -1816,7 +1811,8 @@ spell_reload_one ( #define CONDIT_SUF 4 // add a suffix for matching flags #define CONDIT_AFF 8 // word already has an affix -// Tunable parameters for when the tree is compressed. See 'mkspellmem'. +// Tunable parameters for when the tree is compressed. Filled from the +// 'mkspellmem' option. static long compress_start = 30000; // memory / SBLOCKSIZE static long compress_inc = 100; // memory / SBLOCKSIZE static long compress_added = 500000; // word count @@ -2626,19 +2622,6 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname) spin->si_clear_chartab = false; } - // Don't write a word table for an ASCII file, so that we don't check - // for conflicts with a word table that matches 'encoding'. - // Don't write one for utf-8 either, we use utf_*() and - // mb_get_class(), the list of chars in the file will be incomplete. - if (!spin->si_ascii - && !enc_utf8 - ) { - if (fol == NULL || low == NULL || upp == NULL) - smsg(_("Missing FOL/LOW/UPP line in %s"), fname); - else - (void)set_spell_chartab(fol, low, upp); - } - xfree(fol); xfree(low); xfree(upp); @@ -2656,8 +2639,9 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname) } if (compsylmax != 0) { - if (syllable == NULL) - smsg(_("COMPOUNDSYLMAX used without SYLLABLE")); + if (syllable == NULL) { + smsg("%s", _("COMPOUNDSYLMAX used without SYLLABLE")); + } aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); spin->si_compsylmax = compsylmax; } @@ -2783,6 +2767,7 @@ static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum } // Get one affix name from "*pp" and advance the pointer. +// Returns ZERO_FLAG for "0". // Returns zero for an error, still advances the pointer then. static unsigned get_affitem(int flagtype, char_u **pp) { @@ -2794,6 +2779,9 @@ static unsigned get_affitem(int flagtype, char_u **pp) return 0; } res = getdigits_int(pp, true, 0); + if (res == 0) { + res = ZERO_FLAG; + } } else { res = mb_ptr2char_adv((const char_u **)pp); if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG @@ -2915,10 +2903,15 @@ static bool flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) int digits = getdigits_int(&p, true, 0); assert(digits >= 0); n = (unsigned int)digits; - if (n == flag) + if (n == 0) { + n = ZERO_FLAG; + } + if (n == flag) { return true; - if (*p != NUL) // skip over comma - ++p; + } + if (*p != NUL) { // skip over comma + p++; + } } break; } @@ -3024,6 +3017,7 @@ static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) char_u message[MAXLINELEN + MAXWLEN]; int flags; int duplicate = 0; + Timestamp last_msg_time = 0; // Open the file. fd = os_fopen((char *)fname, "r"); @@ -3099,18 +3093,22 @@ static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) continue; } - // This takes time, print a message every 10000 words. + // This takes time, print a message every 10000 words, but not more + // often than once per second. if (spin->si_verbose && spin->si_msg_count > 10000) { spin->si_msg_count = 0; - vim_snprintf((char *)message, sizeof(message), - _("line %6d, word %6ld - %s"), - lnum, spin->si_foldwcount + spin->si_keepwcount, w); - msg_start(); - msg_puts_long_attr(message, 0); - msg_clr_eos(); - msg_didout = FALSE; - msg_col = 0; - ui_flush(); + if (os_time() > last_msg_time) { + last_msg_time = os_time(); + vim_snprintf((char *)message, sizeof(message), + _("line %6d, word %6ld - %s"), + lnum, spin->si_foldwcount + spin->si_keepwcount, w); + msg_start(); + msg_puts_long_attr(message, 0); + msg_clr_eos(); + msg_didout = false; + msg_col = 0; + ui_flush(); + } } // Store the word in the hashtable to be able to find duplicates. @@ -3923,9 +3921,10 @@ static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int ++spin->si_msg_count; if (spin->si_compress_cnt > 1) { - if (--spin->si_compress_cnt == 1) + if (--spin->si_compress_cnt == 1) { // Did enough words to lower the block count limit. spin->si_blocks_cnt += compress_inc; + } } // When we have allocated lots of memory we need to compress the word tree @@ -3964,9 +3963,10 @@ static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *root, int // compression useful, or one of them is small, which means // compression goes fast. But when filling the soundfold word tree // there is no keep-case tree. - wordtree_compress(spin, spin->si_foldroot); - if (affixID >= 0) - wordtree_compress(spin, spin->si_keeproot); + wordtree_compress(spin, spin->si_foldroot, "case-folded"); + if (affixID >= 0) { + wordtree_compress(spin, spin->si_keeproot, "keep-case"); + } } return OK; @@ -3999,6 +3999,7 @@ static wordnode_T *get_wordnode(spellinfo_T *spin) // siblings. // Returns the number of nodes actually freed. static int deref_wordnode(spellinfo_T *spin, wordnode_T *node) + FUNC_ATTR_NONNULL_ALL { wordnode_T *np; int cnt = 0; @@ -4018,6 +4019,7 @@ static int deref_wordnode(spellinfo_T *spin, wordnode_T *node) // Free a wordnode_T for re-use later. // Only the "wn_child" field becomes invalid. static void free_wordnode(spellinfo_T *spin, wordnode_T *n) + FUNC_ATTR_NONNULL_ALL { n->wn_child = spin->si_first_free; spin->si_first_free = n; @@ -4025,18 +4027,19 @@ static void free_wordnode(spellinfo_T *spin, wordnode_T *n) } // Compress a tree: find tails that are identical and can be shared. -static void wordtree_compress(spellinfo_T *spin, wordnode_T *root) +static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, + const char *name) + FUNC_ATTR_NONNULL_ALL { hashtab_T ht; - int n; - int tot = 0; - int perc; + long tot = 0; + long perc; // Skip the root itself, it's not actually used. The first sibling is the // start of the tree. if (root->wn_sibling != NULL) { hash_init(&ht); - n = node_compress(spin, root->wn_sibling, &ht, &tot); + const long n = node_compress(spin, root->wn_sibling, &ht, &tot); #ifndef SPELL_PRINTTREE if (spin->si_verbose || p_verbose > 2) @@ -4049,8 +4052,8 @@ static void wordtree_compress(spellinfo_T *spin, wordnode_T *root) else perc = (tot - n) * 100 / tot; vim_snprintf((char *)IObuff, IOSIZE, - _("Compressed %d of %d nodes; %d (%d%%) remaining"), - n, tot, tot - n, perc); + _("Compressed %s of %ld nodes; %ld (%ld%%) remaining"), + name, tot, tot - n, perc); spell_message(spin, IObuff); } #ifdef SPELL_PRINTTREE @@ -4062,23 +4065,23 @@ static void wordtree_compress(spellinfo_T *spin, wordnode_T *root) // Compress a node, its siblings and its children, depth first. // Returns the number of compressed nodes. -static int -node_compress ( +static long node_compress( spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, - int *tot // total count of nodes before compressing, + long *tot // total count of nodes before compressing, // incremented while going through the tree ) + FUNC_ATTR_NONNULL_ALL { wordnode_T *np; wordnode_T *tp; wordnode_T *child; hash_T hash; hashitem_T *hi; - int len = 0; + long len = 0; unsigned nr, n; - int compressed = 0; + long compressed = 0; // Go through the list of siblings. Compress each child and then try // finding an identical child to replace it. @@ -4151,7 +4154,7 @@ node_compress ( node->wn_u1.hashkey[5] = NUL; // Check for CTRL-C pressed now and then. - fast_breakcheck(); + veryfast_breakcheck(); return compressed; } @@ -4719,7 +4722,8 @@ static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname) // of the code for the soundfolding stuff. // It might have been done already by spell_reload_one(). for (slang = first_lang; slang != NULL; slang = slang->sl_next) { - if (path_full_compare(wfname, slang->sl_fname, false) == kEqualFiles) { + if (path_full_compare(wfname, slang->sl_fname, false, true) + == kEqualFiles) { break; } } @@ -4757,7 +4761,7 @@ static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname) // Compress the soundfold trie. spell_message(spin, (char_u *)_(msg_compressing)); - wordtree_compress(spin, spin->si_foldroot); + wordtree_compress(spin, spin->si_foldroot, "case-folded"); // Write the .sug file. // Make the file name by changing ".spl" to ".sug". @@ -5103,7 +5107,8 @@ mkspell ( spin.si_newcompID = 127; // start compound ID at first maximum // default: fnames[0] is output file, following are input files - innames = &fnames[1]; + // When "fcount" is 1 there is only one file. + innames = &fnames[fcount == 1 ? 0 : 1]; incount = fcount - 1; wfname = xmalloc(MAXPATHL); @@ -5113,12 +5118,10 @@ mkspell ( if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) { // For ":mkspell path/en.latin1.add" output file is // "path/en.latin1.add.spl". - innames = &fnames[0]; incount = 1; vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); } else if (fcount == 1) { // For ":mkspell path/vim" output file is "path/vim.latin1.spl". - innames = &fnames[0]; incount = 1; vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); @@ -5228,9 +5231,9 @@ mkspell ( if (!error && !got_int) { // Combine tails in the tree. spell_message(&spin, (char_u *)_(msg_compressing)); - wordtree_compress(&spin, spin.si_foldroot); - wordtree_compress(&spin, spin.si_keeproot); - wordtree_compress(&spin, spin.si_prefroot); + wordtree_compress(&spin, spin.si_foldroot, "case-folded"); + wordtree_compress(&spin, spin.si_keeproot, "keep-case"); + wordtree_compress(&spin, spin.si_prefroot, "prefixes"); } if (!error && !got_int) { @@ -5282,7 +5285,8 @@ theend: // Display a message for spell file processing when 'verbose' is set or using // ":mkspell". "str" can be IObuff. -static void spell_message(spellinfo_T *spin, char_u *str) +static void spell_message(const spellinfo_T *spin, char_u *str) + FUNC_ATTR_NONNULL_ALL { if (spin->si_verbose || p_verbose > 2) { if (!spin->si_verbose) @@ -5524,65 +5528,6 @@ static void init_spellfile(void) } } -// Set the spell character tables from strings in the affix file. -static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp) -{ - // We build the new tables here first, so that we can compare with the - // previous one. - spelltab_T new_st; - char_u *pf = fol, *pl = low, *pu = upp; - int f, l, u; - - clear_spell_chartab(&new_st); - - while (*pf != NUL) { - if (*pl == NUL || *pu == NUL) { - EMSG(_(e_affform)); - return FAIL; - } - f = mb_ptr2char_adv((const char_u **)&pf); - l = mb_ptr2char_adv((const char_u **)&pl); - u = mb_ptr2char_adv((const char_u **)&pu); - // Every character that appears is a word character. - if (f < 256) - new_st.st_isw[f] = true; - if (l < 256) - new_st.st_isw[l] = true; - if (u < 256) - new_st.st_isw[u] = true; - - // if "LOW" and "FOL" are not the same the "LOW" char needs - // case-folding - if (l < 256 && l != f) { - if (f >= 256) { - EMSG(_(e_affrange)); - return FAIL; - } - new_st.st_fold[l] = f; - } - - // if "UPP" and "FOL" are not the same the "UPP" char needs - // case-folding, it's upper case and the "UPP" is the upper case of - // "FOL" . - if (u < 256 && u != f) { - if (f >= 256) { - EMSG(_(e_affrange)); - return FAIL; - } - new_st.st_fold[u] = f; - new_st.st_isu[u] = true; - new_st.st_upper[f] = u; - } - } - - if (*pl != NUL || *pu != NUL) { - EMSG(_(e_affform)); - return FAIL; - } - - return set_spell_finish(&new_st); -} - // Set the spell character tables from strings in the .spl file. static void set_spell_charflags ( |