diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/nvim/change.c | 3 | ||||
-rw-r--r-- | src/nvim/edit.c | 87 | ||||
-rw-r--r-- | src/nvim/mbyte.c | 140 | ||||
-rw-r--r-- | src/nvim/ops.c | 113 | ||||
-rw-r--r-- | src/nvim/option.c | 3 | ||||
-rw-r--r-- | src/nvim/option_defs.h | 3 | ||||
-rw-r--r-- | src/nvim/testdir/test_cjk_linebreak.vim | 97 |
7 files changed, 361 insertions, 85 deletions
diff --git a/src/nvim/change.c b/src/nvim/change.c index 71614363d2..be52750c44 100644 --- a/src/nvim/change.c +++ b/src/nvim/change.c @@ -142,7 +142,6 @@ static void changed_common(linenr_T lnum, colnr_T col, linenr_T lnume, long xtra) { int i; - int cols; pos_T *p; int add; @@ -170,7 +169,7 @@ static void changed_common(linenr_T lnum, colnr_T col, linenr_T lnume, if (p->lnum != lnum) { add = true; } else { - cols = comp_textwidth(false); + int cols = comp_textwidth(false); if (cols == 0) { cols = 79; } diff --git a/src/nvim/edit.c b/src/nvim/edit.c index a917e94342..b3261cfce6 100644 --- a/src/nvim/edit.c +++ b/src/nvim/edit.c @@ -5549,13 +5549,11 @@ void insertchar( int second_indent // indent for second line if >= 0 ) { - int textwidth; char_u *p; - int fo_ins_blank; int force_format = flags & INSCHAR_FORMAT; - textwidth = comp_textwidth(force_format); - fo_ins_blank = has_format_option(FO_INS_BLANK); + const int textwidth = comp_textwidth(force_format); + const bool fo_ins_blank = has_format_option(FO_INS_BLANK); /* * Try to break the line in two or more pieces when: @@ -5756,10 +5754,11 @@ internal_format ( int cc; int save_char = NUL; bool haveto_redraw = false; - int fo_ins_blank = has_format_option(FO_INS_BLANK); - int fo_multibyte = has_format_option(FO_MBYTE_BREAK); - int fo_white_par = has_format_option(FO_WHITE_PAR); - int first_line = TRUE; + const bool fo_ins_blank = has_format_option(FO_INS_BLANK); + const bool fo_multibyte = has_format_option(FO_MBYTE_BREAK); + const bool fo_rigor_tw = has_format_option(FO_RIGOROUS_TW); + const bool fo_white_par = has_format_option(FO_WHITE_PAR); + bool first_line = true; colnr_T leader_len; bool no_leader = false; int do_comments = (flags & INSCHAR_DO_COM); @@ -5838,6 +5837,7 @@ internal_format ( curwin->w_cursor.col = startcol; foundcol = 0; + int skip_pos = 0; /* * Find position to break at. @@ -5907,7 +5907,11 @@ internal_format ( foundcol = curwin->w_cursor.col; if (curwin->w_cursor.col <= (colnr_T)wantcol) break; - } else if (cc >= 0x100 && fo_multibyte) { + } else if ((cc >= 0x100 || !utf_allow_break_before(cc)) + && fo_multibyte) { + int ncc; + bool allow_break; + // Break after or before a multi-byte character. if (curwin->w_cursor.col != startcol) { // Don't break until after the comment leader @@ -5916,8 +5920,11 @@ internal_format ( } col = curwin->w_cursor.col; inc_cursor(); - // Don't change end_foundcol if already set. - if (foundcol != curwin->w_cursor.col) { + ncc = gchar_cursor(); + allow_break = utf_allow_break(cc, ncc); + + // If we have already checked this position, skip! + if (curwin->w_cursor.col != skip_pos && allow_break) { foundcol = curwin->w_cursor.col; end_foundcol = foundcol; if (curwin->w_cursor.col <= (colnr_T)wantcol) @@ -5929,6 +5936,7 @@ internal_format ( if (curwin->w_cursor.col == 0) break; + ncc = cc; col = curwin->w_cursor.col; dec_cursor(); @@ -5937,17 +5945,56 @@ internal_format ( if (WHITECHAR(cc)) { continue; // break with space } - // Don't break until after the comment leader + // Don't break until after the comment leader. if (curwin->w_cursor.col < leader_len) { break; } curwin->w_cursor.col = col; + skip_pos = curwin->w_cursor.col; - foundcol = curwin->w_cursor.col; - end_foundcol = foundcol; - if (curwin->w_cursor.col <= (colnr_T)wantcol) - break; + allow_break = utf_allow_break(cc, ncc); + + // Must handle this to respect line break prohibition. + if (allow_break) { + foundcol = curwin->w_cursor.col; + end_foundcol = foundcol; + } + if (curwin->w_cursor.col <= (colnr_T)wantcol) { + const bool ncc_allow_break = utf_allow_break_before(ncc); + + if (allow_break) { + break; + } + if (!ncc_allow_break && !fo_rigor_tw) { + // Enable at most 1 punct hang outside of textwidth. + if (curwin->w_cursor.col == startcol) { + // We are inserting a non-breakable char, postpone + // line break check to next insert. + end_foundcol = foundcol = 0; + break; + } + + // Neither cc nor ncc is NUL if we are here, so + // it's safe to inc_cursor. + col = curwin->w_cursor.col; + + inc_cursor(); + cc = ncc; + ncc = gchar_cursor(); + // handle insert + ncc = (ncc != NUL) ? ncc : c; + + allow_break = utf_allow_break(cc, ncc); + + if (allow_break) { + // Break only when we are not at end of line. + end_foundcol = foundcol = ncc == NUL? 0 : curwin->w_cursor.col; + break; + } + curwin->w_cursor.col = col; + } + } } if (curwin->w_cursor.col == 0) break; @@ -6049,7 +6096,7 @@ internal_format ( } } } - first_line = FALSE; + first_line = false; } if (State & VREPLACE_FLAG) { @@ -6236,12 +6283,10 @@ static void check_auto_format( * Set default to window width (maximum 79) for "gq" operator. */ int comp_textwidth( - int ff // force formatting (for "gq" command) + bool ff // force formatting (for "gq" command) ) { - int textwidth; - - textwidth = curbuf->b_p_tw; + int textwidth = curbuf->b_p_tw; if (textwidth == 0 && curbuf->b_p_wm) { // The width is the window width minus 'wrapmargin' minus all the // things that add to the margin. diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 6d188c6cd0..ec4f4cbc21 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1624,6 +1624,146 @@ int utf_head_off(const char_u *base, const char_u *p) return (int)(p - q); } +// Whether space is NOT allowed before/after 'c'. +bool utf_eat_space(int cc) + FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (cc >= 0x2000 && cc <= 0x206F) // General punctuations + || (cc >= 0x2e00 && cc <= 0x2e7f) // Supplemental punctuations + || (cc >= 0x3000 && cc <= 0x303f) // CJK symbols and punctuations + || (cc >= 0xff01 && cc <= 0xff0f) // Full width ASCII punctuations + || (cc >= 0xff1a && cc <= 0xff20) // .. + || (cc >= 0xff3b && cc <= 0xff40) // .. + || (cc >= 0xff5b && cc <= 0xff65); // .. +} + +// Whether line break is allowed before "cc". +bool utf_allow_break_before(int cc) + FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + static const int BOL_prohibition_punct[] = { + '!', + '%', + ')', + ',', + ':', + ';', + '>', + '?', + ']', + '}', + 0x2019, // ’ right single quotation mark + 0x201d, // ” right double quotation mark + 0x2020, // † dagger + 0x2021, // ‡ double dagger + 0x2026, // … horizontal ellipsis + 0x2030, // ‰ per mille sign + 0x2031, // ‱ per then thousand sign + 0x203c, // ‼ double exclamation mark + 0x2047, // ⁇ double question mark + 0x2048, // ⁈ question exclamation mark + 0x2049, // ⁉ exclamation question mark + 0x2103, // ℃ degree celsius + 0x2109, // ℉ degree fahrenheit + 0x3001, // 、 ideographic comma + 0x3002, // 。 ideographic full stop + 0x3009, // 〉 right angle bracket + 0x300b, // 》 right double angle bracket + 0x300d, // 」 right corner bracket + 0x300f, // 』 right white corner bracket + 0x3011, // 】 right black lenticular bracket + 0x3015, // 〕 right tortoise shell bracket + 0x3017, // 〗 right white lenticular bracket + 0x3019, // 〙 right white tortoise shell bracket + 0x301b, // 〛 right white square bracket + 0xff01, // ! fullwidth exclamation mark + 0xff09, // ) fullwidth right parenthesis + 0xff0c, // , fullwidth comma + 0xff0e, // . fullwidth full stop + 0xff1a, // : fullwidth colon + 0xff1b, // ; fullwidth semicolon + 0xff1f, // ? fullwidth question mark + 0xff3d, // ] fullwidth right square bracket + 0xff5d, // } fullwidth right curly bracket + }; + + int first = 0; + int last = ARRAY_SIZE(BOL_prohibition_punct) - 1; + + while (first < last) { + const int mid = (first + last) / 2; + + if (cc == BOL_prohibition_punct[mid]) { + return false; + } else if (cc > BOL_prohibition_punct[mid]) { + first = mid + 1; + } else { + last = mid - 1; + } + } + + return cc != BOL_prohibition_punct[first]; +} + +// Whether line break is allowed after "cc". +bool utf_allow_break_after(int cc) + FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + static const int EOL_prohibition_punct[] = { + '(', + '<', + '[', + '`', + '{', + // 0x2014, // — em dash + 0x2018, // ‘ left single quotation mark + 0x201c, // “ left double quotation mark + // 0x2053, // ~ swung dash + 0x3008, // 〈 left angle bracket + 0x300a, // 《 left double angle bracket + 0x300c, // 「 left corner bracket + 0x300e, // 『 left white corner bracket + 0x3010, // 【 left black lenticular bracket + 0x3014, // 〔 left tortoise shell bracket + 0x3016, // 〖 left white lenticular bracket + 0x3018, // 〘 left white tortoise shell bracket + 0x301a, // 〚 left white square bracket + 0xff08, // ( fullwidth left parenthesis + 0xff3b, // [ fullwidth left square bracket + 0xff5b, // { fullwidth left curly bracket + }; + + int first = 0; + int last = ARRAY_SIZE(EOL_prohibition_punct) - 1; + + while (first < last) { + const int mid = (first + last)/2; + + if (cc == EOL_prohibition_punct[mid]) { + return false; + } else if (cc > EOL_prohibition_punct[mid]) { + first = mid + 1; + } else { + last = mid - 1; + } + } + + return cc != EOL_prohibition_punct[first]; +} + +// Whether line break is allowed between "cc" and "ncc". +bool utf_allow_break(int cc, int ncc) + FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + // don't break between two-letter punctuations + if (cc == ncc + && (cc == 0x2014 // em dash + || cc == 0x2026)) { // horizontal ellipsis + return false; + } + return utf_allow_break_after(cc) && utf_allow_break_before(ncc); +} + /// Copy a character, advancing the pointers /// /// @param[in,out] fp Source of the character to copy. diff --git a/src/nvim/ops.c b/src/nvim/ops.c index 8329daf5f1..939cde0ba1 100644 --- a/src/nvim/ops.c +++ b/src/nvim/ops.c @@ -3833,7 +3833,8 @@ int do_join(size_t count, && (!has_format_option(FO_MBYTE_JOIN) || (utf_ptr2char(curr) < 0x100 && endcurr1 < 0x100)) && (!has_format_option(FO_MBYTE_JOIN2) - || utf_ptr2char(curr) < 0x100 || endcurr1 < 0x100) + || (utf_ptr2char(curr) < 0x100 && !utf_eat_space(endcurr1)) + || (endcurr1 < 0x100 && !utf_eat_space(utf_ptr2char(curr)))) ) { /* don't add a space if the line is ending in a space */ if (endcurr1 == ' ') @@ -4158,49 +4159,41 @@ format_lines( int avoid_fex /* don't use 'formatexpr' */ ) { - int max_len; - int is_not_par; /* current line not part of parag. */ - int next_is_not_par; /* next line not part of paragraph */ - int is_end_par; /* at end of paragraph */ - int prev_is_end_par = FALSE; /* prev. line not part of parag. */ - int next_is_start_par = FALSE; - int leader_len = 0; /* leader len of current line */ - int next_leader_len; /* leader len of next line */ - char_u *leader_flags = NULL; /* flags for leader of current line */ - char_u *next_leader_flags; /* flags for leader of next line */ - int do_comments; /* format comments */ - int do_comments_list = 0; /* format comments with 'n' or '2' */ - int advance = TRUE; - int second_indent = -1; /* indent for second line (comment - * aware) */ - int do_second_indent; - int do_number_indent; - int do_trail_white; - int first_par_line = TRUE; + bool is_not_par; // current line not part of parag. + bool next_is_not_par; // next line not part of paragraph + bool is_end_par; // at end of paragraph + bool prev_is_end_par = false; // prev. line not part of parag. + bool next_is_start_par = false; + int leader_len = 0; // leader len of current line + int next_leader_len; // leader len of next line + char_u *leader_flags = NULL; // flags for leader of current line + char_u *next_leader_flags; // flags for leader of next line + bool advance = true; + int second_indent = -1; // indent for second line (comment aware) + bool first_par_line = true; int smd_save; long count; - int need_set_indent = TRUE; /* set indent of next paragraph */ - int force_format = FALSE; - int old_State = State; - - /* length of a line to force formatting: 3 * 'tw' */ - max_len = comp_textwidth(TRUE) * 3; - - /* check for 'q', '2' and '1' in 'formatoptions' */ - do_comments = has_format_option(FO_Q_COMS); - do_second_indent = has_format_option(FO_Q_SECOND); - do_number_indent = has_format_option(FO_Q_NUMBER); - do_trail_white = has_format_option(FO_WHITE_PAR); - - /* - * Get info about the previous and current line. - */ - if (curwin->w_cursor.lnum > 1) - is_not_par = fmt_check_par(curwin->w_cursor.lnum - 1 - , &leader_len, &leader_flags, do_comments - ); - else - is_not_par = TRUE; + bool need_set_indent = true; // set indent of next paragraph + bool force_format = false; + const int old_State = State; + + // length of a line to force formatting: 3 * 'tw' + const int max_len = comp_textwidth(true) * 3; + + // check for 'q', '2' and '1' in 'formatoptions' + const bool do_comments = has_format_option(FO_Q_COMS); // format comments + int do_comments_list = 0; // format comments with 'n' or '2' + const bool do_second_indent = has_format_option(FO_Q_SECOND); + const bool do_number_indent = has_format_option(FO_Q_NUMBER); + const bool do_trail_white = has_format_option(FO_WHITE_PAR); + + // Get info about the previous and current line. + if (curwin->w_cursor.lnum > 1) { + is_not_par = fmt_check_par(curwin->w_cursor.lnum - 1, + &leader_len, &leader_flags, do_comments); + } else { + is_not_par = true; + } next_is_not_par = fmt_check_par(curwin->w_cursor.lnum , &next_leader_len, &next_leader_flags, do_comments ); @@ -4225,7 +4218,7 @@ format_lines( * The last line to be formatted. */ if (count == 1 || curwin->w_cursor.lnum == curbuf->b_ml.ml_line_count) { - next_is_not_par = TRUE; + next_is_not_par = true; next_leader_len = 0; next_leader_flags = NULL; } else { @@ -4236,7 +4229,7 @@ format_lines( next_is_start_par = (get_number_indent(curwin->w_cursor.lnum + 1) > 0); } - advance = TRUE; + advance = true; is_end_par = (is_not_par || next_is_not_par || next_is_start_par); if (!is_end_par && do_trail_white) is_end_par = !ends_in_white(curwin->w_cursor.lnum); @@ -4287,7 +4280,7 @@ format_lines( leader_len, leader_flags, next_leader_len, next_leader_flags) ) - is_end_par = TRUE; + is_end_par = true; /* * If we have got to the end of a paragraph, or the line is @@ -4324,9 +4317,9 @@ format_lines( * end of the paragraph. */ if (line_count < 0) break; - first_par_line = TRUE; + first_par_line = true; } - force_format = FALSE; + force_format = false; } /* @@ -4334,7 +4327,7 @@ format_lines( * first delete the leader from the second line. */ if (!is_end_par) { - advance = FALSE; + advance = false; curwin->w_cursor.lnum++; curwin->w_cursor.col = 0; if (line_count < 0 && u_save_cursor() == FAIL) @@ -4357,12 +4350,13 @@ format_lines( beep_flush(); break; } - first_par_line = FALSE; - /* If the line is getting long, format it next time */ - if (STRLEN(get_cursor_line_ptr()) > (size_t)max_len) - force_format = TRUE; - else - force_format = FALSE; + first_par_line = false; + // If the line is getting long, format it next time + if (STRLEN(get_cursor_line_ptr()) > (size_t)max_len) { + force_format = true; + } else { + force_format = false; + } } } line_breakcheck(); @@ -4423,11 +4417,10 @@ static int fmt_check_par(linenr_T lnum, int *leader_len, char_u **leader_flags, int paragraph_start(linenr_T lnum) { char_u *p; - int leader_len = 0; /* leader len of current line */ - char_u *leader_flags = NULL; /* flags for leader of current line */ - int next_leader_len = 0; /* leader len of next line */ - char_u *next_leader_flags = NULL; /* flags for leader of next line */ - int do_comments; /* format comments */ + int leader_len = 0; // leader len of current line + char_u *leader_flags = NULL; // flags for leader of current line + int next_leader_len = 0; // leader len of next line + char_u *next_leader_flags = NULL; // flags for leader of next line if (lnum <= 1) return TRUE; /* start of the file */ @@ -4436,7 +4429,7 @@ int paragraph_start(linenr_T lnum) if (*p == NUL) return TRUE; /* after empty line */ - do_comments = has_format_option(FO_Q_COMS); + const bool do_comments = has_format_option(FO_Q_COMS); // format comments if (fmt_check_par(lnum - 1, &leader_len, &leader_flags, do_comments)) { return true; // after non-paragraph line } diff --git a/src/nvim/option.c b/src/nvim/option.c index 484d9da3a1..4569eb1dda 100644 --- a/src/nvim/option.c +++ b/src/nvim/option.c @@ -6803,7 +6803,8 @@ static void langmap_set(void) /// Return true if format option 'x' is in effect. /// Take care of no formatting when 'paste' is set. -int has_format_option(int x) +bool has_format_option(int x) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT { if (p_paste) { return false; diff --git a/src/nvim/option_defs.h b/src/nvim/option_defs.h index a09811c8fb..6630bda710 100644 --- a/src/nvim/option_defs.h +++ b/src/nvim/option_defs.h @@ -77,12 +77,13 @@ #define FO_ONE_LETTER '1' #define FO_WHITE_PAR 'w' // trailing white space continues paragr. #define FO_AUTO 'a' // automatic formatting +#define FO_RIGOROUS_TW ']' // respect textwidth rigorously #define FO_REMOVE_COMS 'j' // remove comment leaders when joining lines #define FO_PERIOD_ABBR 'p' // don't break a single space after a period #define DFLT_FO_VI "vt" #define DFLT_FO_VIM "tcqj" -#define FO_ALL "tcroq2vlb1mMBn,awjp" // for do_set() +#define FO_ALL "tcroq2vlb1mMBn,aw]jp" // for do_set() // characters for the p_cpo option: #define CPO_ALTREAD 'a' // ":read" sets alternate file name diff --git a/src/nvim/testdir/test_cjk_linebreak.vim b/src/nvim/testdir/test_cjk_linebreak.vim new file mode 100644 index 0000000000..dfaa8fa1af --- /dev/null +++ b/src/nvim/testdir/test_cjk_linebreak.vim @@ -0,0 +1,97 @@ +scriptencoding utf-8 + +func Run_cjk_linebreak_after(rigorous) + set textwidth=12 + for punct in [ + \ '!', '%', ')', ',', ':', ';', '>', '?', ']', '}', '’', '”', '†', '‡', + \ '…', '‰', '‱', '‼', '⁇', '⁈', '⁉', '℃', '℉', '、', '。', '〉', '》', + \ '」', '』', '】', '〕', '〗', '〙', '〛', '!', ')', ',', '.', ':', + \ ';', '?', ']', '}'] + call setline('.', '这是一个测试' .. punct.'试试 CJK 行禁则补丁。') + normal gqq + if a:rigorous + call assert_equal('这是一个测', getline(1)) + else + call assert_equal('这是一个测试' .. punct, getline(1)) + endif + %d_ + endfor +endfunc + +func Test_cjk_linebreak_after() + set formatoptions=croqn2mB1j + call Run_cjk_linebreak_after(0) +endfunc + +func Test_cjk_linebreak_after_rigorous() + set formatoptions=croqn2mB1j] + call Run_cjk_linebreak_after(1) +endfunc + +func Run_cjk_linebreak_before() + set textwidth=12 + for punct in [ + \ '(', '<', '[', '`', '{', '‘', '“', '〈', '《', '「', '『', '【', '〔', + \ '〖', '〘', '〚', '(', '[', '{'] + call setline('.', '这是个测试' .. punct.'试试 CJK 行禁则补丁。') + normal gqq + call assert_equal('这是个测试', getline(1)) + %d_ + endfor +endfunc + +func Test_cjk_linebreak_before() + set formatoptions=croqn2mB1j + call Run_cjk_linebreak_before() +endfunc + +func Test_cjk_linebreak_before_rigorous() + set formatoptions=croqn2mB1j] + call Run_cjk_linebreak_before() +endfunc + +func Run_cjk_linebreak_nobetween(rigorous) + " …… must not start a line + call setline('.', '这是个测试……试试 CJK 行禁则补丁。') + set textwidth=12 ambiwidth=double + normal gqq + if a:rigorous + call assert_equal('这是个测', getline(1)) + else + call assert_equal('这是个测试……', getline(1)) + endif + %d_ + + call setline('.', '这是一个测试……试试 CJK 行禁则补丁。') + set textwidth=12 ambiwidth=double + normal gqq + call assert_equal('这是一个测', getline(1)) + %d_ + + " but —— can + call setline('.', '这是个测试——试试 CJK 行禁则补丁。') + set textwidth=12 ambiwidth=double + normal gqq + call assert_equal('这是个测试', getline(1)) +endfunc + +func Test_cjk_linebreak_nobetween() + set formatoptions=croqn2mB1j + call Run_cjk_linebreak_nobetween(0) +endfunc + +func Test_cjk_linebreak_nobetween_rigorous() + set formatoptions=croqn2mB1j] + call Run_cjk_linebreak_nobetween(1) +endfunc + +func Test_cjk_linebreak_join_punct() + for punct in ['——', '〗', ',', '。', '……'] + call setline(1, '文本文本' .. punct) + call setline(2, 'English') + set formatoptions=croqn2mB1j + normal ggJ + call assert_equal('文本文本' .. punct.'English', getline(1)) + %d_ + endfor +endfunc |