From fb1edb2f5728d74ae811c6ab32395598cea5609b Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index b874f0dc94..f592b54f67 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1618,21 +1618,24 @@ void show_utf8(void) /// "base" must be the start of the string, which must be NUL terminated. /// If "p" points to the NUL at the end of the string return 0. /// Returns 0 when already at the first byte of a character. -int utf_head_off(const char_u *base, const char_u *p) +int utf_head_off(const char *base_in, const char *p_in) { int c; int len; - if (*p < 0x80) { // be quick for ASCII + if ((uint8_t)(*p_in) < 0x80) { // be quick for ASCII return 0; } + const uint8_t *base = (uint8_t *)base_in; + const uint8_t *p = (uint8_t *)p_in; + // Skip backwards over trailing bytes: 10xx.xxxx // Skip backwards again if on a composing char. - const char_u *q; + const uint8_t *q; for (q = p;; q--) { // Move s to the last byte of this char. - const char_u *s; + const uint8_t *s; for (s = q; (s[1] & 0xc0) == 0x80; s++) {} // Move q to the first byte of this char. @@ -1657,7 +1660,7 @@ int utf_head_off(const char_u *base, const char_u *p) if (arabic_maycombine(c)) { // Advance to get a sneak-peak at the next char - const char_u *j = q; + const uint8_t *j = q; j--; // Move j to the first byte of this char. while (j > base && (*j & 0xc0) == 0x80) { @@ -2042,7 +2045,7 @@ void mb_check_adjust_col(void *win_) win->w_cursor.col = len - 1; } // Move the cursor to the head byte. - win->w_cursor.col -= utf_head_off((char_u *)p, (char_u *)p + win->w_cursor.col); + win->w_cursor.col -= utf_head_off(p, p + win->w_cursor.col); } // Reset `coladd` when the cursor would be on the right half of a -- cgit From bd51ac2a347c0a3efb64e4b09400b7314286844c Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index f592b54f67..947594be4f 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1541,17 +1541,16 @@ ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len, size_t index, bool us return -1; } -/* - * Version of strnicmp() that handles multi-byte characters. - * Needed for Big5, Shift-JIS and UTF-8 encoding. Other DBCS encodings can - * probably use strnicmp(), because there are no ASCII characters in the - * second byte. - * Returns zero if s1 and s2 are equal (ignoring case), the difference between - * two characters otherwise. - */ -int mb_strnicmp(const char_u *s1, const char_u *s2, const size_t nn) +/// Version of strnicmp() that handles multi-byte characters. +/// Needed for Big5, Shift-JIS and UTF-8 encoding. Other DBCS encodings can +/// probably use strnicmp(), because there are no ASCII characters in the +/// second byte. +/// +/// @return zero if s1 and s2 are equal (ignoring case), the difference between +/// two characters otherwise. +int mb_strnicmp(const char *s1, const char *s2, const size_t nn) { - return utf_strnicmp(s1, s2, nn, nn); + return utf_strnicmp((char_u *)s1, (char_u *)s2, nn, nn); } /// Compare strings case-insensitively @@ -1568,7 +1567,7 @@ int mb_strnicmp(const char_u *s1, const char_u *s2, const size_t nn) /// @return 0 if strings are equal, <0 if s1 < s2, >0 if s1 > s2. int mb_stricmp(const char *s1, const char *s2) { - return mb_strnicmp((const char_u *)s1, (const char_u *)s2, MAXCOL); + return mb_strnicmp(s1, s2, MAXCOL); } /* -- cgit From 49e893f296bca9eef5ff45a3d746c261d055bf10 Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 947594be4f..0a4182a892 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -587,7 +587,7 @@ size_t mb_string2cells_len(const char *str, size_t size) size_t clen = 0; for (const char_u *p = (char_u *)str; *p != NUL && p < (char_u *)str + size; - p += utfc_ptr2len_len(p, (int)size + (int)(p - (char_u *)str))) { + p += utfc_ptr2len_len((char *)p, (int)size + (int)(p - (char_u *)str))) { clen += (size_t)utf_ptr2cells((char *)p); } @@ -727,12 +727,10 @@ int mb_cptr2char_adv(const char_u **pp) return c; } -/* - * Check if the character pointed to by "p2" is a composing character when it - * comes after "p1". For Arabic sometimes "ab" is replaced with "c", which - * behaves like a composing character. - */ -bool utf_composinglike(const char_u *p1, const char_u *p2) +/// Check if the character pointed to by "p2" is a composing character when it +/// comes after "p1". For Arabic sometimes "ab" is replaced with "c", which +/// behaves like a composing character. +bool utf_composinglike(const char *p1, const char *p2) { int c2; @@ -765,7 +763,7 @@ int utfc_ptr2char(const char *p_in, int *pcc) // Only accept a composing char when the first char isn't illegal. if ((len > 1 || *p < 0x80) && p[len] >= 0x80 - && utf_composinglike(p, p + len)) { + && utf_composinglike((char *)p, (char *)p + len)) { int cc = utf_ptr2char((char *)p + len); for (;;) { pcc[i++] = cc; @@ -809,7 +807,7 @@ int utfc_ptr2char_len(const char_u *p, int *pcc, int maxlen) int len_cc = utf_ptr2len_len(p + len, maxlen - len); safe = len_cc > 1 && len_cc <= maxlen - len; if (!safe || (pcc[i] = utf_ptr2char((char *)p + len)) < 0x80 - || !(i == 0 ? utf_composinglike(p, p + len) : utf_iscomposing(pcc[i]))) { + || !(i == 0 ? utf_composinglike((char *)p, (char *)p + len) : utf_iscomposing(pcc[i]))) { break; } len += len_cc; @@ -916,7 +914,7 @@ int utfc_ptr2len(const char *const p_in) // skip all of them (otherwise the cursor would get stuck). int prevlen = 0; for (;;) { - if (p[len] < 0x80 || !utf_composinglike(p + prevlen, p + len)) { + if (p[len] < 0x80 || !utf_composinglike((char *)p + prevlen, (char *)p + len)) { return len; } @@ -926,13 +924,11 @@ int utfc_ptr2len(const char *const p_in) } } -/* - * Return the number of bytes the UTF-8 encoding of the character at "p[size]" - * takes. This includes following composing characters. - * Returns 0 for an empty string. - * Returns 1 for an illegal char or an incomplete byte sequence. - */ -int utfc_ptr2len_len(const char_u *p, int size) +/// Return the number of bytes the UTF-8 encoding of the character at "p[size]" +/// takes. This includes following composing characters. +/// Returns 0 for an empty string. +/// Returns 1 for an illegal char or an incomplete byte sequence. +int utfc_ptr2len_len(const char *p, int size) { int len; int prevlen; @@ -940,15 +936,15 @@ int utfc_ptr2len_len(const char_u *p, int size) if (size < 1 || *p == NUL) { return 0; } - if (p[0] < 0x80 && (size == 1 || p[1] < 0x80)) { // be quick for ASCII + if ((uint8_t)p[0] < 0x80 && (size == 1 || (uint8_t)p[1] < 0x80)) { // be quick for ASCII return 1; } // Skip over first UTF-8 char, stopping at a NUL byte. - len = utf_ptr2len_len(p, size); + len = utf_ptr2len_len((char_u *)p, size); // Check for illegal byte and incomplete byte sequence. - if ((len == 1 && p[0] >= 0x80) || len > size) { + if ((len == 1 && (uint8_t)p[0] >= 0x80) || len > size) { return 1; } @@ -960,7 +956,7 @@ int utfc_ptr2len_len(const char_u *p, int size) while (len < size) { int len_next_char; - if (p[len] < 0x80) { + if ((uint8_t)p[len] < 0x80) { break; } @@ -968,7 +964,7 @@ int utfc_ptr2len_len(const char_u *p, int size) * Next character length should not go beyond size to ensure that * utf_composinglike(...) does not read beyond size. */ - len_next_char = utf_ptr2len_len(p + len, size - len); + len_next_char = utf_ptr2len_len((char_u *)p + len, size - len); if (len_next_char > size - len) { break; } @@ -2420,7 +2416,7 @@ static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen if (utf_ptr2cells(from) > 1) { *to++ = '?'; } - l = utfc_ptr2len_len((const char_u *)from, (int)fromlen); + l = utfc_ptr2len_len(from, (int)fromlen); from += l; fromlen -= (size_t)l; } else if (ICONV_ERRNO != ICONV_E2BIG) { -- cgit From 1ffd527c837fb2465c9659273bbe5447a1352db2 Mon Sep 17 00:00:00 2001 From: Lewis Russell Date: Fri, 2 Sep 2022 17:39:49 +0100 Subject: refactor: migrate comment style (#20012) Done automatically using the following perl command: perl -pi -0777pe 's#\n\K */\*\n(.+?)\s*\*/\n#join("\n", map { $_ =~ s:^\s*\K \*://:; $_ } split("\n", $1)) . "\n"#sge' src/nvim/**/*.c Co-authored-by: zeertzjq Co-authored-by: zeertzjq --- src/nvim/mbyte.c | 242 +++++++++++++++++++++---------------------------------- 1 file changed, 92 insertions(+), 150 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 0a4182a892..116a66e773 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -132,10 +132,8 @@ const uint8_t utf8len_tab_zero[] = { 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0, // F? }; -/* - * Canonical encoding names and their properties. - * "iso-8859-n" is handled by enc_canonize() directly. - */ +// Canonical encoding names and their properties. +// "iso-8859-n" is handled by enc_canonize() directly. static struct { const char *name; int prop; int codepage; } enc_canon_table[] = @@ -269,9 +267,7 @@ enc_canon_table[] = #define IDX_COUNT 59 }; -/* - * Aliases for encoding names. - */ +// Aliases for encoding names. static struct { const char *name; int canon; } enc_alias_table[] = @@ -342,10 +338,8 @@ enc_alias_table[] = { NULL, 0 } }; -/* - * Find encoding "name" in the list of canonical encoding names. - * Returns -1 if not found. - */ +// Find encoding "name" in the list of canonical encoding names. +// Returns -1 if not found. static int enc_canon_search(const char_u *name) FUNC_ATTR_PURE { @@ -357,10 +351,8 @@ static int enc_canon_search(const char_u *name) return -1; } -/* - * Find canonical encoding "name" in the list and return its properties. - * Returns 0 if not found. - */ +// Find canonical encoding "name" in the list and return its properties. +// Returns 0 if not found. int enc_canon_props(const char_u *name) FUNC_ATTR_PURE { @@ -375,13 +367,11 @@ int enc_canon_props(const char_u *name) return 0; } -/* - * Return the size of the BOM for the current buffer: - * 0 - no BOM - * 2 - UCS-2 or UTF-16 BOM - * 4 - UCS-4 BOM - * 3 - UTF-8 BOM - */ +// Return the size of the BOM for the current buffer: +// 0 - no BOM +// 2 - UCS-2 or UTF-16 BOM +// 4 - UCS-4 BOM +// 3 - UTF-8 BOM int bomb_size(void) FUNC_ATTR_PURE { @@ -401,9 +391,7 @@ int bomb_size(void) return n; } -/* - * Remove all BOM from "s" by moving remaining text. - */ +// Remove all BOM from "s" by moving remaining text. void remove_bom(char_u *s) { char *p = (char *)s; @@ -417,13 +405,11 @@ void remove_bom(char_u *s) } } -/* - * Get class of pointer: - * 0 for blank or NUL - * 1 for punctuation - * 2 for an (ASCII) word character - * >2 for other word characters - */ +// Get class of pointer: +// 0 for blank or NUL +// 1 for punctuation +// 2 for an (ASCII) word character +// >2 for other word characters int mb_get_class(const char_u *p) FUNC_ATTR_PURE { @@ -445,9 +431,7 @@ int mb_get_class_tab(const char_u *p, const uint64_t *const chartab) return utf_class_tab(utf_ptr2char((char *)p), chartab); } -/* - * Return true if "c" is in "table". - */ +// Return true if "c" is in "table". static bool intable(const struct interval *table, size_t n_items, int c) FUNC_ATTR_PURE { @@ -646,22 +630,20 @@ int utf_ptr2char(const char *const p_in) return p[0]; } -/* - * Convert a UTF-8 byte sequence to a wide character. - * String is assumed to be terminated by NUL or after "n" bytes, whichever - * comes first. - * The function is safe in the sense that it never accesses memory beyond the - * first "n" bytes of "s". - * - * On success, returns decoded codepoint, advances "s" to the beginning of - * next character and decreases "n" accordingly. - * - * If end of string was reached, returns 0 and, if "n" > 0, advances "s" past - * NUL byte. - * - * If byte sequence is illegal or incomplete, returns -1 and does not advance - * "s". - */ +// Convert a UTF-8 byte sequence to a wide character. +// String is assumed to be terminated by NUL or after "n" bytes, whichever +// comes first. +// The function is safe in the sense that it never accesses memory beyond the +// first "n" bytes of "s". +// +// On success, returns decoded codepoint, advances "s" to the beginning of +// next character and decreases "n" accordingly. +// +// If end of string was reached, returns 0 and, if "n" > 0, advances "s" past +// NUL byte. +// +// If byte sequence is illegal or incomplete, returns -1 and does not advance +// "s". static int utf_safe_read_char_adv(const char_u **s, size_t *n) { int c; @@ -701,10 +683,8 @@ static int utf_safe_read_char_adv(const char_u **s, size_t *n) return -1; } -/* - * Get character at **pp and advance *pp to the next character. - * Note: composing characters are skipped! - */ +// Get character at **pp and advance *pp to the next character. +// Note: composing characters are skipped! int mb_ptr2char_adv(const char_u **const pp) { int c; @@ -714,10 +694,8 @@ int mb_ptr2char_adv(const char_u **const pp) return c; } -/* - * Get character at **pp and advance *pp to the next character. - * Note: composing characters are returned as separate characters. - */ +// Get character at **pp and advance *pp to the next character. +// Note: composing characters are returned as separate characters. int mb_cptr2char_adv(const char_u **pp) { int c; @@ -784,12 +762,10 @@ int utfc_ptr2char(const char *p_in, int *pcc) return c; } -/* - * Convert a UTF-8 byte string to a wide character. Also get up to MAX_MCO - * composing characters. Use no more than p[maxlen]. - * - * @param [out] pcc: composing chars, last one is 0 - */ +// Convert a UTF-8 byte string to a wide character. Also get up to MAX_MCO +// composing characters. Use no more than p[maxlen]. +// +// @param [out] pcc: composing chars, last one is 0 int utfc_ptr2char_len(const char_u *p, int *pcc, int maxlen) { assert(maxlen > 0); @@ -845,24 +821,20 @@ int utf_ptr2len(const char *const p_in) return len; } -/* - * Return length of UTF-8 character, obtained from the first byte. - * "b" must be between 0 and 255! - * Returns 1 for an invalid first byte value. - */ +// Return length of UTF-8 character, obtained from the first byte. +// "b" must be between 0 and 255! +// Returns 1 for an invalid first byte value. int utf_byte2len(int b) { return utf8len_tab[b]; } -/* - * Get the length of UTF-8 byte sequence "p[size]". Does not include any - * following composing characters. - * Returns 1 for "". - * Returns 1 for an illegal byte sequence (also in incomplete byte seq.). - * Returns number > "size" for an incomplete byte sequence. - * Never returns zero. - */ +// Get the length of UTF-8 byte sequence "p[size]". Does not include any +// following composing characters. +// Returns 1 for "". +// Returns 1 for an illegal byte sequence (also in incomplete byte seq.). +// Returns number > "size" for an incomplete byte sequence. +// Never returns zero. int utf_ptr2len_len(const char_u *p, int size) { int len; @@ -948,10 +920,8 @@ int utfc_ptr2len_len(const char *p, int size) return 1; } - /* - * Check for composing characters. We can handle only the first six, but - * skip all of them (otherwise the cursor would get stuck). - */ + // Check for composing characters. We can handle only the first six, but + // skip all of them (otherwise the cursor would get stuck). prevlen = 0; while (len < size) { int len_next_char; @@ -960,10 +930,8 @@ int utfc_ptr2len_len(const char *p, int size) break; } - /* - * Next character length should not go beyond size to ensure that - * utf_composinglike(...) does not read beyond size. - */ + // Next character length should not go beyond size to ensure that + // utf_composinglike(...) does not read beyond size. len_next_char = utf_ptr2len_len((char_u *)p + len, size - len); if (len_next_char > size - len) { break; @@ -1042,20 +1010,16 @@ int utf_char2bytes(const int c, char *const buf) } } -/* - * Return true if "c" is a composing UTF-8 character. This means it will be - * drawn on top of the preceding character. - * Based on code from Markus Kuhn. - */ +// Return true if "c" is a composing UTF-8 character. This means it will be +// drawn on top of the preceding character. +// Based on code from Markus Kuhn. bool utf_iscomposing(int c) { return intable(combining, ARRAY_SIZE(combining), c); } -/* - * Return true for characters that can be displayed in a normal way. - * Only for characters of 0x100 and above! - */ +// Return true for characters that can be displayed in a normal way. +// Only for characters of 0x100 and above! bool utf_printable(int c) { // Sorted list of non-overlapping intervals. @@ -1070,12 +1034,10 @@ bool utf_printable(int c) return !intable(nonprint, ARRAY_SIZE(nonprint), c); } -/* - * Get class of a Unicode character. - * 0: white space - * 1: punctuation - * 2 or bigger: some class of word character. - */ +// Get class of a Unicode character. +// 0: white space +// 1: punctuation +// 2 or bigger: some class of word character. int utf_class(const int c) { return utf_class_tab(c, curbuf->b_chartab); @@ -1204,11 +1166,9 @@ bool utf_ambiguous_width(int c) || intable(emoji_all, ARRAY_SIZE(emoji_all), c)); } -/* - * Generic conversion function for case operations. - * Return the converted equivalent of "a", which is a UCS-4 character. Use - * the given conversion "table". Uses binary search on "table". - */ +// Generic conversion function for case operations. +// Return the converted equivalent of "a", which is a UCS-4 character. Use +// the given conversion "table". Uses binary search on "table". static int utf_convert(int a, const convertStruct *const table, size_t n_items) { size_t start, mid, end; // indices into table @@ -1234,10 +1194,8 @@ static int utf_convert(int a, const convertStruct *const table, size_t n_items) } } -/* - * Return the folded-case equivalent of "a", which is a UCS-4 character. Uses - * simple case folding. - */ +// Return the folded-case equivalent of "a", which is a UCS-4 character. Uses +// simple case folding. int utf_fold(int a) { if (a < 0x80) { @@ -1566,10 +1524,8 @@ int mb_stricmp(const char *s1, const char *s2) return mb_strnicmp(s1, s2, MAXCOL); } -/* - * "g8": show bytes of the UTF-8 char under the cursor. Doesn't matter what - * 'encoding' has been set to. - */ +// "g8": show bytes of the UTF-8 char under the cursor. Doesn't matter what +// 'encoding' has been set to. void show_utf8(void) { int len; @@ -1916,9 +1872,7 @@ int utf_cp_head_off(const char_u *base, const char_u *p) return i; } -/* - * Find the next illegal byte sequence. - */ +// Find the next illegal byte sequence. void utf_find_illegal(void) { pos_T pos = curwin->w_cursor; @@ -2008,10 +1962,8 @@ bool utf_valid_string(const char_u *s, const char_u *end) return true; } -/* - * If the cursor moves on an trail byte, set the cursor on the lead byte. - * Thus it moves left if necessary. - */ +// If the cursor moves on an trail byte, set the cursor on the lead byte. +// Thus it moves left if necessary. void mb_adjust_cursor(void) { mark_mb_adjustpos(curbuf, &curwin->w_cursor); @@ -2238,10 +2190,8 @@ static int enc_alias_search(const char_u *name) # include #endif -/* - * Get the canonicalized encoding of the current locale. - * Returns an allocated string when successful, NULL when not. - */ +// Get the canonicalized encoding of the current locale. +// Returns an allocated string when successful, NULL when not. char_u *enc_locale(void) { int i; @@ -2307,12 +2257,10 @@ enc_locale_copy_enc: #if defined(HAVE_ICONV) -/* - * Call iconv_open() with a check if iconv() works properly (there are broken - * versions). - * Returns (void *)-1 if failed. - * (should return iconv_t, but that causes problems with prototypes). - */ +// Call iconv_open() with a check if iconv() works properly (there are broken +// versions). +// Returns (void *)-1 if failed. +// (should return iconv_t, but that causes problems with prototypes). void *my_iconv_open(char_u *to, char_u *from) { iconv_t fd; @@ -2328,13 +2276,11 @@ void *my_iconv_open(char_u *to, char_u *from) fd = iconv_open(enc_skip((char *)to), enc_skip((char *)from)); if (fd != (iconv_t)-1 && iconv_working == kUnknown) { - /* - * Do a dummy iconv() call to check if it actually works. There is a - * version of iconv() on Linux that is broken. We can't ignore it, - * because it's wide-spread. The symptoms are that after outputting - * the initial shift state the "to" pointer is NULL and conversion - * stops for no apparent reason after about 8160 characters. - */ + // Do a dummy iconv() call to check if it actually works. There is a + // version of iconv() on Linux that is broken. We can't ignore it, + // because it's wide-spread. The symptoms are that after outputting + // the initial shift state the "to" pointer is NULL and conversion + // stops for no apparent reason after about 8160 characters. p = (char *)tobuf; tolen = ICONV_TESTLEN; (void)iconv(fd, NULL, NULL, &p, &tolen); @@ -2350,13 +2296,11 @@ void *my_iconv_open(char_u *to, char_u *from) return (void *)fd; } -/* - * Convert the string "str[slen]" with iconv(). - * If "unconvlenp" is not NULL handle the string ending in an incomplete - * sequence and set "*unconvlenp" to the length of it. - * Returns the converted string in allocated memory. NULL for an error. - * If resultlenp is not NULL, sets it to the result length in bytes. - */ +// Convert the string "str[slen]" with iconv(). +// If "unconvlenp" is not NULL handle the string ending in an incomplete +// sequence and set "*unconvlenp" to the length of it. +// Returns the converted string in allocated memory. NULL for an error. +// If resultlenp is not NULL, sets it to the result length in bytes. static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen, size_t *unconvlenp, size_t *resultlenp) { @@ -2529,11 +2473,9 @@ char *string_convert(const vimconv_T *const vcp, char *ptr, size_t *lenp) return (char *)string_convert_ext(vcp, (char_u *)ptr, lenp, NULL); } -/* - * Like string_convert(), but when "unconvlenp" is not NULL and there are is - * an incomplete sequence at the end it is not converted and "*unconvlenp" is - * set to the number of remaining bytes. - */ +// Like string_convert(), but when "unconvlenp" is not NULL and there are is +// an incomplete sequence at the end it is not converted and "*unconvlenp" is +// set to the number of remaining bytes. char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp, size_t *unconvlenp) { -- cgit From 73207cae611a1efb8cd17139e8228772daeb9866 Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 116a66e773..83b0609052 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1536,7 +1536,7 @@ void show_utf8(void) // Get the byte length of the char under the cursor, including composing // characters. - line = get_cursor_pos_ptr(); + line = (char_u *)get_cursor_pos_ptr(); len = utfc_ptr2len((char *)line); if (len == 0) { msg("NUL"); @@ -1891,7 +1891,7 @@ void utf_find_illegal(void) curwin->w_cursor.coladd = 0; for (;;) { - p = get_cursor_pos_ptr(); + p = (char_u *)get_cursor_pos_ptr(); if (vimconv.vc_type != CONV_NONE) { xfree(tofree); tofree = (char_u *)string_convert(&vimconv, (char *)p, NULL); @@ -1908,12 +1908,12 @@ void utf_find_illegal(void) if (*p >= 0x80 && (len == 1 || utf_char2len(utf_ptr2char((char *)p)) != len)) { if (vimconv.vc_type == CONV_NONE) { - curwin->w_cursor.col += (colnr_T)(p - get_cursor_pos_ptr()); + curwin->w_cursor.col += (colnr_T)(p - (char_u *)get_cursor_pos_ptr()); } else { int l; len = (int)(p - tofree); - for (p = get_cursor_pos_ptr(); *p != NUL && len-- > 0; p += l) { + for (p = (char_u *)get_cursor_pos_ptr(); *p != NUL && len-- > 0; p += l) { l = utf_ptr2len((char *)p); curwin->w_cursor.col += l; } @@ -1980,7 +1980,7 @@ void mb_check_adjust_col(void *win_) // Column 0 is always valid. if (oldcol != 0) { - char *p = (char *)ml_get_buf(win->w_buffer, win->w_cursor.lnum, false); + char *p = ml_get_buf(win->w_buffer, win->w_cursor.lnum, false); colnr_T len = (colnr_T)STRLEN(p); // Empty line or invalid column? @@ -2114,27 +2114,27 @@ char *enc_skip(char *p) char *enc_canonize(char *enc) FUNC_ATTR_NONNULL_RET { - char_u *p, *s; + char *p, *s; if (STRCMP(enc, "default") == 0) { // Use the default encoding as found by set_init_1(). - return (char *)vim_strsave(fenc_default); + return xstrdup(fenc_default); } // copy "enc" to allocated memory, with room for two '-' - char_u *r = xmalloc(STRLEN(enc) + 3); + char *r = xmalloc(STRLEN(enc) + 3); // Make it all lower case and replace '_' with '-'. p = r; - for (s = (char_u *)enc; *s != NUL; s++) { + for (s = enc; *s != NUL; s++) { if (*s == '_') { *p++ = '-'; } else { - *p++ = (char_u)TOLOWER_ASC(*s); + *p++ = (char)TOLOWER_ASC(*s); } } *p = NUL; // Skip "2byte-" and "8bit-". - p = (char_u *)enc_skip((char *)r); + p = enc_skip(r); // Change "microsoft-cp" to "cp". Used in some spell files. if (STRNCMP(p, "microsoft-cp", 12) == 0) { @@ -2159,17 +2159,17 @@ char *enc_canonize(char *enc) } int i; - if (enc_canon_search(p) >= 0) { + if (enc_canon_search((char_u *)p) >= 0) { // canonical name can be used unmodified if (p != r) { STRMOVE(r, p); } - } else if ((i = enc_alias_search(p)) >= 0) { + } else if ((i = enc_alias_search((char_u *)p)) >= 0) { // alias recognized, get canonical name xfree(r); - r = vim_strsave((char_u *)enc_canon_table[i].name); + r = xstrdup(enc_canon_table[i].name); } - return (char *)r; + return r; } /// Search for an encoding alias of "name". @@ -2491,7 +2491,7 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp len = *lenp; } if (len == 0) { - return vim_strsave((char_u *)""); + return (char_u *)xstrdup(""); } switch (vcp->vc_type) { -- cgit From c5322e752e9e568de907f7a1ef733bbfe342140c Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 83b0609052..dc74e23874 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -338,13 +338,13 @@ enc_alias_table[] = { NULL, 0 } }; -// Find encoding "name" in the list of canonical encoding names. -// Returns -1 if not found. -static int enc_canon_search(const char_u *name) +/// Find encoding "name" in the list of canonical encoding names. +/// Returns -1 if not found. +static int enc_canon_search(const char *name) FUNC_ATTR_PURE { for (int i = 0; i < IDX_COUNT; i++) { - if (STRCMP(name, enc_canon_table[i].name) == 0) { + if (strcmp(name, enc_canon_table[i].name) == 0) { return i; } } @@ -356,7 +356,7 @@ static int enc_canon_search(const char_u *name) int enc_canon_props(const char_u *name) FUNC_ATTR_PURE { - int i = enc_canon_search(name); + int i = enc_canon_search((char *)name); if (i >= 0) { return enc_canon_table[i].prop; } else if (STRNCMP(name, "2byte-", 6) == 0) { @@ -379,7 +379,7 @@ int bomb_size(void) if (curbuf->b_p_bomb && !curbuf->b_p_bin) { if (*curbuf->b_p_fenc == NUL - || STRCMP(curbuf->b_p_fenc, "utf-8") == 0) { + || strcmp(curbuf->b_p_fenc, "utf-8") == 0) { n = 3; } else if (STRNCMP(curbuf->b_p_fenc, "ucs-2", 5) == 0 || STRNCMP(curbuf->b_p_fenc, "utf-16", 6) == 0) { @@ -2115,7 +2115,7 @@ char *enc_canonize(char *enc) FUNC_ATTR_NONNULL_RET { char *p, *s; - if (STRCMP(enc, "default") == 0) { + if (strcmp(enc, "default") == 0) { // Use the default encoding as found by set_init_1(). return xstrdup(fenc_default); } @@ -2159,12 +2159,12 @@ char *enc_canonize(char *enc) } int i; - if (enc_canon_search((char_u *)p) >= 0) { + if (enc_canon_search(p) >= 0) { // canonical name can be used unmodified if (p != r) { STRMOVE(r, p); } - } else if ((i = enc_alias_search((char_u *)p)) >= 0) { + } else if ((i = enc_alias_search(p)) >= 0) { // alias recognized, get canonical name xfree(r); r = xstrdup(enc_canon_table[i].name); @@ -2174,12 +2174,12 @@ char *enc_canonize(char *enc) /// Search for an encoding alias of "name". /// Returns -1 when not found. -static int enc_alias_search(const char_u *name) +static int enc_alias_search(const char *name) { int i; for (i = 0; enc_alias_table[i].name != NULL; i++) { - if (STRCMP(name, enc_alias_table[i].name) == 0) { + if (strcmp(name, enc_alias_table[i].name) == 0) { return enc_alias_table[i].canon; } } @@ -2390,12 +2390,12 @@ static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen /// @return FAIL when conversion is not supported, OK otherwise. int convert_setup(vimconv_T *vcp, char *from, char *to) { - return convert_setup_ext(vcp, (char_u *)from, true, (char_u *)to, true); + return convert_setup_ext(vcp, from, true, to, true); } /// As convert_setup(), but only when from_unicode_is_utf8 is true will all /// "from" unicode charsets be considered utf-8. Same for "to". -int convert_setup_ext(vimconv_T *vcp, char_u *from, bool from_unicode_is_utf8, char_u *to, +int convert_setup_ext(vimconv_T *vcp, char *from, bool from_unicode_is_utf8, char *to, bool to_unicode_is_utf8) { int from_prop; @@ -2413,12 +2413,12 @@ int convert_setup_ext(vimconv_T *vcp, char_u *from, bool from_unicode_is_utf8, c // No conversion when one of the names is empty or they are equal. if (from == NULL || *from == NUL || to == NULL || *to == NUL - || STRCMP(from, to) == 0) { + || strcmp(from, to) == 0) { return OK; } - from_prop = enc_canon_props(from); - to_prop = enc_canon_props(to); + from_prop = enc_canon_props((char_u *)from); + to_prop = enc_canon_props((char_u *)to); if (from_unicode_is_utf8) { from_is_utf8 = from_prop & ENC_UNICODE; } else { @@ -2448,8 +2448,8 @@ int convert_setup_ext(vimconv_T *vcp, char_u *from, bool from_unicode_is_utf8, c #ifdef HAVE_ICONV else { // NOLINT(readability/braces) // Use iconv() for conversion. - vcp->vc_fd = (iconv_t)my_iconv_open(to_is_utf8 ? (char_u *)"utf-8" : to, - from_is_utf8 ? (char_u *)"utf-8" : from); + vcp->vc_fd = (iconv_t)my_iconv_open(to_is_utf8 ? (char_u *)"utf-8" : (char_u *)to, + from_is_utf8 ? (char_u *)"utf-8" : (char_u *)from); if (vcp->vc_fd != (iconv_t)-1) { vcp->vc_type = CONV_ICONV; vcp->vc_factor = 4; // could be longer too... -- cgit From 3ff46544c9872b4161fd098569c30b55fe3abd36 Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index dc74e23874..1c2fd6c7db 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1556,7 +1556,7 @@ void show_utf8(void) sprintf((char *)IObuff + rlen, "%02x ", (line[i] == NL) ? NUL : line[i]); // NUL is stored as NL clen--; - rlen += (int)STRLEN(IObuff + rlen); + rlen += (int)strlen(IObuff + rlen); if (rlen > IOSIZE - 20) { break; } -- cgit From 38059b4f31d8c9374002e209bc9ee2df28ac17fa Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Mon, 12 Sep 2022 14:03:32 +0800 Subject: vim-patch:8.2.2646: Vim9: error for not using string doesn't mention argument Problem: Vim9: error for not using string doesn't mention argument. Solution: Add argument number. https://github.com/vim/vim/commit/f28f2ac425600b88da0bdcc12a82cd620f575681 --- src/nvim/mbyte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index dc74e23874..ebb5e2317b 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -2793,7 +2793,7 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) void f_charclass(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) { - if (tv_check_for_string(&argvars[0]) == FAIL + if (tv_check_for_string(&argvars[0], 1) == FAIL || argvars[0].vval.v_string == NULL) { return; } -- cgit From 49aa9e17fa6f9b22550bff8f468c375ddf03fece Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Mon, 12 Sep 2022 14:08:51 +0800 Subject: vim-patch:8.2.2664: Vim9: not enough function arguments checked for string Problem: Vim9: not enough function arguments checked for string. Solution: Check in balloon functions. Refactor function arguments. https://github.com/vim/vim/commit/32105ae88f3aa6a6af30336f0bc9f8eb81292cd7 Cherry-pick removal of useless check from patch 8.2.3840. vim-patch:8.2.3083: crash when passing null string to charclass() Problem: Crash when passing null string to charclass(). Solution: Bail out when string pointer is NULL. (Christian Brabandt, closes vim/vim#8498, closes vim/vim#8260) https://github.com/vim/vim/commit/72463f883cdfd08e29ab0018ef3889284848d5f1 --- src/nvim/mbyte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index ebb5e2317b..310ad05196 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -2793,7 +2793,7 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) void f_charclass(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) { - if (tv_check_for_string(&argvars[0], 1) == FAIL + if (tv_check_for_string_arg(argvars, 0) == FAIL || argvars[0].vval.v_string == NULL) { return; } -- cgit From 25e4af439f3b5620406776ca77417d897097a3e9 Mon Sep 17 00:00:00 2001 From: bfredl Date: Mon, 12 Sep 2022 11:26:27 +0200 Subject: fix(lua): make vim.str_utfindex and vim.str_byteindex handle NUL bytes fixes #16290 --- src/nvim/mbyte.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 310ad05196..ac3fbe9acd 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1457,7 +1457,7 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints, size_t *codeunit { size_t count = 0, extra = 0; size_t clen; - for (size_t i = 0; i < len && s[i] != NUL; i += clen) { + for (size_t i = 0; i < len; i += clen) { clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i)); // NB: gets the byte value of invalid sequence bytes. // we only care whether the char fits in the BMP or not @@ -1479,7 +1479,7 @@ ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len, size_t index, bool us if (index == 0) { return 0; } - for (i = 0; i < len && s[i] != NUL; i += clen) { + for (i = 0; i < len; i += clen) { clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i)); // NB: gets the byte value of invalid sequence bytes. // we only care whether the char fits in the BMP or not -- cgit From 6d557e324fd4223fff3279a0112f40431c540163 Mon Sep 17 00:00:00 2001 From: dundargoc <33953936+dundargoc@users.noreply.github.com> Date: Sun, 18 Sep 2022 03:17:15 +0200 Subject: vim-patch:8.1.0941: macros for MS-Windows are inconsistent (#20215) Problem: Macros for MS-Windows are inconsistent, using "32", "3264 and others. Solution: Use MSWIN for all MS-Windows builds. Use FEAT_GUI_MSWIN for the GUI build. (Hirohito Higashi, closes vim/vim#3932) https://github.com/vim/vim/commit/4f97475d326c2773a78561fb874e4f23c25cbcd9 --- src/nvim/mbyte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 72f0cec235..33d652a51f 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1350,7 +1350,7 @@ static int utf_strnicmp(const char_u *s1, const char_u *s2, size_t n1, size_t n2 return n1 == 0 ? -1 : 1; } -#ifdef WIN32 +#ifdef MSWIN # ifndef CP_UTF8 # define CP_UTF8 65001 // magic number from winnls.h # endif -- cgit From 784e498c4a9c1f03266ced5ec3f55c3a6c94b80d Mon Sep 17 00:00:00 2001 From: dundargoc <33953936+dundargoc@users.noreply.github.com> Date: Fri, 21 Oct 2022 14:47:44 +0200 Subject: refactor: clang-tidy fixes to silence clangd warning (#20683) * refactor: readability-uppercase-literal-suffix * refactor: readability-named-parameter * refactor: bugprone-suspicious-string-compare * refactor: google-readability-casting * refactor: readability-redundant-control-flow * refactor: bugprone-too-small-loop-variable * refactor: readability-non-const-parameter * refactor: readability-avoid-const-params-in-decls * refactor: google-readability-todo * refactor: readability-inconsistent-declaration-parameter-name * refactor: bugprone-suspicious-missing-comma * refactor: remove noisy or slow warnings --- src/nvim/mbyte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 33d652a51f..ddcab37e34 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -2301,7 +2301,7 @@ void *my_iconv_open(char_u *to, char_u *from) // sequence and set "*unconvlenp" to the length of it. // Returns the converted string in allocated memory. NULL for an error. // If resultlenp is not NULL, sets it to the result length in bytes. -static char_u *iconv_string(const vimconv_T *const vcp, char_u *str, size_t slen, +static char_u *iconv_string(const vimconv_T *const vcp, const char_u *str, size_t slen, size_t *unconvlenp, size_t *resultlenp) { const char *from; -- cgit From b05d1943f063c382ea96b76d250877bc58297314 Mon Sep 17 00:00:00 2001 From: dundargoc <33953936+dundargoc@users.noreply.github.com> Date: Tue, 1 Nov 2022 15:39:49 +0100 Subject: build(lint): remove clint.py rules for braces #20880 Uncrustify is the source of truth where possible. Remove any redundant checks from clint.py. See also https://github.com/neovim/neovim/pull/18563 --- src/nvim/mbyte.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index ddcab37e34..9e34c7e413 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -136,8 +136,7 @@ const uint8_t utf8len_tab_zero[] = { // "iso-8859-n" is handled by enc_canonize() directly. static struct { const char *name; int prop; int codepage; } -enc_canon_table[] = -{ +enc_canon_table[] = { #define IDX_LATIN_1 0 { "latin1", ENC_8BIT + ENC_LATIN1, 1252 }, #define IDX_ISO_2 1 @@ -270,8 +269,7 @@ enc_canon_table[] = // Aliases for encoding names. static struct { const char *name; int canon; } -enc_alias_table[] = -{ +enc_alias_table[] = { { "ansi", IDX_LATIN_1 }, { "iso-8859-1", IDX_LATIN_1 }, { "latin2", IDX_ISO_2 }, @@ -1024,8 +1022,7 @@ bool utf_printable(int c) { // Sorted list of non-overlapping intervals. // 0xd800-0xdfff is reserved for UTF-16, actually illegal. - static struct interval nonprint[] = - { + static struct interval nonprint[] = { { 0x070f, 0x070f }, { 0x180b, 0x180e }, { 0x200b, 0x200f }, { 0x202a, 0x202e }, { 0x2060, 0x206f }, { 0xd800, 0xdfff }, { 0xfeff, 0xfeff }, { 0xfff9, 0xfffb }, { 0xfffe, 0xffff } -- cgit From 731cdde28ea8d48cc23ba2752a08c261c87eee92 Mon Sep 17 00:00:00 2001 From: dundargoc Date: Sat, 22 Oct 2022 12:36:38 +0200 Subject: refactor: fix clang-tidy warnings Enable and fix bugprone-misplaced-widening-cast warning. Fix some modernize-macro-to-enum and readability-else-after-return warnings, but don't enable them. While the warnings can be useful, they are in general too noisy to enable. --- src/nvim/mbyte.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 9e34c7e413..b4b2c4c7eb 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1186,9 +1186,8 @@ static int utf_convert(int a, const convertStruct *const table, size_t n_items) && a <= table[start].rangeEnd && (a - table[start].rangeStart) % table[start].step == 0) { return a + table[start].offset; - } else { - return a; } + return a; } // Return the folded-case equivalent of "a", which is a UCS-4 character. Uses -- cgit From bdb98de2d16ce7185a0f53740e06511904fdd814 Mon Sep 17 00:00:00 2001 From: Lewis Russell Date: Mon, 7 Nov 2022 10:21:44 +0000 Subject: refactor: more clint (#20910) --- src/nvim/mbyte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 9e34c7e413..14691741d8 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1550,7 +1550,7 @@ void show_utf8(void) } clen = utf_ptr2len((char *)line + i); } - sprintf((char *)IObuff + rlen, "%02x ", + sprintf((char *)IObuff + rlen, "%02x ", // NOLINT(runtime/printf) (line[i] == NL) ? NUL : line[i]); // NUL is stored as NL clen--; rlen += (int)strlen(IObuff + rlen); -- cgit From 66360675cf4d091b7460e4a8e1435c13216c1929 Mon Sep 17 00:00:00 2001 From: dundargoc Date: Sun, 11 Sep 2022 17:12:44 +0200 Subject: build: allow IWYU to fix includes for all .c files Allow Include What You Use to remove unnecessary includes and only include what is necessary. This helps with reducing compilation times and makes it easier to visualise which dependencies are actually required. Work on https://github.com/neovim/neovim/issues/549, but doesn't close it since this only works fully for .c files and not headers. --- src/nvim/mbyte.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 848b0f29d0..42b3ec0202 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -25,36 +25,55 @@ /// Vim scripts may contain an ":scriptencoding" command. This has an effect /// for some commands, like ":menutrans". -#include +#include +#include +#include +#include #include +#include +#include #include #include #include -#include "nvim/ascii.h" -#include "nvim/vim.h" -#ifdef HAVE_LOCALE_H -# include -#endif +#include "auto/config.h" #include "nvim/arabic.h" +#include "nvim/ascii.h" +#include "nvim/buffer_defs.h" #include "nvim/charset.h" #include "nvim/cursor.h" #include "nvim/drawscreen.h" -#include "nvim/eval.h" -#include "nvim/fileio.h" -#include "nvim/func_attr.h" +#include "nvim/eval/typval.h" +#include "nvim/eval/typval_defs.h" #include "nvim/getchar.h" +#include "nvim/gettext.h" +#include "nvim/globals.h" +#include "nvim/grid_defs.h" #include "nvim/iconv.h" +#include "nvim/keycodes.h" +#include "nvim/macros.h" #include "nvim/mark.h" #include "nvim/mbyte.h" +#include "nvim/mbyte_defs.h" #include "nvim/memline.h" #include "nvim/memory.h" #include "nvim/message.h" +#include "nvim/option_defs.h" #include "nvim/os/os.h" -#include "nvim/path.h" +#include "nvim/os/os_defs.h" +#include "nvim/pos.h" #include "nvim/screen.h" -#include "nvim/spell.h" #include "nvim/strings.h" +#include "nvim/types.h" +#include "nvim/vim.h" + +#ifdef HAVE_LOCALE_H +# include +#endif + +#ifdef __STDC_ISO_10646__ +# include +#endif typedef struct { int rangeStart; @@ -68,11 +87,12 @@ struct interval { long last; }; +// uncrustify:off #ifdef INCLUDE_GENERATED_DECLARATIONS # include "mbyte.c.generated.h" - # include "unicode_tables.generated.h" #endif +// uncrustify:on static char e_list_item_nr_is_not_list[] = N_("E1109: List item %d is not a List"); -- cgit From 51a48d482ea533415b4caa88e03adcd82f654f3a Mon Sep 17 00:00:00 2001 From: dundargoc Date: Thu, 17 Nov 2022 15:43:29 +0100 Subject: refactor: remove __STDC_ISO_10646__ check We can always assume wchar_t values are unicode codepoints for the systems we support, so this check isn't necessary. --- src/nvim/mbyte.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 42b3ec0202..1a30852e72 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -71,10 +71,6 @@ # include #endif -#ifdef __STDC_ISO_10646__ -# include -#endif - typedef struct { int rangeStart; int rangeEnd; @@ -1235,12 +1231,9 @@ int mb_toupper(int a) return TOUPPER_ASC(a); } -#if defined(__STDC_ISO_10646__) - // If towupper() is available and handles Unicode, use it. if (!(cmp_flags & CMP_INTERNAL)) { return (int)towupper((wint_t)a); } -#endif // For characters below 128 use locale sensitive toupper(). if (a < 128) { @@ -1266,12 +1259,9 @@ int mb_tolower(int a) return TOLOWER_ASC(a); } -#if defined(__STDC_ISO_10646__) - // If towlower() is available and handles Unicode, use it. if (!(cmp_flags & CMP_INTERNAL)) { return (int)towlower((wint_t)a); } -#endif // For characters below 128 use locale sensitive tolower(). if (a < 128) { -- cgit From bd22585061b66d7f71d4832b4a81e950b3c9d19d Mon Sep 17 00:00:00 2001 From: Dundar Göc Date: Fri, 26 Aug 2022 23:11:25 +0200 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 1a30852e72..c7734e45e7 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1987,7 +1987,7 @@ void mb_check_adjust_col(void *win_) // Column 0 is always valid. if (oldcol != 0) { char *p = ml_get_buf(win->w_buffer, win->w_cursor.lnum, false); - colnr_T len = (colnr_T)STRLEN(p); + colnr_T len = (colnr_T)strlen(p); // Empty line or invalid column? if (len == 0 || oldcol < 0) { @@ -2127,7 +2127,7 @@ char *enc_canonize(char *enc) } // copy "enc" to allocated memory, with room for two '-' - char *r = xmalloc(STRLEN(enc) + 3); + char *r = xmalloc(strlen(enc) + 3); // Make it all lower case and replace '_' with '-'. p = r; for (s = enc; *s != NUL; s++) { @@ -2492,7 +2492,7 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp size_t len; if (lenp == NULL) { - len = STRLEN(ptr); + len = strlen((char *)ptr); } else { len = *lenp; } -- cgit From 3b96ccf7d35be90e49029dec76344d3d92ad91dc Mon Sep 17 00:00:00 2001 From: dundargoc Date: Sat, 26 Nov 2022 18:57:46 +0100 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index c7734e45e7..f48955c904 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -367,15 +367,15 @@ static int enc_canon_search(const char *name) // Find canonical encoding "name" in the list and return its properties. // Returns 0 if not found. -int enc_canon_props(const char_u *name) +int enc_canon_props(const char *name) FUNC_ATTR_PURE { int i = enc_canon_search((char *)name); if (i >= 0) { return enc_canon_table[i].prop; - } else if (STRNCMP(name, "2byte-", 6) == 0) { + } else if (strncmp(name, "2byte-", 6) == 0) { return ENC_DBCS; - } else if (STRNCMP(name, "8bit-", 5) == 0 || STRNCMP(name, "iso-8859-", 9) == 0) { + } else if (strncmp(name, "8bit-", 5) == 0 || strncmp(name, "iso-8859-", 9) == 0) { return ENC_8BIT; } return 0; @@ -395,10 +395,10 @@ int bomb_size(void) if (*curbuf->b_p_fenc == NUL || strcmp(curbuf->b_p_fenc, "utf-8") == 0) { n = 3; - } else if (STRNCMP(curbuf->b_p_fenc, "ucs-2", 5) == 0 - || STRNCMP(curbuf->b_p_fenc, "utf-16", 6) == 0) { + } else if (strncmp(curbuf->b_p_fenc, "ucs-2", 5) == 0 + || strncmp(curbuf->b_p_fenc, "utf-16", 6) == 0) { n = 2; - } else if (STRNCMP(curbuf->b_p_fenc, "ucs-4", 5) == 0) { + } else if (strncmp(curbuf->b_p_fenc, "ucs-4", 5) == 0) { n = 4; } } @@ -1888,7 +1888,7 @@ void utf_find_illegal(void) char_u *tofree = NULL; vimconv.vc_type = CONV_NONE; - if (enc_canon_props((char_u *)curbuf->b_p_fenc) & ENC_8BIT) { + if (enc_canon_props(curbuf->b_p_fenc) & ENC_8BIT) { // 'encoding' is "utf-8" but we are editing a 8-bit encoded file, // possibly a utf-8 file with illegal bytes. Setup for conversion // from utf-8 to 'fileencoding'. @@ -2103,10 +2103,10 @@ const char *mb_unescape(const char **const pp) /// Skip the Vim specific head of a 'encoding' name. char *enc_skip(char *p) { - if (STRNCMP(p, "2byte-", 6) == 0) { + if (strncmp(p, "2byte-", 6) == 0) { return p + 6; } - if (STRNCMP(p, "8bit-", 5) == 0) { + if (strncmp(p, "8bit-", 5) == 0) { return p + 5; } return p; @@ -2143,24 +2143,24 @@ char *enc_canonize(char *enc) p = enc_skip(r); // Change "microsoft-cp" to "cp". Used in some spell files. - if (STRNCMP(p, "microsoft-cp", 12) == 0) { + if (strncmp(p, "microsoft-cp", 12) == 0) { STRMOVE(p, p + 10); } // "iso8859" -> "iso-8859" - if (STRNCMP(p, "iso8859", 7) == 0) { + if (strncmp(p, "iso8859", 7) == 0) { STRMOVE(p + 4, p + 3); p[3] = '-'; } // "iso-8859n" -> "iso-8859-n" - if (STRNCMP(p, "iso-8859", 8) == 0 && p[8] != '-') { + if (strncmp(p, "iso-8859", 8) == 0 && p[8] != '-') { STRMOVE(p + 9, p + 8); p[8] = '-'; } // "latin-N" -> "latinN" - if (STRNCMP(p, "latin-", 6) == 0) { + if (strncmp(p, "latin-", 6) == 0) { STRMOVE(p + 5, p + 6); } @@ -2423,8 +2423,8 @@ int convert_setup_ext(vimconv_T *vcp, char *from, bool from_unicode_is_utf8, cha return OK; } - from_prop = enc_canon_props((char_u *)from); - to_prop = enc_canon_props((char_u *)to); + from_prop = enc_canon_props(from); + to_prop = enc_canon_props(to); if (from_unicode_is_utf8) { from_is_utf8 = from_prop & ENC_UNICODE; } else { -- cgit From 08c2c7480619ccdf0c92fe6ce76da5b73b0e395b Mon Sep 17 00:00:00 2001 From: dundargoc Date: Sat, 26 Nov 2022 18:57:46 +0100 Subject: refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 90 +++++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 46 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index f48955c904..450b84dced 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -427,22 +427,22 @@ void remove_bom(char_u *s) int mb_get_class(const char_u *p) FUNC_ATTR_PURE { - return mb_get_class_tab(p, curbuf->b_chartab); + return mb_get_class_tab((char *)p, curbuf->b_chartab); } -int mb_get_class_tab(const char_u *p, const uint64_t *const chartab) +int mb_get_class_tab(const char *p, const uint64_t *const chartab) FUNC_ATTR_PURE { - if (MB_BYTE2LEN(p[0]) == 1) { + if (MB_BYTE2LEN((uint8_t)p[0]) == 1) { if (p[0] == NUL || ascii_iswhite(p[0])) { return 0; } - if (vim_iswordc_tab(p[0], chartab)) { + if (vim_iswordc_tab((uint8_t)p[0], chartab)) { return 2; } return 1; } - return utf_class_tab(utf_ptr2char((char *)p), chartab); + return utf_class_tab(utf_ptr2char(p), chartab); } // Return true if "c" is in "table". @@ -534,13 +534,13 @@ int utf_ptr2cells(const char *p) /// Like utf_ptr2cells(), but limit string length to "size". /// For an empty string or truncated character returns 1. -int utf_ptr2cells_len(const char_u *p, int size) +int utf_ptr2cells_len(const char *p, int size) { int c; // Need to convert to a wide character. - if (size > 0 && *p >= 0x80) { - if (utf_ptr2len_len(p, size) < utf8len_tab[*p]) { + if (size > 0 && (uint8_t)(*p) >= 0x80) { + if (utf_ptr2len_len((char_u *)p, size) < utf8len_tab[(uint8_t)(*p)]) { return 1; // truncated } c = utf_ptr2char((char *)p); @@ -599,7 +599,7 @@ size_t mb_string2cells_len(const char *str, size_t size) /// For an overlong sequence this may return zero. /// Does not include composing characters for obvious reasons. /// -/// @param[in] p String to convert. +/// @param[in] p_in String to convert. /// /// @return Unicode codepoint or byte value. int utf_ptr2char(const char *const p_in) @@ -744,26 +744,25 @@ bool utf_composinglike(const char *p1, const char *p2) /// space at least for #MAX_MCO + 1 elements. /// /// @return leading character. -int utfc_ptr2char(const char *p_in, int *pcc) +int utfc_ptr2char(const char *p, int *pcc) { - uint8_t *p = (uint8_t *)p_in; int i = 0; - int c = utf_ptr2char((char *)p); - int len = utf_ptr2len((char *)p); + int c = utf_ptr2char(p); + int len = utf_ptr2len(p); // Only accept a composing char when the first char isn't illegal. - if ((len > 1 || *p < 0x80) - && p[len] >= 0x80 - && utf_composinglike((char *)p, (char *)p + len)) { - int cc = utf_ptr2char((char *)p + len); + if ((len > 1 || (uint8_t)(*p) < 0x80) + && (uint8_t)p[len] >= 0x80 + && utf_composinglike(p, p + len)) { + int cc = utf_ptr2char(p + len); for (;;) { pcc[i++] = cc; if (i == MAX_MCO) { break; } - len += utf_ptr2len((char *)p + len); - if (p[len] < 0x80 || !utf_iscomposing(cc = utf_ptr2char((char *)p + len))) { + len += utf_ptr2len(p + len); + if ((uint8_t)p[len] < 0x80 || !utf_iscomposing(cc = utf_ptr2char(p + len))) { break; } } @@ -780,24 +779,24 @@ int utfc_ptr2char(const char *p_in, int *pcc) // composing characters. Use no more than p[maxlen]. // // @param [out] pcc: composing chars, last one is 0 -int utfc_ptr2char_len(const char_u *p, int *pcc, int maxlen) +int utfc_ptr2char_len(const char *p, int *pcc, int maxlen) { assert(maxlen > 0); int i = 0; - int len = utf_ptr2len_len(p, maxlen); + int len = utf_ptr2len_len((char_u *)p, maxlen); // Is it safe to use utf_ptr2char()? bool safe = len > 1 && len <= maxlen; - int c = safe ? utf_ptr2char((char *)p) : *p; + int c = safe ? utf_ptr2char(p) : (uint8_t)(*p); // Only accept a composing char when the first char isn't illegal. - if ((safe || c < 0x80) && len < maxlen && p[len] >= 0x80) { + if ((safe || c < 0x80) && len < maxlen && (uint8_t)p[len] >= 0x80) { for (; i < MAX_MCO; i++) { - int len_cc = utf_ptr2len_len(p + len, maxlen - len); + int len_cc = utf_ptr2len_len((char_u *)p + len, maxlen - len); safe = len_cc > 1 && len_cc <= maxlen - len; - if (!safe || (pcc[i] = utf_ptr2char((char *)p + len)) < 0x80 - || !(i == 0 ? utf_composinglike((char *)p, (char *)p + len) : utf_iscomposing(pcc[i]))) { + if (!safe || (pcc[i] = utf_ptr2char(p + len)) < 0x80 + || !(i == 0 ? utf_composinglike(p, p + len) : utf_iscomposing(pcc[i]))) { break; } len += len_cc; @@ -875,21 +874,20 @@ int utf_ptr2len_len(const char_u *p, int size) /// Return the number of bytes occupied by a UTF-8 character in a string. /// This includes following composing characters. /// Returns zero for NUL. -int utfc_ptr2len(const char *const p_in) +int utfc_ptr2len(const char *const p) FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { - uint8_t *p = (uint8_t *)p_in; - uint8_t b0 = *p; + uint8_t b0 = (uint8_t)(*p); if (b0 == NUL) { return 0; } - if (b0 < 0x80 && p[1] < 0x80) { // be quick for ASCII + if (b0 < 0x80 && (uint8_t)p[1] < 0x80) { // be quick for ASCII return 1; } // Skip over first UTF-8 char, stopping at a NUL byte. - int len = utf_ptr2len((char *)p); + int len = utf_ptr2len(p); // Check for illegal byte. if (len == 1 && b0 >= 0x80) { @@ -900,13 +898,13 @@ int utfc_ptr2len(const char *const p_in) // skip all of them (otherwise the cursor would get stuck). int prevlen = 0; for (;;) { - if (p[len] < 0x80 || !utf_composinglike((char *)p + prevlen, (char *)p + len)) { + if ((uint8_t)p[len] < 0x80 || !utf_composinglike(p + prevlen, p + len)) { return len; } // Skip over composing char. prevlen = len; - len += utf_ptr2len((char *)p + len); + len += utf_ptr2len(p + len); } } @@ -1786,11 +1784,12 @@ void mb_copy_char(const char **const fp, char **const tp) *fp += l; } -/// Return the offset from "p" to the first byte of a character. When "p" is +/// Return the offset from "p_in" to the first byte of a character. When "p_in" is /// at the start of a character 0 is returned, otherwise the offset to the next /// character. Can start anywhere in a stream of bytes. -int mb_off_next(const char_u *base, const char_u *p) +int mb_off_next(const char_u *base, const char *p_in) { + const uint8_t *p = (uint8_t *)p_in; int i; int j; @@ -1882,7 +1881,7 @@ int utf_cp_head_off(const char_u *base, const char_u *p) void utf_find_illegal(void) { pos_T pos = curwin->w_cursor; - char_u *p; + char *p; int len; vimconv_T vimconv; char_u *tofree = NULL; @@ -1897,30 +1896,29 @@ void utf_find_illegal(void) curwin->w_cursor.coladd = 0; for (;;) { - p = (char_u *)get_cursor_pos_ptr(); + p = get_cursor_pos_ptr(); if (vimconv.vc_type != CONV_NONE) { xfree(tofree); - tofree = (char_u *)string_convert(&vimconv, (char *)p, NULL); + tofree = (char_u *)string_convert(&vimconv, p, NULL); if (tofree == NULL) { break; } - p = tofree; + p = (char *)tofree; } while (*p != NUL) { // Illegal means that there are not enough trail bytes (checked by // utf_ptr2len()) or too many of them (overlong sequence). - len = utf_ptr2len((char *)p); - if (*p >= 0x80 && (len == 1 - || utf_char2len(utf_ptr2char((char *)p)) != len)) { + len = utf_ptr2len(p); + if ((uint8_t)(*p) >= 0x80 && (len == 1 || utf_char2len(utf_ptr2char(p)) != len)) { if (vimconv.vc_type == CONV_NONE) { - curwin->w_cursor.col += (colnr_T)(p - (char_u *)get_cursor_pos_ptr()); + curwin->w_cursor.col += (colnr_T)(p - get_cursor_pos_ptr()); } else { int l; - len = (int)(p - tofree); - for (p = (char_u *)get_cursor_pos_ptr(); *p != NUL && len-- > 0; p += l) { - l = utf_ptr2len((char *)p); + len = (int)(p - (char *)tofree); + for (p = get_cursor_pos_ptr(); *p != NUL && len-- > 0; p += l) { + l = utf_ptr2len(p); curwin->w_cursor.col += l; } } -- cgit From 50f03773f4b9f4638489ccfd0503dc9e39e5de78 Mon Sep 17 00:00:00 2001 From: dundargoc <33953936+dundargoc@users.noreply.github.com> Date: Mon, 9 Jan 2023 15:37:34 +0100 Subject: refactor: replace char_u with char 18 (#21237) refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 450b84dced..e1a870071c 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1557,7 +1557,7 @@ void show_utf8(void) } clen = utf_ptr2len((char *)line + i); } - sprintf((char *)IObuff + rlen, "%02x ", // NOLINT(runtime/printf) + sprintf(IObuff + rlen, "%02x ", // NOLINT(runtime/printf) (line[i] == NL) ? NUL : line[i]); // NUL is stored as NL clen--; rlen += (int)strlen(IObuff + rlen); @@ -1566,7 +1566,7 @@ void show_utf8(void) } } - msg((char *)IObuff); + msg(IObuff); } /// Return offset from "p" to the start of a character, including composing characters. -- cgit From e89c39d6f016a4140293755250e968e839009617 Mon Sep 17 00:00:00 2001 From: dundargoc <33953936+dundargoc@users.noreply.github.com> Date: Sat, 14 Jan 2023 08:58:28 +0100 Subject: refactor: replace char_u with char 21 (#21779) refactor: replace char_u with char Work on https://github.com/neovim/neovim/issues/459 --- src/nvim/mbyte.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/nvim/mbyte.c') diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index e1a870071c..93ac0fccfa 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -424,10 +424,10 @@ void remove_bom(char_u *s) // 1 for punctuation // 2 for an (ASCII) word character // >2 for other word characters -int mb_get_class(const char_u *p) +int mb_get_class(const char *p) FUNC_ATTR_PURE { - return mb_get_class_tab((char *)p, curbuf->b_chartab); + return mb_get_class_tab(p, curbuf->b_chartab); } int mb_get_class_tab(const char *p, const uint64_t *const chartab) @@ -1456,16 +1456,16 @@ int utf16_to_utf8(const wchar_t *utf16, int utf16len, char **utf8) /// @param len maximum length (an earlier NUL terminates) /// @param[out] codepoints incremented with UTF-32 code point size /// @param[out] codeunits incremented with UTF-16 code unit size -void mb_utflen(const char_u *s, size_t len, size_t *codepoints, size_t *codeunits) +void mb_utflen(const char *s, size_t len, size_t *codepoints, size_t *codeunits) FUNC_ATTR_NONNULL_ALL { size_t count = 0, extra = 0; size_t clen; for (size_t i = 0; i < len; i += clen) { - clen = (size_t)utf_ptr2len_len(s + i, (int)(len - i)); + clen = (size_t)utf_ptr2len_len((char_u *)s + i, (int)(len - i)); // NB: gets the byte value of invalid sequence bytes. // we only care whether the char fits in the BMP or not - int c = (clen > 1) ? utf_ptr2char((char *)s + i) : s[i]; + int c = (clen > 1) ? utf_ptr2char(s + i) : (uint8_t)s[i]; count++; if (c > 0xFFFF) { extra++; @@ -2012,7 +2012,7 @@ void mb_check_adjust_col(void *win_) /// @param line start of the string /// /// @return a pointer to the character before "*p", if there is one. -char_u *mb_prevptr(char_u *line, char_u *p) +char *mb_prevptr(char *line, char *p) { if (p > line) { MB_PTR_BACK(line, p); @@ -2022,9 +2022,9 @@ char_u *mb_prevptr(char_u *line, char_u *p) /// Return the character length of "str". Each multi-byte character (with /// following composing characters) counts as one. -int mb_charlen(const char_u *str) +int mb_charlen(const char *str) { - const char_u *p = str; + const char_u *p = (char_u *)str; int count; if (p == NULL) { @@ -2801,5 +2801,5 @@ void f_charclass(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) || argvars[0].vval.v_string == NULL) { return; } - rettv->vval.v_number = mb_get_class((const char_u *)argvars[0].vval.v_string); + rettv->vval.v_number = mb_get_class(argvars[0].vval.v_string); } -- cgit