diff options
Diffstat (limited to 'src/nvim/mbyte.h')
-rw-r--r-- | src/nvim/mbyte.h | 49 |
1 files changed, 17 insertions, 32 deletions
diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h index ddac040aae..2da051fca2 100644 --- a/src/nvim/mbyte.h +++ b/src/nvim/mbyte.h @@ -3,17 +3,21 @@ #include <stdbool.h> #include <stdint.h> #include <sys/types.h> // IWYU pragma: keep +#include <utf8proc.h> #include <uv.h> // IWYU pragma: keep #include "nvim/cmdexpand_defs.h" // IWYU pragma: keep #include "nvim/eval/typval_defs.h" // IWYU pragma: keep -#include "nvim/func_attr.h" #include "nvim/macros_defs.h" #include "nvim/mbyte_defs.h" // IWYU pragma: keep #include "nvim/types_defs.h" // IWYU pragma: keep +typedef utf8proc_int32_t GraphemeState; +#define GRAPHEME_STATE_INIT 0 + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "mbyte.h.generated.h" +# include "mbyte.h.inline.generated.h" #endif enum { @@ -53,18 +57,14 @@ extern const uint8_t utf8len_tab[256]; (p -= utf_head_off((char *)(s), (char *)(p) - 1) + 1) /// Check whether a given UTF-8 byte is a trailing byte (10xx.xxxx). -static inline bool utf_is_trail_byte(uint8_t byte) - REAL_FATTR_CONST REAL_FATTR_ALWAYS_INLINE; static inline bool utf_is_trail_byte(uint8_t const byte) + FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE { // uint8_t is for clang to use smaller cmp return (uint8_t)(byte & 0xC0U) == 0x80U; } -static inline CharInfo utf_ptr2CharInfo(char const *p_in) - REAL_FATTR_NONNULL_ALL REAL_FATTR_PURE REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_ALWAYS_INLINE; - /// Convert a UTF-8 byte sequence to a Unicode code point. /// Handles ascii, multibyte sequiences and illegal sequences. /// @@ -73,6 +73,7 @@ static inline CharInfo utf_ptr2CharInfo(char const *p_in) /// @return information abouth the character. When the sequence is illegal, /// "value" is negative, "len" is 1. static inline CharInfo utf_ptr2CharInfo(char const *const p_in) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE { uint8_t const *const p = (uint8_t const *)p_in; uint8_t const first = *p; @@ -88,43 +89,27 @@ static inline CharInfo utf_ptr2CharInfo(char const *const p_in) } } -static inline StrCharInfo utfc_next(StrCharInfo cur) - REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE REAL_FATTR_PURE; - /// Return information about the next character. /// Composing and combining characters are considered a part of the current character. /// /// @param[in] cur Information about the current character in the string. static inline StrCharInfo utfc_next(StrCharInfo cur) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_PURE { - int32_t prev_code = cur.chr.value; + // handle ASCII case inline uint8_t *next = (uint8_t *)(cur.ptr + cur.chr.len); - - while (true) { - if (EXPECT(*next < 0x80U, true)) { - return (StrCharInfo){ - .ptr = (char *)next, - .chr = (CharInfo){ .value = *next, .len = 1 }, - }; - } - uint8_t const next_len = utf8len_tab[*next]; - int32_t const next_code = utf_ptr2CharInfo_impl(next, (uintptr_t)next_len); - if (!utf_char_composinglike(prev_code, next_code)) { - return (StrCharInfo){ - .ptr = (char *)next, - .chr = (CharInfo){ .value = next_code, .len = (next_code < 0 ? 1 : next_len) }, - }; - } - - prev_code = next_code; - next += next_len; + if (EXPECT(*next < 0x80U, true)) { + return (StrCharInfo){ + .ptr = (char *)next, + .chr = (CharInfo){ .value = *next, .len = 1 }, + }; } -} -static inline StrCharInfo utf_ptr2StrCharInfo(char *ptr) - REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE REAL_FATTR_PURE; + return utfc_next_impl(cur); +} static inline StrCharInfo utf_ptr2StrCharInfo(char *ptr) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_PURE { return (StrCharInfo){ .ptr = ptr, .chr = utf_ptr2CharInfo(ptr) }; } |