1 files changed, 17 insertions, 32 deletions
diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h
index ddac040aae..2da051fca2 100644
--- a/src/nvim/mbyte.h
+++ b/src/nvim/mbyte.h
@@ -3,17 +3,21 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <sys/types.h>  // IWYU pragma: keep
+#include <utf8proc.h>
 #include <uv.h>  // IWYU pragma: keep
 
 #include "nvim/cmdexpand_defs.h"  // IWYU pragma: keep
 #include "nvim/eval/typval_defs.h"  // IWYU pragma: keep
-#include "nvim/func_attr.h"
 #include "nvim/macros_defs.h"
 #include "nvim/mbyte_defs.h"  // IWYU pragma: keep
 #include "nvim/types_defs.h"  // IWYU pragma: keep
 
+typedef utf8proc_int32_t GraphemeState;
+#define GRAPHEME_STATE_INIT 0
+
 #ifdef INCLUDE_GENERATED_DECLARATIONS
 # include "mbyte.h.generated.h"
+# include "mbyte.h.inline.generated.h"
 #endif
 
 enum {
@@ -53,18 +57,14 @@ extern const uint8_t utf8len_tab[256];
   (p -= utf_head_off((char *)(s), (char *)(p) - 1) + 1)
 
 /// Check whether a given UTF-8 byte is a trailing byte (10xx.xxxx).
-static inline bool utf_is_trail_byte(uint8_t byte)
-  REAL_FATTR_CONST REAL_FATTR_ALWAYS_INLINE;
 
 static inline bool utf_is_trail_byte(uint8_t const byte)
+  FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE
 {
   // uint8_t is for clang to use smaller cmp
   return (uint8_t)(byte & 0xC0U) == 0x80U;
 }
 
-static inline CharInfo utf_ptr2CharInfo(char const *p_in)
-  REAL_FATTR_NONNULL_ALL REAL_FATTR_PURE REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_ALWAYS_INLINE;
-
 /// Convert a UTF-8 byte sequence to a Unicode code point.
 /// Handles ascii, multibyte sequiences and illegal sequences.
 ///
@@ -73,6 +73,7 @@ static inline CharInfo utf_ptr2CharInfo(char const *p_in)
 /// @return information abouth the character. When the sequence is illegal,
 /// "value" is negative, "len" is 1.
 static inline CharInfo utf_ptr2CharInfo(char const *const p_in)
+  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
 {
   uint8_t const *const p = (uint8_t const *)p_in;
   uint8_t const first = *p;
@@ -88,43 +89,27 @@ static inline CharInfo utf_ptr2CharInfo(char const *const p_in)
   }
 }
 
-static inline StrCharInfo utfc_next(StrCharInfo cur)
-  REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE REAL_FATTR_PURE;
-
 /// Return information about the next character.
 /// Composing and combining characters are considered a part of the current character.
 ///
 /// @param[in] cur  Information about the current character in the string.
 static inline StrCharInfo utfc_next(StrCharInfo cur)
+  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_PURE
 {
-  int32_t prev_code = cur.chr.value;
+  // handle ASCII case inline
   uint8_t *next = (uint8_t *)(cur.ptr + cur.chr.len);
-
-  while (true) {
-    if (EXPECT(*next < 0x80U, true)) {
-      return (StrCharInfo){
-        .ptr = (char *)next,
-        .chr = (CharInfo){ .value = *next, .len = 1 },
-      };
-    }
-    uint8_t const next_len = utf8len_tab[*next];
-    int32_t const next_code = utf_ptr2CharInfo_impl(next, (uintptr_t)next_len);
-    if (!utf_char_composinglike(prev_code, next_code)) {
-      return (StrCharInfo){
-        .ptr = (char *)next,
-        .chr = (CharInfo){ .value = next_code, .len = (next_code < 0 ? 1 : next_len) },
-      };
-    }
-
-    prev_code = next_code;
-    next += next_len;
+  if (EXPECT(*next < 0x80U, true)) {
+    return (StrCharInfo){
+      .ptr = (char *)next,
+      .chr = (CharInfo){ .value = *next, .len = 1 },
+    };
   }
-}
 
-static inline StrCharInfo utf_ptr2StrCharInfo(char *ptr)
-  REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE REAL_FATTR_PURE;
+  return utfc_next_impl(cur);
+}
 
 static inline StrCharInfo utf_ptr2StrCharInfo(char *ptr)
+  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_PURE
 {
   return (StrCharInfo){ .ptr = ptr, .chr = utf_ptr2CharInfo(ptr) };
 }