diff options
Diffstat (limited to 'src/tree_sitter/unicode.h')
-rw-r--r-- | src/tree_sitter/unicode.h | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/src/tree_sitter/unicode.h b/src/tree_sitter/unicode.h new file mode 100644 index 0000000000..0fba56a612 --- /dev/null +++ b/src/tree_sitter/unicode.h @@ -0,0 +1,50 @@ +#ifndef TREE_SITTER_UNICODE_H_ +#define TREE_SITTER_UNICODE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <limits.h> +#include <stdint.h> + +#define U_EXPORT +#define U_EXPORT2 +#include "unicode/utf8.h" +#include "unicode/utf16.h" + +static const int32_t TS_DECODE_ERROR = U_SENTINEL; + +// These functions read one unicode code point from the given string, +// returning the number of bytes consumed. +typedef uint32_t (*UnicodeDecodeFunction)( + const uint8_t *string, + uint32_t length, + int32_t *code_point +); + +static inline uint32_t ts_decode_utf8( + const uint8_t *string, + uint32_t length, + int32_t *code_point +) { + uint32_t i = 0; + U8_NEXT(string, i, length, *code_point); + return i; +} + +static inline uint32_t ts_decode_utf16( + const uint8_t *string, + uint32_t length, + int32_t *code_point +) { + uint32_t i = 0; + U16_NEXT(((uint16_t *)string), i, length, *code_point); + return i * 2; +} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_UNICODE_H_ |