aboutsummaryrefslogtreecommitdiff
path: root/src/tree_sitter/unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/tree_sitter/unicode.h')
-rw-r--r--src/tree_sitter/unicode.h50
1 files changed, 50 insertions, 0 deletions
diff --git a/src/tree_sitter/unicode.h b/src/tree_sitter/unicode.h
new file mode 100644
index 0000000000..0fba56a612
--- /dev/null
+++ b/src/tree_sitter/unicode.h
@@ -0,0 +1,50 @@
+#ifndef TREE_SITTER_UNICODE_H_
+#define TREE_SITTER_UNICODE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <stdint.h>
+
+#define U_EXPORT
+#define U_EXPORT2
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+static const int32_t TS_DECODE_ERROR = U_SENTINEL;
+
+// These functions read one unicode code point from the given string,
+// returning the number of bytes consumed.
+typedef uint32_t (*UnicodeDecodeFunction)(
+ const uint8_t *string,
+ uint32_t length,
+ int32_t *code_point
+);
+
+static inline uint32_t ts_decode_utf8(
+ const uint8_t *string,
+ uint32_t length,
+ int32_t *code_point
+) {
+ uint32_t i = 0;
+ U8_NEXT(string, i, length, *code_point);
+ return i;
+}
+
+static inline uint32_t ts_decode_utf16(
+ const uint8_t *string,
+ uint32_t length,
+ int32_t *code_point
+) {
+ uint32_t i = 0;
+ U16_NEXT(((uint16_t *)string), i, length, *code_point);
+ return i * 2;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_UNICODE_H_