aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/nvim/CMakeLists.txt15
-rw-r--r--src/nvim/lua/executor.c29
-rw-r--r--src/nvim/lua/treesitter.c652
-rw-r--r--src/nvim/lua/treesitter.h14
-rw-r--r--src/nvim/lua/vim.lua3
-rw-r--r--src/tree_sitter/LICENSE21
-rw-r--r--src/tree_sitter/alloc.h94
-rw-r--r--src/tree_sitter/api.h660
-rw-r--r--src/tree_sitter/array.h142
-rw-r--r--src/tree_sitter/atomic.h42
-rw-r--r--src/tree_sitter/clock.h141
-rw-r--r--src/tree_sitter/error_costs.h11
-rw-r--r--src/tree_sitter/get_changed_ranges.c482
-rw-r--r--src/tree_sitter/get_changed_ranges.h36
-rw-r--r--src/tree_sitter/language.c107
-rw-r--r--src/tree_sitter/language.h138
-rw-r--r--src/tree_sitter/length.h44
-rw-r--r--src/tree_sitter/lexer.c322
-rw-r--r--src/tree_sitter/lexer.h48
-rw-r--r--src/tree_sitter/lib.c20
-rw-r--r--src/tree_sitter/node.c673
-rw-r--r--src/tree_sitter/parser.c1887
-rw-r--r--src/tree_sitter/parser.h220
-rw-r--r--src/tree_sitter/point.h53
-rw-r--r--src/tree_sitter/reduce_action.h34
-rw-r--r--src/tree_sitter/reusable_node.h88
-rw-r--r--src/tree_sitter/stack.c846
-rw-r--r--src/tree_sitter/stack.h135
-rw-r--r--src/tree_sitter/subtree.c996
-rw-r--r--src/tree_sitter/subtree.h281
-rw-r--r--src/tree_sitter/tree.c149
-rw-r--r--src/tree_sitter/tree.h34
-rw-r--r--src/tree_sitter/tree_cursor.c302
-rw-r--r--src/tree_sitter/tree_cursor.h20
-rw-r--r--src/tree_sitter/utf16.c33
-rw-r--r--src/tree_sitter/utf16.h21
36 files changed, 8790 insertions, 3 deletions
diff --git a/src/nvim/CMakeLists.txt b/src/nvim/CMakeLists.txt
index aa8100873b..27977e3a40 100644
--- a/src/nvim/CMakeLists.txt
+++ b/src/nvim/CMakeLists.txt
@@ -85,6 +85,10 @@ file(GLOB NVIM_HEADERS *.h)
file(GLOB XDIFF_SOURCES xdiff/*.c)
file(GLOB XDIFF_HEADERS xdiff/*.h)
+file(GLOB TREESITTER_SOURCES ../tree_sitter/*.c)
+file(GLOB TS_SOURCE_AMALGAM ../tree_sitter/lib.c)
+list(REMOVE_ITEM TREESITTER_SOURCES ${TS_SOURCE_AMALGAM})
+
foreach(subdir
os
api
@@ -141,6 +145,7 @@ set(CONV_SOURCES
ex_cmds.c
ex_docmd.c
fileio.c
+ lua/treesitter.c
mbyte.c
memline.c
message.c
@@ -172,6 +177,9 @@ if(NOT MSVC)
set_source_files_properties(
eval.c PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -Wno-conversion")
endif()
+
+ # tree-sitter: inlined external project, we don't maintain it. #10124
+ set_source_files_properties(${TREESITTER_SOURCES} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -Wno-conversion -Wno-pedantic -Wno-shadow -Wno-missing-prototypes -Wno-unused-variable")
endif()
if(NOT "${MIN_LOG_LEVEL}" MATCHES "^$")
@@ -395,6 +403,7 @@ list(APPEND NVIM_LINK_LIBRARIES
${LIBVTERM_LIBRARIES}
${LIBTERMKEY_LIBRARIES}
${UNIBILIUM_LIBRARIES}
+ ${UTF8PROC_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT}
)
@@ -414,7 +423,7 @@ endif()
add_executable(nvim ${NVIM_GENERATED_FOR_SOURCES} ${NVIM_GENERATED_FOR_HEADERS}
${NVIM_GENERATED_SOURCES} ${NVIM_SOURCES} ${NVIM_HEADERS}
- ${XDIFF_SOURCES} ${XDIFF_HEADERS})
+ ${XDIFF_SOURCES} ${XDIFF_HEADERS} ${TREESITTER_SOURCES})
target_link_libraries(nvim ${NVIM_EXEC_LINK_LIBRARIES})
install_helper(TARGETS nvim)
@@ -500,7 +509,7 @@ add_library(
EXCLUDE_FROM_ALL
${NVIM_SOURCES} ${NVIM_GENERATED_SOURCES}
${NVIM_HEADERS} ${NVIM_GENERATED_FOR_SOURCES} ${NVIM_GENERATED_FOR_HEADERS}
- ${XDIFF_SOURCES} ${XDIFF_HEADERS}
+ ${XDIFF_SOURCES} ${XDIFF_HEADERS} ${TREESITTER_SOURCES}
)
set_property(TARGET libnvim APPEND PROPERTY
INCLUDE_DIRECTORIES ${LUA_PREFERRED_INCLUDE_DIRS})
@@ -525,7 +534,7 @@ else()
EXCLUDE_FROM_ALL
${NVIM_SOURCES} ${NVIM_GENERATED_SOURCES}
${NVIM_HEADERS} ${NVIM_GENERATED_FOR_SOURCES} ${NVIM_GENERATED_FOR_HEADERS}
- ${XDIFF_SOURCES} ${XDIFF_HEADERS}
+ ${XDIFF_SOURCES} ${XDIFF_HEADERS} ${TREESITTER_SOURCES}
${UNIT_TEST_FIXTURES}
)
target_link_libraries(nvim-test ${NVIM_TEST_LINK_LIBRARIES})
diff --git a/src/nvim/lua/executor.c b/src/nvim/lua/executor.c
index f51aa3c6d4..127458fe39 100644
--- a/src/nvim/lua/executor.c
+++ b/src/nvim/lua/executor.c
@@ -31,6 +31,7 @@
#include "nvim/lua/executor.h"
#include "nvim/lua/converter.h"
+#include "nvim/lua/treesitter.h"
#include "luv/luv.h"
@@ -310,7 +311,11 @@ static int nlua_state_init(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
lua_setfield(lstate, -2, "luv");
lua_pop(lstate, 3);
+ // internal vim._treesitter... API
+ nlua_add_treesitter(lstate);
+
lua_setglobal(lstate, "vim");
+
return 0;
}
@@ -816,3 +821,27 @@ void ex_luafile(exarg_T *const eap)
return;
}
}
+
+static int create_tslua_parser(lua_State *L)
+{
+ if (lua_gettop(L) < 1 || !lua_isstring(L, 1)) {
+ return luaL_error(L, "string expected");
+ }
+
+ const char *lang_name = lua_tostring(L, 1);
+ return tslua_push_parser(L, lang_name);
+}
+
+static void nlua_add_treesitter(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
+{
+ tslua_init(lstate);
+
+ lua_pushcfunction(lstate, create_tslua_parser);
+ lua_setfield(lstate, -2, "_create_ts_parser");
+
+ lua_pushcfunction(lstate, tslua_register_lang);
+ lua_setfield(lstate, -2, "_ts_add_language");
+
+ lua_pushcfunction(lstate, tslua_inspect_lang);
+ lua_setfield(lstate, -2, "_ts_inspect_language");
+}
diff --git a/src/nvim/lua/treesitter.c b/src/nvim/lua/treesitter.c
new file mode 100644
index 0000000000..d2072402bb
--- /dev/null
+++ b/src/nvim/lua/treesitter.c
@@ -0,0 +1,652 @@
+// This is an open source non-commercial project. Dear PVS-Studio, please check
+// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+
+// lua bindings for tree-sitter.
+// NB: this file mostly contains a generic lua interface for tree-sitter
+// trees and nodes, and could be broken out as a reusable lua package
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include <lua.h>
+#include <lualib.h>
+#include <lauxlib.h>
+
+#include "tree_sitter/api.h"
+
+#include "nvim/lua/treesitter.h"
+#include "nvim/api/private/handle.h"
+#include "nvim/memline.h"
+
+typedef struct {
+ TSParser *parser;
+ TSTree *tree; // internal tree, used for editing/reparsing
+} TSLua_parser;
+
+#ifdef INCLUDE_GENERATED_DECLARATIONS
+# include "lua/treesitter.c.generated.h"
+#endif
+
+static struct luaL_Reg parser_meta[] = {
+ { "__gc", parser_gc },
+ { "__tostring", parser_tostring },
+ { "parse_buf", parser_parse_buf },
+ { "edit", parser_edit },
+ { "tree", parser_tree },
+ { NULL, NULL }
+};
+
+static struct luaL_Reg tree_meta[] = {
+ { "__gc", tree_gc },
+ { "__tostring", tree_tostring },
+ { "root", tree_root },
+ { NULL, NULL }
+};
+
+static struct luaL_Reg node_meta[] = {
+ { "__tostring", node_tostring },
+ { "__eq", node_eq },
+ { "__len", node_child_count },
+ { "range", node_range },
+ { "start", node_start },
+ { "end_", node_end },
+ { "type", node_type },
+ { "symbol", node_symbol },
+ { "named", node_named },
+ { "missing", node_missing },
+ { "has_error", node_has_error },
+ { "sexpr", node_sexpr },
+ { "child_count", node_child_count },
+ { "named_child_count", node_named_child_count },
+ { "child", node_child },
+ { "named_child", node_named_child },
+ { "descendant_for_range", node_descendant_for_range },
+ { "named_descendant_for_range", node_named_descendant_for_range },
+ { "parent", node_parent },
+ { NULL, NULL }
+};
+
+static PMap(cstr_t) *langs;
+
+static void build_meta(lua_State *L, const char *tname, const luaL_Reg *meta)
+{
+ if (luaL_newmetatable(L, tname)) { // [meta]
+ for (size_t i = 0; meta[i].name != NULL; i++) {
+ lua_pushcfunction(L, meta[i].func); // [meta, func]
+ lua_setfield(L, -2, meta[i].name); // [meta]
+ }
+
+ lua_pushvalue(L, -1); // [meta, meta]
+ lua_setfield(L, -2, "__index"); // [meta]
+ }
+ lua_pop(L, 1); // [] (don't use it now)
+}
+
+/// init the tslua library
+///
+/// all global state is stored in the regirstry of the lua_State
+void tslua_init(lua_State *L)
+{
+ langs = pmap_new(cstr_t)();
+
+ // type metatables
+ build_meta(L, "treesitter_parser", parser_meta);
+ build_meta(L, "treesitter_tree", tree_meta);
+ build_meta(L, "treesitter_node", node_meta);
+}
+
+int tslua_register_lang(lua_State *L)
+{
+ if (lua_gettop(L) < 2 || !lua_isstring(L, 1) || !lua_isstring(L, 2)) {
+ return luaL_error(L, "string expected");
+ }
+
+ const char *path = lua_tostring(L, 1);
+ const char *lang_name = lua_tostring(L, 2);
+
+ if (pmap_has(cstr_t)(langs, lang_name)) {
+ return 0;
+ }
+
+#define BUFSIZE 128
+ char symbol_buf[BUFSIZE];
+ snprintf(symbol_buf, BUFSIZE, "tree_sitter_%s", lang_name);
+#undef BUFSIZE
+
+ uv_lib_t lib;
+ if (uv_dlopen(path, &lib)) {
+ snprintf((char *)IObuff, IOSIZE, "Failed to load parser: uv_dlopen: %s",
+ uv_dlerror(&lib));
+ uv_dlclose(&lib);
+ lua_pushstring(L, (char *)IObuff);
+ return lua_error(L);
+ }
+
+ TSLanguage *(*lang_parser)(void);
+ if (uv_dlsym(&lib, symbol_buf, (void **)&lang_parser)) {
+ snprintf((char *)IObuff, IOSIZE, "Failed to load parser: uv_dlsym: %s",
+ uv_dlerror(&lib));
+ uv_dlclose(&lib);
+ lua_pushstring(L, (char *)IObuff);
+ return lua_error(L);
+ }
+
+ TSLanguage *lang = lang_parser();
+ if (lang == NULL) {
+ return luaL_error(L, "Failed to load parser: internal error");
+ }
+
+ pmap_put(cstr_t)(langs, xstrdup(lang_name), lang);
+
+ lua_pushboolean(L, true);
+ return 1;
+}
+
+int tslua_inspect_lang(lua_State *L)
+{
+ if (lua_gettop(L) < 1 || !lua_isstring(L, 1)) {
+ return luaL_error(L, "string expected");
+ }
+ const char *lang_name = lua_tostring(L, 1);
+
+ TSLanguage *lang = pmap_get(cstr_t)(langs, lang_name);
+ if (!lang) {
+ return luaL_error(L, "no such language: %s", lang_name);
+ }
+
+ lua_createtable(L, 0, 2); // [retval]
+
+ size_t nsymbols = (size_t)ts_language_symbol_count(lang);
+
+ lua_createtable(L, nsymbols-1, 1); // [retval, symbols]
+ for (size_t i = 0; i < nsymbols; i++) {
+ TSSymbolType t = ts_language_symbol_type(lang, i);
+ if (t == TSSymbolTypeAuxiliary) {
+ // not used by the API
+ continue;
+ }
+ lua_createtable(L, 2, 0); // [retval, symbols, elem]
+ lua_pushstring(L, ts_language_symbol_name(lang, i));
+ lua_rawseti(L, -2, 1);
+ lua_pushboolean(L, t == TSSymbolTypeRegular);
+ lua_rawseti(L, -2, 2); // [retval, symbols, elem]
+ lua_rawseti(L, -2, i); // [retval, symbols]
+ }
+
+ lua_setfield(L, -2, "symbols"); // [retval]
+
+ size_t nfields = (size_t)ts_language_field_count(lang);
+ lua_createtable(L, nfields-1, 1); // [retval, fields]
+ for (size_t i = 0; i < nfields; i++) {
+ lua_pushstring(L, ts_language_field_name_for_id(lang, i));
+ lua_rawseti(L, -2, i); // [retval, fields]
+ }
+
+ lua_setfield(L, -2, "fields"); // [retval]
+ return 1;
+}
+
+int tslua_push_parser(lua_State *L, const char *lang_name)
+{
+ TSLanguage *lang = pmap_get(cstr_t)(langs, lang_name);
+ if (!lang) {
+ return luaL_error(L, "no such language: %s", lang_name);
+ }
+
+ TSParser *parser = ts_parser_new();
+ ts_parser_set_language(parser, lang);
+ TSLua_parser *p = lua_newuserdata(L, sizeof(TSLua_parser)); // [udata]
+ p->parser = parser;
+ p->tree = NULL;
+
+ lua_getfield(L, LUA_REGISTRYINDEX, "treesitter_parser"); // [udata, meta]
+ lua_setmetatable(L, -2); // [udata]
+ return 1;
+}
+
+static TSLua_parser *parser_check(lua_State *L)
+{
+ return luaL_checkudata(L, 1, "treesitter_parser");
+}
+
+static int parser_gc(lua_State *L)
+{
+ TSLua_parser *p = parser_check(L);
+ if (!p) {
+ return 0;
+ }
+
+ ts_parser_delete(p->parser);
+ if (p->tree) {
+ ts_tree_delete(p->tree);
+ }
+
+ return 0;
+}
+
+static int parser_tostring(lua_State *L)
+{
+ lua_pushstring(L, "<parser>");
+ return 1;
+}
+
+static const char *input_cb(void *payload, uint32_t byte_index,
+ TSPoint position, uint32_t *bytes_read)
+{
+ buf_T *bp = payload;
+#define BUFSIZE 256
+ static char buf[BUFSIZE];
+
+ if ((linenr_T)position.row >= bp->b_ml.ml_line_count) {
+ *bytes_read = 0;
+ return "";
+ }
+ char_u *line = ml_get_buf(bp, position.row+1, false);
+ size_t len = STRLEN(line);
+ size_t tocopy = MIN(len-position.column, BUFSIZE);
+
+ memcpy(buf, line+position.column, tocopy);
+ // Translate embedded \n to NUL
+ memchrsub(buf, '\n', '\0', tocopy);
+ *bytes_read = (uint32_t)tocopy;
+ if (tocopy < BUFSIZE) {
+ // now add the final \n. If it didn't fit, input_cb will be called again
+ // on the same line with advanced column.
+ buf[tocopy] = '\n';
+ (*bytes_read)++;
+ }
+ return buf;
+#undef BUFSIZE
+}
+
+static int parser_parse_buf(lua_State *L)
+{
+ TSLua_parser *p = parser_check(L);
+ if (!p) {
+ return 0;
+ }
+
+ long bufnr = lua_tointeger(L, 2);
+ void *payload = handle_get_buffer(bufnr);
+ if (!payload) {
+ return luaL_error(L, "invalid buffer handle: %d", bufnr);
+ }
+ TSInput input = { payload, input_cb, TSInputEncodingUTF8 };
+ TSTree *new_tree = ts_parser_parse(p->parser, p->tree, input);
+ if (p->tree) {
+ ts_tree_delete(p->tree);
+ }
+ p->tree = new_tree;
+
+ tslua_push_tree(L, p->tree);
+ return 1;
+}
+
+static int parser_tree(lua_State *L)
+{
+ TSLua_parser *p = parser_check(L);
+ if (!p) {
+ return 0;
+ }
+
+ tslua_push_tree(L, p->tree);
+ return 1;
+}
+
+static int parser_edit(lua_State *L)
+{
+ if (lua_gettop(L) < 10) {
+ lua_pushstring(L, "not enough args to parser:edit()");
+ return lua_error(L);
+ }
+
+ TSLua_parser *p = parser_check(L);
+ if (!p) {
+ return 0;
+ }
+
+ if (!p->tree) {
+ return 0;
+ }
+
+ long start_byte = lua_tointeger(L, 2);
+ long old_end_byte = lua_tointeger(L, 3);
+ long new_end_byte = lua_tointeger(L, 4);
+ TSPoint start_point = { lua_tointeger(L, 5), lua_tointeger(L, 6) };
+ TSPoint old_end_point = { lua_tointeger(L, 7), lua_tointeger(L, 8) };
+ TSPoint new_end_point = { lua_tointeger(L, 9), lua_tointeger(L, 10) };
+
+ TSInputEdit edit = { start_byte, old_end_byte, new_end_byte,
+ start_point, old_end_point, new_end_point };
+
+ ts_tree_edit(p->tree, &edit);
+
+ return 0;
+}
+
+
+// Tree methods
+
+/// push tree interface on lua stack.
+///
+/// This makes a copy of the tree, so ownership of the argument is unaffected.
+void tslua_push_tree(lua_State *L, TSTree *tree)
+{
+ if (tree == NULL) {
+ lua_pushnil(L);
+ return;
+ }
+ TSTree **ud = lua_newuserdata(L, sizeof(TSTree *)); // [udata]
+ *ud = ts_tree_copy(tree);
+ lua_getfield(L, LUA_REGISTRYINDEX, "treesitter_tree"); // [udata, meta]
+ lua_setmetatable(L, -2); // [udata]
+
+ // table used for node wrappers to keep a reference to tree wrapper
+ // NB: in lua 5.3 the uservalue for the node could just be the tree, but
+ // in lua 5.1 the uservalue (fenv) must be a table.
+ lua_createtable(L, 1, 0); // [udata, reftable]
+ lua_pushvalue(L, -2); // [udata, reftable, udata]
+ lua_rawseti(L, -2, 1); // [udata, reftable]
+ lua_setfenv(L, -2); // [udata]
+}
+
+static TSTree *tree_check(lua_State *L)
+{
+ TSTree **ud = luaL_checkudata(L, 1, "treesitter_tree");
+ return *ud;
+}
+
+static int tree_gc(lua_State *L)
+{
+ TSTree *tree = tree_check(L);
+ if (!tree) {
+ return 0;
+ }
+
+ ts_tree_delete(tree);
+ return 0;
+}
+
+static int tree_tostring(lua_State *L)
+{
+ lua_pushstring(L, "<tree>");
+ return 1;
+}
+
+static int tree_root(lua_State *L)
+{
+ TSTree *tree = tree_check(L);
+ if (!tree) {
+ return 0;
+ }
+ TSNode root = ts_tree_root_node(tree);
+ push_node(L, root);
+ return 1;
+}
+
+// Node methods
+
+/// push node interface on lua stack
+///
+/// top of stack must either be the tree this node belongs to or another node
+/// of the same tree! This value is not popped. Can only be called inside a
+/// cfunction with the tslua environment.
+static void push_node(lua_State *L, TSNode node)
+{
+ if (ts_node_is_null(node)) {
+ lua_pushnil(L); // [src, nil]
+ return;
+ }
+ TSNode *ud = lua_newuserdata(L, sizeof(TSNode)); // [src, udata]
+ *ud = node;
+ lua_getfield(L, LUA_REGISTRYINDEX, "treesitter_node"); // [src, udata, meta]
+ lua_setmetatable(L, -2); // [src, udata]
+ lua_getfenv(L, -2); // [src, udata, reftable]
+ lua_setfenv(L, -2); // [src, udata]
+}
+
+static bool node_check(lua_State *L, TSNode *res)
+{
+ TSNode *ud = luaL_checkudata(L, 1, "treesitter_node");
+ if (ud) {
+ *res = *ud;
+ return true;
+ }
+ return false;
+}
+
+
+static int node_tostring(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ lua_pushstring(L, "<node ");
+ lua_pushstring(L, ts_node_type(node));
+ lua_pushstring(L, ">");
+ lua_concat(L, 3);
+ return 1;
+}
+
+static int node_eq(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ // This should only be called if both x and y in "x == y" has the
+ // treesitter_node metatable. So it is ok to error out otherwise.
+ TSNode *ud = luaL_checkudata(L, 2, "treesitter_node");
+ if (!ud) {
+ return 0;
+ }
+ TSNode node2 = *ud;
+ lua_pushboolean(L, ts_node_eq(node, node2));
+ return 1;
+}
+
+static int node_range(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ TSPoint start = ts_node_start_point(node);
+ TSPoint end = ts_node_end_point(node);
+ lua_pushnumber(L, start.row);
+ lua_pushnumber(L, start.column);
+ lua_pushnumber(L, end.row);
+ lua_pushnumber(L, end.column);
+ return 4;
+}
+
+static int node_start(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ TSPoint start = ts_node_start_point(node);
+ uint32_t start_byte = ts_node_start_byte(node);
+ lua_pushnumber(L, start.row);
+ lua_pushnumber(L, start.column);
+ lua_pushnumber(L, start_byte);
+ return 3;
+}
+
+static int node_end(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ TSPoint end = ts_node_end_point(node);
+ uint32_t end_byte = ts_node_end_byte(node);
+ lua_pushnumber(L, end.row);
+ lua_pushnumber(L, end.column);
+ lua_pushnumber(L, end_byte);
+ return 3;
+}
+
+static int node_child_count(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ uint32_t count = ts_node_child_count(node);
+ lua_pushnumber(L, count);
+ return 1;
+}
+
+static int node_named_child_count(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ uint32_t count = ts_node_named_child_count(node);
+ lua_pushnumber(L, count);
+ return 1;
+}
+
+static int node_type(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ lua_pushstring(L, ts_node_type(node));
+ return 1;
+}
+
+static int node_symbol(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ TSSymbol symbol = ts_node_symbol(node);
+ lua_pushnumber(L, symbol);
+ return 1;
+}
+
+static int node_named(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ lua_pushboolean(L, ts_node_is_named(node));
+ return 1;
+}
+
+static int node_sexpr(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ char *allocated = ts_node_string(node);
+ lua_pushstring(L, allocated);
+ xfree(allocated);
+ return 1;
+}
+
+static int node_missing(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ lua_pushboolean(L, ts_node_is_missing(node));
+ return 1;
+}
+
+static int node_has_error(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ lua_pushboolean(L, ts_node_has_error(node));
+ return 1;
+}
+
+static int node_child(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ long num = lua_tointeger(L, 2);
+ TSNode child = ts_node_child(node, (uint32_t)num);
+
+ lua_pushvalue(L, 1);
+ push_node(L, child);
+ return 1;
+}
+
+static int node_named_child(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ long num = lua_tointeger(L, 2);
+ TSNode child = ts_node_named_child(node, (uint32_t)num);
+
+ lua_pushvalue(L, 1);
+ push_node(L, child);
+ return 1;
+}
+
+static int node_descendant_for_range(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ TSPoint start = { (uint32_t)lua_tointeger(L, 2),
+ (uint32_t)lua_tointeger(L, 3) };
+ TSPoint end = { (uint32_t)lua_tointeger(L, 4),
+ (uint32_t)lua_tointeger(L, 5) };
+ TSNode child = ts_node_descendant_for_point_range(node, start, end);
+
+ lua_pushvalue(L, 1);
+ push_node(L, child);
+ return 1;
+}
+
+static int node_named_descendant_for_range(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ TSPoint start = { (uint32_t)lua_tointeger(L, 2),
+ (uint32_t)lua_tointeger(L, 3) };
+ TSPoint end = { (uint32_t)lua_tointeger(L, 4),
+ (uint32_t)lua_tointeger(L, 5) };
+ TSNode child = ts_node_named_descendant_for_point_range(node, start, end);
+
+ lua_pushvalue(L, 1);
+ push_node(L, child);
+ return 1;
+}
+
+static int node_parent(lua_State *L)
+{
+ TSNode node;
+ if (!node_check(L, &node)) {
+ return 0;
+ }
+ TSNode parent = ts_node_parent(node);
+ push_node(L, parent);
+ return 1;
+}
+
diff --git a/src/nvim/lua/treesitter.h b/src/nvim/lua/treesitter.h
new file mode 100644
index 0000000000..812166f67b
--- /dev/null
+++ b/src/nvim/lua/treesitter.h
@@ -0,0 +1,14 @@
+#ifndef NVIM_LUA_TREESITTER_H
+#define NVIM_LUA_TREESITTER_H
+
+#include <lua.h>
+#include <lualib.h>
+#include <lauxlib.h>
+
+#include "tree_sitter/api.h"
+
+#ifdef INCLUDE_GENERATED_DECLARATIONS
+# include "lua/treesitter.h.generated.h"
+#endif
+
+#endif // NVIM_LUA_TREESITTER_H
diff --git a/src/nvim/lua/vim.lua b/src/nvim/lua/vim.lua
index b1a684b977..b67762e48e 100644
--- a/src/nvim/lua/vim.lua
+++ b/src/nvim/lua/vim.lua
@@ -232,6 +232,9 @@ local function __index(t, key)
if key == 'inspect' then
t.inspect = require('vim.inspect')
return t.inspect
+ elseif key == 'treesitter' then
+ t.treesitter = require('vim.treesitter')
+ return t.treesitter
elseif require('vim.shared')[key] ~= nil then
-- Expose all `vim.shared` functions on the `vim` module.
t[key] = require('vim.shared')[key]
diff --git a/src/tree_sitter/LICENSE b/src/tree_sitter/LICENSE
new file mode 100644
index 0000000000..971b81f9a8
--- /dev/null
+++ b/src/tree_sitter/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2018 Max Brunsfeld
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/tree_sitter/alloc.h b/src/tree_sitter/alloc.h
new file mode 100644
index 0000000000..2229995bd1
--- /dev/null
+++ b/src/tree_sitter/alloc.h
@@ -0,0 +1,94 @@
+#ifndef TREE_SITTER_ALLOC_H_
+#define TREE_SITTER_ALLOC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "nvim/memory.h"
+
+#if 1
+
+static inline bool ts_toggle_allocation_recording(bool value) {
+ return false;
+}
+
+#define ts_malloc xmalloc
+#define ts_calloc xcalloc
+#define ts_realloc xrealloc
+#define ts_free xfree
+
+#elif defined(TREE_SITTER_TEST)
+
+void *ts_record_malloc(size_t);
+void *ts_record_calloc(size_t, size_t);
+void *ts_record_realloc(void *, size_t);
+void ts_record_free(void *);
+bool ts_toggle_allocation_recording(bool);
+
+static inline void *ts_malloc(size_t size) {
+ return ts_record_malloc(size);
+}
+
+static inline void *ts_calloc(size_t count, size_t size) {
+ return ts_record_calloc(count, size);
+}
+
+static inline void *ts_realloc(void *buffer, size_t size) {
+ return ts_record_realloc(buffer, size);
+}
+
+static inline void ts_free(void *buffer) {
+ ts_record_free(buffer);
+}
+
+#else
+
+#include <stdlib.h>
+
+static inline bool ts_toggle_allocation_recording(bool value) {
+ return false;
+}
+
+static inline void *ts_malloc(size_t size) {
+ void *result = malloc(size);
+ if (size > 0 && !result) {
+ fprintf(stderr, "tree-sitter failed to allocate %lu bytes", size);
+ exit(1);
+ }
+ return result;
+}
+
+static inline void *ts_calloc(size_t count, size_t size) {
+ void *result = calloc(count, size);
+ if (count > 0 && !result) {
+ fprintf(stderr, "tree-sitter failed to allocate %lu bytes", count * size);
+ exit(1);
+ }
+ return result;
+}
+
+static inline void *ts_realloc(void *buffer, size_t size) {
+ void *result = realloc(buffer, size);
+ if (size > 0 && !result) {
+ fprintf(stderr, "tree-sitter failed to reallocate %lu bytes", size);
+ exit(1);
+ }
+ return result;
+}
+
+static inline void ts_free(void *buffer) {
+ free(buffer);
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ALLOC_H_
diff --git a/src/tree_sitter/api.h b/src/tree_sitter/api.h
new file mode 100644
index 0000000000..d39d0521ee
--- /dev/null
+++ b/src/tree_sitter/api.h
@@ -0,0 +1,660 @@
+#ifndef TREE_SITTER_API_H_
+#define TREE_SITTER_API_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+/****************************/
+/* Section - ABI Versioning */
+/****************************/
+
+#define TREE_SITTER_LANGUAGE_VERSION 11
+#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9
+
+/*******************/
+/* Section - Types */
+/*******************/
+
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+typedef struct TSParser TSParser;
+typedef struct TSTree TSTree;
+
+typedef enum {
+ TSInputEncodingUTF8,
+ TSInputEncodingUTF16,
+} TSInputEncoding;
+
+typedef enum {
+ TSSymbolTypeRegular,
+ TSSymbolTypeAnonymous,
+ TSSymbolTypeAuxiliary,
+} TSSymbolType;
+
+typedef struct {
+ uint32_t row;
+ uint32_t column;
+} TSPoint;
+
+typedef struct {
+ TSPoint start_point;
+ TSPoint end_point;
+ uint32_t start_byte;
+ uint32_t end_byte;
+} TSRange;
+
+typedef struct {
+ void *payload;
+ const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
+ TSInputEncoding encoding;
+} TSInput;
+
+typedef enum {
+ TSLogTypeParse,
+ TSLogTypeLex,
+} TSLogType;
+
+typedef struct {
+ void *payload;
+ void (*log)(void *payload, TSLogType, const char *);
+} TSLogger;
+
+typedef struct {
+ uint32_t start_byte;
+ uint32_t old_end_byte;
+ uint32_t new_end_byte;
+ TSPoint start_point;
+ TSPoint old_end_point;
+ TSPoint new_end_point;
+} TSInputEdit;
+
+typedef struct {
+ uint32_t context[4];
+ const void *id;
+ const TSTree *tree;
+} TSNode;
+
+typedef struct {
+ const void *tree;
+ const void *id;
+ uint32_t context[2];
+} TSTreeCursor;
+
+/********************/
+/* Section - Parser */
+/********************/
+
+/**
+ * Create a new parser.
+ */
+TSParser *ts_parser_new(void);
+
+/**
+ * Delete the parser, freeing all of the memory that it used.
+ */
+void ts_parser_delete(TSParser *parser);
+
+/**
+ * Set the language that the parser should use for parsing.
+ *
+ * Returns a boolean indicating whether or not the language was successfully
+ * assigned. True means assignment succeeded. False means there was a version
+ * mismatch: the language was generated with an incompatible version of the
+ * Tree-sitter CLI. Check the language's version using `ts_language_version`
+ * and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and
+ * `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants.
+ */
+bool ts_parser_set_language(TSParser *self, const TSLanguage *language);
+
+/**
+ * Get the parser's current language.
+ */
+const TSLanguage *ts_parser_language(const TSParser *self);
+
+/**
+ * Set the spans of text that the parser should include when parsing.
+ *
+ * By default, the parser will always include entire documents. This function
+ * allows you to parse only a *portion* of a document but still return a syntax
+ * tree whose ranges match up with the document as a whole. You can also pass
+ * multiple disjoint ranges.
+ *
+ * The second and third parameters specify the location and length of an array
+ * of ranges. The parser does *not* take ownership of these ranges; it copies
+ * the data, so it doesn't matter how these ranges are allocated.
+ */
+void ts_parser_set_included_ranges(
+ TSParser *self,
+ const TSRange *ranges,
+ uint32_t length
+);
+
+/**
+ * Get the ranges of text that the parser will include when parsing.
+ *
+ * The returned pointer is owned by the parser. The caller should not free it
+ * or write to it. The length of the array will be written to the given
+ * `length` pointer.
+ */
+const TSRange *ts_parser_included_ranges(
+ const TSParser *self,
+ uint32_t *length
+);
+
+/**
+ * Use the parser to parse some source code and create a syntax tree.
+ *
+ * If you are parsing this document for the first time, pass `NULL` for the
+ * `old_tree` parameter. Otherwise, if you have already parsed an earlier
+ * version of this document and the document has since been edited, pass the
+ * previous syntax tree so that the unchanged parts of it can be reused.
+ * This will save time and memory. For this to work correctly, you must have
+ * already edited the old syntax tree using the `ts_tree_edit` function in a
+ * way that exactly matches the source code changes.
+ *
+ * The `TSInput` parameter lets you specify how to read the text. It has the
+ * following three fields:
+ * 1. `read`: A function to retrieve a chunk of text at a given byte offset
+ * and (row, column) position. The function should return a pointer to the
+ * text and write its length to the the `bytes_read` pointer. The parser
+ * does not take ownership of this buffer; it just borrows it until it has
+ * finished reading it. The function should write a zero value to the
+ * `bytes_read` pointer to indicate the end of the document.
+ * 2. `payload`: An arbitrary pointer that will be passed to each invocation
+ * of the `read` function.
+ * 3. `encoding`: An indication of how the text is encoded. Either
+ * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
+ *
+ * This function returns a syntax tree on success, and `NULL` on failure. There
+ * are three possible reasons for failure:
+ * 1. The parser does not have a language assigned. Check for this using the
+ `ts_parser_language` function.
+ * 2. Parsing was cancelled due to a timeout that was set by an earlier call to
+ * the `ts_parser_set_timeout_micros` function. You can resume parsing from
+ * where the parser left out by calling `ts_parser_parse` again with the
+ * same arguments. Or you can start parsing from scratch by first calling
+ * `ts_parser_reset`.
+ * 3. Parsing was cancelled using a cancellation flag that was set by an
+ * earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing
+ * from where the parser left out by calling `ts_parser_parse` again with
+ * the same arguments.
+ */
+TSTree *ts_parser_parse(
+ TSParser *self,
+ const TSTree *old_tree,
+ TSInput input
+);
+
+/**
+ * Use the parser to parse some source code stored in one contiguous buffer.
+ * The first two parameters are the same as in the `ts_parser_parse` function
+ * above. The second two parameters indicate the location of the buffer and its
+ * length in bytes.
+ */
+TSTree *ts_parser_parse_string(
+ TSParser *self,
+ const TSTree *old_tree,
+ const char *string,
+ uint32_t length
+);
+
+/**
+ * Use the parser to parse some source code stored in one contiguous buffer with
+ * a given encoding. The first four parameters work the same as in the
+ * `ts_parser_parse_string` method above. The final parameter indicates whether
+ * the text is encoded as UTF8 or UTF16.
+ */
+TSTree *ts_parser_parse_string_encoding(
+ TSParser *self,
+ const TSTree *old_tree,
+ const char *string,
+ uint32_t length,
+ TSInputEncoding encoding
+);
+
+/**
+ * Instruct the parser to start the next parse from the beginning.
+ *
+ * If the parser previously failed because of a timeout or a cancellation, then
+ * by default, it will resume where it left off on the next call to
+ * `ts_parser_parse` or other parsing functions. If you don't want to resume,
+ * and instead intend to use this parser to parse some other document, you must
+ * call this `ts_parser_reset` first.
+ */
+void ts_parser_reset(TSParser *self);
+
+/**
+ * Set the maximum duration in microseconds that parsing should be allowed to
+ * take before halting. If parsing takes longer than this, it will halt early,
+ * returning NULL. See `ts_parser_parse` for more information.
+ */
+void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout);
+
+/**
+ * Get the duration in microseconds that parsing is allowed to take.
+ */
+uint64_t ts_parser_timeout_micros(const TSParser *self);
+
+/**
+ * Set the parser's current cancellation flag pointer. If a non-null pointer is
+ * assigned, then the parser will periodically read from this pointer during
+ * parsing. If it reads a non-zero value, it will halt early, returning NULL.
+ * See `ts_parser_parse` for more information.
+ */
+void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);
+
+/**
+ * Get the parser's current cancellation flag pointer.
+ */
+const size_t *ts_parser_cancellation_flag(const TSParser *self);
+
+/**
+ * Set the logger that a parser should use during parsing.
+ *
+ * The parser does not take ownership over the logger payload. If a logger was
+ * previously assigned, the caller is responsible for releasing any memory
+ * owned by the previous logger.
+ */
+void ts_parser_set_logger(TSParser *self, TSLogger logger);
+
+/**
+ * Get the parser's current logger.
+ */
+TSLogger ts_parser_logger(const TSParser *self);
+
+/**
+ * Set the file descriptor to which the parser should write debugging graphs
+ * during parsing. The graphs are formatted in the DOT language. You may want
+ * to pipe these graphs directly to a `dot(1)` process in order to generate
+ * SVG output. You can turn off this logging by passing a negative number.
+ */
+void ts_parser_print_dot_graphs(TSParser *self, int file);
+
+/**
+ * Set whether or not the parser should halt immediately upon detecting an
+ * error. This will generally result in a syntax tree with an error at the
+ * root, and one or more partial syntax trees within the error. This behavior
+ * may not be supported long-term.
+ */
+void ts_parser_halt_on_error(TSParser *self, bool halt);
+
+/******************/
+/* Section - Tree */
+/******************/
+
+/**
+ * Create a shallow copy of the syntax tree. This is very fast.
+ *
+ * You need to copy a syntax tree in order to use it on more than one thread at
+ * a time, as syntax trees are not thread safe.
+ */
+TSTree *ts_tree_copy(const TSTree *self);
+
+/**
+ * Delete the syntax tree, freeing all of the memory that it used.
+ */
+void ts_tree_delete(TSTree *self);
+
+/**
+ * Get the root node of the syntax tree.
+ */
+TSNode ts_tree_root_node(const TSTree *self);
+
+/**
+ * Get the language that was used to parse the syntax tree.
+ */
+const TSLanguage *ts_tree_language(const TSTree *);
+
+/**
+ * Edit the syntax tree to keep it in sync with source code that has been
+ * edited.
+ *
+ * You must describe the edit both in terms of byte offsets and in terms of
+ * (row, column) coordinates.
+ */
+void ts_tree_edit(TSTree *self, const TSInputEdit *edit);
+
+/**
+ * Compare a new syntax tree to a previous syntax tree representing the same
+ * document, returning an array of ranges whose syntactic structure has changed.
+ *
+ * For this to work correctly, the old syntax tree must have been edited such
+ * that its ranges match up to the new tree. Generally, you'll want to call
+ * this function right after calling one of the `ts_parser_parse` functions,
+ * passing in the new tree that was returned from `ts_parser_parse` and the old
+ * tree that was passed as a parameter.
+ *
+ * The returned array is allocated using `malloc` and the caller is responsible
+ * for freeing it using `free`. The length of the array will be written to the
+ * given `length` pointer.
+ */
+TSRange *ts_tree_get_changed_ranges(
+ const TSTree *self,
+ const TSTree *old_tree,
+ uint32_t *length
+);
+
+/**
+ * Write a DOT graph describing the syntax tree to the given file.
+ */
+void ts_tree_print_dot_graph(const TSTree *, FILE *);
+
+/******************/
+/* Section - Node */
+/******************/
+
+/**
+ * Get the node's type as a null-terminated string.
+ */
+const char *ts_node_type(TSNode);
+
+/**
+ * Get the node's type as a numerical id.
+ */
+TSSymbol ts_node_symbol(TSNode);
+
+/**
+ * Get the node's start byte.
+ */
+uint32_t ts_node_start_byte(TSNode);
+
+/**
+ * Get the node's start position in terms of rows and columns.
+ */
+TSPoint ts_node_start_point(TSNode);
+
+/**
+ * Get the node's end byte.
+ */
+uint32_t ts_node_end_byte(TSNode);
+
+/**
+ * Get the node's end position in terms of rows and columns.
+ */
+TSPoint ts_node_end_point(TSNode);
+
+/**
+ * Get an S-expression representing the node as a string.
+ *
+ * This string is allocated with `malloc` and the caller is responsible for
+ * freeing it using `free`.
+ */
+char *ts_node_string(TSNode);
+
+/**
+ * Check if the node is null. Functions like `ts_node_child` and
+ * `ts_node_next_sibling` will return a null node to indicate that no such node
+ * was found.
+ */
+bool ts_node_is_null(TSNode);
+
+/**
+ * Check if the node is *named*. Named nodes correspond to named rules in the
+ * grammar, whereas *anonymous* nodes correspond to string literals in the
+ * grammar.
+ */
+bool ts_node_is_named(TSNode);
+
+/**
+ * Check if the node is *missing*. Missing nodes are inserted by the parser in
+ * order to recover from certain kinds of syntax errors.
+ */
+bool ts_node_is_missing(TSNode);
+
+/**
+ * Check if the node is *missing*. Missing nodes are inserted by the parser in
+ * order to recover from certain kinds of syntax errors.
+ */
+bool ts_node_is_extra(TSNode);
+
+/**
+ * Check if a syntax node has been edited.
+ */
+bool ts_node_has_changes(TSNode);
+
+/**
+ * Check if the node is a syntax error or contains any syntax errors.
+ */
+bool ts_node_has_error(TSNode);
+
+/**
+ * Get the node's immediate parent.
+ */
+TSNode ts_node_parent(TSNode);
+
+/**
+ * Get the node's child at the given index, where zero represents the first
+ * child.
+ */
+TSNode ts_node_child(TSNode, uint32_t);
+
+/**
+ * Get the node's number of children.
+ */
+uint32_t ts_node_child_count(TSNode);
+
+/**
+ * Get the node's *named* child at the given index.
+ *
+ * See also `ts_node_is_named`.
+ */
+TSNode ts_node_named_child(TSNode, uint32_t);
+
+/**
+ * Get the node's number of *named* children.
+ *
+ * See also `ts_node_is_named`.
+ */
+uint32_t ts_node_named_child_count(TSNode);
+
+/**
+ * Get the node's child with the given field name.
+ */
+TSNode ts_node_child_by_field_name(
+ TSNode self,
+ const char *field_name,
+ uint32_t field_name_length
+);
+
+/**
+ * Get the node's child with the given numerical field id.
+ *
+ * You can convert a field name to an id using the
+ * `ts_language_field_id_for_name` function.
+ */
+TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
+
+/**
+ * Get the node's next / previous sibling.
+ */
+TSNode ts_node_next_sibling(TSNode);
+TSNode ts_node_prev_sibling(TSNode);
+
+/**
+ * Get the node's next / previous *named* sibling.
+ */
+TSNode ts_node_next_named_sibling(TSNode);
+TSNode ts_node_prev_named_sibling(TSNode);
+
+/**
+ * Get the node's first child that extends beyond the given byte offset.
+ */
+TSNode ts_node_first_child_for_byte(TSNode, uint32_t);
+
+/**
+ * Get the node's first named child that extends beyond the given byte offset.
+ */
+TSNode ts_node_first_named_child_for_byte(TSNode, uint32_t);
+
+/**
+ * Get the smallest node within this node that spans the given range of bytes
+ * or (row, column) positions.
+ */
+TSNode ts_node_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
+TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint);
+
+/**
+ * Get the smallest named node within this node that spans the given range of
+ * bytes or (row, column) positions.
+ */
+TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
+TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint);
+
+/**
+ * Edit the node to keep it in-sync with source code that has been edited.
+ *
+ * This function is only rarely needed. When you edit a syntax tree with the
+ * `ts_tree_edit` function, all of the nodes that you retrieve from the tree
+ * afterward will already reflect the edit. You only need to use `ts_node_edit`
+ * when you have a `TSNode` instance that you want to keep and continue to use
+ * after an edit.
+ */
+void ts_node_edit(TSNode *, const TSInputEdit *);
+
+/**
+ * Check if two nodes are identical.
+ */
+bool ts_node_eq(TSNode, TSNode);
+
+/************************/
+/* Section - TreeCursor */
+/************************/
+
+/**
+ * Create a new tree cursor starting from the given node.
+ *
+ * A tree cursor allows you to walk a syntax tree more efficiently than is
+ * possible using the `TSNode` functions. It is a mutable object that is always
+ * on a certain syntax node, and can be moved imperatively to different nodes.
+ */
+TSTreeCursor ts_tree_cursor_new(TSNode);
+
+/**
+ * Delete a tree cursor, freeing all of the memory that it used.
+ */
+void ts_tree_cursor_delete(TSTreeCursor *);
+
+/**
+ * Re-initialize a tree cursor to start at a different ndoe.
+ */
+void ts_tree_cursor_reset(TSTreeCursor *, TSNode);
+
+/**
+ * Get the tree cursor's current node.
+ */
+TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
+
+/**
+ * Get the field name of the tree cursor's current node.
+ *
+ * This returns `NULL` if the current node doesn't have a field.
+ * See also `ts_node_child_by_field_name`.
+ */
+const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
+
+/**
+ * Get the field name of the tree cursor's current node.
+ *
+ * This returns zero if the current node doesn't have a field.
+ * See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`.
+ */
+TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
+
+/**
+ * Move the cursor to the parent of its current node.
+ *
+ * This returns `true` if the cursor successfully moved, and returns `false`
+ * if there was no parent node (the cursor was already on the root node).
+ */
+bool ts_tree_cursor_goto_parent(TSTreeCursor *);
+
+/**
+ * Move the cursor to the next sibling of its current node.
+ *
+ * This returns `true` if the cursor successfully moved, and returns `false`
+ * if there was no next sibling node.
+ */
+bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
+
+/**
+ * Move the cursor to the first schild of its current node.
+ *
+ * This returns `true` if the cursor successfully moved, and returns `false`
+ * if there were no children.
+ */
+bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
+
+/**
+ * Move the cursor to the first schild of its current node that extends beyond
+ * the given byte offset.
+ *
+ * This returns the index of the child node if one was found, and returns -1
+ * if no such child was found.
+ */
+int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
+
+TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *);
+
+/**********************/
+/* Section - Language */
+/**********************/
+
+/**
+ * Get the number of distinct node types in the language.
+ */
+uint32_t ts_language_symbol_count(const TSLanguage *);
+
+/**
+ * Get a node type string for the given numerical id.
+ */
+const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
+
+/**
+ * Get the numerical id for the given node type string.
+ */
+TSSymbol ts_language_symbol_for_name(const TSLanguage *, const char *);
+
+/**
+ * Get the number of distinct field names in the language.
+ */
+uint32_t ts_language_field_count(const TSLanguage *);
+
+/**
+ * Get the field name string for the given numerical id.
+ */
+const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId);
+
+/**
+ * Get the numerical id for the given field name string.
+ */
+TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t);
+
+/**
+ * Check whether the given node type id belongs to named nodes, anonymous nodes,
+ * or a hidden nodes.
+ *
+ * See also `ts_node_is_named`. Hidden nodes are never returned from the API.
+ */
+TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol);
+
+/**
+ * Get the ABI version number for this language. This version number is used
+ * to ensure that languages were generated by a compatible version of
+ * Tree-sitter.
+ *
+ * See also `ts_parser_set_language`.
+ */
+uint32_t ts_language_version(const TSLanguage *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_API_H_
diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h
new file mode 100644
index 0000000000..bc77e687bf
--- /dev/null
+++ b/src/tree_sitter/array.h
@@ -0,0 +1,142 @@
+#ifndef TREE_SITTER_ARRAY_H_
+#define TREE_SITTER_ARRAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <stdbool.h>
+#include "./alloc.h"
+
+#define Array(T) \
+ struct { \
+ T *contents; \
+ uint32_t size; \
+ uint32_t capacity; \
+ }
+
+#define array_init(self) \
+ ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
+
+#define array_new() \
+ { NULL, 0, 0 }
+
+#define array_get(self, index) \
+ (assert((uint32_t)index < (self)->size), &(self)->contents[index])
+
+#define array_front(self) array_get(self, 0)
+
+#define array_back(self) array_get(self, (self)->size - 1)
+
+#define array_clear(self) ((self)->size = 0)
+
+#define array_reserve(self, new_capacity) \
+ array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity)
+
+#define array_erase(self, index) \
+ array__erase((VoidArray *)(self), array__elem_size(self), index)
+
+#define array_delete(self) array__delete((VoidArray *)self)
+
+#define array_push(self, element) \
+ (array__grow((VoidArray *)(self), 1, array__elem_size(self)), \
+ (self)->contents[(self)->size++] = (element))
+
+#define array_grow_by(self, count) \
+ (array__grow((VoidArray *)(self), count, array__elem_size(self)), \
+ memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \
+ (self)->size += (count))
+
+#define array_push_all(self, other) \
+ array_splice((self), (self)->size, 0, (other)->size, (other)->contents)
+
+#define array_splice(self, index, old_count, new_count, new_contents) \
+ array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
+ new_count, new_contents)
+
+#define array_insert(self, index, element) \
+ array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element)
+
+#define array_pop(self) ((self)->contents[--(self)->size])
+
+#define array_assign(self, other) \
+ array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
+
+// Private
+
+typedef Array(void) VoidArray;
+
+#define array__elem_size(self) sizeof(*(self)->contents)
+
+static inline void array__delete(VoidArray *self) {
+ ts_free(self->contents);
+ self->contents = NULL;
+ self->size = 0;
+ self->capacity = 0;
+}
+
+static inline void array__erase(VoidArray *self, size_t element_size,
+ uint32_t index) {
+ assert(index < self->size);
+ char *contents = (char *)self->contents;
+ memmove(contents + index * element_size, contents + (index + 1) * element_size,
+ (self->size - index - 1) * element_size);
+ self->size--;
+}
+
+static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t new_capacity) {
+ if (new_capacity > self->capacity) {
+ if (self->contents) {
+ self->contents = ts_realloc(self->contents, new_capacity * element_size);
+ } else {
+ self->contents = ts_calloc(new_capacity, element_size);
+ }
+ self->capacity = new_capacity;
+ }
+}
+
+static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) {
+ array__reserve(self, element_size, other->size);
+ self->size = other->size;
+ memcpy(self->contents, other->contents, self->size * element_size);
+}
+
+static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
+ size_t new_size = self->size + count;
+ if (new_size > self->capacity) {
+ size_t new_capacity = self->capacity * 2;
+ if (new_capacity < 8) new_capacity = 8;
+ if (new_capacity < new_size) new_capacity = new_size;
+ array__reserve(self, element_size, new_capacity);
+ }
+}
+
+static inline void array__splice(VoidArray *self, size_t element_size,
+ uint32_t index, uint32_t old_count,
+ uint32_t new_count, const void *elements) {
+ uint32_t new_size = self->size + new_count - old_count;
+ uint32_t old_end = index + old_count;
+ uint32_t new_end = index + new_count;
+ assert(old_end <= self->size);
+
+ array__reserve(self, element_size, new_size);
+
+ char *contents = (char *)self->contents;
+ if (self->size > old_end)
+ memmove(contents + new_end * element_size, contents + old_end * element_size,
+ (self->size - old_end) * element_size);
+ if (new_count > 0)
+ memcpy((contents + index * element_size), elements,
+ new_count * element_size);
+ self->size += new_count - old_count;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_ARRAY_H_
diff --git a/src/tree_sitter/atomic.h b/src/tree_sitter/atomic.h
new file mode 100644
index 0000000000..301ee36700
--- /dev/null
+++ b/src/tree_sitter/atomic.h
@@ -0,0 +1,42 @@
+#ifndef TREE_SITTER_ATOMIC_H_
+#define TREE_SITTER_ATOMIC_H_
+
+#include <stdint.h>
+
+#ifdef _WIN32
+
+#include <windows.h>
+
+static inline size_t atomic_load(const volatile size_t *p) {
+ return *p;
+}
+
+static inline uint32_t atomic_inc(volatile uint32_t *p) {
+ return InterlockedIncrement(p);
+}
+
+static inline uint32_t atomic_dec(volatile uint32_t *p) {
+ return InterlockedDecrement(p);
+}
+
+#else
+
+static inline size_t atomic_load(const volatile size_t *p) {
+#ifdef __ATOMIC_RELAXED
+ return __atomic_load_n(p, __ATOMIC_RELAXED);
+#else
+ return __sync_fetch_and_add((volatile size_t *)p, 0);
+#endif
+}
+
+static inline uint32_t atomic_inc(volatile uint32_t *p) {
+ return __sync_add_and_fetch(p, 1u);
+}
+
+static inline uint32_t atomic_dec(volatile uint32_t *p) {
+ return __sync_sub_and_fetch(p, 1u);
+}
+
+#endif
+
+#endif // TREE_SITTER_ATOMIC_H_
diff --git a/src/tree_sitter/clock.h b/src/tree_sitter/clock.h
new file mode 100644
index 0000000000..94545f3566
--- /dev/null
+++ b/src/tree_sitter/clock.h
@@ -0,0 +1,141 @@
+#ifndef TREE_SITTER_CLOCK_H_
+#define TREE_SITTER_CLOCK_H_
+
+#include <stdint.h>
+
+typedef uint64_t TSDuration;
+
+#ifdef _WIN32
+
+// Windows:
+// * Represent a time as a performance counter value.
+// * Represent a duration as a number of performance counter ticks.
+
+#include <windows.h>
+typedef uint64_t TSClock;
+
+static inline TSDuration duration_from_micros(uint64_t micros) {
+ LARGE_INTEGER frequency;
+ QueryPerformanceFrequency(&frequency);
+ return micros * (uint64_t)frequency.QuadPart / 1000000;
+}
+
+static inline uint64_t duration_to_micros(TSDuration self) {
+ LARGE_INTEGER frequency;
+ QueryPerformanceFrequency(&frequency);
+ return self * 1000000 / (uint64_t)frequency.QuadPart;
+}
+
+static inline TSClock clock_null(void) {
+ return 0;
+}
+
+static inline TSClock clock_now(void) {
+ LARGE_INTEGER result;
+ QueryPerformanceCounter(&result);
+ return (uint64_t)result.QuadPart;
+}
+
+static inline TSClock clock_after(TSClock base, TSDuration duration) {
+ return base + duration;
+}
+
+static inline bool clock_is_null(TSClock self) {
+ return !self;
+}
+
+static inline bool clock_is_gt(TSClock self, TSClock other) {
+ return self > other;
+}
+
+#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
+
+// POSIX with monotonic clock support (Linux)
+// * Represent a time as a monotonic (seconds, nanoseconds) pair.
+// * Represent a duration as a number of microseconds.
+//
+// On these platforms, parse timeouts will correspond accurately to
+// real time, regardless of what other processes are running.
+
+#include <time.h>
+typedef struct timespec TSClock;
+
+static inline TSDuration duration_from_micros(uint64_t micros) {
+ return micros;
+}
+
+static inline uint64_t duration_to_micros(TSDuration self) {
+ return self;
+}
+
+static inline TSClock clock_now(void) {
+ TSClock result;
+ clock_gettime(CLOCK_MONOTONIC, &result);
+ return result;
+}
+
+static inline TSClock clock_null(void) {
+ return (TSClock) {0, 0};
+}
+
+static inline TSClock clock_after(TSClock base, TSDuration duration) {
+ TSClock result = base;
+ result.tv_sec += duration / 1000000;
+ result.tv_nsec += (duration % 1000000) * 1000;
+ return result;
+}
+
+static inline bool clock_is_null(TSClock self) {
+ return !self.tv_sec;
+}
+
+static inline bool clock_is_gt(TSClock self, TSClock other) {
+ if (self.tv_sec > other.tv_sec) return true;
+ if (self.tv_sec < other.tv_sec) return false;
+ return self.tv_nsec > other.tv_nsec;
+}
+
+#else
+
+// macOS or POSIX without monotonic clock support
+// * Represent a time as a process clock value.
+// * Represent a duration as a number of process clock ticks.
+//
+// On these platforms, parse timeouts may be affected by other processes,
+// which is not ideal, but is better than using a non-monotonic time API
+// like `gettimeofday`.
+
+#include <time.h>
+typedef uint64_t TSClock;
+
+static inline TSDuration duration_from_micros(uint64_t micros) {
+ return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
+}
+
+static inline uint64_t duration_to_micros(TSDuration self) {
+ return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
+}
+
+static inline TSClock clock_null(void) {
+ return 0;
+}
+
+static inline TSClock clock_now(void) {
+ return (uint64_t)clock();
+}
+
+static inline TSClock clock_after(TSClock base, TSDuration duration) {
+ return base + duration;
+}
+
+static inline bool clock_is_null(TSClock self) {
+ return !self;
+}
+
+static inline bool clock_is_gt(TSClock self, TSClock other) {
+ return self > other;
+}
+
+#endif
+
+#endif // TREE_SITTER_CLOCK_H_
diff --git a/src/tree_sitter/error_costs.h b/src/tree_sitter/error_costs.h
new file mode 100644
index 0000000000..32d3666a66
--- /dev/null
+++ b/src/tree_sitter/error_costs.h
@@ -0,0 +1,11 @@
+#ifndef TREE_SITTER_ERROR_COSTS_H_
+#define TREE_SITTER_ERROR_COSTS_H_
+
+#define ERROR_STATE 0
+#define ERROR_COST_PER_RECOVERY 500
+#define ERROR_COST_PER_MISSING_TREE 110
+#define ERROR_COST_PER_SKIPPED_TREE 100
+#define ERROR_COST_PER_SKIPPED_LINE 30
+#define ERROR_COST_PER_SKIPPED_CHAR 1
+
+#endif
diff --git a/src/tree_sitter/get_changed_ranges.c b/src/tree_sitter/get_changed_ranges.c
new file mode 100644
index 0000000000..5bd1d814bd
--- /dev/null
+++ b/src/tree_sitter/get_changed_ranges.c
@@ -0,0 +1,482 @@
+#include "./get_changed_ranges.h"
+#include "./subtree.h"
+#include "./language.h"
+#include "./error_costs.h"
+#include "./tree_cursor.h"
+#include <assert.h>
+
+// #define DEBUG_GET_CHANGED_RANGES
+
+static void ts_range_array_add(TSRangeArray *self, Length start, Length end) {
+ if (self->size > 0) {
+ TSRange *last_range = array_back(self);
+ if (start.bytes <= last_range->end_byte) {
+ last_range->end_byte = end.bytes;
+ last_range->end_point = end.extent;
+ return;
+ }
+ }
+
+ if (start.bytes < end.bytes) {
+ TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
+ array_push(self, range);
+ }
+}
+
+bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index,
+ uint32_t start_byte, uint32_t end_byte) {
+ for (unsigned i = start_index; i < self->size; i++) {
+ TSRange *range = &self->contents[i];
+ if (range->end_byte > start_byte) {
+ if (range->start_byte >= end_byte) break;
+ return true;
+ }
+ }
+ return false;
+}
+
+void ts_range_array_get_changed_ranges(
+ const TSRange *old_ranges, unsigned old_range_count,
+ const TSRange *new_ranges, unsigned new_range_count,
+ TSRangeArray *differences
+) {
+ unsigned new_index = 0;
+ unsigned old_index = 0;
+ Length current_position = length_zero();
+ bool in_old_range = false;
+ bool in_new_range = false;
+
+ while (old_index < old_range_count || new_index < new_range_count) {
+ const TSRange *old_range = &old_ranges[old_index];
+ const TSRange *new_range = &new_ranges[new_index];
+
+ Length next_old_position;
+ if (in_old_range) {
+ next_old_position = (Length) {old_range->end_byte, old_range->end_point};
+ } else if (old_index < old_range_count) {
+ next_old_position = (Length) {old_range->start_byte, old_range->start_point};
+ } else {
+ next_old_position = LENGTH_MAX;
+ }
+
+ Length next_new_position;
+ if (in_new_range) {
+ next_new_position = (Length) {new_range->end_byte, new_range->end_point};
+ } else if (new_index < new_range_count) {
+ next_new_position = (Length) {new_range->start_byte, new_range->start_point};
+ } else {
+ next_new_position = LENGTH_MAX;
+ }
+
+ if (next_old_position.bytes < next_new_position.bytes) {
+ if (in_old_range != in_new_range) {
+ ts_range_array_add(differences, current_position, next_old_position);
+ }
+ if (in_old_range) old_index++;
+ current_position = next_old_position;
+ in_old_range = !in_old_range;
+ } else if (next_new_position.bytes < next_old_position.bytes) {
+ if (in_old_range != in_new_range) {
+ ts_range_array_add(differences, current_position, next_new_position);
+ }
+ if (in_new_range) new_index++;
+ current_position = next_new_position;
+ in_new_range = !in_new_range;
+ } else {
+ if (in_old_range != in_new_range) {
+ ts_range_array_add(differences, current_position, next_new_position);
+ }
+ if (in_old_range) old_index++;
+ if (in_new_range) new_index++;
+ in_old_range = !in_old_range;
+ in_new_range = !in_new_range;
+ current_position = next_new_position;
+ }
+ }
+}
+
+typedef struct {
+ TreeCursor cursor;
+ const TSLanguage *language;
+ unsigned visible_depth;
+ bool in_padding;
+} Iterator;
+
+static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language) {
+ array_clear(&cursor->stack);
+ array_push(&cursor->stack, ((TreeCursorEntry){
+ .subtree = tree,
+ .position = length_zero(),
+ .child_index = 0,
+ .structural_child_index = 0,
+ }));
+ return (Iterator) {
+ .cursor = *cursor,
+ .language = language,
+ .visible_depth = 1,
+ .in_padding = false,
+ };
+}
+
+static bool iterator_done(Iterator *self) {
+ return self->cursor.stack.size == 0;
+}
+
+static Length iterator_start_position(Iterator *self) {
+ TreeCursorEntry entry = *array_back(&self->cursor.stack);
+ if (self->in_padding) {
+ return entry.position;
+ } else {
+ return length_add(entry.position, ts_subtree_padding(*entry.subtree));
+ }
+}
+
+static Length iterator_end_position(Iterator *self) {
+ TreeCursorEntry entry = *array_back(&self->cursor.stack);
+ Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
+ if (self->in_padding) {
+ return result;
+ } else {
+ return length_add(result, ts_subtree_size(*entry.subtree));
+ }
+}
+
+static bool iterator_tree_is_visible(const Iterator *self) {
+ TreeCursorEntry entry = *array_back(&self->cursor.stack);
+ if (ts_subtree_visible(*entry.subtree)) return true;
+ if (self->cursor.stack.size > 1) {
+ Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(
+ self->language,
+ parent.ptr->production_id
+ );
+ return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
+ }
+ return false;
+}
+
+static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
+ TSSymbol *alias_symbol, uint32_t *start_byte) {
+ uint32_t i = self->cursor.stack.size - 1;
+
+ if (self->in_padding) {
+ if (i == 0) return;
+ i--;
+ }
+
+ for (; i + 1 > 0; i--) {
+ TreeCursorEntry entry = self->cursor.stack.contents[i];
+
+ if (i > 0) {
+ const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(
+ self->language,
+ parent->ptr->production_id
+ );
+ if (alias_sequence) {
+ *alias_symbol = alias_sequence[entry.structural_child_index];
+ }
+ }
+
+ if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
+ *tree = *entry.subtree;
+ *start_byte = entry.position.bytes;
+ break;
+ }
+ }
+}
+
+static void iterator_ascend(Iterator *self) {
+ if (iterator_done(self)) return;
+ if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
+ if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
+ self->cursor.stack.size--;
+}
+
+static bool iterator_descend(Iterator *self, uint32_t goal_position) {
+ if (self->in_padding) return false;
+
+ bool did_descend;
+ do {
+ did_descend = false;
+ TreeCursorEntry entry = *array_back(&self->cursor.stack);
+ Length position = entry.position;
+ uint32_t structural_child_index = 0;
+ for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
+ const Subtree *child = &entry.subtree->ptr->children[i];
+ Length child_left = length_add(position, ts_subtree_padding(*child));
+ Length child_right = length_add(child_left, ts_subtree_size(*child));
+
+ if (child_right.bytes > goal_position) {
+ array_push(&self->cursor.stack, ((TreeCursorEntry){
+ .subtree = child,
+ .position = position,
+ .child_index = i,
+ .structural_child_index = structural_child_index,
+ }));
+
+ if (iterator_tree_is_visible(self)) {
+ if (child_left.bytes > goal_position) {
+ self->in_padding = true;
+ } else {
+ self->visible_depth++;
+ }
+ return true;
+ }
+
+ did_descend = true;
+ break;
+ }
+
+ position = child_right;
+ if (!ts_subtree_extra(*child)) structural_child_index++;
+ }
+ } while (did_descend);
+
+ return false;
+}
+
+static void iterator_advance(Iterator *self) {
+ if (self->in_padding) {
+ self->in_padding = false;
+ if (iterator_tree_is_visible(self)) {
+ self->visible_depth++;
+ } else {
+ iterator_descend(self, 0);
+ }
+ return;
+ }
+
+ for (;;) {
+ if (iterator_tree_is_visible(self)) self->visible_depth--;
+ TreeCursorEntry entry = array_pop(&self->cursor.stack);
+ if (iterator_done(self)) return;
+
+ const Subtree *parent = array_back(&self->cursor.stack)->subtree;
+ uint32_t child_index = entry.child_index + 1;
+ if (ts_subtree_child_count(*parent) > child_index) {
+ Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
+ uint32_t structural_child_index = entry.structural_child_index;
+ if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
+ const Subtree *next_child = &parent->ptr->children[child_index];
+
+ array_push(&self->cursor.stack, ((TreeCursorEntry){
+ .subtree = next_child,
+ .position = position,
+ .child_index = child_index,
+ .structural_child_index = structural_child_index,
+ }));
+
+ if (iterator_tree_is_visible(self)) {
+ if (ts_subtree_padding(*next_child).bytes > 0) {
+ self->in_padding = true;
+ } else {
+ self->visible_depth++;
+ }
+ } else {
+ iterator_descend(self, 0);
+ }
+ break;
+ }
+ }
+}
+
+typedef enum {
+ IteratorDiffers,
+ IteratorMayDiffer,
+ IteratorMatches,
+} IteratorComparison;
+
+static IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter) {
+ Subtree old_tree = NULL_SUBTREE;
+ Subtree new_tree = NULL_SUBTREE;
+ uint32_t old_start = 0;
+ uint32_t new_start = 0;
+ TSSymbol old_alias_symbol = 0;
+ TSSymbol new_alias_symbol = 0;
+ iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
+ iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
+
+ if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
+ if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
+
+ if (
+ old_alias_symbol == new_alias_symbol &&
+ ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
+ ) {
+ if (old_start == new_start &&
+ !ts_subtree_has_changes(old_tree) &&
+ ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
+ ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
+ ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
+ ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
+ (ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
+ (ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
+ return IteratorMatches;
+ } else {
+ return IteratorMayDiffer;
+ }
+ }
+
+ return IteratorDiffers;
+}
+
+#ifdef DEBUG_GET_CHANGED_RANGES
+static inline void iterator_print_state(Iterator *self) {
+ TreeCursorEntry entry = *array_back(&self->cursor.stack);
+ TSPoint start = iterator_start_position(self).extent;
+ TSPoint end = iterator_end_position(self).extent;
+ const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
+ printf(
+ "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
+ name, self->in_padding ? "(p)" : " ",
+ self->visible_depth,
+ start.row + 1, start.column,
+ end.row + 1, end.column
+ );
+}
+#endif
+
+unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree,
+ TreeCursor *cursor1, TreeCursor *cursor2,
+ const TSLanguage *language,
+ const TSRangeArray *included_range_differences,
+ TSRange **ranges) {
+ TSRangeArray results = array_new();
+
+ Iterator old_iter = iterator_new(cursor1, old_tree, language);
+ Iterator new_iter = iterator_new(cursor2, new_tree, language);
+
+ unsigned included_range_difference_index = 0;
+
+ Length position = iterator_start_position(&old_iter);
+ Length next_position = iterator_start_position(&new_iter);
+ if (position.bytes < next_position.bytes) {
+ ts_range_array_add(&results, position, next_position);
+ position = next_position;
+ } else if (position.bytes > next_position.bytes) {
+ ts_range_array_add(&results, next_position, position);
+ next_position = position;
+ }
+
+ do {
+ #ifdef DEBUG_GET_CHANGED_RANGES
+ printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
+ iterator_print_state(&old_iter);
+ printf("\tvs\t");
+ iterator_print_state(&new_iter);
+ puts("");
+ #endif
+
+ // Compare the old and new subtrees.
+ IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
+
+ // Even if the two subtrees appear to be identical, they could differ
+ // internally if they contain a range of text that was previously
+ // excluded from the parse, and is now included, or vice-versa.
+ if (comparison == IteratorMatches && ts_range_array_intersects(
+ included_range_differences,
+ included_range_difference_index,
+ position.bytes,
+ iterator_end_position(&old_iter).bytes
+ )) {
+ comparison = IteratorMayDiffer;
+ }
+
+ bool is_changed = false;
+ switch (comparison) {
+ // If the subtrees are definitely identical, move to the end
+ // of both subtrees.
+ case IteratorMatches:
+ next_position = iterator_end_position(&old_iter);
+ break;
+
+ // If the subtrees might differ internally, descend into both
+ // subtrees, finding the first child that spans the current position.
+ case IteratorMayDiffer:
+ if (iterator_descend(&old_iter, position.bytes)) {
+ if (!iterator_descend(&new_iter, position.bytes)) {
+ is_changed = true;
+ next_position = iterator_end_position(&old_iter);
+ }
+ } else if (iterator_descend(&new_iter, position.bytes)) {
+ is_changed = true;
+ next_position = iterator_end_position(&new_iter);
+ } else {
+ next_position = length_min(
+ iterator_end_position(&old_iter),
+ iterator_end_position(&new_iter)
+ );
+ }
+ break;
+
+ // If the subtrees are different, record a change and then move
+ // to the end of both subtrees.
+ case IteratorDiffers:
+ is_changed = true;
+ next_position = length_min(
+ iterator_end_position(&old_iter),
+ iterator_end_position(&new_iter)
+ );
+ break;
+ }
+
+ // Ensure that both iterators are caught up to the current position.
+ while (
+ !iterator_done(&old_iter) &&
+ iterator_end_position(&old_iter).bytes <= next_position.bytes
+ ) iterator_advance(&old_iter);
+ while (
+ !iterator_done(&new_iter) &&
+ iterator_end_position(&new_iter).bytes <= next_position.bytes
+ ) iterator_advance(&new_iter);
+
+ // Ensure that both iterators are at the same depth in the tree.
+ while (old_iter.visible_depth > new_iter.visible_depth) {
+ iterator_ascend(&old_iter);
+ }
+ while (new_iter.visible_depth > old_iter.visible_depth) {
+ iterator_ascend(&new_iter);
+ }
+
+ if (is_changed) {
+ #ifdef DEBUG_GET_CHANGED_RANGES
+ printf(
+ " change: [[%u, %u] - [%u, %u]]\n",
+ position.extent.row + 1, position.extent.column,
+ next_position.extent.row + 1, next_position.extent.column
+ );
+ #endif
+
+ ts_range_array_add(&results, position, next_position);
+ }
+
+ position = next_position;
+
+ // Keep track of the current position in the included range differences
+ // array in order to avoid scanning the entire array on each iteration.
+ while (included_range_difference_index < included_range_differences->size) {
+ const TSRange *range = &included_range_differences->contents[
+ included_range_difference_index
+ ];
+ if (range->end_byte <= position.bytes) {
+ included_range_difference_index++;
+ } else {
+ break;
+ }
+ }
+ } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
+
+ Length old_size = ts_subtree_total_size(*old_tree);
+ Length new_size = ts_subtree_total_size(*new_tree);
+ if (old_size.bytes < new_size.bytes) {
+ ts_range_array_add(&results, old_size, new_size);
+ } else if (new_size.bytes < old_size.bytes) {
+ ts_range_array_add(&results, new_size, old_size);
+ }
+
+ *cursor1 = old_iter.cursor;
+ *cursor2 = new_iter.cursor;
+ *ranges = results.contents;
+ return results.size;
+}
diff --git a/src/tree_sitter/get_changed_ranges.h b/src/tree_sitter/get_changed_ranges.h
new file mode 100644
index 0000000000..a1f1dbb430
--- /dev/null
+++ b/src/tree_sitter/get_changed_ranges.h
@@ -0,0 +1,36 @@
+#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
+#define TREE_SITTER_GET_CHANGED_RANGES_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./tree_cursor.h"
+#include "./subtree.h"
+
+typedef Array(TSRange) TSRangeArray;
+
+void ts_range_array_get_changed_ranges(
+ const TSRange *old_ranges, unsigned old_range_count,
+ const TSRange *new_ranges, unsigned new_range_count,
+ TSRangeArray *differences
+);
+
+bool ts_range_array_intersects(
+ const TSRangeArray *self, unsigned start_index,
+ uint32_t start_byte, uint32_t end_byte
+);
+
+unsigned ts_subtree_get_changed_ranges(
+ const Subtree *old_tree, const Subtree *new_tree,
+ TreeCursor *cursor1, TreeCursor *cursor2,
+ const TSLanguage *language,
+ const TSRangeArray *included_range_differences,
+ TSRange **ranges
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_GET_CHANGED_RANGES_H_
diff --git a/src/tree_sitter/language.c b/src/tree_sitter/language.c
new file mode 100644
index 0000000000..1bfb1a8d03
--- /dev/null
+++ b/src/tree_sitter/language.c
@@ -0,0 +1,107 @@
+#include "./language.h"
+#include "./subtree.h"
+#include "./error_costs.h"
+#include <string.h>
+
+void ts_language_table_entry(const TSLanguage *self, TSStateId state,
+ TSSymbol symbol, TableEntry *result) {
+ if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
+ result->action_count = 0;
+ result->is_reusable = false;
+ result->actions = NULL;
+ } else {
+ assert(symbol < self->token_count);
+ uint32_t action_index = ts_language_lookup(self, state, symbol);
+ const TSParseActionEntry *entry = &self->parse_actions[action_index];
+ result->action_count = entry->count;
+ result->is_reusable = entry->reusable;
+ result->actions = (const TSParseAction *)(entry + 1);
+ }
+}
+
+uint32_t ts_language_symbol_count(const TSLanguage *language) {
+ return language->symbol_count + language->alias_count;
+}
+
+uint32_t ts_language_version(const TSLanguage *language) {
+ return language->version;
+}
+
+TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) {
+ if (symbol == ts_builtin_sym_error) {
+ return (TSSymbolMetadata){.visible = true, .named = true};
+ } else if (symbol == ts_builtin_sym_error_repeat) {
+ return (TSSymbolMetadata){.visible = false, .named = false};
+ } else {
+ return language->symbol_metadata[symbol];
+ }
+}
+
+const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) {
+ if (symbol == ts_builtin_sym_error) {
+ return "ERROR";
+ } else if (symbol == ts_builtin_sym_error_repeat) {
+ return "_ERROR";
+ } else {
+ return language->symbol_names[symbol];
+ }
+}
+
+TSSymbol ts_language_symbol_for_name(const TSLanguage *self, const char *name) {
+ if (!strcmp(name, "ERROR")) return ts_builtin_sym_error;
+
+ uint32_t count = ts_language_symbol_count(self);
+ for (TSSymbol i = 0; i < count; i++) {
+ if (!strcmp(self->symbol_names[i], name)) {
+ return i;
+ }
+ }
+ return 0;
+}
+
+TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol) {
+ TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
+ if (metadata.named) {
+ return TSSymbolTypeRegular;
+ } else if (metadata.visible) {
+ return TSSymbolTypeAnonymous;
+ } else {
+ return TSSymbolTypeAuxiliary;
+ }
+}
+
+uint32_t ts_language_field_count(const TSLanguage *self) {
+ if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
+ return self->field_count;
+ } else {
+ return 0;
+ }
+}
+
+const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) {
+ uint32_t count = ts_language_field_count(self);
+ if (count) {
+ return self->field_names[id];
+ } else {
+ return NULL;
+ }
+}
+
+TSFieldId ts_language_field_id_for_name(
+ const TSLanguage *self,
+ const char *name,
+ uint32_t name_length
+) {
+ uint32_t count = ts_language_field_count(self);
+ for (TSSymbol i = 1; i < count + 1; i++) {
+ switch (strncmp(name, self->field_names[i], name_length)) {
+ case 0:
+ return i;
+ case -1:
+ return 0;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
diff --git a/src/tree_sitter/language.h b/src/tree_sitter/language.h
new file mode 100644
index 0000000000..0741486a1b
--- /dev/null
+++ b/src/tree_sitter/language.h
@@ -0,0 +1,138 @@
+#ifndef TREE_SITTER_LANGUAGE_H_
+#define TREE_SITTER_LANGUAGE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./subtree.h"
+#include "tree_sitter/parser.h"
+
+#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
+#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
+#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11
+
+typedef struct {
+ const TSParseAction *actions;
+ uint32_t action_count;
+ bool is_reusable;
+} TableEntry;
+
+void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
+
+TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
+
+static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
+ return 0 < symbol && symbol < self->external_token_count + 1;
+}
+
+static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
+ TSStateId state,
+ TSSymbol symbol,
+ uint32_t *count) {
+ TableEntry entry;
+ ts_language_table_entry(self, state, symbol, &entry);
+ *count = entry.action_count;
+ return entry.actions;
+}
+
+static inline bool ts_language_has_actions(const TSLanguage *self,
+ TSStateId state,
+ TSSymbol symbol) {
+ TableEntry entry;
+ ts_language_table_entry(self, state, symbol, &entry);
+ return entry.action_count > 0;
+}
+
+static inline bool ts_language_has_reduce_action(const TSLanguage *self,
+ TSStateId state,
+ TSSymbol symbol) {
+ TableEntry entry;
+ ts_language_table_entry(self, state, symbol, &entry);
+ return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
+}
+
+static inline uint16_t ts_language_lookup(
+ const TSLanguage *self,
+ TSStateId state,
+ TSSymbol symbol
+) {
+ if (
+ self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES &&
+ state >= self->large_state_count
+ ) {
+ uint32_t index = self->small_parse_table_map[state - self->large_state_count];
+ const uint16_t *data = &self->small_parse_table[index];
+ uint16_t section_count = *(data++);
+ for (unsigned i = 0; i < section_count; i++) {
+ uint16_t section_value = *(data++);
+ uint16_t symbol_count = *(data++);
+ for (unsigned i = 0; i < symbol_count; i++) {
+ if (*(data++) == symbol) return section_value;
+ }
+ }
+ return 0;
+ } else {
+ return self->parse_table[state * self->symbol_count + symbol];
+ }
+}
+
+static inline TSStateId ts_language_next_state(const TSLanguage *self,
+ TSStateId state,
+ TSSymbol symbol) {
+ if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
+ return 0;
+ } else if (symbol < self->token_count) {
+ uint32_t count;
+ const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
+ if (count > 0) {
+ TSParseAction action = actions[count - 1];
+ if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) {
+ return action.params.state;
+ }
+ }
+ return 0;
+ } else {
+ return ts_language_lookup(self, state, symbol);
+ }
+}
+
+static inline const bool *
+ts_language_enabled_external_tokens(const TSLanguage *self,
+ unsigned external_scanner_state) {
+ if (external_scanner_state == 0) {
+ return NULL;
+ } else {
+ return self->external_scanner.states + self->external_token_count * external_scanner_state;
+ }
+}
+
+static inline const TSSymbol *
+ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) {
+ return production_id > 0 ?
+ self->alias_sequences + production_id * self->max_alias_sequence_length :
+ NULL;
+}
+
+static inline void ts_language_field_map(
+ const TSLanguage *self,
+ uint32_t production_id,
+ const TSFieldMapEntry **start,
+ const TSFieldMapEntry **end
+) {
+ if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) {
+ *start = NULL;
+ *end = NULL;
+ return;
+ }
+
+ TSFieldMapSlice slice = self->field_map_slices[production_id];
+ *start = &self->field_map_entries[slice.index];
+ *end = &self->field_map_entries[slice.index] + slice.length;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_LANGUAGE_H_
diff --git a/src/tree_sitter/length.h b/src/tree_sitter/length.h
new file mode 100644
index 0000000000..61de9fc1d5
--- /dev/null
+++ b/src/tree_sitter/length.h
@@ -0,0 +1,44 @@
+#ifndef TREE_SITTER_LENGTH_H_
+#define TREE_SITTER_LENGTH_H_
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include "./point.h"
+#include "tree_sitter/api.h"
+
+typedef struct {
+ uint32_t bytes;
+ TSPoint extent;
+} Length;
+
+static const Length LENGTH_UNDEFINED = {0, {0, 1}};
+static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
+
+static inline bool length_is_undefined(Length length) {
+ return length.bytes == 0 && length.extent.column != 0;
+}
+
+static inline Length length_min(Length len1, Length len2) {
+ return (len1.bytes < len2.bytes) ? len1 : len2;
+}
+
+static inline Length length_add(Length len1, Length len2) {
+ Length result;
+ result.bytes = len1.bytes + len2.bytes;
+ result.extent = point_add(len1.extent, len2.extent);
+ return result;
+}
+
+static inline Length length_sub(Length len1, Length len2) {
+ Length result;
+ result.bytes = len1.bytes - len2.bytes;
+ result.extent = point_sub(len1.extent, len2.extent);
+ return result;
+}
+
+static inline Length length_zero(void) {
+ Length result = {0, {0, 0}};
+ return result;
+}
+
+#endif
diff --git a/src/tree_sitter/lexer.c b/src/tree_sitter/lexer.c
new file mode 100644
index 0000000000..fdc127466f
--- /dev/null
+++ b/src/tree_sitter/lexer.c
@@ -0,0 +1,322 @@
+#include <stdio.h>
+#include "./lexer.h"
+#include "./subtree.h"
+#include "./length.h"
+#include "./utf16.h"
+#include "utf8proc.h"
+
+#define LOG(...) \
+ if (self->logger.log) { \
+ snprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
+ self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); \
+ }
+
+#define LOG_CHARACTER(message, character) \
+ LOG( \
+ 32 <= character && character < 127 ? \
+ message " character:'%c'" : \
+ message " character:%d", character \
+ )
+
+static const char empty_chunk[3] = { 0, 0 };
+
+static const int32_t BYTE_ORDER_MARK = 0xFEFF;
+
+static void ts_lexer__get_chunk(Lexer *self) {
+ self->chunk_start = self->current_position.bytes;
+ self->chunk = self->input.read(
+ self->input.payload,
+ self->current_position.bytes,
+ self->current_position.extent,
+ &self->chunk_size
+ );
+ if (!self->chunk_size) self->chunk = empty_chunk;
+}
+
+typedef utf8proc_ssize_t (*DecodeFunction)(
+ const utf8proc_uint8_t *,
+ utf8proc_ssize_t,
+ utf8proc_int32_t *
+);
+
+static void ts_lexer__get_lookahead(Lexer *self) {
+ uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
+ const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
+ uint32_t size = self->chunk_size - position_in_chunk;
+
+ if (size == 0) {
+ self->lookahead_size = 1;
+ self->data.lookahead = '\0';
+ return;
+ }
+
+ DecodeFunction decode =
+ self->input.encoding == TSInputEncodingUTF8 ? utf8proc_iterate : utf16_iterate;
+
+ self->lookahead_size = decode(chunk, size, &self->data.lookahead);
+
+ // If this chunk ended in the middle of a multi-byte character,
+ // try again with a fresh chunk.
+ if (self->data.lookahead == -1 && size < 4) {
+ ts_lexer__get_chunk(self);
+ chunk = (const uint8_t *)self->chunk;
+ size = self->chunk_size;
+ self->lookahead_size = decode(chunk, size, &self->data.lookahead);
+ }
+
+ if (self->data.lookahead == -1) {
+ self->lookahead_size = 1;
+ }
+}
+
+static void ts_lexer__advance(TSLexer *payload, bool skip) {
+ Lexer *self = (Lexer *)payload;
+ if (self->chunk == empty_chunk)
+ return;
+
+ if (self->lookahead_size) {
+ self->current_position.bytes += self->lookahead_size;
+ if (self->data.lookahead == '\n') {
+ self->current_position.extent.row++;
+ self->current_position.extent.column = 0;
+ } else {
+ self->current_position.extent.column += self->lookahead_size;
+ }
+ }
+
+ TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+ if (self->current_position.bytes == current_range->end_byte) {
+ self->current_included_range_index++;
+ if (self->current_included_range_index == self->included_range_count) {
+ self->data.lookahead = '\0';
+ self->lookahead_size = 1;
+ return;
+ } else {
+ current_range++;
+ self->current_position = (Length) {
+ current_range->start_byte,
+ current_range->start_point,
+ };
+ }
+ }
+
+ if (skip) {
+ LOG_CHARACTER("skip", self->data.lookahead);
+ self->token_start_position = self->current_position;
+ } else {
+ LOG_CHARACTER("consume", self->data.lookahead);
+ }
+
+ if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
+ ts_lexer__get_chunk(self);
+ }
+
+ ts_lexer__get_lookahead(self);
+}
+
+static void ts_lexer__mark_end(TSLexer *payload) {
+ Lexer *self = (Lexer *)payload;
+ TSRange *current_included_range = &self->included_ranges[self->current_included_range_index];
+ if (self->current_included_range_index > 0 &&
+ self->current_position.bytes == current_included_range->start_byte) {
+ TSRange *previous_included_range = current_included_range - 1;
+ self->token_end_position = (Length) {
+ previous_included_range->end_byte,
+ previous_included_range->end_point,
+ };
+ } else {
+ self->token_end_position = self->current_position;
+ }
+}
+
+static uint32_t ts_lexer__get_column(TSLexer *payload) {
+ Lexer *self = (Lexer *)payload;
+ uint32_t goal_byte = self->current_position.bytes;
+
+ self->current_position.bytes -= self->current_position.extent.column;
+ self->current_position.extent.column = 0;
+
+ if (self->current_position.bytes < self->chunk_start) {
+ ts_lexer__get_chunk(self);
+ }
+
+ uint32_t result = 0;
+ while (self->current_position.bytes < goal_byte) {
+ ts_lexer__advance(payload, false);
+ result++;
+ }
+
+ return result;
+}
+
+static bool ts_lexer__is_at_included_range_start(TSLexer *payload) {
+ const Lexer *self = (const Lexer *)payload;
+ TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+ return self->current_position.bytes == current_range->start_byte;
+}
+
+// The lexer's methods are stored as a struct field so that generated
+// parsers can call them without needing to be linked against this library.
+
+void ts_lexer_init(Lexer *self) {
+ *self = (Lexer) {
+ .data = {
+ .advance = ts_lexer__advance,
+ .mark_end = ts_lexer__mark_end,
+ .get_column = ts_lexer__get_column,
+ .is_at_included_range_start = ts_lexer__is_at_included_range_start,
+ .lookahead = 0,
+ .result_symbol = 0,
+ },
+ .chunk = NULL,
+ .chunk_start = 0,
+ .current_position = {UINT32_MAX, {0, 0}},
+ .logger = {
+ .payload = NULL,
+ .log = NULL
+ },
+ .current_included_range_index = 0,
+ };
+
+ self->included_ranges = NULL;
+ ts_lexer_set_included_ranges(self, NULL, 0);
+ ts_lexer_reset(self, length_zero());
+}
+
+void ts_lexer_delete(Lexer *self) {
+ ts_free(self->included_ranges);
+}
+
+void ts_lexer_set_input(Lexer *self, TSInput input) {
+ self->input = input;
+ self->data.lookahead = 0;
+ self->lookahead_size = 0;
+ self->chunk = 0;
+ self->chunk_start = 0;
+ self->chunk_size = 0;
+}
+
+static void ts_lexer_goto(Lexer *self, Length position) {
+ bool found_included_range = false;
+ for (unsigned i = 0; i < self->included_range_count; i++) {
+ TSRange *included_range = &self->included_ranges[i];
+ if (included_range->end_byte > position.bytes) {
+ if (included_range->start_byte > position.bytes) {
+ position = (Length) {
+ .bytes = included_range->start_byte,
+ .extent = included_range->start_point,
+ };
+ }
+
+ self->current_included_range_index = i;
+ found_included_range = true;
+ break;
+ }
+ }
+
+ if (!found_included_range) {
+ TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
+ position = (Length) {
+ .bytes = last_included_range->end_byte,
+ .extent = last_included_range->end_point,
+ };
+ self->chunk = empty_chunk;
+ self->chunk_start = position.bytes;
+ self->chunk_size = 2;
+ }
+
+ self->token_start_position = position;
+ self->token_end_position = LENGTH_UNDEFINED;
+ self->current_position = position;
+
+ if (self->chunk && (position.bytes < self->chunk_start ||
+ position.bytes >= self->chunk_start + self->chunk_size)) {
+ self->chunk = 0;
+ self->chunk_start = 0;
+ self->chunk_size = 0;
+ }
+
+ self->lookahead_size = 0;
+ self->data.lookahead = 0;
+}
+
+void ts_lexer_reset(Lexer *self, Length position) {
+ if (position.bytes != self->current_position.bytes) ts_lexer_goto(self, position);
+}
+
+void ts_lexer_start(Lexer *self) {
+ self->token_start_position = self->current_position;
+ self->token_end_position = LENGTH_UNDEFINED;
+ self->data.result_symbol = 0;
+ if (!self->chunk) ts_lexer__get_chunk(self);
+ if (!self->lookahead_size) ts_lexer__get_lookahead(self);
+ if (
+ self->current_position.bytes == 0 &&
+ self->data.lookahead == BYTE_ORDER_MARK
+ ) ts_lexer__advance((TSLexer *)self, true);
+}
+
+void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
+ if (length_is_undefined(self->token_end_position)) {
+ ts_lexer__mark_end(&self->data);
+ }
+
+ uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
+
+ // In order to determine that a byte sequence is invalid UTF8 or UTF16,
+ // the character decoding algorithm may have looked at the following byte.
+ // Therefore, the next byte *after* the current (invalid) character
+ // affects the interpretation of the current character.
+ if (self->data.lookahead == -1) {
+ current_lookahead_end_byte++;
+ }
+
+ if (current_lookahead_end_byte > *lookahead_end_byte) {
+ *lookahead_end_byte = current_lookahead_end_byte;
+ }
+}
+
+void ts_lexer_advance_to_end(Lexer *self) {
+ while (self->data.lookahead != 0) {
+ ts_lexer__advance((TSLexer *)self, false);
+ }
+}
+
+void ts_lexer_mark_end(Lexer *self) {
+ ts_lexer__mark_end(&self->data);
+}
+
+static const TSRange DEFAULT_RANGES[] = {
+ {
+ .start_point = {
+ .row = 0,
+ .column = 0,
+ },
+ .end_point = {
+ .row = UINT32_MAX,
+ .column = UINT32_MAX,
+ },
+ .start_byte = 0,
+ .end_byte = UINT32_MAX
+ }
+};
+
+void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count) {
+ if (!ranges) {
+ ranges = DEFAULT_RANGES;
+ count = 1;
+ }
+
+ size_t sz = count * sizeof(TSRange);
+ self->included_ranges = ts_realloc(self->included_ranges, sz);
+ memcpy(self->included_ranges, ranges, sz);
+ self->included_range_count = count;
+ ts_lexer_goto(self, self->current_position);
+}
+
+TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
+ *count = self->included_range_count;
+ return self->included_ranges;
+}
+
+#undef LOG
diff --git a/src/tree_sitter/lexer.h b/src/tree_sitter/lexer.h
new file mode 100644
index 0000000000..f523d88f65
--- /dev/null
+++ b/src/tree_sitter/lexer.h
@@ -0,0 +1,48 @@
+#ifndef TREE_SITTER_LEXER_H_
+#define TREE_SITTER_LEXER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./length.h"
+#include "./subtree.h"
+#include "tree_sitter/api.h"
+#include "tree_sitter/parser.h"
+
+typedef struct {
+ TSLexer data;
+ Length current_position;
+ Length token_start_position;
+ Length token_end_position;
+
+ TSRange * included_ranges;
+ size_t included_range_count;
+ size_t current_included_range_index;
+
+ const char *chunk;
+ uint32_t chunk_start;
+ uint32_t chunk_size;
+ uint32_t lookahead_size;
+
+ TSInput input;
+ TSLogger logger;
+ char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
+} Lexer;
+
+void ts_lexer_init(Lexer *);
+void ts_lexer_delete(Lexer *);
+void ts_lexer_set_input(Lexer *, TSInput);
+void ts_lexer_reset(Lexer *, Length);
+void ts_lexer_start(Lexer *);
+void ts_lexer_finish(Lexer *, uint32_t *);
+void ts_lexer_advance_to_end(Lexer *);
+void ts_lexer_mark_end(Lexer *);
+void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
+TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_LEXER_H_
diff --git a/src/tree_sitter/lib.c b/src/tree_sitter/lib.c
new file mode 100644
index 0000000000..fc5fbc9210
--- /dev/null
+++ b/src/tree_sitter/lib.c
@@ -0,0 +1,20 @@
+// The Tree-sitter library can be built by compiling this one source file.
+//
+// The following directories must be added to the include path:
+// - include
+// - utf8proc
+
+#define _POSIX_C_SOURCE 200112L
+#define UTF8PROC_STATIC
+
+#include "./get_changed_ranges.c"
+#include "./language.c"
+#include "./lexer.c"
+#include "./node.c"
+#include "./parser.c"
+#include "./stack.c"
+#include "./subtree.c"
+#include "./tree_cursor.c"
+#include "./tree.c"
+#include "./utf16.c"
+#include "utf8proc.c"
diff --git a/src/tree_sitter/node.c b/src/tree_sitter/node.c
new file mode 100644
index 0000000000..6b2be36ee5
--- /dev/null
+++ b/src/tree_sitter/node.c
@@ -0,0 +1,673 @@
+#include <stdbool.h>
+#include "./subtree.h"
+#include "./tree.h"
+#include "./language.h"
+
+typedef struct {
+ Subtree parent;
+ const TSTree *tree;
+ Length position;
+ uint32_t child_index;
+ uint32_t structural_child_index;
+ const TSSymbol *alias_sequence;
+} NodeChildIterator;
+
+// TSNode - constructors
+
+TSNode ts_node_new(
+ const TSTree *tree,
+ const Subtree *subtree,
+ Length position,
+ TSSymbol alias
+) {
+ return (TSNode) {
+ {position.bytes, position.extent.row, position.extent.column, alias},
+ subtree,
+ tree,
+ };
+}
+
+static inline TSNode ts_node__null(void) {
+ return ts_node_new(NULL, NULL, length_zero(), 0);
+}
+
+// TSNode - accessors
+
+uint32_t ts_node_start_byte(TSNode self) {
+ return self.context[0];
+}
+
+TSPoint ts_node_start_point(TSNode self) {
+ return (TSPoint) {self.context[1], self.context[2]};
+}
+
+static inline uint32_t ts_node__alias(const TSNode *self) {
+ return self->context[3];
+}
+
+static inline Subtree ts_node__subtree(TSNode self) {
+ return *(const Subtree *)self.id;
+}
+
+// NodeChildIterator
+
+static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
+ Subtree subtree = ts_node__subtree(*node);
+ if (ts_subtree_child_count(subtree) == 0) {
+ return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
+ }
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(
+ node->tree->language,
+ subtree.ptr->production_id
+ );
+ return (NodeChildIterator) {
+ .tree = node->tree,
+ .parent = subtree,
+ .position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
+ .child_index = 0,
+ .structural_child_index = 0,
+ .alias_sequence = alias_sequence,
+ };
+}
+
+static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
+ return self->child_index == self->parent.ptr->child_count;
+}
+
+static inline bool ts_node_child_iterator_next(
+ NodeChildIterator *self,
+ TSNode *result
+) {
+ if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
+ const Subtree *child = &self->parent.ptr->children[self->child_index];
+ TSSymbol alias_symbol = 0;
+ if (!ts_subtree_extra(*child)) {
+ if (self->alias_sequence) {
+ alias_symbol = self->alias_sequence[self->structural_child_index];
+ }
+ self->structural_child_index++;
+ }
+ if (self->child_index > 0) {
+ self->position = length_add(self->position, ts_subtree_padding(*child));
+ }
+ *result = ts_node_new(
+ self->tree,
+ child,
+ self->position,
+ alias_symbol
+ );
+ self->position = length_add(self->position, ts_subtree_size(*child));
+ self->child_index++;
+ return true;
+}
+
+// TSNode - private
+
+static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
+ Subtree tree = ts_node__subtree(self);
+ if (include_anonymous) {
+ return ts_subtree_visible(tree) || ts_node__alias(&self);
+ } else {
+ TSSymbol alias = ts_node__alias(&self);
+ if (alias) {
+ return ts_language_symbol_metadata(self.tree->language, alias).named;
+ } else {
+ return ts_subtree_visible(tree) && ts_subtree_named(tree);
+ }
+ }
+}
+
+static inline uint32_t ts_node__relevant_child_count(
+ TSNode self,
+ bool include_anonymous
+) {
+ Subtree tree = ts_node__subtree(self);
+ if (ts_subtree_child_count(tree) > 0) {
+ if (include_anonymous) {
+ return tree.ptr->visible_child_count;
+ } else {
+ return tree.ptr->named_child_count;
+ }
+ } else {
+ return 0;
+ }
+}
+
+static inline TSNode ts_node__child(
+ TSNode self,
+ uint32_t child_index,
+ bool include_anonymous
+) {
+ TSNode result = self;
+ bool did_descend = true;
+
+ while (did_descend) {
+ did_descend = false;
+
+ TSNode child;
+ uint32_t index = 0;
+ NodeChildIterator iterator = ts_node_iterate_children(&result);
+ while (ts_node_child_iterator_next(&iterator, &child)) {
+ if (ts_node__is_relevant(child, include_anonymous)) {
+ if (index == child_index) {
+ ts_tree_set_cached_parent(self.tree, &child, &self);
+ return child;
+ }
+ index++;
+ } else {
+ uint32_t grandchild_index = child_index - index;
+ uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
+ if (grandchild_index < grandchild_count) {
+ did_descend = true;
+ result = child;
+ child_index = grandchild_index;
+ break;
+ }
+ index += grandchild_count;
+ }
+ }
+ }
+
+ return ts_node__null();
+}
+
+static bool ts_subtree_has_trailing_empty_descendant(
+ Subtree self,
+ Subtree other
+) {
+ for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
+ Subtree child = self.ptr->children[i];
+ if (ts_subtree_total_bytes(child) > 0) break;
+ if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
+ Subtree self_subtree = ts_node__subtree(self);
+ bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
+ uint32_t target_end_byte = ts_node_end_byte(self);
+
+ TSNode node = ts_node_parent(self);
+ TSNode earlier_node = ts_node__null();
+ bool earlier_node_is_relevant = false;
+
+ while (!ts_node_is_null(node)) {
+ TSNode earlier_child = ts_node__null();
+ bool earlier_child_is_relevant = false;
+ bool found_child_containing_target = false;
+
+ TSNode child;
+ NodeChildIterator iterator = ts_node_iterate_children(&node);
+ while (ts_node_child_iterator_next(&iterator, &child)) {
+ if (child.id == self.id) break;
+ if (iterator.position.bytes > target_end_byte) {
+ found_child_containing_target = true;
+ break;
+ }
+
+ if (iterator.position.bytes == target_end_byte &&
+ (!self_is_empty ||
+ ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
+ found_child_containing_target = true;
+ break;
+ }
+
+ if (ts_node__is_relevant(child, include_anonymous)) {
+ earlier_child = child;
+ earlier_child_is_relevant = true;
+ } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
+ earlier_child = child;
+ earlier_child_is_relevant = false;
+ }
+ }
+
+ if (found_child_containing_target) {
+ if (!ts_node_is_null(earlier_child)) {
+ earlier_node = earlier_child;
+ earlier_node_is_relevant = earlier_child_is_relevant;
+ }
+ node = child;
+ } else if (earlier_child_is_relevant) {
+ return earlier_child;
+ } else if (!ts_node_is_null(earlier_child)) {
+ node = earlier_child;
+ } else if (earlier_node_is_relevant) {
+ return earlier_node;
+ } else {
+ node = earlier_node;
+ }
+ }
+
+ return ts_node__null();
+}
+
+static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
+ uint32_t target_end_byte = ts_node_end_byte(self);
+
+ TSNode node = ts_node_parent(self);
+ TSNode later_node = ts_node__null();
+ bool later_node_is_relevant = false;
+
+ while (!ts_node_is_null(node)) {
+ TSNode later_child = ts_node__null();
+ bool later_child_is_relevant = false;
+ TSNode child_containing_target = ts_node__null();
+
+ TSNode child;
+ NodeChildIterator iterator = ts_node_iterate_children(&node);
+ while (ts_node_child_iterator_next(&iterator, &child)) {
+ if (iterator.position.bytes < target_end_byte) continue;
+ if (ts_node_start_byte(child) <= ts_node_start_byte(self)) {
+ if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
+ child_containing_target = child;
+ }
+ } else if (ts_node__is_relevant(child, include_anonymous)) {
+ later_child = child;
+ later_child_is_relevant = true;
+ break;
+ } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
+ later_child = child;
+ later_child_is_relevant = false;
+ break;
+ }
+ }
+
+ if (!ts_node_is_null(child_containing_target)) {
+ if (!ts_node_is_null(later_child)) {
+ later_node = later_child;
+ later_node_is_relevant = later_child_is_relevant;
+ }
+ node = child_containing_target;
+ } else if (later_child_is_relevant) {
+ return later_child;
+ } else if (!ts_node_is_null(later_child)) {
+ node = later_child;
+ } else if (later_node_is_relevant) {
+ return later_node;
+ } else {
+ node = later_node;
+ }
+ }
+
+ return ts_node__null();
+}
+
+static inline TSNode ts_node__first_child_for_byte(
+ TSNode self,
+ uint32_t goal,
+ bool include_anonymous
+) {
+ TSNode node = self;
+ bool did_descend = true;
+
+ while (did_descend) {
+ did_descend = false;
+
+ TSNode child;
+ NodeChildIterator iterator = ts_node_iterate_children(&node);
+ while (ts_node_child_iterator_next(&iterator, &child)) {
+ if (ts_node_end_byte(child) > goal) {
+ if (ts_node__is_relevant(child, include_anonymous)) {
+ return child;
+ } else if (ts_node_child_count(child) > 0) {
+ did_descend = true;
+ node = child;
+ break;
+ }
+ }
+ }
+ }
+
+ return ts_node__null();
+}
+
+static inline TSNode ts_node__descendant_for_byte_range(
+ TSNode self,
+ uint32_t range_start,
+ uint32_t range_end,
+ bool include_anonymous
+) {
+ TSNode node = self;
+ TSNode last_visible_node = self;
+
+ bool did_descend = true;
+ while (did_descend) {
+ did_descend = false;
+
+ TSNode child;
+ NodeChildIterator iterator = ts_node_iterate_children(&node);
+ while (ts_node_child_iterator_next(&iterator, &child)) {
+ uint32_t node_end = iterator.position.bytes;
+
+ // The end of this node must extend far enough forward to touch
+ // the end of the range and exceed the start of the range.
+ if (node_end < range_end) continue;
+ if (node_end <= range_start) continue;
+
+ // The start of this node must extend far enough backward to
+ // touch the start of the range.
+ if (range_start < ts_node_start_byte(child)) break;
+
+ node = child;
+ if (ts_node__is_relevant(node, include_anonymous)) {
+ ts_tree_set_cached_parent(self.tree, &child, &last_visible_node);
+ last_visible_node = node;
+ }
+ did_descend = true;
+ break;
+ }
+ }
+
+ return last_visible_node;
+}
+
+static inline TSNode ts_node__descendant_for_point_range(
+ TSNode self,
+ TSPoint range_start,
+ TSPoint range_end,
+ bool include_anonymous
+) {
+ TSNode node = self;
+ TSNode last_visible_node = self;
+
+ bool did_descend = true;
+ while (did_descend) {
+ did_descend = false;
+
+ TSNode child;
+ NodeChildIterator iterator = ts_node_iterate_children(&node);
+ while (ts_node_child_iterator_next(&iterator, &child)) {
+ TSPoint node_end = iterator.position.extent;
+
+ // The end of this node must extend far enough forward to touch
+ // the end of the range and exceed the start of the range.
+ if (point_lt(node_end, range_end)) continue;
+ if (point_lte(node_end, range_start)) continue;
+
+ // The start of this node must extend far enough backward to
+ // touch the start of the range.
+ if (point_lt(range_start, ts_node_start_point(child))) break;
+
+ node = child;
+ if (ts_node__is_relevant(node, include_anonymous)) {
+ ts_tree_set_cached_parent(self.tree, &child, &last_visible_node);
+ last_visible_node = node;
+ }
+ did_descend = true;
+ break;
+ }
+ }
+
+ return last_visible_node;
+}
+
+// TSNode - public
+
+uint32_t ts_node_end_byte(TSNode self) {
+ return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
+}
+
+TSPoint ts_node_end_point(TSNode self) {
+ return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
+}
+
+TSSymbol ts_node_symbol(TSNode self) {
+ return ts_node__alias(&self)
+ ? ts_node__alias(&self)
+ : ts_subtree_symbol(ts_node__subtree(self));
+}
+
+const char *ts_node_type(TSNode self) {
+ return ts_language_symbol_name(self.tree->language, ts_node_symbol(self));
+}
+
+char *ts_node_string(TSNode self) {
+ return ts_subtree_string(ts_node__subtree(self), self.tree->language, false);
+}
+
+bool ts_node_eq(TSNode self, TSNode other) {
+ return self.tree == other.tree && self.id == other.id;
+}
+
+bool ts_node_is_null(TSNode self) {
+ return self.id == 0;
+}
+
+bool ts_node_is_extra(TSNode self) {
+ return ts_subtree_extra(ts_node__subtree(self));
+}
+
+bool ts_node_is_named(TSNode self) {
+ TSSymbol alias = ts_node__alias(&self);
+ return alias
+ ? ts_language_symbol_metadata(self.tree->language, alias).named
+ : ts_subtree_named(ts_node__subtree(self));
+}
+
+bool ts_node_is_missing(TSNode self) {
+ return ts_subtree_missing(ts_node__subtree(self));
+}
+
+bool ts_node_has_changes(TSNode self) {
+ return ts_subtree_has_changes(ts_node__subtree(self));
+}
+
+bool ts_node_has_error(TSNode self) {
+ return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
+}
+
+TSNode ts_node_parent(TSNode self) {
+ TSNode node = ts_tree_get_cached_parent(self.tree, &self);
+ if (node.id) return node;
+
+ node = ts_tree_root_node(self.tree);
+ uint32_t end_byte = ts_node_end_byte(self);
+ if (node.id == self.id) return ts_node__null();
+
+ TSNode last_visible_node = node;
+ bool did_descend = true;
+ while (did_descend) {
+ did_descend = false;
+
+ TSNode child;
+ NodeChildIterator iterator = ts_node_iterate_children(&node);
+ while (ts_node_child_iterator_next(&iterator, &child)) {
+ if (
+ ts_node_start_byte(child) > ts_node_start_byte(self) ||
+ child.id == self.id
+ ) break;
+ if (iterator.position.bytes >= end_byte) {
+ node = child;
+ if (ts_node__is_relevant(child, true)) {
+ ts_tree_set_cached_parent(self.tree, &node, &last_visible_node);
+ last_visible_node = node;
+ }
+ did_descend = true;
+ break;
+ }
+ }
+ }
+
+ return last_visible_node;
+}
+
+TSNode ts_node_child(TSNode self, uint32_t child_index) {
+ return ts_node__child(self, child_index, true);
+}
+
+TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
+ return ts_node__child(self, child_index, false);
+}
+
+TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
+recur:
+ if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
+
+ const TSFieldMapEntry *field_map, *field_map_end;
+ ts_language_field_map(
+ self.tree->language,
+ ts_node__subtree(self).ptr->production_id,
+ &field_map,
+ &field_map_end
+ );
+ if (field_map == field_map_end) return ts_node__null();
+
+ // The field mappings are sorted by their field id. Scan all
+ // the mappings to find the ones for the given field id.
+ while (field_map->field_id < field_id) {
+ field_map++;
+ if (field_map == field_map_end) return ts_node__null();
+ }
+ while (field_map_end[-1].field_id > field_id) {
+ field_map_end--;
+ if (field_map == field_map_end) return ts_node__null();
+ }
+
+ TSNode child;
+ NodeChildIterator iterator = ts_node_iterate_children(&self);
+ while (ts_node_child_iterator_next(&iterator, &child)) {
+ if (!ts_subtree_extra(ts_node__subtree(child))) {
+ uint32_t index = iterator.structural_child_index - 1;
+ if (index < field_map->child_index) continue;
+
+ // Hidden nodes' fields are "inherited" by their visible parent.
+ if (field_map->inherited) {
+
+ // If this is the *last* possible child node for this field,
+ // then perform a tail call to avoid recursion.
+ if (field_map + 1 == field_map_end) {
+ self = child;
+ goto recur;
+ }
+
+ // Otherwise, descend into this child, but if it doesn't contain
+ // the field, continue searching subsequent children.
+ else {
+ TSNode result = ts_node_child_by_field_id(child, field_id);
+ if (result.id) return result;
+ field_map++;
+ if (field_map == field_map_end) return ts_node__null();
+ }
+ }
+
+ else if (ts_node__is_relevant(child, true)) {
+ return child;
+ }
+
+ // If the field refers to a hidden node, return its first visible
+ // child.
+ else {
+ return ts_node_child(child, 0);
+ }
+ }
+ }
+
+ return ts_node__null();
+}
+
+TSNode ts_node_child_by_field_name(
+ TSNode self,
+ const char *name,
+ uint32_t name_length
+) {
+ TSFieldId field_id = ts_language_field_id_for_name(
+ self.tree->language,
+ name,
+ name_length
+ );
+ return ts_node_child_by_field_id(self, field_id);
+}
+
+uint32_t ts_node_child_count(TSNode self) {
+ Subtree tree = ts_node__subtree(self);
+ if (ts_subtree_child_count(tree) > 0) {
+ return tree.ptr->visible_child_count;
+ } else {
+ return 0;
+ }
+}
+
+uint32_t ts_node_named_child_count(TSNode self) {
+ Subtree tree = ts_node__subtree(self);
+ if (ts_subtree_child_count(tree) > 0) {
+ return tree.ptr->named_child_count;
+ } else {
+ return 0;
+ }
+}
+
+TSNode ts_node_next_sibling(TSNode self) {
+ return ts_node__next_sibling(self, true);
+}
+
+TSNode ts_node_next_named_sibling(TSNode self) {
+ return ts_node__next_sibling(self, false);
+}
+
+TSNode ts_node_prev_sibling(TSNode self) {
+ return ts_node__prev_sibling(self, true);
+}
+
+TSNode ts_node_prev_named_sibling(TSNode self) {
+ return ts_node__prev_sibling(self, false);
+}
+
+TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
+ return ts_node__first_child_for_byte(self, byte, true);
+}
+
+TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
+ return ts_node__first_child_for_byte(self, byte, false);
+}
+
+TSNode ts_node_descendant_for_byte_range(
+ TSNode self,
+ uint32_t start,
+ uint32_t end
+) {
+ return ts_node__descendant_for_byte_range(self, start, end, true);
+}
+
+TSNode ts_node_named_descendant_for_byte_range(
+ TSNode self,
+ uint32_t start,
+ uint32_t end
+) {
+ return ts_node__descendant_for_byte_range(self, start, end, false);
+}
+
+TSNode ts_node_descendant_for_point_range(
+ TSNode self,
+ TSPoint start,
+ TSPoint end
+) {
+ return ts_node__descendant_for_point_range(self, start, end, true);
+}
+
+TSNode ts_node_named_descendant_for_point_range(
+ TSNode self,
+ TSPoint start,
+ TSPoint end
+) {
+ return ts_node__descendant_for_point_range(self, start, end, false);
+}
+
+void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
+ uint32_t start_byte = ts_node_start_byte(*self);
+ TSPoint start_point = ts_node_start_point(*self);
+
+ if (start_byte >= edit->old_end_byte) {
+ start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
+ start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
+ } else if (start_byte > edit->start_byte) {
+ start_byte = edit->new_end_byte;
+ start_point = edit->new_end_point;
+ }
+
+ self->context[0] = start_byte;
+ self->context[1] = start_point.row;
+ self->context[2] = start_point.column;
+}
diff --git a/src/tree_sitter/parser.c b/src/tree_sitter/parser.c
new file mode 100644
index 0000000000..88b20845fd
--- /dev/null
+++ b/src/tree_sitter/parser.c
@@ -0,0 +1,1887 @@
+#include <time.h>
+#include <assert.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdbool.h>
+#include "tree_sitter/api.h"
+#include "./alloc.h"
+#include "./array.h"
+#include "./atomic.h"
+#include "./clock.h"
+#include "./error_costs.h"
+#include "./get_changed_ranges.h"
+#include "./language.h"
+#include "./length.h"
+#include "./lexer.h"
+#include "./reduce_action.h"
+#include "./reusable_node.h"
+#include "./stack.h"
+#include "./subtree.h"
+#include "./tree.h"
+
+#define LOG(...) \
+ if (self->lexer.logger.log || self->dot_graph_file) { \
+ snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
+ ts_parser__log(self); \
+ }
+
+#define LOG_STACK() \
+ if (self->dot_graph_file) { \
+ ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
+ fputs("\n\n", self->dot_graph_file); \
+ }
+
+#define LOG_TREE(tree) \
+ if (self->dot_graph_file) { \
+ ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
+ fputs("\n", self->dot_graph_file); \
+ }
+
+#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
+
+#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree))
+
+static const unsigned MAX_VERSION_COUNT = 6;
+static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
+static const unsigned MAX_SUMMARY_DEPTH = 16;
+static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
+static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
+
+typedef struct {
+ Subtree token;
+ Subtree last_external_token;
+ uint32_t byte_index;
+} TokenCache;
+
+struct TSParser {
+ Lexer lexer;
+ Stack *stack;
+ SubtreePool tree_pool;
+ const TSLanguage *language;
+ ReduceActionSet reduce_actions;
+ Subtree finished_tree;
+ SubtreeHeapData scratch_tree_data;
+ MutableSubtree scratch_tree;
+ TokenCache token_cache;
+ ReusableNode reusable_node;
+ void *external_scanner_payload;
+ FILE *dot_graph_file;
+ TSClock end_clock;
+ TSDuration timeout_duration;
+ unsigned accept_count;
+ unsigned operation_count;
+ const volatile size_t *cancellation_flag;
+ bool halt_on_error;
+ Subtree old_tree;
+ TSRangeArray included_range_differences;
+ unsigned included_range_difference_index;
+};
+
+typedef struct {
+ unsigned cost;
+ unsigned node_count;
+ int dynamic_precedence;
+ bool is_in_error;
+} ErrorStatus;
+
+typedef enum {
+ ErrorComparisonTakeLeft,
+ ErrorComparisonPreferLeft,
+ ErrorComparisonNone,
+ ErrorComparisonPreferRight,
+ ErrorComparisonTakeRight,
+} ErrorComparison;
+
+typedef struct {
+ const char *string;
+ uint32_t length;
+} TSStringInput;
+
+// StringInput
+
+static const char *ts_string_input_read(
+ void *_self,
+ uint32_t byte,
+ TSPoint _,
+ uint32_t *length
+) {
+ TSStringInput *self = (TSStringInput *)_self;
+ if (byte >= self->length) {
+ *length = 0;
+ return "";
+ } else {
+ *length = self->length - byte;
+ return self->string + byte;
+ }
+}
+
+// Parser - Private
+
+static void ts_parser__log(TSParser *self) {
+ if (self->lexer.logger.log) {
+ self->lexer.logger.log(
+ self->lexer.logger.payload,
+ TSLogTypeParse,
+ self->lexer.debug_buffer
+ );
+ }
+
+ if (self->dot_graph_file) {
+ fprintf(self->dot_graph_file, "graph {\nlabel=\"");
+ for (char *c = &self->lexer.debug_buffer[0]; *c != 0; c++) {
+ if (*c == '"') fputc('\\', self->dot_graph_file);
+ fputc(*c, self->dot_graph_file);
+ }
+ fprintf(self->dot_graph_file, "\"\n}\n\n");
+ }
+}
+
+static bool ts_parser__breakdown_top_of_stack(
+ TSParser *self,
+ StackVersion version
+) {
+ bool did_break_down = false;
+ bool pending = false;
+
+ do {
+ StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
+ if (!pop.size) break;
+
+ did_break_down = true;
+ pending = false;
+ for (uint32_t i = 0; i < pop.size; i++) {
+ StackSlice slice = pop.contents[i];
+ TSStateId state = ts_stack_state(self->stack, slice.version);
+ Subtree parent = *array_front(&slice.subtrees);
+
+ for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
+ Subtree child = parent.ptr->children[j];
+ pending = ts_subtree_child_count(child) > 0;
+
+ if (ts_subtree_is_error(child)) {
+ state = ERROR_STATE;
+ } else if (!ts_subtree_extra(child)) {
+ state = ts_language_next_state(self->language, state, ts_subtree_symbol(child));
+ }
+
+ ts_subtree_retain(child);
+ ts_stack_push(self->stack, slice.version, child, pending, state);
+ }
+
+ for (uint32_t j = 1; j < slice.subtrees.size; j++) {
+ Subtree tree = slice.subtrees.contents[j];
+ ts_stack_push(self->stack, slice.version, tree, false, state);
+ }
+
+ ts_subtree_release(&self->tree_pool, parent);
+ array_delete(&slice.subtrees);
+
+ LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
+ LOG_STACK();
+ }
+ } while (pending);
+
+ return did_break_down;
+}
+
+static void ts_parser__breakdown_lookahead(
+ TSParser *self,
+ Subtree *lookahead,
+ TSStateId state,
+ ReusableNode *reusable_node
+) {
+ bool did_descend = false;
+ Subtree tree = reusable_node_tree(reusable_node);
+ while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
+ LOG("state_mismatch sym:%s", TREE_NAME(tree));
+ reusable_node_descend(reusable_node);
+ tree = reusable_node_tree(reusable_node);
+ did_descend = true;
+ }
+
+ if (did_descend) {
+ ts_subtree_release(&self->tree_pool, *lookahead);
+ *lookahead = tree;
+ ts_subtree_retain(*lookahead);
+ }
+}
+
+static ErrorComparison ts_parser__compare_versions(
+ TSParser *self,
+ ErrorStatus a,
+ ErrorStatus b
+) {
+ if (!a.is_in_error && b.is_in_error) {
+ if (a.cost < b.cost) {
+ return ErrorComparisonTakeLeft;
+ } else {
+ return ErrorComparisonPreferLeft;
+ }
+ }
+
+ if (a.is_in_error && !b.is_in_error) {
+ if (b.cost < a.cost) {
+ return ErrorComparisonTakeRight;
+ } else {
+ return ErrorComparisonPreferRight;
+ }
+ }
+
+ if (a.cost < b.cost) {
+ if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
+ return ErrorComparisonTakeLeft;
+ } else {
+ return ErrorComparisonPreferLeft;
+ }
+ }
+
+ if (b.cost < a.cost) {
+ if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
+ return ErrorComparisonTakeRight;
+ } else {
+ return ErrorComparisonPreferRight;
+ }
+ }
+
+ if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
+ if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
+ return ErrorComparisonNone;
+}
+
+static ErrorStatus ts_parser__version_status(
+ TSParser *self,
+ StackVersion version
+) {
+ unsigned cost = ts_stack_error_cost(self->stack, version);
+ bool is_paused = ts_stack_is_paused(self->stack, version);
+ if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
+ return (ErrorStatus) {
+ .cost = cost,
+ .node_count = ts_stack_node_count_since_error(self->stack, version),
+ .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
+ .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
+ };
+}
+
+static bool ts_parser__better_version_exists(
+ TSParser *self,
+ StackVersion version,
+ bool is_in_error,
+ unsigned cost
+) {
+ if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
+ return true;
+ }
+
+ Length position = ts_stack_position(self->stack, version);
+ ErrorStatus status = {
+ .cost = cost,
+ .is_in_error = is_in_error,
+ .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
+ .node_count = ts_stack_node_count_since_error(self->stack, version),
+ };
+
+ for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
+ if (i == version ||
+ !ts_stack_is_active(self->stack, i) ||
+ ts_stack_position(self->stack, i).bytes < position.bytes) continue;
+ ErrorStatus status_i = ts_parser__version_status(self, i);
+ switch (ts_parser__compare_versions(self, status, status_i)) {
+ case ErrorComparisonTakeRight:
+ return true;
+ case ErrorComparisonPreferRight:
+ if (ts_stack_can_merge(self->stack, i, version)) return true;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
+static void ts_parser__restore_external_scanner(
+ TSParser *self,
+ Subtree external_token
+) {
+ if (external_token.ptr) {
+ self->language->external_scanner.deserialize(
+ self->external_scanner_payload,
+ ts_external_scanner_state_data(&external_token.ptr->external_scanner_state),
+ external_token.ptr->external_scanner_state.length
+ );
+ } else {
+ self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
+ }
+}
+
+static bool ts_parser__can_reuse_first_leaf(
+ TSParser *self,
+ TSStateId state,
+ Subtree tree,
+ TableEntry *table_entry
+) {
+ TSLexMode current_lex_mode = self->language->lex_modes[state];
+ TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
+ TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
+ TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
+
+ // If the token was created in a state with the same set of lookaheads, it is reusable.
+ if (
+ table_entry->action_count > 0 &&
+ memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexMode)) == 0 &&
+ (
+ leaf_symbol != self->language->keyword_capture_token ||
+ (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state)
+ )
+ ) return true;
+
+ // Empty tokens are not reusable in states with different lookaheads.
+ if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
+
+ // If the current state allows external tokens or other tokens that conflict with this
+ // token, this token is not reusable.
+ return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
+}
+
+static Subtree ts_parser__lex(
+ TSParser *self,
+ StackVersion version,
+ TSStateId parse_state
+) {
+ Length start_position = ts_stack_position(self->stack, version);
+ Subtree external_token = ts_stack_last_external_token(self->stack, version);
+ TSLexMode lex_mode = self->language->lex_modes[parse_state];
+ const bool *valid_external_tokens = ts_language_enabled_external_tokens(
+ self->language,
+ lex_mode.external_lex_state
+ );
+
+ bool found_external_token = false;
+ bool error_mode = parse_state == ERROR_STATE;
+ bool skipped_error = false;
+ int32_t first_error_character = 0;
+ Length error_start_position = length_zero();
+ Length error_end_position = length_zero();
+ uint32_t lookahead_end_byte = 0;
+ ts_lexer_reset(&self->lexer, start_position);
+
+ for (;;) {
+ Length current_position = self->lexer.current_position;
+
+ if (valid_external_tokens) {
+ LOG(
+ "lex_external state:%d, row:%u, column:%u",
+ lex_mode.external_lex_state,
+ current_position.extent.row + 1,
+ current_position.extent.column
+ );
+ ts_lexer_start(&self->lexer);
+ ts_parser__restore_external_scanner(self, external_token);
+ bool found_token = self->language->external_scanner.scan(
+ self->external_scanner_payload,
+ &self->lexer.data,
+ valid_external_tokens
+ );
+ ts_lexer_finish(&self->lexer, &lookahead_end_byte);
+
+ // Zero-length external tokens are generally allowed, but they're not
+ // allowed right after a syntax error. This is for two reasons:
+ // 1. After a syntax error, the lexer is looking for any possible token,
+ // as opposed to the specific set of tokens that are valid in some
+ // parse state. In this situation, it's very easy for an external
+ // scanner to produce unwanted zero-length tokens.
+ // 2. The parser sometimes inserts *missing* tokens to recover from
+ // errors. These tokens are also zero-length. If we allow more
+ // zero-length tokens to be created after missing tokens, it
+ // can lead to infinite loops. Forbidding zero-length tokens
+ // right at the point of error recovery is a conservative strategy
+ // for preventing this kind of infinite loop.
+ if (found_token && (
+ self->lexer.token_end_position.bytes > current_position.bytes ||
+ (!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
+ )) {
+ found_external_token = true;
+ break;
+ }
+
+ ts_lexer_reset(&self->lexer, current_position);
+ }
+
+ LOG(
+ "lex_internal state:%d, row:%u, column:%u",
+ lex_mode.lex_state,
+ current_position.extent.row + 1,
+ current_position.extent.column
+ );
+ ts_lexer_start(&self->lexer);
+ bool found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
+ ts_lexer_finish(&self->lexer, &lookahead_end_byte);
+ if (found_token) break;
+
+ if (!error_mode) {
+ error_mode = true;
+ lex_mode = self->language->lex_modes[ERROR_STATE];
+ valid_external_tokens = ts_language_enabled_external_tokens(
+ self->language,
+ lex_mode.external_lex_state
+ );
+ ts_lexer_reset(&self->lexer, start_position);
+ continue;
+ }
+
+ if (!skipped_error) {
+ LOG("skip_unrecognized_character");
+ skipped_error = true;
+ error_start_position = self->lexer.token_start_position;
+ error_end_position = self->lexer.token_start_position;
+ first_error_character = self->lexer.data.lookahead;
+ }
+
+ if (self->lexer.current_position.bytes == error_end_position.bytes) {
+ if (self->lexer.data.lookahead == 0) {
+ self->lexer.data.result_symbol = ts_builtin_sym_error;
+ break;
+ }
+ self->lexer.data.advance(&self->lexer.data, false);
+ }
+
+ error_end_position = self->lexer.current_position;
+ }
+
+ Subtree result;
+ if (skipped_error) {
+ Length padding = length_sub(error_start_position, start_position);
+ Length size = length_sub(error_end_position, error_start_position);
+ uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
+ result = ts_subtree_new_error(
+ &self->tree_pool,
+ first_error_character,
+ padding,
+ size,
+ lookahead_bytes,
+ parse_state,
+ self->language
+ );
+
+ LOG(
+ "lexed_lookahead sym:%s, size:%u, character:'%c'",
+ SYM_NAME(ts_subtree_symbol(result)),
+ ts_subtree_total_size(result).bytes,
+ first_error_character
+ );
+ } else {
+ if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
+ self->lexer.token_start_position = self->lexer.token_end_position;
+ }
+
+ bool is_keyword = false;
+ TSSymbol symbol = self->lexer.data.result_symbol;
+ Length padding = length_sub(self->lexer.token_start_position, start_position);
+ Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
+ uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
+
+ if (found_external_token) {
+ symbol = self->language->external_scanner.symbol_map[symbol];
+ } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
+ uint32_t end_byte = self->lexer.token_end_position.bytes;
+ ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
+ ts_lexer_start(&self->lexer);
+ if (
+ self->language->keyword_lex_fn(&self->lexer.data, 0) &&
+ self->lexer.token_end_position.bytes == end_byte &&
+ ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
+ ) {
+ is_keyword = true;
+ symbol = self->lexer.data.result_symbol;
+ }
+ }
+
+ result = ts_subtree_new_leaf(
+ &self->tree_pool,
+ symbol,
+ padding,
+ size,
+ lookahead_bytes,
+ parse_state,
+ found_external_token,
+ is_keyword,
+ self->language
+ );
+
+ if (found_external_token) {
+ unsigned length = self->language->external_scanner.serialize(
+ self->external_scanner_payload,
+ self->lexer.debug_buffer
+ );
+ ts_external_scanner_state_init(
+ &((SubtreeHeapData *)result.ptr)->external_scanner_state,
+ self->lexer.debug_buffer,
+ length
+ );
+ }
+
+ LOG(
+ "lexed_lookahead sym:%s, size:%u",
+ SYM_NAME(ts_subtree_symbol(result)),
+ ts_subtree_total_size(result).bytes
+ );
+ }
+
+ return result;
+}
+
+static Subtree ts_parser__get_cached_token(
+ TSParser *self,
+ TSStateId state,
+ size_t position,
+ Subtree last_external_token,
+ TableEntry *table_entry
+) {
+ TokenCache *cache = &self->token_cache;
+ if (
+ cache->token.ptr && cache->byte_index == position &&
+ ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
+ ) {
+ ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
+ if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
+ ts_subtree_retain(cache->token);
+ return cache->token;
+ }
+ }
+ return NULL_SUBTREE;
+}
+
+static void ts_parser__set_cached_token(
+ TSParser *self,
+ size_t byte_index,
+ Subtree last_external_token,
+ Subtree token
+) {
+ TokenCache *cache = &self->token_cache;
+ if (token.ptr) ts_subtree_retain(token);
+ if (last_external_token.ptr) ts_subtree_retain(last_external_token);
+ if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
+ if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token);
+ cache->token = token;
+ cache->byte_index = byte_index;
+ cache->last_external_token = last_external_token;
+}
+
+static bool ts_parser__has_included_range_difference(
+ const TSParser *self,
+ uint32_t start_position,
+ uint32_t end_position
+) {
+ return ts_range_array_intersects(
+ &self->included_range_differences,
+ self->included_range_difference_index,
+ start_position,
+ end_position
+ );
+}
+
+static Subtree ts_parser__reuse_node(
+ TSParser *self,
+ StackVersion version,
+ TSStateId *state,
+ uint32_t position,
+ Subtree last_external_token,
+ TableEntry *table_entry
+) {
+ Subtree result;
+ while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
+ uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
+ uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
+
+ if (byte_offset > position) {
+ LOG("before_reusable_node symbol:%s", TREE_NAME(result));
+ break;
+ }
+
+ if (byte_offset < position) {
+ LOG("past_reusable_node symbol:%s", TREE_NAME(result));
+ if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
+ reusable_node_advance(&self->reusable_node);
+ }
+ continue;
+ }
+
+ if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
+ LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
+ reusable_node_advance(&self->reusable_node);
+ continue;
+ }
+
+ const char *reason = NULL;
+ if (ts_subtree_has_changes(result)) {
+ reason = "has_changes";
+ } else if (ts_subtree_is_error(result)) {
+ reason = "is_error";
+ } else if (ts_subtree_missing(result)) {
+ reason = "is_missing";
+ } else if (ts_subtree_is_fragile(result)) {
+ reason = "is_fragile";
+ } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
+ reason = "contains_different_included_range";
+ }
+
+ if (reason) {
+ LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
+ if (!reusable_node_descend(&self->reusable_node)) {
+ reusable_node_advance(&self->reusable_node);
+ ts_parser__breakdown_top_of_stack(self, version);
+ *state = ts_stack_state(self->stack, version);
+ }
+ continue;
+ }
+
+ TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
+ ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
+ if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
+ LOG(
+ "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
+ TREE_NAME(result),
+ SYM_NAME(leaf_symbol)
+ );
+ reusable_node_advance_past_leaf(&self->reusable_node);
+ break;
+ }
+
+ LOG("reuse_node symbol:%s", TREE_NAME(result));
+ ts_subtree_retain(result);
+ return result;
+ }
+
+ return NULL_SUBTREE;
+}
+
+static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
+ if (!left.ptr) return true;
+ if (!right.ptr) return false;
+
+ if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
+ LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
+ return true;
+ }
+
+ if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
+ LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
+ return false;
+ }
+
+ if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) {
+ LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
+ TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
+ ts_subtree_dynamic_precedence(left));
+ return true;
+ }
+
+ if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) {
+ LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
+ TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right),
+ ts_subtree_dynamic_precedence(right));
+ return false;
+ }
+
+ if (ts_subtree_error_cost(left) > 0) return true;
+
+ int comparison = ts_subtree_compare(left, right);
+ switch (comparison) {
+ case -1:
+ LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
+ return false;
+ break;
+ case 1:
+ LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
+ return true;
+ default:
+ LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
+ return false;
+ }
+}
+
+static void ts_parser__shift(
+ TSParser *self,
+ StackVersion version,
+ TSStateId state,
+ Subtree lookahead,
+ bool extra
+) {
+ Subtree subtree_to_push;
+ if (extra != ts_subtree_extra(lookahead)) {
+ MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
+ ts_subtree_set_extra(&result);
+ subtree_to_push = ts_subtree_from_mut(result);
+ } else {
+ subtree_to_push = lookahead;
+ }
+
+ bool is_pending = ts_subtree_child_count(subtree_to_push) > 0;
+ ts_stack_push(self->stack, version, subtree_to_push, is_pending, state);
+ if (ts_subtree_has_external_tokens(subtree_to_push)) {
+ ts_stack_set_last_external_token(
+ self->stack, version, ts_subtree_last_external_token(subtree_to_push)
+ );
+ }
+}
+
+static bool ts_parser__replace_children(
+ TSParser *self,
+ MutableSubtree *tree,
+ SubtreeArray *children
+) {
+ *self->scratch_tree.ptr = *tree->ptr;
+ self->scratch_tree.ptr->child_count = 0;
+ ts_subtree_set_children(self->scratch_tree, children->contents, children->size, self->language);
+ if (ts_parser__select_tree(self, ts_subtree_from_mut(*tree), ts_subtree_from_mut(self->scratch_tree))) {
+ *tree->ptr = *self->scratch_tree.ptr;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static StackVersion ts_parser__reduce(
+ TSParser *self,
+ StackVersion version,
+ TSSymbol symbol,
+ uint32_t count,
+ int dynamic_precedence,
+ uint16_t production_id,
+ bool fragile
+) {
+ uint32_t initial_version_count = ts_stack_version_count(self->stack);
+ uint32_t removed_version_count = 0;
+ StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
+
+ for (uint32_t i = 0; i < pop.size; i++) {
+ StackSlice slice = pop.contents[i];
+ StackVersion slice_version = slice.version - removed_version_count;
+
+ // Error recovery can sometimes cause lots of stack versions to merge,
+ // such that a single pop operation can produce a lots of slices.
+ // Avoid creating too many stack versions in that situation.
+ if (i > 0 && slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
+ ts_stack_remove_version(self->stack, slice_version);
+ ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
+ removed_version_count++;
+ while (i + 1 < pop.size) {
+ StackSlice next_slice = pop.contents[i + 1];
+ if (next_slice.version != slice.version) break;
+ ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
+ i++;
+ }
+ continue;
+ }
+
+ // Extra tokens on top of the stack should not be included in this new parent
+ // node. They will be re-pushed onto the stack after the parent node is
+ // created and pushed.
+ SubtreeArray children = slice.subtrees;
+ while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
+ children.size--;
+ }
+
+ MutableSubtree parent = ts_subtree_new_node(&self->tree_pool,
+ symbol, &children, production_id, self->language
+ );
+
+ // This pop operation may have caused multiple stack versions to collapse
+ // into one, because they all diverged from a common state. In that case,
+ // choose one of the arrays of trees to be the parent node's children, and
+ // delete the rest of the tree arrays.
+ while (i + 1 < pop.size) {
+ StackSlice next_slice = pop.contents[i + 1];
+ if (next_slice.version != slice.version) break;
+ i++;
+
+ SubtreeArray children = next_slice.subtrees;
+ while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
+ children.size--;
+ }
+
+ if (ts_parser__replace_children(self, &parent, &children)) {
+ ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
+ slice = next_slice;
+ } else {
+ ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
+ }
+ }
+
+ parent.ptr->dynamic_precedence += dynamic_precedence;
+ parent.ptr->production_id = production_id;
+
+ TSStateId state = ts_stack_state(self->stack, slice_version);
+ TSStateId next_state = ts_language_next_state(self->language, state, symbol);
+ if (fragile || pop.size > 1 || initial_version_count > 1) {
+ parent.ptr->fragile_left = true;
+ parent.ptr->fragile_right = true;
+ parent.ptr->parse_state = TS_TREE_STATE_NONE;
+ } else {
+ parent.ptr->parse_state = state;
+ }
+
+ // Push the parent node onto the stack, along with any extra tokens that
+ // were previously on top of the stack.
+ ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
+ for (uint32_t j = parent.ptr->child_count; j < slice.subtrees.size; j++) {
+ ts_stack_push(self->stack, slice_version, slice.subtrees.contents[j], false, next_state);
+ }
+
+ for (StackVersion j = 0; j < slice_version; j++) {
+ if (j == version) continue;
+ if (ts_stack_merge(self->stack, j, slice_version)) {
+ removed_version_count++;
+ break;
+ }
+ }
+ }
+
+ // Return the first new stack version that was created.
+ return ts_stack_version_count(self->stack) > initial_version_count
+ ? initial_version_count
+ : STACK_VERSION_NONE;
+}
+
+static void ts_parser__accept(
+ TSParser *self,
+ StackVersion version,
+ Subtree lookahead
+) {
+ assert(ts_subtree_is_eof(lookahead));
+ ts_stack_push(self->stack, version, lookahead, false, 1);
+
+ StackSliceArray pop = ts_stack_pop_all(self->stack, version);
+ for (uint32_t i = 0; i < pop.size; i++) {
+ SubtreeArray trees = pop.contents[i].subtrees;
+
+ Subtree root = NULL_SUBTREE;
+ for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
+ Subtree child = trees.contents[j];
+ if (!ts_subtree_extra(child)) {
+ assert(!child.data.is_inline);
+ uint32_t child_count = ts_subtree_child_count(child);
+ for (uint32_t k = 0; k < child_count; k++) {
+ ts_subtree_retain(child.ptr->children[k]);
+ }
+ array_splice(&trees, j, 1, child_count, child.ptr->children);
+ root = ts_subtree_from_mut(ts_subtree_new_node(
+ &self->tree_pool,
+ ts_subtree_symbol(child),
+ &trees,
+ child.ptr->production_id,
+ self->language
+ ));
+ ts_subtree_release(&self->tree_pool, child);
+ break;
+ }
+ }
+
+ assert(root.ptr);
+ self->accept_count++;
+
+ if (self->finished_tree.ptr) {
+ if (ts_parser__select_tree(self, self->finished_tree, root)) {
+ ts_subtree_release(&self->tree_pool, self->finished_tree);
+ self->finished_tree = root;
+ } else {
+ ts_subtree_release(&self->tree_pool, root);
+ }
+ } else {
+ self->finished_tree = root;
+ }
+ }
+
+ ts_stack_remove_version(self->stack, pop.contents[0].version);
+ ts_stack_halt(self->stack, version);
+}
+
+static bool ts_parser__do_all_potential_reductions(
+ TSParser *self,
+ StackVersion starting_version,
+ TSSymbol lookahead_symbol
+) {
+ uint32_t initial_version_count = ts_stack_version_count(self->stack);
+
+ bool can_shift_lookahead_symbol = false;
+ StackVersion version = starting_version;
+ for (unsigned i = 0; true; i++) {
+ uint32_t version_count = ts_stack_version_count(self->stack);
+ if (version >= version_count) break;
+
+ bool merged = false;
+ for (StackVersion i = initial_version_count; i < version; i++) {
+ if (ts_stack_merge(self->stack, i, version)) {
+ merged = true;
+ break;
+ }
+ }
+ if (merged) continue;
+
+ TSStateId state = ts_stack_state(self->stack, version);
+ bool has_shift_action = false;
+ array_clear(&self->reduce_actions);
+
+ TSSymbol first_symbol, end_symbol;
+ if (lookahead_symbol != 0) {
+ first_symbol = lookahead_symbol;
+ end_symbol = lookahead_symbol + 1;
+ } else {
+ first_symbol = 1;
+ end_symbol = self->language->token_count;
+ }
+
+ for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
+ TableEntry entry;
+ ts_language_table_entry(self->language, state, symbol, &entry);
+ for (uint32_t i = 0; i < entry.action_count; i++) {
+ TSParseAction action = entry.actions[i];
+ switch (action.type) {
+ case TSParseActionTypeShift:
+ case TSParseActionTypeRecover:
+ if (!action.params.extra && !action.params.repetition) has_shift_action = true;
+ break;
+ case TSParseActionTypeReduce:
+ if (action.params.child_count > 0)
+ ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
+ .symbol = action.params.symbol,
+ .count = action.params.child_count,
+ .dynamic_precedence = action.params.dynamic_precedence,
+ .production_id = action.params.production_id,
+ });
+ default:
+ break;
+ }
+ }
+ }
+
+ StackVersion reduction_version = STACK_VERSION_NONE;
+ for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
+ ReduceAction action = self->reduce_actions.contents[i];
+
+ reduction_version = ts_parser__reduce(
+ self, version, action.symbol, action.count,
+ action.dynamic_precedence, action.production_id,
+ true
+ );
+ }
+
+ if (has_shift_action) {
+ can_shift_lookahead_symbol = true;
+ } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
+ ts_stack_renumber_version(self->stack, reduction_version, version);
+ continue;
+ } else if (lookahead_symbol != 0) {
+ ts_stack_remove_version(self->stack, version);
+ }
+
+ if (version == starting_version) {
+ version = version_count;
+ } else {
+ version++;
+ }
+ }
+
+ return can_shift_lookahead_symbol;
+}
+
+static void ts_parser__handle_error(
+ TSParser *self,
+ StackVersion version,
+ TSSymbol lookahead_symbol
+) {
+ uint32_t previous_version_count = ts_stack_version_count(self->stack);
+
+ // Perform any reductions that can happen in this state, regardless of the lookahead. After
+ // skipping one or more invalid tokens, the parser might find a token that would have allowed
+ // a reduction to take place.
+ ts_parser__do_all_potential_reductions(self, version, 0);
+ uint32_t version_count = ts_stack_version_count(self->stack);
+ Length position = ts_stack_position(self->stack, version);
+
+ // Push a discontinuity onto the stack. Merge all of the stack versions that
+ // were created in the previous step.
+ bool did_insert_missing_token = false;
+ for (StackVersion v = version; v < version_count;) {
+ if (!did_insert_missing_token) {
+ TSStateId state = ts_stack_state(self->stack, v);
+ for (TSSymbol missing_symbol = 1;
+ missing_symbol < self->language->token_count;
+ missing_symbol++) {
+ TSStateId state_after_missing_symbol = ts_language_next_state(
+ self->language, state, missing_symbol
+ );
+ if (state_after_missing_symbol == 0) continue;
+
+ if (ts_language_has_reduce_action(
+ self->language,
+ state_after_missing_symbol,
+ lookahead_symbol
+ )) {
+ // In case the parser is currently outside of any included range, the lexer will
+ // snap to the beginning of the next included range. The missing token's padding
+ // must be assigned to position it within the next included range.
+ ts_lexer_reset(&self->lexer, position);
+ ts_lexer_mark_end(&self->lexer);
+ Length padding = length_sub(self->lexer.token_end_position, position);
+
+ StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
+ Subtree missing_tree = ts_subtree_new_missing_leaf(
+ &self->tree_pool, missing_symbol, padding, self->language
+ );
+ ts_stack_push(
+ self->stack, version_with_missing_tree,
+ missing_tree, false,
+ state_after_missing_symbol
+ );
+
+ if (ts_parser__do_all_potential_reductions(
+ self, version_with_missing_tree,
+ lookahead_symbol
+ )) {
+ LOG(
+ "recover_with_missing symbol:%s, state:%u",
+ SYM_NAME(missing_symbol),
+ ts_stack_state(self->stack, version_with_missing_tree)
+ );
+ did_insert_missing_token = true;
+ break;
+ }
+ }
+ }
+ }
+
+ ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
+ v = (v == version) ? previous_version_count : v + 1;
+ }
+
+ for (unsigned i = previous_version_count; i < version_count; i++) {
+ bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
+ assert(did_merge);
+ }
+
+ ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
+ LOG_STACK();
+}
+
+static void ts_parser__halt_parse(TSParser *self) {
+ LOG("halting_parse");
+ LOG_STACK();
+
+ ts_lexer_advance_to_end(&self->lexer);
+ Length remaining_length = length_sub(
+ self->lexer.current_position,
+ ts_stack_position(self->stack, 0)
+ );
+
+ Subtree filler_node = ts_subtree_new_error(
+ &self->tree_pool,
+ 0,
+ length_zero(),
+ remaining_length,
+ remaining_length.bytes,
+ 0,
+ self->language
+ );
+ ts_subtree_to_mut_unsafe(filler_node).ptr->visible = false;
+ ts_stack_push(self->stack, 0, filler_node, false, 0);
+
+ SubtreeArray children = array_new();
+ Subtree root_error = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language);
+ ts_stack_push(self->stack, 0, root_error, false, 0);
+
+ Subtree eof = ts_subtree_new_leaf(
+ &self->tree_pool,
+ ts_builtin_sym_end,
+ length_zero(),
+ length_zero(),
+ 0,
+ 0,
+ false,
+ false,
+ self->language
+ );
+ ts_parser__accept(self, 0, eof);
+}
+
+static bool ts_parser__recover_to_state(
+ TSParser *self,
+ StackVersion version,
+ unsigned depth,
+ TSStateId goal_state
+) {
+ StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
+ StackVersion previous_version = STACK_VERSION_NONE;
+
+ for (unsigned i = 0; i < pop.size; i++) {
+ StackSlice slice = pop.contents[i];
+
+ if (slice.version == previous_version) {
+ ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
+ array_erase(&pop, i--);
+ continue;
+ }
+
+ if (ts_stack_state(self->stack, slice.version) != goal_state) {
+ ts_stack_halt(self->stack, slice.version);
+ ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
+ array_erase(&pop, i--);
+ continue;
+ }
+
+ SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
+ if (error_trees.size > 0) {
+ assert(error_trees.size == 1);
+ Subtree error_tree = error_trees.contents[0];
+ uint32_t error_child_count = ts_subtree_child_count(error_tree);
+ if (error_child_count > 0) {
+ array_splice(&slice.subtrees, 0, 0, error_child_count, error_tree.ptr->children);
+ for (unsigned j = 0; j < error_child_count; j++) {
+ ts_subtree_retain(slice.subtrees.contents[j]);
+ }
+ }
+ ts_subtree_array_delete(&self->tree_pool, &error_trees);
+ }
+
+ SubtreeArray trailing_extras = ts_subtree_array_remove_trailing_extras(&slice.subtrees);
+
+ if (slice.subtrees.size > 0) {
+ Subtree error = ts_subtree_new_error_node(&self->tree_pool, &slice.subtrees, true, self->language);
+ ts_stack_push(self->stack, slice.version, error, false, goal_state);
+ } else {
+ array_delete(&slice.subtrees);
+ }
+
+ for (unsigned j = 0; j < trailing_extras.size; j++) {
+ Subtree tree = trailing_extras.contents[j];
+ ts_stack_push(self->stack, slice.version, tree, false, goal_state);
+ }
+
+ previous_version = slice.version;
+ array_delete(&trailing_extras);
+ }
+
+ return previous_version != STACK_VERSION_NONE;
+}
+
+static void ts_parser__recover(
+ TSParser *self,
+ StackVersion version,
+ Subtree lookahead
+) {
+ bool did_recover = false;
+ unsigned previous_version_count = ts_stack_version_count(self->stack);
+ Length position = ts_stack_position(self->stack, version);
+ StackSummary *summary = ts_stack_get_summary(self->stack, version);
+ unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
+ unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
+
+ // When the parser is in the error state, there are two strategies for recovering with a
+ // given lookahead token:
+ // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
+ // create a new stack version that is in that state again. This entails popping all of the
+ // subtrees that have been pushed onto the stack since that previous state, and wrapping
+ // them in an ERROR node.
+ // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
+ // move on to the next lookahead token, remaining in the error state.
+ //
+ // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
+ // of the previous parse states and their depths. Look at each state in the summary, to see
+ // if the current lookahead token would be valid in that state.
+ if (summary && !ts_subtree_is_error(lookahead)) {
+ for (unsigned i = 0; i < summary->size; i++) {
+ StackSummaryEntry entry = summary->contents[i];
+
+ if (entry.state == ERROR_STATE) continue;
+ if (entry.position.bytes == position.bytes) continue;
+ unsigned depth = entry.depth;
+ if (node_count_since_error > 0) depth++;
+
+ // Do not recover in ways that create redundant stack versions.
+ bool would_merge = false;
+ for (unsigned j = 0; j < previous_version_count; j++) {
+ if (
+ ts_stack_state(self->stack, j) == entry.state &&
+ ts_stack_position(self->stack, j).bytes == position.bytes
+ ) {
+ would_merge = true;
+ break;
+ }
+ }
+ if (would_merge) continue;
+
+ // Do not recover if the result would clearly be worse than some existing stack version.
+ unsigned new_cost =
+ current_error_cost +
+ entry.depth * ERROR_COST_PER_SKIPPED_TREE +
+ (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
+ (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
+ if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
+
+ // If the current lookahead token is valid in some previous state, recover to that state.
+ // Then stop looking for further recoveries.
+ if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
+ if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
+ did_recover = true;
+ LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
+ LOG_STACK();
+ break;
+ }
+ }
+ }
+ }
+
+ // In the process of attemping to recover, some stack versions may have been created
+ // and subsequently halted. Remove those versions.
+ for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
+ if (!ts_stack_is_active(self->stack, i)) {
+ ts_stack_remove_version(self->stack, i--);
+ }
+ }
+
+ // If strategy 1 succeeded, a new stack version will have been created which is able to handle
+ // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
+ // current lookahead token by wrapping it in an ERROR node.
+
+ // Don't pursue this additional strategy if there are already too many stack versions.
+ if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
+ ts_stack_halt(self->stack, version);
+ ts_subtree_release(&self->tree_pool, lookahead);
+ return;
+ }
+
+ // If the parser is still in the error state at the end of the file, just wrap everything
+ // in an ERROR node and terminate.
+ if (ts_subtree_is_eof(lookahead)) {
+ LOG("recover_eof");
+ SubtreeArray children = array_new();
+ Subtree parent = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language);
+ ts_stack_push(self->stack, version, parent, false, 1);
+ ts_parser__accept(self, version, lookahead);
+ return;
+ }
+
+ // Do not recover if the result would clearly be worse than some existing stack version.
+ unsigned new_cost =
+ current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
+ ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR +
+ ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE;
+ if (ts_parser__better_version_exists(self, version, false, new_cost)) {
+ ts_stack_halt(self->stack, version);
+ ts_subtree_release(&self->tree_pool, lookahead);
+ return;
+ }
+
+ // If the current lookahead token is an extra token, mark it as extra. This means it won't
+ // be counted in error cost calculations.
+ unsigned n;
+ const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
+ if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.extra) {
+ MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
+ ts_subtree_set_extra(&mutable_lookahead);
+ lookahead = ts_subtree_from_mut(mutable_lookahead);
+ }
+
+ // Wrap the lookahead token in an ERROR.
+ LOG("skip_token symbol:%s", TREE_NAME(lookahead));
+ SubtreeArray children = array_new();
+ array_reserve(&children, 1);
+ array_push(&children, lookahead);
+ MutableSubtree error_repeat = ts_subtree_new_node(
+ &self->tree_pool,
+ ts_builtin_sym_error_repeat,
+ &children,
+ 0,
+ self->language
+ );
+
+ // If other tokens have already been skipped, so there is already an ERROR at the top of the
+ // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
+ // ERROR.
+ if (node_count_since_error > 0) {
+ StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
+
+ // TODO: Figure out how to make this condition occur.
+ // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
+ // If multiple stack versions have merged at this point, just pick one of the errors
+ // arbitrarily and discard the rest.
+ if (pop.size > 1) {
+ for (unsigned i = 1; i < pop.size; i++) {
+ ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
+ }
+ while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
+ ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
+ }
+ }
+
+ ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
+ array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
+ error_repeat = ts_subtree_new_node(
+ &self->tree_pool,
+ ts_builtin_sym_error_repeat,
+ &pop.contents[0].subtrees,
+ 0,
+ self->language
+ );
+ }
+
+ // Push the new ERROR onto the stack.
+ ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
+ if (ts_subtree_has_external_tokens(lookahead)) {
+ ts_stack_set_last_external_token(
+ self->stack, version, ts_subtree_last_external_token(lookahead)
+ );
+ }
+}
+
+static bool ts_parser__advance(
+ TSParser *self,
+ StackVersion version,
+ bool allow_node_reuse
+) {
+ TSStateId state = ts_stack_state(self->stack, version);
+ uint32_t position = ts_stack_position(self->stack, version).bytes;
+ Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
+
+ bool did_reuse = true;
+ Subtree lookahead = NULL_SUBTREE;
+ TableEntry table_entry = {.action_count = 0};
+
+ // If possible, reuse a node from the previous syntax tree.
+ if (allow_node_reuse) {
+ lookahead = ts_parser__reuse_node(
+ self, version, &state, position, last_external_token, &table_entry
+ );
+ }
+
+ // If no node from the previous syntax tree could be reused, then try to
+ // reuse the token previously returned by the lexer.
+ if (!lookahead.ptr) {
+ did_reuse = false;
+ lookahead = ts_parser__get_cached_token(
+ self, state, position, last_external_token, &table_entry
+ );
+ }
+
+ // Otherwise, re-run the lexer.
+ if (!lookahead.ptr) {
+ lookahead = ts_parser__lex(self, version, state);
+ ts_parser__set_cached_token(self, position, last_external_token, lookahead);
+ ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
+ }
+
+ for (;;) {
+ // If a cancellation flag or a timeout was provided, then check every
+ // time a fixed number of parse actions has been processed.
+ if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
+ self->operation_count = 0;
+ }
+ if (
+ self->operation_count == 0 &&
+ ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
+ (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
+ ) {
+ ts_subtree_release(&self->tree_pool, lookahead);
+ return false;
+ }
+
+ // Process each parse action for the current lookahead token in
+ // the current state. If there are multiple actions, then this is
+ // an ambiguous state. REDUCE actions always create a new stack
+ // version, whereas SHIFT actions update the existing stack version
+ // and terminate this loop.
+ StackVersion last_reduction_version = STACK_VERSION_NONE;
+ for (uint32_t i = 0; i < table_entry.action_count; i++) {
+ TSParseAction action = table_entry.actions[i];
+
+ switch (action.type) {
+ case TSParseActionTypeShift: {
+ if (action.params.repetition) break;
+ TSStateId next_state;
+ if (action.params.extra) {
+
+ // TODO: remove when TREE_SITTER_LANGUAGE_VERSION 9 is out.
+ if (state == ERROR_STATE) continue;
+
+ next_state = state;
+ LOG("shift_extra");
+ } else {
+ next_state = action.params.state;
+ LOG("shift state:%u", next_state);
+ }
+
+ if (ts_subtree_child_count(lookahead) > 0) {
+ ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
+ next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
+ }
+
+ ts_parser__shift(self, version, next_state, lookahead, action.params.extra);
+ if (did_reuse) reusable_node_advance(&self->reusable_node);
+ return true;
+ }
+
+ case TSParseActionTypeReduce: {
+ bool is_fragile = table_entry.action_count > 1;
+ LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count);
+ StackVersion reduction_version = ts_parser__reduce(
+ self, version, action.params.symbol, action.params.child_count,
+ action.params.dynamic_precedence, action.params.production_id,
+ is_fragile
+ );
+ if (reduction_version != STACK_VERSION_NONE) {
+ last_reduction_version = reduction_version;
+ }
+ break;
+ }
+
+ case TSParseActionTypeAccept: {
+ LOG("accept");
+ ts_parser__accept(self, version, lookahead);
+ return true;
+ }
+
+ case TSParseActionTypeRecover: {
+ if (ts_subtree_child_count(lookahead) > 0) {
+ ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
+ }
+
+ ts_parser__recover(self, version, lookahead);
+ if (did_reuse) reusable_node_advance(&self->reusable_node);
+ return true;
+ }
+ }
+ }
+
+ // If a reduction was performed, then replace the current stack version
+ // with one of the stack versions created by a reduction, and continue
+ // processing this version of the stack with the same lookahead symbol.
+ if (last_reduction_version != STACK_VERSION_NONE) {
+ ts_stack_renumber_version(self->stack, last_reduction_version, version);
+ LOG_STACK();
+ state = ts_stack_state(self->stack, version);
+ ts_language_table_entry(
+ self->language,
+ state,
+ ts_subtree_leaf_symbol(lookahead),
+ &table_entry
+ );
+ continue;
+ }
+
+ // If there were no parse actions for the current lookahead token, then
+ // it is not valid in this state. If the current lookahead token is a
+ // keyword, then switch to treating it as the normal word token if that
+ // token is valid in this state.
+ if (
+ ts_subtree_is_keyword(lookahead) &&
+ ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
+ ) {
+ ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry);
+ if (table_entry.action_count > 0) {
+ LOG(
+ "switch from_keyword:%s, to_word_token:%s",
+ TREE_NAME(lookahead),
+ SYM_NAME(self->language->keyword_capture_token)
+ );
+
+ MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
+ ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
+ lookahead = ts_subtree_from_mut(mutable_lookahead);
+ continue;
+ }
+ }
+
+ // If the current lookahead token is not valid and the parser is
+ // already in the error state, restart the error recovery process.
+ // TODO - can this be unified with the other `RECOVER` case above?
+ if (state == ERROR_STATE) {
+ ts_parser__recover(self, version, lookahead);
+ return true;
+ }
+
+ // If the current lookahead token is not valid and the previous
+ // subtree on the stack was reused from an old tree, it isn't actually
+ // valid to reuse it. Remove it from the stack, and in its place,
+ // push each of its children. Then try again to process the current
+ // lookahead.
+ if (ts_parser__breakdown_top_of_stack(self, version)) {
+ continue;
+ }
+
+ // At this point, the current lookahead token is definitely not valid
+ // for this parse stack version. Mark this version as paused and continue
+ // processing any other stack versions that might exist. If some other
+ // version advances successfully, then this version can simply be removed.
+ // But if all versions end up paused, then error recovery is needed.
+ LOG("detect_error");
+ ts_stack_pause(self->stack, version, ts_subtree_leaf_symbol(lookahead));
+ ts_subtree_release(&self->tree_pool, lookahead);
+ return true;
+ }
+}
+
+static unsigned ts_parser__condense_stack(TSParser *self) {
+ bool made_changes = false;
+ unsigned min_error_cost = UINT_MAX;
+ for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
+ // Prune any versions that have been marked for removal.
+ if (ts_stack_is_halted(self->stack, i)) {
+ ts_stack_remove_version(self->stack, i);
+ i--;
+ continue;
+ }
+
+ // Keep track of the minimum error cost of any stack version so
+ // that it can be returned.
+ ErrorStatus status_i = ts_parser__version_status(self, i);
+ if (!status_i.is_in_error && status_i.cost < min_error_cost) {
+ min_error_cost = status_i.cost;
+ }
+
+ // Examine each pair of stack versions, removing any versions that
+ // are clearly worse than another version. Ensure that the versions
+ // are ordered from most promising to least promising.
+ for (StackVersion j = 0; j < i; j++) {
+ ErrorStatus status_j = ts_parser__version_status(self, j);
+
+ switch (ts_parser__compare_versions(self, status_j, status_i)) {
+ case ErrorComparisonTakeLeft:
+ made_changes = true;
+ ts_stack_remove_version(self->stack, i);
+ i--;
+ j = i;
+ break;
+
+ case ErrorComparisonPreferLeft:
+ case ErrorComparisonNone:
+ if (ts_stack_merge(self->stack, j, i)) {
+ made_changes = true;
+ i--;
+ j = i;
+ }
+ break;
+
+ case ErrorComparisonPreferRight:
+ made_changes = true;
+ if (ts_stack_merge(self->stack, j, i)) {
+ i--;
+ j = i;
+ } else {
+ ts_stack_swap_versions(self->stack, i, j);
+ }
+ break;
+
+ case ErrorComparisonTakeRight:
+ made_changes = true;
+ ts_stack_remove_version(self->stack, j);
+ i--;
+ j--;
+ break;
+ }
+ }
+ }
+
+ // Enfore a hard upper bound on the number of stack versions by
+ // discarding the least promising versions.
+ while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
+ ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
+ made_changes = true;
+ }
+
+ // If the best-performing stack version is currently paused, or all
+ // versions are paused, then resume the best paused version and begin
+ // the error recovery process. Otherwise, remove the paused versions.
+ if (ts_stack_version_count(self->stack) > 0) {
+ bool has_unpaused_version = false;
+ for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
+ if (ts_stack_is_paused(self->stack, i)) {
+ if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
+ LOG("resume version:%u", i);
+ min_error_cost = ts_stack_error_cost(self->stack, i);
+ TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i);
+ ts_parser__handle_error(self, i, lookahead_symbol);
+ has_unpaused_version = true;
+ } else {
+ ts_stack_remove_version(self->stack, i);
+ i--;
+ n--;
+ }
+ } else {
+ has_unpaused_version = true;
+ }
+ }
+ }
+
+ if (made_changes) {
+ LOG("condense");
+ LOG_STACK();
+ }
+
+ return min_error_cost;
+}
+
+static bool ts_parser_has_outstanding_parse(TSParser *self) {
+ return (
+ self->lexer.current_position.bytes > 0 ||
+ ts_stack_state(self->stack, 0) != 1
+ );
+}
+
+// Parser - Public
+
+TSParser *ts_parser_new(void) {
+ TSParser *self = ts_calloc(1, sizeof(TSParser));
+ ts_lexer_init(&self->lexer);
+ array_init(&self->reduce_actions);
+ array_reserve(&self->reduce_actions, 4);
+ self->tree_pool = ts_subtree_pool_new(32);
+ self->stack = ts_stack_new(&self->tree_pool);
+ self->finished_tree = NULL_SUBTREE;
+ self->reusable_node = reusable_node_new();
+ self->dot_graph_file = NULL;
+ self->halt_on_error = false;
+ self->cancellation_flag = NULL;
+ self->timeout_duration = 0;
+ self->end_clock = clock_null();
+ self->operation_count = 0;
+ self->old_tree = NULL_SUBTREE;
+ self->scratch_tree.ptr = &self->scratch_tree_data;
+ self->included_range_differences = (TSRangeArray) array_new();
+ self->included_range_difference_index = 0;
+ ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
+ return self;
+}
+
+void ts_parser_delete(TSParser *self) {
+ if (!self) return;
+
+ ts_stack_delete(self->stack);
+ if (self->reduce_actions.contents) {
+ array_delete(&self->reduce_actions);
+ }
+ if (self->included_range_differences.contents) {
+ array_delete(&self->included_range_differences);
+ }
+ if (self->old_tree.ptr) {
+ ts_subtree_release(&self->tree_pool, self->old_tree);
+ self->old_tree = NULL_SUBTREE;
+ }
+ ts_lexer_delete(&self->lexer);
+ ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
+ ts_subtree_pool_delete(&self->tree_pool);
+ reusable_node_delete(&self->reusable_node);
+ ts_parser_set_language(self, NULL);
+ ts_free(self);
+}
+
+const TSLanguage *ts_parser_language(const TSParser *self) {
+ return self->language;
+}
+
+bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
+ if (language) {
+ if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false;
+ if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false;
+ }
+
+ if (self->external_scanner_payload && self->language->external_scanner.destroy) {
+ self->language->external_scanner.destroy(self->external_scanner_payload);
+ }
+
+ if (language && language->external_scanner.create) {
+ self->external_scanner_payload = language->external_scanner.create();
+ } else {
+ self->external_scanner_payload = NULL;
+ }
+
+ self->language = language;
+ return true;
+}
+
+TSLogger ts_parser_logger(const TSParser *self) {
+ return self->lexer.logger;
+}
+
+void ts_parser_set_logger(TSParser *self, TSLogger logger) {
+ self->lexer.logger = logger;
+}
+
+void ts_parser_print_dot_graphs(TSParser *self, int fd) {
+ if (self->dot_graph_file) {
+ fclose(self->dot_graph_file);
+ }
+
+ if (fd >= 0) {
+ self->dot_graph_file = fdopen(fd, "a");
+ } else {
+ self->dot_graph_file = NULL;
+ }
+}
+
+void ts_parser_halt_on_error(TSParser *self, bool should_halt_on_error) {
+ self->halt_on_error = should_halt_on_error;
+}
+
+const size_t *ts_parser_cancellation_flag(const TSParser *self) {
+ return (const size_t *)self->cancellation_flag;
+}
+
+void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
+ self->cancellation_flag = (const volatile size_t *)flag;
+}
+
+uint64_t ts_parser_timeout_micros(const TSParser *self) {
+ return duration_to_micros(self->timeout_duration);
+}
+
+void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
+ self->timeout_duration = duration_from_micros(timeout_micros);
+}
+
+void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) {
+ ts_lexer_set_included_ranges(&self->lexer, ranges, count);
+}
+
+const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) {
+ return ts_lexer_included_ranges(&self->lexer, count);
+}
+
+void ts_parser_reset(TSParser *self) {
+ if (self->language->external_scanner.deserialize) {
+ self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
+ }
+
+ if (self->old_tree.ptr) {
+ ts_subtree_release(&self->tree_pool, self->old_tree);
+ self->old_tree = NULL_SUBTREE;
+ }
+
+ reusable_node_clear(&self->reusable_node);
+ ts_lexer_reset(&self->lexer, length_zero());
+ ts_stack_clear(self->stack);
+ ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
+ if (self->finished_tree.ptr) {
+ ts_subtree_release(&self->tree_pool, self->finished_tree);
+ self->finished_tree = NULL_SUBTREE;
+ }
+ self->accept_count = 0;
+}
+
+TSTree *ts_parser_parse(
+ TSParser *self,
+ const TSTree *old_tree,
+ TSInput input
+) {
+ if (!self->language || !input.read) return NULL;
+
+ ts_lexer_set_input(&self->lexer, input);
+
+ array_clear(&self->included_range_differences);
+ self->included_range_difference_index = 0;
+
+ if (ts_parser_has_outstanding_parse(self)) {
+ LOG("resume_parsing");
+ } else if (old_tree) {
+ ts_subtree_retain(old_tree->root);
+ self->old_tree = old_tree->root;
+ ts_range_array_get_changed_ranges(
+ old_tree->included_ranges, old_tree->included_range_count,
+ self->lexer.included_ranges, self->lexer.included_range_count,
+ &self->included_range_differences
+ );
+ reusable_node_reset(&self->reusable_node, old_tree->root);
+ LOG("parse_after_edit");
+ LOG_TREE(self->old_tree);
+ for (unsigned i = 0; i < self->included_range_differences.size; i++) {
+ TSRange *range = &self->included_range_differences.contents[i];
+ LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
+ }
+ } else {
+ reusable_node_clear(&self->reusable_node);
+ LOG("new_parse");
+ }
+
+ uint32_t position = 0, last_position = 0, version_count = 0;
+ self->operation_count = 0;
+ if (self->timeout_duration) {
+ self->end_clock = clock_after(clock_now(), self->timeout_duration);
+ } else {
+ self->end_clock = clock_null();
+ }
+
+ do {
+ for (StackVersion version = 0;
+ version_count = ts_stack_version_count(self->stack), version < version_count;
+ version++) {
+ bool allow_node_reuse = version_count == 1;
+ while (ts_stack_is_active(self->stack, version)) {
+ LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
+ version, ts_stack_version_count(self->stack),
+ ts_stack_state(self->stack, version),
+ ts_stack_position(self->stack, version).extent.row + 1,
+ ts_stack_position(self->stack, version).extent.column);
+
+ if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
+ LOG_STACK();
+
+ position = ts_stack_position(self->stack, version).bytes;
+ if (position > last_position || (version > 0 && position == last_position)) {
+ last_position = position;
+ break;
+ }
+ }
+ }
+
+ unsigned min_error_cost = ts_parser__condense_stack(self);
+ if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
+ break;
+ } else if (self->halt_on_error && min_error_cost > 0) {
+ ts_parser__halt_parse(self);
+ break;
+ }
+
+ while (self->included_range_difference_index < self->included_range_differences.size) {
+ TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index];
+ if (range->end_byte <= position) {
+ self->included_range_difference_index++;
+ } else {
+ break;
+ }
+ }
+ } while (version_count != 0);
+
+ ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
+ LOG("done");
+ LOG_TREE(self->finished_tree);
+
+ TSTree *result = ts_tree_new(
+ self->finished_tree,
+ self->language,
+ self->lexer.included_ranges,
+ self->lexer.included_range_count
+ );
+ self->finished_tree = NULL_SUBTREE;
+ ts_parser_reset(self);
+ return result;
+}
+
+TSTree *ts_parser_parse_string(
+ TSParser *self,
+ const TSTree *old_tree,
+ const char *string,
+ uint32_t length
+) {
+ return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
+}
+
+TSTree *ts_parser_parse_string_encoding(TSParser *self, const TSTree *old_tree,
+ const char *string, uint32_t length, TSInputEncoding encoding) {
+ TSStringInput input = {string, length};
+ return ts_parser_parse(self, old_tree, (TSInput) {
+ &input,
+ ts_string_input_read,
+ encoding,
+ });
+}
+
+#undef LOG
diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h
new file mode 100644
index 0000000000..974a7ca52f
--- /dev/null
+++ b/src/tree_sitter/parser.h
@@ -0,0 +1,220 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+ TSFieldId field_id;
+ uint8_t child_index;
+ bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+ uint16_t index;
+ uint16_t length;
+} TSFieldMapSlice;
+
+typedef uint16_t TSStateId;
+
+typedef struct {
+ bool visible : 1;
+ bool named : 1;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+ int32_t lookahead;
+ TSSymbol result_symbol;
+ void (*advance)(TSLexer *, bool);
+ void (*mark_end)(TSLexer *);
+ uint32_t (*get_column)(TSLexer *);
+ bool (*is_at_included_range_start)(TSLexer *);
+};
+
+typedef enum {
+ TSParseActionTypeShift,
+ TSParseActionTypeReduce,
+ TSParseActionTypeAccept,
+ TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef struct {
+ union {
+ struct {
+ TSStateId state;
+ bool extra : 1;
+ bool repetition : 1;
+ };
+ struct {
+ TSSymbol symbol;
+ int16_t dynamic_precedence;
+ uint8_t child_count;
+ uint8_t production_id;
+ };
+ } params;
+ TSParseActionType type : 4;
+} TSParseAction;
+
+typedef struct {
+ uint16_t lex_state;
+ uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+ TSParseAction action;
+ struct {
+ uint8_t count;
+ bool reusable : 1;
+ };
+} TSParseActionEntry;
+
+struct TSLanguage {
+ uint32_t version;
+ uint32_t symbol_count;
+ uint32_t alias_count;
+ uint32_t token_count;
+ uint32_t external_token_count;
+ const char **symbol_names;
+ const TSSymbolMetadata *symbol_metadata;
+ const uint16_t *parse_table;
+ const TSParseActionEntry *parse_actions;
+ const TSLexMode *lex_modes;
+ const TSSymbol *alias_sequences;
+ uint16_t max_alias_sequence_length;
+ bool (*lex_fn)(TSLexer *, TSStateId);
+ bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+ TSSymbol keyword_capture_token;
+ struct {
+ const bool *states;
+ const TSSymbol *symbol_map;
+ void *(*create)(void);
+ void (*destroy)(void *);
+ bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+ unsigned (*serialize)(void *, char *);
+ void (*deserialize)(void *, const char *, unsigned);
+ } external_scanner;
+ uint32_t field_count;
+ const TSFieldMapSlice *field_map_slices;
+ const TSFieldMapEntry *field_map_entries;
+ const char **field_names;
+ uint32_t large_state_count;
+ const uint16_t *small_parse_table;
+ const uint32_t *small_parse_table_map;
+};
+
+/*
+ * Lexer Macros
+ */
+
+#define START_LEXER() \
+ bool result = false; \
+ bool skip = false; \
+ int32_t lookahead; \
+ goto start; \
+ next_state: \
+ lexer->advance(lexer, skip); \
+ start: \
+ skip = false; \
+ lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+ { \
+ state = state_value; \
+ goto next_state; \
+ }
+
+#define SKIP(state_value) \
+ { \
+ skip = true; \
+ state = state_value; \
+ goto next_state; \
+ }
+
+#define ACCEPT_TOKEN(symbol_value) \
+ result = true; \
+ lexer->result_symbol = symbol_value; \
+ lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ * Parse Table Macros
+ */
+
+#define SMALL_STATE(id) id - LARGE_STATE_COUNT
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value) \
+ { \
+ { \
+ .type = TSParseActionTypeShift, \
+ .params = {.state = state_value}, \
+ } \
+ }
+
+#define SHIFT_REPEAT(state_value) \
+ { \
+ { \
+ .type = TSParseActionTypeShift, \
+ .params = { \
+ .state = state_value, \
+ .repetition = true \
+ }, \
+ } \
+ }
+
+#define RECOVER() \
+ { \
+ { .type = TSParseActionTypeRecover } \
+ }
+
+#define SHIFT_EXTRA() \
+ { \
+ { \
+ .type = TSParseActionTypeShift, \
+ .params = {.extra = true} \
+ } \
+ }
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+ { \
+ { \
+ .type = TSParseActionTypeReduce, \
+ .params = { \
+ .symbol = symbol_val, \
+ .child_count = child_count_val, \
+ __VA_ARGS__ \
+ } \
+ } \
+ }
+
+#define ACCEPT_INPUT() \
+ { \
+ { .type = TSParseActionTypeAccept } \
+ }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_PARSER_H_
diff --git a/src/tree_sitter/point.h b/src/tree_sitter/point.h
new file mode 100644
index 0000000000..4d0aed18ef
--- /dev/null
+++ b/src/tree_sitter/point.h
@@ -0,0 +1,53 @@
+#ifndef TREE_SITTER_POINT_H_
+#define TREE_SITTER_POINT_H_
+
+#include "tree_sitter/api.h"
+
+#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
+
+static inline TSPoint point__new(unsigned row, unsigned column) {
+ TSPoint result = {row, column};
+ return result;
+}
+
+static inline TSPoint point_add(TSPoint a, TSPoint b) {
+ if (b.row > 0)
+ return point__new(a.row + b.row, b.column);
+ else
+ return point__new(a.row, a.column + b.column);
+}
+
+static inline TSPoint point_sub(TSPoint a, TSPoint b) {
+ if (a.row > b.row)
+ return point__new(a.row - b.row, a.column);
+ else
+ return point__new(0, a.column - b.column);
+}
+
+static inline bool point_lte(TSPoint a, TSPoint b) {
+ return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
+}
+
+static inline bool point_lt(TSPoint a, TSPoint b) {
+ return (a.row < b.row) || (a.row == b.row && a.column < b.column);
+}
+
+static inline bool point_eq(TSPoint a, TSPoint b) {
+ return a.row == b.row && a.column == b.column;
+}
+
+static inline TSPoint point_min(TSPoint a, TSPoint b) {
+ if (a.row < b.row || (a.row == b.row && a.column < b.column))
+ return a;
+ else
+ return b;
+}
+
+static inline TSPoint point_max(TSPoint a, TSPoint b) {
+ if (a.row > b.row || (a.row == b.row && a.column > b.column))
+ return a;
+ else
+ return b;
+}
+
+#endif
diff --git a/src/tree_sitter/reduce_action.h b/src/tree_sitter/reduce_action.h
new file mode 100644
index 0000000000..72aff08d73
--- /dev/null
+++ b/src/tree_sitter/reduce_action.h
@@ -0,0 +1,34 @@
+#ifndef TREE_SITTER_REDUCE_ACTION_H_
+#define TREE_SITTER_REDUCE_ACTION_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./array.h"
+#include "tree_sitter/api.h"
+
+typedef struct {
+ uint32_t count;
+ TSSymbol symbol;
+ int dynamic_precedence;
+ unsigned short production_id;
+} ReduceAction;
+
+typedef Array(ReduceAction) ReduceActionSet;
+
+static inline void ts_reduce_action_set_add(ReduceActionSet *self,
+ ReduceAction new_action) {
+ for (uint32_t i = 0; i < self->size; i++) {
+ ReduceAction action = self->contents[i];
+ if (action.symbol == new_action.symbol && action.count == new_action.count)
+ return;
+ }
+ array_push(self, new_action);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_REDUCE_ACTION_H_
diff --git a/src/tree_sitter/reusable_node.h b/src/tree_sitter/reusable_node.h
new file mode 100644
index 0000000000..9cba951909
--- /dev/null
+++ b/src/tree_sitter/reusable_node.h
@@ -0,0 +1,88 @@
+#include "./subtree.h"
+
+typedef struct {
+ Subtree tree;
+ uint32_t child_index;
+ uint32_t byte_offset;
+} StackEntry;
+
+typedef struct {
+ Array(StackEntry) stack;
+ Subtree last_external_token;
+} ReusableNode;
+
+static inline ReusableNode reusable_node_new(void) {
+ return (ReusableNode) {array_new(), NULL_SUBTREE};
+}
+
+static inline void reusable_node_clear(ReusableNode *self) {
+ array_clear(&self->stack);
+ self->last_external_token = NULL_SUBTREE;
+}
+
+static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
+ reusable_node_clear(self);
+ array_push(&self->stack, ((StackEntry) {
+ .tree = tree,
+ .child_index = 0,
+ .byte_offset = 0,
+ }));
+}
+
+static inline Subtree reusable_node_tree(ReusableNode *self) {
+ return self->stack.size > 0
+ ? self->stack.contents[self->stack.size - 1].tree
+ : NULL_SUBTREE;
+}
+
+static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
+ return self->stack.size > 0
+ ? self->stack.contents[self->stack.size - 1].byte_offset
+ : UINT32_MAX;
+}
+
+static inline void reusable_node_delete(ReusableNode *self) {
+ array_delete(&self->stack);
+}
+
+static inline void reusable_node_advance(ReusableNode *self) {
+ StackEntry last_entry = *array_back(&self->stack);
+ uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
+ if (ts_subtree_has_external_tokens(last_entry.tree)) {
+ self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
+ }
+
+ Subtree tree;
+ uint32_t next_index;
+ do {
+ StackEntry popped_entry = array_pop(&self->stack);
+ next_index = popped_entry.child_index + 1;
+ if (self->stack.size == 0) return;
+ tree = array_back(&self->stack)->tree;
+ } while (ts_subtree_child_count(tree) <= next_index);
+
+ array_push(&self->stack, ((StackEntry) {
+ .tree = tree.ptr->children[next_index],
+ .child_index = next_index,
+ .byte_offset = byte_offset,
+ }));
+}
+
+static inline bool reusable_node_descend(ReusableNode *self) {
+ StackEntry last_entry = *array_back(&self->stack);
+ if (ts_subtree_child_count(last_entry.tree) > 0) {
+ array_push(&self->stack, ((StackEntry) {
+ .tree = last_entry.tree.ptr->children[0],
+ .child_index = 0,
+ .byte_offset = last_entry.byte_offset,
+ }));
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
+ while (reusable_node_descend(self)) {}
+ reusable_node_advance(self);
+}
diff --git a/src/tree_sitter/stack.c b/src/tree_sitter/stack.c
new file mode 100644
index 0000000000..3e842c99c3
--- /dev/null
+++ b/src/tree_sitter/stack.c
@@ -0,0 +1,846 @@
+#include "./alloc.h"
+#include "./language.h"
+#include "./subtree.h"
+#include "./array.h"
+#include "./stack.h"
+#include "./length.h"
+#include <assert.h>
+#include <stdio.h>
+
+#define MAX_LINK_COUNT 8
+#define MAX_NODE_POOL_SIZE 50
+#define MAX_ITERATOR_COUNT 64
+
+#ifdef _WIN32
+#define inline __forceinline
+#else
+#define inline static inline __attribute__((always_inline))
+#endif
+
+typedef struct StackNode StackNode;
+
+typedef struct {
+ StackNode *node;
+ Subtree subtree;
+ bool is_pending;
+} StackLink;
+
+struct StackNode {
+ TSStateId state;
+ Length position;
+ StackLink links[MAX_LINK_COUNT];
+ short unsigned int link_count;
+ uint32_t ref_count;
+ unsigned error_cost;
+ unsigned node_count;
+ int dynamic_precedence;
+};
+
+typedef struct {
+ StackNode *node;
+ SubtreeArray subtrees;
+ uint32_t subtree_count;
+ bool is_pending;
+} StackIterator;
+
+typedef struct {
+ void *payload;
+ StackIterateCallback callback;
+} StackIterateSession;
+
+typedef Array(StackNode *) StackNodeArray;
+
+typedef enum {
+ StackStatusActive,
+ StackStatusPaused,
+ StackStatusHalted,
+} StackStatus;
+
+typedef struct {
+ StackNode *node;
+ Subtree last_external_token;
+ StackSummary *summary;
+ unsigned node_count_at_last_error;
+ TSSymbol lookahead_when_paused;
+ StackStatus status;
+} StackHead;
+
+struct Stack {
+ Array(StackHead) heads;
+ StackSliceArray slices;
+ Array(StackIterator) iterators;
+ StackNodeArray node_pool;
+ StackNode *base_node;
+ SubtreePool *subtree_pool;
+};
+
+typedef unsigned StackAction;
+enum {
+ StackActionNone,
+ StackActionStop = 1,
+ StackActionPop = 2,
+};
+
+typedef StackAction (*StackCallback)(void *, const StackIterator *);
+
+static void stack_node_retain(StackNode *self) {
+ if (!self)
+ return;
+ assert(self->ref_count > 0);
+ self->ref_count++;
+ assert(self->ref_count != 0);
+}
+
+static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
+recur:
+ assert(self->ref_count != 0);
+ self->ref_count--;
+ if (self->ref_count > 0) return;
+
+ StackNode *first_predecessor = NULL;
+ if (self->link_count > 0) {
+ for (unsigned i = self->link_count - 1; i > 0; i--) {
+ StackLink link = self->links[i];
+ if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
+ stack_node_release(link.node, pool, subtree_pool);
+ }
+ StackLink link = self->links[0];
+ if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
+ first_predecessor = self->links[0].node;
+ }
+
+ if (pool->size < MAX_NODE_POOL_SIZE) {
+ array_push(pool, self);
+ } else {
+ ts_free(self);
+ }
+
+ if (first_predecessor) {
+ self = first_predecessor;
+ goto recur;
+ }
+}
+
+static StackNode *stack_node_new(StackNode *previous_node, Subtree subtree,
+ bool is_pending, TSStateId state, StackNodeArray *pool) {
+ StackNode *node = pool->size > 0 ?
+ array_pop(pool) :
+ ts_malloc(sizeof(StackNode));
+ *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state};
+
+ if (previous_node) {
+ node->link_count = 1;
+ node->links[0] = (StackLink){
+ .node = previous_node,
+ .subtree = subtree,
+ .is_pending = is_pending,
+ };
+
+ node->position = previous_node->position;
+ node->error_cost = previous_node->error_cost;
+ node->dynamic_precedence = previous_node->dynamic_precedence;
+ node->node_count = previous_node->node_count;
+
+ if (subtree.ptr) {
+ node->error_cost += ts_subtree_error_cost(subtree);
+ node->position = length_add(node->position, ts_subtree_total_size(subtree));
+ node->node_count += ts_subtree_node_count(subtree);
+ node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
+ }
+ } else {
+ node->position = length_zero();
+ node->error_cost = 0;
+ }
+
+ return node;
+}
+
+static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
+ return
+ left.ptr == right.ptr ||
+ (left.ptr && right.ptr &&
+ ts_subtree_symbol(left) == ts_subtree_symbol(right) &&
+ ((ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) ||
+ (ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
+ ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
+ ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
+ ts_subtree_extra(left) == ts_subtree_extra(right) &&
+ ts_subtree_external_scanner_state_eq(left, right))));
+}
+
+static void stack_node_add_link(StackNode *self, StackLink link, SubtreePool *subtree_pool) {
+ if (link.node == self) return;
+
+ for (int i = 0; i < self->link_count; i++) {
+ StackLink *existing_link = &self->links[i];
+ if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
+ // In general, we preserve ambiguities until they are removed from the stack
+ // during a pop operation where multiple paths lead to the same node. But in
+ // the special case where two links directly connect the same pair of nodes,
+ // we can safely remove the ambiguity ahead of time without changing behavior.
+ if (existing_link->node == link.node) {
+ if (
+ ts_subtree_dynamic_precedence(link.subtree) >
+ ts_subtree_dynamic_precedence(existing_link->subtree)
+ ) {
+ ts_subtree_retain(link.subtree);
+ ts_subtree_release(subtree_pool, existing_link->subtree);
+ existing_link->subtree = link.subtree;
+ self->dynamic_precedence =
+ link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
+ }
+ return;
+ }
+
+ // If the previous nodes are mergeable, merge them recursively.
+ if (existing_link->node->state == link.node->state &&
+ existing_link->node->position.bytes == link.node->position.bytes) {
+ for (int j = 0; j < link.node->link_count; j++) {
+ stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
+ }
+ int32_t dynamic_precedence = link.node->dynamic_precedence;
+ if (link.subtree.ptr) {
+ dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
+ }
+ if (dynamic_precedence > self->dynamic_precedence) {
+ self->dynamic_precedence = dynamic_precedence;
+ }
+ return;
+ }
+ }
+ }
+
+ if (self->link_count == MAX_LINK_COUNT) return;
+
+ stack_node_retain(link.node);
+ unsigned node_count = link.node->node_count;
+ int dynamic_precedence = link.node->dynamic_precedence;
+ self->links[self->link_count++] = link;
+
+ if (link.subtree.ptr) {
+ ts_subtree_retain(link.subtree);
+ node_count += ts_subtree_node_count(link.subtree);
+ dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
+ }
+
+ if (node_count > self->node_count) self->node_count = node_count;
+ if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
+}
+
+static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
+ if (self->node) {
+ if (self->last_external_token.ptr) {
+ ts_subtree_release(subtree_pool, self->last_external_token);
+ }
+ if (self->summary) {
+ array_delete(self->summary);
+ ts_free(self->summary);
+ }
+ stack_node_release(self->node, pool, subtree_pool);
+ }
+}
+
+static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version,
+ StackNode *node) {
+ StackHead head = {
+ .node = node,
+ .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
+ .last_external_token = self->heads.contents[original_version].last_external_token,
+ .status = StackStatusActive,
+ .lookahead_when_paused = 0,
+ };
+ array_push(&self->heads, head);
+ stack_node_retain(node);
+ if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
+ return (StackVersion)(self->heads.size - 1);
+}
+
+static void ts_stack__add_slice(Stack *self, StackVersion original_version,
+ StackNode *node, SubtreeArray *subtrees) {
+ for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
+ StackVersion version = self->slices.contents[i].version;
+ if (self->heads.contents[version].node == node) {
+ StackSlice slice = {*subtrees, version};
+ array_insert(&self->slices, i + 1, slice);
+ return;
+ }
+ }
+
+ StackVersion version = ts_stack__add_version(self, original_version, node);
+ StackSlice slice = { *subtrees, version };
+ array_push(&self->slices, slice);
+}
+
+inline StackSliceArray stack__iter(Stack *self, StackVersion version,
+ StackCallback callback, void *payload,
+ int goal_subtree_count) {
+ array_clear(&self->slices);
+ array_clear(&self->iterators);
+
+ StackHead *head = array_get(&self->heads, version);
+ StackIterator iterator = {
+ .node = head->node,
+ .subtrees = array_new(),
+ .subtree_count = 0,
+ .is_pending = true,
+ };
+
+ bool include_subtrees = false;
+ if (goal_subtree_count >= 0) {
+ include_subtrees = true;
+ array_reserve(&iterator.subtrees, goal_subtree_count);
+ }
+
+ array_push(&self->iterators, iterator);
+
+ while (self->iterators.size > 0) {
+ for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
+ StackIterator *iterator = &self->iterators.contents[i];
+ StackNode *node = iterator->node;
+
+ StackAction action = callback(payload, iterator);
+ bool should_pop = action & StackActionPop;
+ bool should_stop = action & StackActionStop || node->link_count == 0;
+
+ if (should_pop) {
+ SubtreeArray subtrees = iterator->subtrees;
+ if (!should_stop)
+ ts_subtree_array_copy(subtrees, &subtrees);
+ ts_subtree_array_reverse(&subtrees);
+ ts_stack__add_slice(
+ self,
+ version,
+ node,
+ &subtrees
+ );
+ }
+
+ if (should_stop) {
+ if (!should_pop)
+ ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
+ array_erase(&self->iterators, i);
+ i--, size--;
+ continue;
+ }
+
+ for (uint32_t j = 1; j <= node->link_count; j++) {
+ StackIterator *next_iterator;
+ StackLink link;
+ if (j == node->link_count) {
+ link = node->links[0];
+ next_iterator = &self->iterators.contents[i];
+ } else {
+ if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
+ link = node->links[j];
+ StackIterator current_iterator = self->iterators.contents[i];
+ array_push(&self->iterators, current_iterator);
+ next_iterator = array_back(&self->iterators);
+ ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
+ }
+
+ next_iterator->node = link.node;
+ if (link.subtree.ptr) {
+ if (include_subtrees) {
+ array_push(&next_iterator->subtrees, link.subtree);
+ ts_subtree_retain(link.subtree);
+ }
+
+ if (!ts_subtree_extra(link.subtree)) {
+ next_iterator->subtree_count++;
+ if (!link.is_pending) {
+ next_iterator->is_pending = false;
+ }
+ }
+ } else {
+ next_iterator->subtree_count++;
+ next_iterator->is_pending = false;
+ }
+ }
+ }
+ }
+
+ return self->slices;
+}
+
+Stack *ts_stack_new(SubtreePool *subtree_pool) {
+ Stack *self = ts_calloc(1, sizeof(Stack));
+
+ array_init(&self->heads);
+ array_init(&self->slices);
+ array_init(&self->iterators);
+ array_init(&self->node_pool);
+ array_reserve(&self->heads, 4);
+ array_reserve(&self->slices, 4);
+ array_reserve(&self->iterators, 4);
+ array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
+
+ self->subtree_pool = subtree_pool;
+ self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
+ ts_stack_clear(self);
+
+ return self;
+}
+
+void ts_stack_delete(Stack *self) {
+ if (self->slices.contents)
+ array_delete(&self->slices);
+ if (self->iterators.contents)
+ array_delete(&self->iterators);
+ stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
+ for (uint32_t i = 0; i < self->heads.size; i++) {
+ stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
+ }
+ array_clear(&self->heads);
+ if (self->node_pool.contents) {
+ for (uint32_t i = 0; i < self->node_pool.size; i++)
+ ts_free(self->node_pool.contents[i]);
+ array_delete(&self->node_pool);
+ }
+ array_delete(&self->heads);
+ ts_free(self);
+}
+
+uint32_t ts_stack_version_count(const Stack *self) {
+ return self->heads.size;
+}
+
+TSStateId ts_stack_state(const Stack *self, StackVersion version) {
+ return array_get(&self->heads, version)->node->state;
+}
+
+Length ts_stack_position(const Stack *self, StackVersion version) {
+ return array_get(&self->heads, version)->node->position;
+}
+
+Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
+ return array_get(&self->heads, version)->last_external_token;
+}
+
+void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
+ StackHead *head = array_get(&self->heads, version);
+ if (token.ptr) ts_subtree_retain(token);
+ if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
+ head->last_external_token = token;
+}
+
+unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
+ StackHead *head = array_get(&self->heads, version);
+ unsigned result = head->node->error_cost;
+ if (
+ head->status == StackStatusPaused ||
+ (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
+ result += ERROR_COST_PER_RECOVERY;
+ }
+ return result;
+}
+
+unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
+ StackHead *head = array_get(&self->heads, version);
+ if (head->node->node_count < head->node_count_at_last_error) {
+ head->node_count_at_last_error = head->node->node_count;
+ }
+ return head->node->node_count - head->node_count_at_last_error;
+}
+
+void ts_stack_push(Stack *self, StackVersion version, Subtree subtree,
+ bool pending, TSStateId state) {
+ StackHead *head = array_get(&self->heads, version);
+ StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
+ if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
+ head->node = new_node;
+}
+
+inline StackAction iterate_callback(void *payload, const StackIterator *iterator) {
+ StackIterateSession *session = payload;
+ session->callback(
+ session->payload,
+ iterator->node->state,
+ iterator->subtree_count
+ );
+ return StackActionNone;
+}
+
+void ts_stack_iterate(Stack *self, StackVersion version,
+ StackIterateCallback callback, void *payload) {
+ StackIterateSession session = {payload, callback};
+ stack__iter(self, version, iterate_callback, &session, -1);
+}
+
+inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
+ unsigned *goal_subtree_count = payload;
+ if (iterator->subtree_count == *goal_subtree_count) {
+ return StackActionPop | StackActionStop;
+ } else {
+ return StackActionNone;
+ }
+}
+
+StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
+ return stack__iter(self, version, pop_count_callback, &count, count);
+}
+
+inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
+ if (iterator->subtree_count >= 1) {
+ if (iterator->is_pending) {
+ return StackActionPop | StackActionStop;
+ } else {
+ return StackActionStop;
+ }
+ } else {
+ return StackActionNone;
+ }
+}
+
+StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
+ StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
+ if (pop.size > 0) {
+ ts_stack_renumber_version(self, pop.contents[0].version, version);
+ pop.contents[0].version = version;
+ }
+ return pop;
+}
+
+inline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
+ if (iterator->subtrees.size > 0) {
+ bool *found_error = payload;
+ if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) {
+ *found_error = true;
+ return StackActionPop | StackActionStop;
+ } else {
+ return StackActionStop;
+ }
+ } else {
+ return StackActionNone;
+ }
+}
+
+SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
+ StackNode *node = array_get(&self->heads, version)->node;
+ for (unsigned i = 0; i < node->link_count; i++) {
+ if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
+ bool found_error = false;
+ StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
+ if (pop.size > 0) {
+ assert(pop.size == 1);
+ ts_stack_renumber_version(self, pop.contents[0].version, version);
+ return pop.contents[0].subtrees;
+ }
+ break;
+ }
+ }
+ return (SubtreeArray){.size = 0};
+}
+
+inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
+ return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
+}
+
+StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
+ return stack__iter(self, version, pop_all_callback, NULL, 0);
+}
+
+typedef struct {
+ StackSummary *summary;
+ unsigned max_depth;
+} SummarizeStackSession;
+
+inline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
+ SummarizeStackSession *session = payload;
+ TSStateId state = iterator->node->state;
+ unsigned depth = iterator->subtree_count;
+ if (depth > session->max_depth) return StackActionStop;
+ for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
+ StackSummaryEntry entry = session->summary->contents[i];
+ if (entry.depth < depth) break;
+ if (entry.depth == depth && entry.state == state) return StackActionNone;
+ }
+ array_push(session->summary, ((StackSummaryEntry){
+ .position = iterator->node->position,
+ .depth = depth,
+ .state = state,
+ }));
+ return StackActionNone;
+}
+
+void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
+ SummarizeStackSession session = {
+ .summary = ts_malloc(sizeof(StackSummary)),
+ .max_depth = max_depth
+ };
+ array_init(session.summary);
+ stack__iter(self, version, summarize_stack_callback, &session, -1);
+ self->heads.contents[version].summary = session.summary;
+}
+
+StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
+ return array_get(&self->heads, version)->summary;
+}
+
+int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
+ return array_get(&self->heads, version)->node->dynamic_precedence;
+}
+
+bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
+ const StackHead *head = array_get(&self->heads, version);
+ const StackNode *node = head->node;
+ if (node->error_cost == 0) return true;
+ while (node) {
+ if (node->link_count > 0) {
+ Subtree subtree = node->links[0].subtree;
+ if (subtree.ptr) {
+ if (ts_subtree_total_bytes(subtree) > 0) {
+ return true;
+ } else if (
+ node->node_count > head->node_count_at_last_error &&
+ ts_subtree_error_cost(subtree) == 0
+ ) {
+ node = node->links[0].node;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+void ts_stack_remove_version(Stack *self, StackVersion version) {
+ stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
+ array_erase(&self->heads, version);
+}
+
+void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
+ if (v1 == v2) return;
+ assert(v2 < v1);
+ assert((uint32_t)v1 < self->heads.size);
+ StackHead *source_head = &self->heads.contents[v1];
+ StackHead *target_head = &self->heads.contents[v2];
+ if (target_head->summary && !source_head->summary) {
+ source_head->summary = target_head->summary;
+ target_head->summary = NULL;
+ }
+ stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
+ *target_head = *source_head;
+ array_erase(&self->heads, v1);
+}
+
+void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
+ StackHead temporary_head = self->heads.contents[v1];
+ self->heads.contents[v1] = self->heads.contents[v2];
+ self->heads.contents[v2] = temporary_head;
+}
+
+StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
+ assert(version < self->heads.size);
+ array_push(&self->heads, self->heads.contents[version]);
+ StackHead *head = array_back(&self->heads);
+ stack_node_retain(head->node);
+ if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
+ head->summary = NULL;
+ return self->heads.size - 1;
+}
+
+bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
+ if (!ts_stack_can_merge(self, version1, version2)) return false;
+ StackHead *head1 = &self->heads.contents[version1];
+ StackHead *head2 = &self->heads.contents[version2];
+ for (uint32_t i = 0; i < head2->node->link_count; i++) {
+ stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
+ }
+ if (head1->node->state == ERROR_STATE) {
+ head1->node_count_at_last_error = head1->node->node_count;
+ }
+ ts_stack_remove_version(self, version2);
+ return true;
+}
+
+bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
+ StackHead *head1 = &self->heads.contents[version1];
+ StackHead *head2 = &self->heads.contents[version2];
+ return
+ head1->status == StackStatusActive &&
+ head2->status == StackStatusActive &&
+ head1->node->state == head2->node->state &&
+ head1->node->position.bytes == head2->node->position.bytes &&
+ head1->node->error_cost == head2->node->error_cost &&
+ ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
+}
+
+void ts_stack_halt(Stack *self, StackVersion version) {
+ array_get(&self->heads, version)->status = StackStatusHalted;
+}
+
+void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) {
+ StackHead *head = array_get(&self->heads, version);
+ head->status = StackStatusPaused;
+ head->lookahead_when_paused = lookahead;
+ head->node_count_at_last_error = head->node->node_count;
+}
+
+bool ts_stack_is_active(const Stack *self, StackVersion version) {
+ return array_get(&self->heads, version)->status == StackStatusActive;
+}
+
+bool ts_stack_is_halted(const Stack *self, StackVersion version) {
+ return array_get(&self->heads, version)->status == StackStatusHalted;
+}
+
+bool ts_stack_is_paused(const Stack *self, StackVersion version) {
+ return array_get(&self->heads, version)->status == StackStatusPaused;
+}
+
+TSSymbol ts_stack_resume(Stack *self, StackVersion version) {
+ StackHead *head = array_get(&self->heads, version);
+ assert(head->status == StackStatusPaused);
+ TSSymbol result = head->lookahead_when_paused;
+ head->status = StackStatusActive;
+ head->lookahead_when_paused = 0;
+ return result;
+}
+
+void ts_stack_clear(Stack *self) {
+ stack_node_retain(self->base_node);
+ for (uint32_t i = 0; i < self->heads.size; i++) {
+ stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
+ }
+ array_clear(&self->heads);
+ array_push(&self->heads, ((StackHead){
+ .node = self->base_node,
+ .last_external_token = NULL_SUBTREE,
+ .status = StackStatusActive,
+ .lookahead_when_paused = 0,
+ }));
+}
+
+bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
+ array_reserve(&self->iterators, 32);
+ bool was_recording_allocations = ts_toggle_allocation_recording(false);
+ if (!f) f = stderr;
+
+ fprintf(f, "digraph stack {\n");
+ fprintf(f, "rankdir=\"RL\";\n");
+ fprintf(f, "edge [arrowhead=none]\n");
+
+ Array(StackNode *) visited_nodes = array_new();
+
+ array_clear(&self->iterators);
+ for (uint32_t i = 0; i < self->heads.size; i++) {
+ StackHead *head = &self->heads.contents[i];
+ if (head->status == StackStatusHalted) continue;
+
+ fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
+ fprintf(f, "node_head_%u -> node_%p [", i, head->node);
+
+ if (head->status == StackStatusPaused) {
+ fprintf(f, "color=red ");
+ }
+ fprintf(f,
+ "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
+ i,
+ ts_stack_node_count_since_error(self, i),
+ ts_stack_error_cost(self, i)
+ );
+
+ if (head->last_external_token.ptr) {
+ const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
+ const char *data = ts_external_scanner_state_data(state);
+ fprintf(f, "\nexternal_scanner_state:");
+ for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
+ }
+
+ fprintf(f, "\"]\n");
+ array_push(&self->iterators, ((StackIterator){.node = head->node }));
+ }
+
+ bool all_iterators_done = false;
+ while (!all_iterators_done) {
+ all_iterators_done = true;
+
+ for (uint32_t i = 0; i < self->iterators.size; i++) {
+ StackIterator iterator = self->iterators.contents[i];
+ StackNode *node = iterator.node;
+
+ for (uint32_t j = 0; j < visited_nodes.size; j++) {
+ if (visited_nodes.contents[j] == node) {
+ node = NULL;
+ break;
+ }
+ }
+
+ if (!node) continue;
+ all_iterators_done = false;
+
+ fprintf(f, "node_%p [", node);
+ if (node->state == ERROR_STATE) {
+ fprintf(f, "label=\"?\"");
+ } else if (
+ node->link_count == 1 &&
+ node->links[0].subtree.ptr &&
+ ts_subtree_extra(node->links[0].subtree)
+ ) {
+ fprintf(f, "shape=point margin=0 label=\"\"");
+ } else {
+ fprintf(f, "label=\"%d\"", node->state);
+ }
+
+ fprintf(
+ f,
+ " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
+ node->position.extent.row + 1,
+ node->position.extent.column,
+ node->node_count,
+ node->error_cost,
+ node->dynamic_precedence
+ );
+
+ for (int j = 0; j < node->link_count; j++) {
+ StackLink link = node->links[j];
+ fprintf(f, "node_%p -> node_%p [", node, link.node);
+ if (link.is_pending) fprintf(f, "style=dashed ");
+ if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");
+
+ if (!link.subtree.ptr) {
+ fprintf(f, "color=red");
+ } else {
+ fprintf(f, "label=\"");
+ bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
+ if (quoted) fprintf(f, "'");
+ const char *name = ts_language_symbol_name(language, ts_subtree_symbol(link.subtree));
+ for (const char *c = name; *c; c++) {
+ if (*c == '\"' || *c == '\\') fprintf(f, "\\");
+ fprintf(f, "%c", *c);
+ }
+ if (quoted) fprintf(f, "'");
+ fprintf(f, "\"");
+ fprintf(
+ f,
+ "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"",
+ ts_subtree_error_cost(link.subtree),
+ ts_subtree_dynamic_precedence(link.subtree)
+ );
+ }
+
+ fprintf(f, "];\n");
+
+ StackIterator *next_iterator;
+ if (j == 0) {
+ next_iterator = &self->iterators.contents[i];
+ } else {
+ array_push(&self->iterators, iterator);
+ next_iterator = array_back(&self->iterators);
+ }
+ next_iterator->node = link.node;
+ }
+
+ array_push(&visited_nodes, node);
+ }
+ }
+
+ fprintf(f, "}\n");
+
+ array_delete(&visited_nodes);
+ ts_toggle_allocation_recording(was_recording_allocations);
+ return true;
+}
+
+#undef inline
diff --git a/src/tree_sitter/stack.h b/src/tree_sitter/stack.h
new file mode 100644
index 0000000000..ec7a69d2b4
--- /dev/null
+++ b/src/tree_sitter/stack.h
@@ -0,0 +1,135 @@
+#ifndef TREE_SITTER_PARSE_STACK_H_
+#define TREE_SITTER_PARSE_STACK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./array.h"
+#include "./subtree.h"
+#include "./error_costs.h"
+#include <stdio.h>
+
+typedef struct Stack Stack;
+
+typedef unsigned StackVersion;
+#define STACK_VERSION_NONE ((StackVersion)-1)
+
+typedef struct {
+ SubtreeArray subtrees;
+ StackVersion version;
+} StackSlice;
+typedef Array(StackSlice) StackSliceArray;
+
+typedef struct {
+ Length position;
+ unsigned depth;
+ TSStateId state;
+} StackSummaryEntry;
+typedef Array(StackSummaryEntry) StackSummary;
+
+// Create a stack.
+Stack *ts_stack_new(SubtreePool *);
+
+// Release the memory reserved for a given stack.
+void ts_stack_delete(Stack *);
+
+// Get the stack's current number of versions.
+uint32_t ts_stack_version_count(const Stack *);
+
+// Get the state at the top of the given version of the stack. If the stack is
+// empty, this returns the initial state, 0.
+TSStateId ts_stack_state(const Stack *, StackVersion);
+
+// Get the last external token associated with a given version of the stack.
+Subtree ts_stack_last_external_token(const Stack *, StackVersion);
+
+// Set the last external token associated with a given version of the stack.
+void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
+
+// Get the position of the given version of the stack within the document.
+Length ts_stack_position(const Stack *, StackVersion);
+
+// Push a tree and state onto the given version of the stack.
+//
+// This transfers ownership of the tree to the Stack. Callers that
+// need to retain ownership of the tree for their own purposes should
+// first retain the tree.
+void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
+
+// Pop the given number of entries from the given version of the stack. This
+// operation can increase the number of stack versions by revealing multiple
+// versions which had previously been merged. It returns an array that
+// specifies the index of each revealed version and the trees that were
+// removed from that version.
+StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
+
+// Remove an error at the top of the given version of the stack.
+SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
+
+// Remove any pending trees from the top of the given version of the stack.
+StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
+
+// Remove any all trees from the given version of the stack.
+StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
+
+// Get the maximum number of tree nodes reachable from this version of the stack
+// since the last error was detected.
+unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
+
+int ts_stack_dynamic_precedence(Stack *, StackVersion);
+
+bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
+
+// Compute a summary of all the parse states near the top of the given
+// version of the stack and store the summary for later retrieval.
+void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
+
+// Retrieve a summary of all the parse states near the top of the
+// given version of the stack.
+StackSummary *ts_stack_get_summary(Stack *, StackVersion);
+
+// Get the total cost of all errors on the given version of the stack.
+unsigned ts_stack_error_cost(const Stack *, StackVersion version);
+
+// Merge the given two stack versions if possible, returning true
+// if they were successfully merged and false otherwise.
+bool ts_stack_merge(Stack *, StackVersion, StackVersion);
+
+// Determine whether the given two stack versions can be merged.
+bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
+
+TSSymbol ts_stack_resume(Stack *, StackVersion);
+
+void ts_stack_pause(Stack *, StackVersion, TSSymbol);
+
+void ts_stack_halt(Stack *, StackVersion);
+
+bool ts_stack_is_active(const Stack *, StackVersion);
+
+bool ts_stack_is_paused(const Stack *, StackVersion);
+
+bool ts_stack_is_halted(const Stack *, StackVersion);
+
+void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
+
+void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);
+
+StackVersion ts_stack_copy_version(Stack *, StackVersion);
+
+// Remove the given version from the stack.
+void ts_stack_remove_version(Stack *, StackVersion);
+
+void ts_stack_clear(Stack *);
+
+bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
+
+typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);
+
+void ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_PARSE_STACK_H_
diff --git a/src/tree_sitter/subtree.c b/src/tree_sitter/subtree.c
new file mode 100644
index 0000000000..e95733eb46
--- /dev/null
+++ b/src/tree_sitter/subtree.c
@@ -0,0 +1,996 @@
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include "./alloc.h"
+#include "./atomic.h"
+#include "./subtree.h"
+#include "./length.h"
+#include "./language.h"
+#include "./error_costs.h"
+#include <stddef.h>
+
+typedef struct {
+ Length start;
+ Length old_end;
+ Length new_end;
+} Edit;
+
+#ifdef TREE_SITTER_TEST
+
+#define TS_MAX_INLINE_TREE_LENGTH 2
+#define TS_MAX_TREE_POOL_SIZE 0
+
+#else
+
+#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
+#define TS_MAX_TREE_POOL_SIZE 32
+
+#endif
+
+static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}};
+
+// ExternalScannerState
+
+void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
+ self->length = length;
+ if (length > sizeof(self->short_data)) {
+ self->long_data = ts_malloc(length);
+ memcpy(self->long_data, data, length);
+ } else {
+ memcpy(self->short_data, data, length);
+ }
+}
+
+ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
+ ExternalScannerState result = *self;
+ if (self->length > sizeof(self->short_data)) {
+ result.long_data = ts_malloc(self->length);
+ memcpy(result.long_data, self->long_data, self->length);
+ }
+ return result;
+}
+
+void ts_external_scanner_state_delete(ExternalScannerState *self) {
+ if (self->length > sizeof(self->short_data)) {
+ ts_free(self->long_data);
+ }
+}
+
+const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
+ if (self->length > sizeof(self->short_data)) {
+ return self->long_data;
+ } else {
+ return self->short_data;
+ }
+}
+
+bool ts_external_scanner_state_eq(const ExternalScannerState *a, const ExternalScannerState *b) {
+ return a == b || (
+ a->length == b->length &&
+ !memcmp(ts_external_scanner_state_data(a), ts_external_scanner_state_data(b), a->length)
+ );
+}
+
+// SubtreeArray
+
+void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
+ dest->size = self.size;
+ dest->capacity = self.capacity;
+ dest->contents = self.contents;
+ if (self.capacity > 0) {
+ dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
+ memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
+ for (uint32_t i = 0; i < self.size; i++) {
+ ts_subtree_retain(dest->contents[i]);
+ }
+ }
+}
+
+void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
+ for (uint32_t i = 0; i < self->size; i++) {
+ ts_subtree_release(pool, self->contents[i]);
+ }
+ array_delete(self);
+}
+
+SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
+ SubtreeArray result = array_new();
+
+ uint32_t i = self->size - 1;
+ for (; i + 1 > 0; i--) {
+ Subtree child = self->contents[i];
+ if (!ts_subtree_extra(child)) break;
+ array_push(&result, child);
+ }
+
+ self->size = i + 1;
+ ts_subtree_array_reverse(&result);
+ return result;
+}
+
+void ts_subtree_array_reverse(SubtreeArray *self) {
+ for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
+ size_t reverse_index = self->size - 1 - i;
+ Subtree swap = self->contents[i];
+ self->contents[i] = self->contents[reverse_index];
+ self->contents[reverse_index] = swap;
+ }
+}
+
+// SubtreePool
+
+SubtreePool ts_subtree_pool_new(uint32_t capacity) {
+ SubtreePool self = {array_new(), array_new()};
+ array_reserve(&self.free_trees, capacity);
+ return self;
+}
+
+void ts_subtree_pool_delete(SubtreePool *self) {
+ if (self->free_trees.contents) {
+ for (unsigned i = 0; i < self->free_trees.size; i++) {
+ ts_free(self->free_trees.contents[i].ptr);
+ }
+ array_delete(&self->free_trees);
+ }
+ if (self->tree_stack.contents) array_delete(&self->tree_stack);
+}
+
+static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
+ if (self->free_trees.size > 0) {
+ return array_pop(&self->free_trees).ptr;
+ } else {
+ return ts_malloc(sizeof(SubtreeHeapData));
+ }
+}
+
+static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
+ if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
+ array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
+ } else {
+ ts_free(tree);
+ }
+}
+
+// Subtree
+
+static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
+ return
+ padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
+ padding.extent.row < 16 &&
+ padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
+ size.extent.row == 0 &&
+ size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
+ lookahead_bytes < 16;
+}
+
+Subtree ts_subtree_new_leaf(
+ SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
+ uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
+ bool is_keyword, const TSLanguage *language
+) {
+ TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
+ bool extra = symbol == ts_builtin_sym_end;
+
+ bool is_inline = (
+ symbol <= UINT8_MAX &&
+ !has_external_tokens &&
+ ts_subtree_can_inline(padding, size, lookahead_bytes)
+ );
+
+ if (is_inline) {
+ return (Subtree) {{
+ .parse_state = parse_state,
+ .symbol = symbol,
+ .padding_bytes = padding.bytes,
+ .padding_rows = padding.extent.row,
+ .padding_columns = padding.extent.column,
+ .size_bytes = size.bytes,
+ .lookahead_bytes = lookahead_bytes,
+ .visible = metadata.visible,
+ .named = metadata.named,
+ .extra = extra,
+ .has_changes = false,
+ .is_missing = false,
+ .is_keyword = is_keyword,
+ .is_inline = true,
+ }};
+ } else {
+ SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
+ *data = (SubtreeHeapData) {
+ .ref_count = 1,
+ .padding = padding,
+ .size = size,
+ .lookahead_bytes = lookahead_bytes,
+ .error_cost = 0,
+ .child_count = 0,
+ .symbol = symbol,
+ .parse_state = parse_state,
+ .visible = metadata.visible,
+ .named = metadata.named,
+ .extra = extra,
+ .fragile_left = false,
+ .fragile_right = false,
+ .has_changes = false,
+ .has_external_tokens = has_external_tokens,
+ .is_missing = false,
+ .is_keyword = is_keyword,
+ .first_leaf = {.symbol = 0, .parse_state = 0},
+ };
+ return (Subtree) {.ptr = data};
+ }
+}
+
+void ts_subtree_set_symbol(
+ MutableSubtree *self,
+ TSSymbol symbol,
+ const TSLanguage *language
+) {
+ TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
+ if (self->data.is_inline) {
+ assert(symbol < UINT8_MAX);
+ self->data.symbol = symbol;
+ self->data.named = metadata.named;
+ self->data.visible = metadata.visible;
+ } else {
+ self->ptr->symbol = symbol;
+ self->ptr->named = metadata.named;
+ self->ptr->visible = metadata.visible;
+ }
+}
+
+Subtree ts_subtree_new_error(
+ SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
+ uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
+) {
+ Subtree result = ts_subtree_new_leaf(
+ pool, ts_builtin_sym_error, padding, size, bytes_scanned,
+ parse_state, false, false, language
+ );
+ SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
+ data->fragile_left = true;
+ data->fragile_right = true;
+ data->lookahead_char = lookahead_char;
+ return result;
+}
+
+MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
+ if (self.data.is_inline) return (MutableSubtree) {self.data};
+ if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
+
+ SubtreeHeapData *result = ts_subtree_pool_allocate(pool);
+ memcpy(result, self.ptr, sizeof(SubtreeHeapData));
+ if (result->child_count > 0) {
+ result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree));
+ memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree));
+ for (uint32_t i = 0; i < result->child_count; i++) {
+ ts_subtree_retain(result->children[i]);
+ }
+ } else if (result->has_external_tokens) {
+ result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state);
+ }
+ result->ref_count = 1;
+ ts_subtree_release(pool, self);
+ return (MutableSubtree) {.ptr = result};
+}
+
+static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language,
+ MutableSubtreeArray *stack) {
+ unsigned initial_stack_size = stack->size;
+
+ MutableSubtree tree = self;
+ TSSymbol symbol = tree.ptr->symbol;
+ for (unsigned i = 0; i < count; i++) {
+ if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
+
+ MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
+ if (
+ child.data.is_inline ||
+ child.ptr->child_count < 2 ||
+ child.ptr->ref_count > 1 ||
+ child.ptr->symbol != symbol
+ ) break;
+
+ MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]);
+ if (
+ grandchild.data.is_inline ||
+ grandchild.ptr->child_count < 2 ||
+ grandchild.ptr->ref_count > 1 ||
+ grandchild.ptr->symbol != symbol
+ ) break;
+
+ tree.ptr->children[0] = ts_subtree_from_mut(grandchild);
+ child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1];
+ grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
+ array_push(stack, tree);
+ tree = grandchild;
+ }
+
+ while (stack->size > initial_stack_size) {
+ tree = array_pop(stack);
+ MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
+ MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]);
+ ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language);
+ ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language);
+ ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language);
+ }
+}
+
+void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) {
+ array_clear(&pool->tree_stack);
+
+ if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) {
+ array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
+ }
+
+ while (pool->tree_stack.size > 0) {
+ MutableSubtree tree = array_pop(&pool->tree_stack);
+
+ if (tree.ptr->repeat_depth > 0) {
+ Subtree child1 = tree.ptr->children[0];
+ Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1];
+ if (
+ ts_subtree_child_count(child1) > 0 &&
+ ts_subtree_child_count(child2) > 0 &&
+ child1.ptr->repeat_depth > child2.ptr->repeat_depth
+ ) {
+ unsigned n = child1.ptr->repeat_depth - child2.ptr->repeat_depth;
+ for (unsigned i = n / 2; i > 0; i /= 2) {
+ ts_subtree__compress(tree, i, language, &pool->tree_stack);
+ n -= i;
+ }
+ }
+ }
+
+ for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
+ Subtree child = tree.ptr->children[i];
+ if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
+ array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
+ }
+ }
+ }
+}
+
+static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
+ return ts_subtree_child_count(self) ? self.ptr->repeat_depth : 0;
+}
+
+void ts_subtree_set_children(
+ MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language
+) {
+ assert(!self.data.is_inline);
+
+ if (self.ptr->child_count > 0 && children != self.ptr->children) {
+ ts_free(self.ptr->children);
+ }
+
+ self.ptr->child_count = child_count;
+ self.ptr->children = children;
+ self.ptr->named_child_count = 0;
+ self.ptr->visible_child_count = 0;
+ self.ptr->error_cost = 0;
+ self.ptr->repeat_depth = 0;
+ self.ptr->node_count = 1;
+ self.ptr->has_external_tokens = false;
+ self.ptr->dynamic_precedence = 0;
+
+ uint32_t non_extra_index = 0;
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
+ uint32_t lookahead_end_byte = 0;
+
+ for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+ Subtree child = self.ptr->children[i];
+
+ if (i == 0) {
+ self.ptr->padding = ts_subtree_padding(child);
+ self.ptr->size = ts_subtree_size(child);
+ } else {
+ self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
+ }
+
+ uint32_t child_lookahead_end_byte =
+ self.ptr->padding.bytes +
+ self.ptr->size.bytes +
+ ts_subtree_lookahead_bytes(child);
+ if (child_lookahead_end_byte > lookahead_end_byte) lookahead_end_byte = child_lookahead_end_byte;
+
+ if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
+ self.ptr->error_cost += ts_subtree_error_cost(child);
+ }
+
+ self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
+ self.ptr->node_count += ts_subtree_node_count(child);
+
+ if (alias_sequence && alias_sequence[non_extra_index] != 0 && !ts_subtree_extra(child)) {
+ self.ptr->visible_child_count++;
+ if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
+ self.ptr->named_child_count++;
+ }
+ } else if (ts_subtree_visible(child)) {
+ self.ptr->visible_child_count++;
+ if (ts_subtree_named(child)) self.ptr->named_child_count++;
+ } else if (ts_subtree_child_count(child) > 0) {
+ self.ptr->visible_child_count += child.ptr->visible_child_count;
+ self.ptr->named_child_count += child.ptr->named_child_count;
+ }
+
+ if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
+
+ if (ts_subtree_is_error(child)) {
+ self.ptr->fragile_left = self.ptr->fragile_right = true;
+ self.ptr->parse_state = TS_TREE_STATE_NONE;
+ }
+
+ if (!ts_subtree_extra(child)) non_extra_index++;
+ }
+
+ self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
+
+ if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) {
+ self.ptr->error_cost +=
+ ERROR_COST_PER_RECOVERY +
+ ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
+ ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
+ for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+ Subtree child = self.ptr->children[i];
+ uint32_t grandchild_count = ts_subtree_child_count(child);
+ if (ts_subtree_extra(child)) continue;
+ if (ts_subtree_is_error(child) && grandchild_count == 0) continue;
+ if (ts_subtree_visible(child)) {
+ self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
+ } else if (grandchild_count > 0) {
+ self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
+ }
+ }
+ }
+
+ if (self.ptr->child_count > 0) {
+ Subtree first_child = self.ptr->children[0];
+ Subtree last_child = self.ptr->children[self.ptr->child_count - 1];
+
+ self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
+ self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
+
+ if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
+ if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
+
+ if (
+ self.ptr->child_count >= 2 &&
+ !self.ptr->visible &&
+ !self.ptr->named &&
+ ts_subtree_symbol(first_child) == self.ptr->symbol
+ ) {
+ if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
+ self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
+ } else {
+ self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
+ }
+ }
+ }
+}
+
+MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
+ SubtreeArray *children, unsigned production_id,
+ const TSLanguage *language) {
+ TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
+ bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
+ SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
+ *data = (SubtreeHeapData) {
+ .ref_count = 1,
+ .symbol = symbol,
+ .production_id = production_id,
+ .visible = metadata.visible,
+ .named = metadata.named,
+ .has_changes = false,
+ .fragile_left = fragile,
+ .fragile_right = fragile,
+ .is_keyword = false,
+ .node_count = 0,
+ .first_leaf = {.symbol = 0, .parse_state = 0},
+ };
+ MutableSubtree result = {.ptr = data};
+ ts_subtree_set_children(result, children->contents, children->size, language);
+ return result;
+}
+
+Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
+ bool extra, const TSLanguage *language) {
+ MutableSubtree result = ts_subtree_new_node(
+ pool, ts_builtin_sym_error, children, 0, language
+ );
+ result.ptr->extra = extra;
+ return ts_subtree_from_mut(result);
+}
+
+Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
+ const TSLanguage *language) {
+ Subtree result = ts_subtree_new_leaf(
+ pool, symbol, padding, length_zero(), 0,
+ 0, false, false, language
+ );
+
+ if (result.data.is_inline) {
+ result.data.is_missing = true;
+ } else {
+ ((SubtreeHeapData *)result.ptr)->is_missing = true;
+ }
+
+ return result;
+}
+
+void ts_subtree_retain(Subtree self) {
+ if (self.data.is_inline) return;
+ assert(self.ptr->ref_count > 0);
+ atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
+ assert(self.ptr->ref_count != 0);
+}
+
+void ts_subtree_release(SubtreePool *pool, Subtree self) {
+ if (self.data.is_inline) return;
+ array_clear(&pool->tree_stack);
+
+ assert(self.ptr->ref_count > 0);
+ if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
+ array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
+ }
+
+ while (pool->tree_stack.size > 0) {
+ MutableSubtree tree = array_pop(&pool->tree_stack);
+ if (tree.ptr->child_count > 0) {
+ for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
+ Subtree child = tree.ptr->children[i];
+ if (child.data.is_inline) continue;
+ assert(child.ptr->ref_count > 0);
+ if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
+ array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
+ }
+ }
+ ts_free(tree.ptr->children);
+ } else if (tree.ptr->has_external_tokens) {
+ ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
+ }
+ ts_subtree_pool_free(pool, tree.ptr);
+ }
+}
+
+bool ts_subtree_eq(Subtree self, Subtree other) {
+ if (self.data.is_inline || other.data.is_inline) {
+ return memcmp(&self, &other, sizeof(SubtreeInlineData)) == 0;
+ }
+
+ if (self.ptr) {
+ if (!other.ptr) return false;
+ } else {
+ return !other.ptr;
+ }
+
+ if (self.ptr->symbol != other.ptr->symbol) return false;
+ if (self.ptr->visible != other.ptr->visible) return false;
+ if (self.ptr->named != other.ptr->named) return false;
+ if (self.ptr->padding.bytes != other.ptr->padding.bytes) return false;
+ if (self.ptr->size.bytes != other.ptr->size.bytes) return false;
+ if (self.ptr->symbol == ts_builtin_sym_error) return self.ptr->lookahead_char == other.ptr->lookahead_char;
+ if (self.ptr->child_count != other.ptr->child_count) return false;
+ if (self.ptr->child_count > 0) {
+ if (self.ptr->visible_child_count != other.ptr->visible_child_count) return false;
+ if (self.ptr->named_child_count != other.ptr->named_child_count) return false;
+
+ for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+ if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+int ts_subtree_compare(Subtree left, Subtree right) {
+ if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1;
+ if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1;
+ if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
+ if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
+ for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
+ Subtree left_child = left.ptr->children[i];
+ Subtree right_child = right.ptr->children[i];
+ switch (ts_subtree_compare(left_child, right_child)) {
+ case -1: return -1;
+ case 1: return 1;
+ default: break;
+ }
+ }
+ return 0;
+}
+
+static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
+ if (self->data.is_inline) {
+ self->data.has_changes = true;
+ } else {
+ self->ptr->has_changes = true;
+ }
+}
+
+Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool) {
+ typedef struct {
+ Subtree *tree;
+ Edit edit;
+ } StackEntry;
+
+ Array(StackEntry) stack = array_new();
+ array_push(&stack, ((StackEntry) {
+ .tree = &self,
+ .edit = (Edit) {
+ .start = {edit->start_byte, edit->start_point},
+ .old_end = {edit->old_end_byte, edit->old_end_point},
+ .new_end = {edit->new_end_byte, edit->new_end_point},
+ },
+ }));
+
+ while (stack.size) {
+ StackEntry entry = array_pop(&stack);
+ Edit edit = entry.edit;
+ bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
+ bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
+
+ Length size = ts_subtree_size(*entry.tree);
+ Length padding = ts_subtree_padding(*entry.tree);
+ uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
+ uint32_t end_byte = padding.bytes + size.bytes + lookahead_bytes;
+ if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
+
+ // If the edit is entirely within the space before this subtree, then shift this
+ // subtree over according to the edit without changing its size.
+ if (edit.old_end.bytes <= padding.bytes) {
+ padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
+ }
+
+ // If the edit starts in the space before this subtree and extends into this subtree,
+ // shrink the subtree's content to compensate for the change in the space before it.
+ else if (edit.start.bytes < padding.bytes) {
+ size = length_sub(size, length_sub(edit.old_end, padding));
+ padding = edit.new_end;
+ }
+
+ // If the edit is a pure insertion right at the start of the subtree,
+ // shift the subtree over according to the insertion.
+ else if (edit.start.bytes == padding.bytes && is_pure_insertion) {
+ padding = edit.new_end;
+ }
+
+ // If the edit is within this subtree, resize the subtree to reflect the edit.
+ else {
+ uint32_t total_bytes = padding.bytes + size.bytes;
+ if (edit.start.bytes < total_bytes ||
+ (edit.start.bytes == total_bytes && is_pure_insertion)) {
+ size = length_add(
+ length_sub(edit.new_end, padding),
+ length_sub(size, length_sub(edit.old_end, padding))
+ );
+ }
+ }
+
+ MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
+
+ if (result.data.is_inline) {
+ if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
+ result.data.padding_bytes = padding.bytes;
+ result.data.padding_rows = padding.extent.row;
+ result.data.padding_columns = padding.extent.column;
+ result.data.size_bytes = size.bytes;
+ } else {
+ SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
+ data->ref_count = 1;
+ data->padding = padding;
+ data->size = size;
+ data->lookahead_bytes = lookahead_bytes;
+ data->error_cost = 0;
+ data->child_count = 0;
+ data->symbol = result.data.symbol;
+ data->parse_state = result.data.parse_state;
+ data->visible = result.data.visible;
+ data->named = result.data.named;
+ data->extra = result.data.extra;
+ data->fragile_left = false;
+ data->fragile_right = false;
+ data->has_changes = false;
+ data->has_external_tokens = false;
+ data->is_missing = result.data.is_missing;
+ data->is_keyword = result.data.is_keyword;
+ result.ptr = data;
+ }
+ } else {
+ result.ptr->padding = padding;
+ result.ptr->size = size;
+ }
+
+ ts_subtree_set_has_changes(&result);
+ *entry.tree = ts_subtree_from_mut(result);
+
+ Length child_left, child_right = length_zero();
+ for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
+ Subtree *child = &result.ptr->children[i];
+ Length child_size = ts_subtree_total_size(*child);
+ child_left = child_right;
+ child_right = length_add(child_left, child_size);
+
+ // If this child ends before the edit, it is not affected.
+ if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
+
+ // If this child starts after the edit, then we're done processing children.
+ if (child_left.bytes > edit.old_end.bytes ||
+ (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
+
+ // Transform edit into the child's coordinate space.
+ Edit child_edit = {
+ .start = length_sub(edit.start, child_left),
+ .old_end = length_sub(edit.old_end, child_left),
+ .new_end = length_sub(edit.new_end, child_left),
+ };
+
+ // Clamp child_edit to the child's bounds.
+ if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
+ if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
+ if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
+ if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
+
+ // Interpret all inserted text as applying to the *first* child that touches the edit.
+ // Subsequent children are only never have any text inserted into them; they are only
+ // shrunk to compensate for the edit.
+ if (child_right.bytes > edit.start.bytes ||
+ (child_right.bytes == edit.start.bytes && is_pure_insertion)) {
+ edit.new_end = edit.start;
+ }
+
+ // Children that occur before the edit are not reshaped by the edit.
+ else {
+ child_edit.old_end = child_edit.start;
+ child_edit.new_end = child_edit.start;
+ }
+
+ // Queue processing of this child's subtree.
+ array_push(&stack, ((StackEntry) {
+ .tree = child,
+ .edit = child_edit,
+ }));
+ }
+ }
+
+ array_delete(&stack);
+ return self;
+}
+
+Subtree ts_subtree_last_external_token(Subtree tree) {
+ if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
+ while (tree.ptr->child_count > 0) {
+ for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
+ Subtree child = tree.ptr->children[i];
+ if (ts_subtree_has_external_tokens(child)) {
+ tree = child;
+ break;
+ }
+ }
+ }
+ return tree;
+}
+
+static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) {
+ if (c == 0)
+ return snprintf(s, n, "EOF");
+ if (c == -1)
+ return snprintf(s, n, "INVALID");
+ else if (c == '\n')
+ return snprintf(s, n, "'\\n'");
+ else if (c == '\t')
+ return snprintf(s, n, "'\\t'");
+ else if (c == '\r')
+ return snprintf(s, n, "'\\r'");
+ else if (0 < c && c < 128 && isprint(c))
+ return snprintf(s, n, "'%c'", c);
+ else
+ return snprintf(s, n, "%d", c);
+}
+
+static void ts_subtree__write_dot_string(FILE *f, const char *string) {
+ for (const char *c = string; *c; c++) {
+ if (*c == '"') {
+ fputs("\\\"", f);
+ } else if (*c == '\n') {
+ fputs("\\n", f);
+ } else {
+ fputc(*c, f);
+ }
+ }
+}
+
+static const char *ROOT_FIELD = "__ROOT__";
+
+static size_t ts_subtree__write_to_string(
+ Subtree self, char *string, size_t limit,
+ const TSLanguage *language, bool include_all,
+ TSSymbol alias_symbol, bool alias_is_named, const char *field_name
+) {
+ if (!self.ptr) return snprintf(string, limit, "(NULL)");
+
+ char *cursor = string;
+ char **writer = (limit > 0) ? &cursor : &string;
+ bool is_root = field_name == ROOT_FIELD;
+ bool is_visible =
+ include_all ||
+ ts_subtree_missing(self) ||
+ (
+ alias_symbol
+ ? alias_is_named
+ : ts_subtree_visible(self) && ts_subtree_named(self)
+ );
+
+ if (is_visible) {
+ if (!is_root) {
+ cursor += snprintf(*writer, limit, " ");
+ if (field_name) {
+ cursor += snprintf(*writer, limit, "%s: ", field_name);
+ }
+ }
+
+ if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
+ cursor += snprintf(*writer, limit, "(UNEXPECTED ");
+ cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
+ } else {
+ TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
+ const char *symbol_name = ts_language_symbol_name(language, symbol);
+ if (ts_subtree_missing(self)) {
+ cursor += snprintf(*writer, limit, "(MISSING ");
+ if (alias_is_named || ts_subtree_named(self)) {
+ cursor += snprintf(*writer, limit, "%s", symbol_name);
+ } else {
+ cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
+ }
+ } else {
+ cursor += snprintf(*writer, limit, "(%s", symbol_name);
+ }
+ }
+ } else if (is_root) {
+ TSSymbol symbol = ts_subtree_symbol(self);
+ const char *symbol_name = ts_language_symbol_name(language, symbol);
+ cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name);
+ }
+
+ if (ts_subtree_child_count(self)) {
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
+ const TSFieldMapEntry *field_map, *field_map_end;
+ ts_language_field_map(
+ language,
+ self.ptr->production_id,
+ &field_map,
+ &field_map_end
+ );
+
+ uint32_t structural_child_index = 0;
+ for (uint32_t i = 0; i < self.ptr->child_count; i++) {
+ Subtree child = self.ptr->children[i];
+ if (ts_subtree_extra(child)) {
+ cursor += ts_subtree__write_to_string(
+ child, *writer, limit,
+ language, include_all,
+ 0, false, NULL
+ );
+ } else {
+ TSSymbol alias_symbol = alias_sequence
+ ? alias_sequence[structural_child_index]
+ : 0;
+ bool alias_is_named = alias_symbol
+ ? ts_language_symbol_metadata(language, alias_symbol).named
+ : false;
+
+ const char *child_field_name = is_visible ? NULL : field_name;
+ for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
+ if (!i->inherited && i->child_index == structural_child_index) {
+ child_field_name = language->field_names[i->field_id];
+ break;
+ }
+ }
+
+ cursor += ts_subtree__write_to_string(
+ child, *writer, limit,
+ language, include_all,
+ alias_symbol, alias_is_named, child_field_name
+ );
+ structural_child_index++;
+ }
+ }
+ }
+
+ if (is_visible) cursor += snprintf(*writer, limit, ")");
+
+ return cursor - string;
+}
+
+char *ts_subtree_string(
+ Subtree self,
+ const TSLanguage *language,
+ bool include_all
+) {
+ char scratch_string[1];
+ size_t size = ts_subtree__write_to_string(
+ self, scratch_string, 0,
+ language, include_all,
+ 0, false, ROOT_FIELD
+ ) + 1;
+ char *result = malloc(size * sizeof(char));
+ ts_subtree__write_to_string(
+ self, result, size,
+ language, include_all,
+ 0, false, ROOT_FIELD
+ );
+ return result;
+}
+
+void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
+ const TSLanguage *language, TSSymbol alias_symbol,
+ FILE *f) {
+ TSSymbol subtree_symbol = ts_subtree_symbol(*self);
+ TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
+ uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
+ fprintf(f, "tree_%p [label=\"", self);
+ ts_subtree__write_dot_string(f, ts_language_symbol_name(language, symbol));
+ fprintf(f, "\"");
+
+ if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
+ if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
+
+ fprintf(f, ", tooltip=\""
+ "range: %u - %u\n"
+ "state: %d\n"
+ "error-cost: %u\n"
+ "has-changes: %u\n"
+ "repeat-depth: %u\n"
+ "lookahead-bytes: %u",
+ start_offset, end_offset,
+ ts_subtree_parse_state(*self),
+ ts_subtree_error_cost(*self),
+ ts_subtree_has_changes(*self),
+ ts_subtree_repeat_depth(*self),
+ ts_subtree_lookahead_bytes(*self)
+ );
+
+ if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) {
+ fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
+ }
+
+ fprintf(f, "\"]\n");
+
+ uint32_t child_start_offset = start_offset;
+ uint32_t child_info_offset =
+ language->max_alias_sequence_length *
+ ts_subtree_production_id(*self);
+ for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
+ const Subtree *child = &self->ptr->children[i];
+ TSSymbol alias_symbol = 0;
+ if (!ts_subtree_extra(*child) && child_info_offset) {
+ alias_symbol = language->alias_sequences[child_info_offset];
+ child_info_offset++;
+ }
+ ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
+ fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
+ child_start_offset += ts_subtree_total_bytes(*child);
+ }
+}
+
+void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
+ fprintf(f, "digraph tree {\n");
+ fprintf(f, "edge [arrowhead=none]\n");
+ ts_subtree__print_dot_graph(&self, 0, language, 0, f);
+ fprintf(f, "}\n");
+}
+
+bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
+ const ExternalScannerState *state1 = &empty_state;
+ const ExternalScannerState *state2 = &empty_state;
+ if (self.ptr && ts_subtree_has_external_tokens(self) && !self.ptr->child_count) {
+ state1 = &self.ptr->external_scanner_state;
+ }
+ if (other.ptr && ts_subtree_has_external_tokens(other) && !other.ptr->child_count) {
+ state2 = &other.ptr->external_scanner_state;
+ }
+ return ts_external_scanner_state_eq(state1, state2);
+}
diff --git a/src/tree_sitter/subtree.h b/src/tree_sitter/subtree.h
new file mode 100644
index 0000000000..79ccd92390
--- /dev/null
+++ b/src/tree_sitter/subtree.h
@@ -0,0 +1,281 @@
+#ifndef TREE_SITTER_SUBTREE_H_
+#define TREE_SITTER_SUBTREE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include "./length.h"
+#include "./array.h"
+#include "./error_costs.h"
+#include "tree_sitter/api.h"
+#include "tree_sitter/parser.h"
+
+static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
+#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
+
+typedef union Subtree Subtree;
+typedef union MutableSubtree MutableSubtree;
+
+typedef struct {
+ union {
+ char *long_data;
+ char short_data[24];
+ };
+ uint32_t length;
+} ExternalScannerState;
+
+typedef struct {
+ bool is_inline : 1;
+ bool visible : 1;
+ bool named : 1;
+ bool extra : 1;
+ bool has_changes : 1;
+ bool is_missing : 1;
+ bool is_keyword : 1;
+ uint8_t symbol;
+ uint8_t padding_bytes;
+ uint8_t size_bytes;
+ uint8_t padding_columns;
+ uint8_t padding_rows : 4;
+ uint8_t lookahead_bytes : 4;
+ uint16_t parse_state;
+} SubtreeInlineData;
+
+typedef struct {
+ volatile uint32_t ref_count;
+ Length padding;
+ Length size;
+ uint32_t lookahead_bytes;
+ uint32_t error_cost;
+ uint32_t child_count;
+ TSSymbol symbol;
+ TSStateId parse_state;
+
+ bool visible : 1;
+ bool named : 1;
+ bool extra : 1;
+ bool fragile_left : 1;
+ bool fragile_right : 1;
+ bool has_changes : 1;
+ bool has_external_tokens : 1;
+ bool is_missing : 1;
+ bool is_keyword : 1;
+
+ union {
+ // Non-terminal subtrees (`child_count > 0`)
+ struct {
+ Subtree *children;
+ uint32_t visible_child_count;
+ uint32_t named_child_count;
+ uint32_t node_count;
+ uint32_t repeat_depth;
+ int32_t dynamic_precedence;
+ uint16_t production_id;
+ struct {
+ TSSymbol symbol;
+ TSStateId parse_state;
+ } first_leaf;
+ };
+
+ // External terminal subtrees (`child_count == 0 && has_external_tokens`)
+ ExternalScannerState external_scanner_state;
+
+ // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
+ int32_t lookahead_char;
+ };
+} SubtreeHeapData;
+
+union Subtree {
+ SubtreeInlineData data;
+ const SubtreeHeapData *ptr;
+};
+
+union MutableSubtree {
+ SubtreeInlineData data;
+ SubtreeHeapData *ptr;
+};
+
+typedef Array(Subtree) SubtreeArray;
+typedef Array(MutableSubtree) MutableSubtreeArray;
+
+typedef struct {
+ MutableSubtreeArray free_trees;
+ MutableSubtreeArray tree_stack;
+} SubtreePool;
+
+void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
+const char *ts_external_scanner_state_data(const ExternalScannerState *);
+
+void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
+void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
+SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
+void ts_subtree_array_reverse(SubtreeArray *);
+
+SubtreePool ts_subtree_pool_new(uint32_t capacity);
+void ts_subtree_pool_delete(SubtreePool *);
+
+Subtree ts_subtree_new_leaf(
+ SubtreePool *, TSSymbol, Length, Length, uint32_t,
+ TSStateId, bool, bool, const TSLanguage *
+);
+Subtree ts_subtree_new_error(
+ SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
+);
+MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
+Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *);
+Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
+MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
+void ts_subtree_retain(Subtree);
+void ts_subtree_release(SubtreePool *, Subtree);
+bool ts_subtree_eq(Subtree, Subtree);
+int ts_subtree_compare(Subtree, Subtree);
+void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
+void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *);
+void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
+Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
+char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
+void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
+Subtree ts_subtree_last_external_token(Subtree);
+bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
+
+#define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name)
+
+static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
+static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
+static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
+static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
+static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
+static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
+static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
+static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
+static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
+
+#undef SUBTREE_GET
+
+static inline void ts_subtree_set_extra(MutableSubtree *self) {
+ if (self->data.is_inline) {
+ self->data.extra = true;
+ } else {
+ self->ptr->extra = true;
+ }
+}
+
+static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
+ if (self.data.is_inline) return self.data.symbol;
+ if (self.ptr->child_count == 0) return self.ptr->symbol;
+ return self.ptr->first_leaf.symbol;
+}
+
+static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
+ if (self.data.is_inline) return self.data.parse_state;
+ if (self.ptr->child_count == 0) return self.ptr->parse_state;
+ return self.ptr->first_leaf.parse_state;
+}
+
+static inline Length ts_subtree_padding(Subtree self) {
+ if (self.data.is_inline) {
+ Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
+ return result;
+ } else {
+ return self.ptr->padding;
+ }
+}
+
+static inline Length ts_subtree_size(Subtree self) {
+ if (self.data.is_inline) {
+ Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
+ return result;
+ } else {
+ return self.ptr->size;
+ }
+}
+
+static inline Length ts_subtree_total_size(Subtree self) {
+ return length_add(ts_subtree_padding(self), ts_subtree_size(self));
+}
+
+static inline uint32_t ts_subtree_total_bytes(Subtree self) {
+ return ts_subtree_total_size(self).bytes;
+}
+
+static inline uint32_t ts_subtree_child_count(Subtree self) {
+ return self.data.is_inline ? 0 : self.ptr->child_count;
+}
+
+static inline uint32_t ts_subtree_node_count(Subtree self) {
+ return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count;
+}
+
+static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
+ if (ts_subtree_child_count(self) > 0) {
+ return self.ptr->visible_child_count;
+ } else {
+ return 0;
+ }
+}
+
+static inline uint32_t ts_subtree_error_cost(Subtree self) {
+ if (ts_subtree_missing(self)) {
+ return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
+ } else {
+ return self.data.is_inline ? 0 : self.ptr->error_cost;
+ }
+}
+
+static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
+ return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
+}
+
+static inline uint16_t ts_subtree_production_id(Subtree self) {
+ if (ts_subtree_child_count(self) > 0) {
+ return self.ptr->production_id;
+ } else {
+ return 0;
+ }
+}
+
+static inline bool ts_subtree_fragile_left(Subtree self) {
+ return self.data.is_inline ? false : self.ptr->fragile_left;
+}
+
+static inline bool ts_subtree_fragile_right(Subtree self) {
+ return self.data.is_inline ? false : self.ptr->fragile_right;
+}
+
+static inline bool ts_subtree_has_external_tokens(Subtree self) {
+ return self.data.is_inline ? false : self.ptr->has_external_tokens;
+}
+
+static inline bool ts_subtree_is_fragile(Subtree self) {
+ return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
+}
+
+static inline bool ts_subtree_is_error(Subtree self) {
+ return ts_subtree_symbol(self) == ts_builtin_sym_error;
+}
+
+static inline bool ts_subtree_is_eof(Subtree self) {
+ return ts_subtree_symbol(self) == ts_builtin_sym_end;
+}
+
+static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
+ Subtree result;
+ result.data = self.data;
+ return result;
+}
+
+static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
+ MutableSubtree result;
+ result.data = self.data;
+ return result;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_SUBTREE_H_
diff --git a/src/tree_sitter/tree.c b/src/tree_sitter/tree.c
new file mode 100644
index 0000000000..04cb1d242f
--- /dev/null
+++ b/src/tree_sitter/tree.c
@@ -0,0 +1,149 @@
+#include "tree_sitter/api.h"
+#include "./array.h"
+#include "./get_changed_ranges.h"
+#include "./subtree.h"
+#include "./tree_cursor.h"
+#include "./tree.h"
+
+static const unsigned PARENT_CACHE_CAPACITY = 32;
+
+TSTree *ts_tree_new(
+ Subtree root, const TSLanguage *language,
+ const TSRange *included_ranges, unsigned included_range_count
+) {
+ TSTree *result = ts_malloc(sizeof(TSTree));
+ result->root = root;
+ result->language = language;
+ result->parent_cache = NULL;
+ result->parent_cache_start = 0;
+ result->parent_cache_size = 0;
+ result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
+ memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
+ result->included_range_count = included_range_count;
+ return result;
+}
+
+TSTree *ts_tree_copy(const TSTree *self) {
+ ts_subtree_retain(self->root);
+ return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
+}
+
+void ts_tree_delete(TSTree *self) {
+ if (!self) return;
+
+ SubtreePool pool = ts_subtree_pool_new(0);
+ ts_subtree_release(&pool, self->root);
+ ts_subtree_pool_delete(&pool);
+ ts_free(self->included_ranges);
+ if (self->parent_cache) ts_free(self->parent_cache);
+ ts_free(self);
+}
+
+TSNode ts_tree_root_node(const TSTree *self) {
+ return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
+}
+
+const TSLanguage *ts_tree_language(const TSTree *self) {
+ return self->language;
+}
+
+void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
+ for (unsigned i = 0; i < self->included_range_count; i++) {
+ TSRange *range = &self->included_ranges[i];
+ if (range->end_byte >= edit->old_end_byte) {
+ if (range->end_byte != UINT32_MAX) {
+ range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
+ range->end_point = point_add(
+ edit->new_end_point,
+ point_sub(range->end_point, edit->old_end_point)
+ );
+ if (range->end_byte < edit->new_end_byte) {
+ range->end_byte = UINT32_MAX;
+ range->end_point = POINT_MAX;
+ }
+ }
+ if (range->start_byte >= edit->old_end_byte) {
+ range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
+ range->start_point = point_add(
+ edit->new_end_point,
+ point_sub(range->start_point, edit->old_end_point)
+ );
+ if (range->start_byte < edit->new_end_byte) {
+ range->start_byte = UINT32_MAX;
+ range->start_point = POINT_MAX;
+ }
+ }
+ }
+ }
+
+ SubtreePool pool = ts_subtree_pool_new(0);
+ self->root = ts_subtree_edit(self->root, edit, &pool);
+ self->parent_cache_start = 0;
+ self->parent_cache_size = 0;
+ ts_subtree_pool_delete(&pool);
+}
+
+TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
+ TSRange *result;
+ TreeCursor cursor1 = {NULL, array_new()};
+ TreeCursor cursor2 = {NULL, array_new()};
+ TSNode root = ts_tree_root_node(self);
+ ts_tree_cursor_init(&cursor1, root);
+ ts_tree_cursor_init(&cursor2, root);
+
+ TSRangeArray included_range_differences = array_new();
+ ts_range_array_get_changed_ranges(
+ self->included_ranges, self->included_range_count,
+ other->included_ranges, other->included_range_count,
+ &included_range_differences
+ );
+
+ *count = ts_subtree_get_changed_ranges(
+ &self->root, &other->root, &cursor1, &cursor2,
+ self->language, &included_range_differences, &result
+ );
+
+ array_delete(&included_range_differences);
+ array_delete(&cursor1.stack);
+ array_delete(&cursor2.stack);
+ return result;
+}
+
+void ts_tree_print_dot_graph(const TSTree *self, FILE *file) {
+ ts_subtree_print_dot_graph(self->root, self->language, file);
+}
+
+TSNode ts_tree_get_cached_parent(const TSTree *self, const TSNode *node) {
+ for (uint32_t i = 0; i < self->parent_cache_size; i++) {
+ uint32_t index = (self->parent_cache_start + i) % PARENT_CACHE_CAPACITY;
+ ParentCacheEntry *entry = &self->parent_cache[index];
+ if (entry->child == node->id) {
+ return ts_node_new(self, entry->parent, entry->position, entry->alias_symbol);
+ }
+ }
+ return ts_node_new(NULL, NULL, length_zero(), 0);
+}
+
+void ts_tree_set_cached_parent(const TSTree *_self, const TSNode *node, const TSNode *parent) {
+ TSTree *self = (TSTree *)_self;
+ if (!self->parent_cache) {
+ self->parent_cache = ts_calloc(PARENT_CACHE_CAPACITY, sizeof(ParentCacheEntry));
+ }
+
+ uint32_t index = (self->parent_cache_start + self->parent_cache_size) % PARENT_CACHE_CAPACITY;
+ self->parent_cache[index] = (ParentCacheEntry) {
+ .child = node->id,
+ .parent = (const Subtree *)parent->id,
+ .position = {
+ parent->context[0],
+ {parent->context[1], parent->context[2]}
+ },
+ .alias_symbol = parent->context[3],
+ };
+
+ if (self->parent_cache_size == PARENT_CACHE_CAPACITY) {
+ self->parent_cache_start++;
+ } else {
+ self->parent_cache_size++;
+ }
+}
diff --git a/src/tree_sitter/tree.h b/src/tree_sitter/tree.h
new file mode 100644
index 0000000000..92a7e64179
--- /dev/null
+++ b/src/tree_sitter/tree.h
@@ -0,0 +1,34 @@
+#ifndef TREE_SITTER_TREE_H_
+#define TREE_SITTER_TREE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ const Subtree *child;
+ const Subtree *parent;
+ Length position;
+ TSSymbol alias_symbol;
+} ParentCacheEntry;
+
+struct TSTree {
+ Subtree root;
+ const TSLanguage *language;
+ ParentCacheEntry *parent_cache;
+ uint32_t parent_cache_start;
+ uint32_t parent_cache_size;
+ TSRange *included_ranges;
+ unsigned included_range_count;
+};
+
+TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned);
+TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
+TSNode ts_tree_get_cached_parent(const TSTree *, const TSNode *);
+void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_TREE_H_
diff --git a/src/tree_sitter/tree_cursor.c b/src/tree_sitter/tree_cursor.c
new file mode 100644
index 0000000000..7103fc411d
--- /dev/null
+++ b/src/tree_sitter/tree_cursor.c
@@ -0,0 +1,302 @@
+#include "tree_sitter/api.h"
+#include "./alloc.h"
+#include "./tree_cursor.h"
+#include "./language.h"
+#include "./tree.h"
+
+typedef struct {
+ Subtree parent;
+ const TSTree *tree;
+ Length position;
+ uint32_t child_index;
+ uint32_t structural_child_index;
+ const TSSymbol *alias_sequence;
+} CursorChildIterator;
+
+// CursorChildIterator
+
+static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
+ TreeCursorEntry *last_entry = array_back(&self->stack);
+ if (ts_subtree_child_count(*last_entry->subtree) == 0) {
+ return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL};
+ }
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(
+ self->tree->language,
+ last_entry->subtree->ptr->production_id
+ );
+ return (CursorChildIterator) {
+ .tree = self->tree,
+ .parent = *last_entry->subtree,
+ .position = last_entry->position,
+ .child_index = 0,
+ .structural_child_index = 0,
+ .alias_sequence = alias_sequence,
+ };
+}
+
+static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
+ TreeCursorEntry *result,
+ bool *visible) {
+ if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
+ const Subtree *child = &self->parent.ptr->children[self->child_index];
+ *result = (TreeCursorEntry) {
+ .subtree = child,
+ .position = self->position,
+ .child_index = self->child_index,
+ .structural_child_index = self->structural_child_index,
+ };
+ *visible = ts_subtree_visible(*child);
+ bool extra = ts_subtree_extra(*child);
+ if (!extra && self->alias_sequence) {
+ *visible |= self->alias_sequence[self->structural_child_index];
+ self->structural_child_index++;
+ }
+
+ self->position = length_add(self->position, ts_subtree_size(*child));
+ self->child_index++;
+
+ if (self->child_index < self->parent.ptr->child_count) {
+ Subtree next_child = self->parent.ptr->children[self->child_index];
+ self->position = length_add(self->position, ts_subtree_padding(next_child));
+ }
+
+ return true;
+}
+
+// TSTreeCursor - lifecycle
+
+TSTreeCursor ts_tree_cursor_new(TSNode node) {
+ TSTreeCursor self = {NULL, NULL, {0, 0}};
+ ts_tree_cursor_init((TreeCursor *)&self, node);
+ return self;
+}
+
+void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
+ ts_tree_cursor_init((TreeCursor *)_self, node);
+}
+
+void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
+ self->tree = node.tree;
+ array_clear(&self->stack);
+ array_push(&self->stack, ((TreeCursorEntry) {
+ .subtree = (const Subtree *)node.id,
+ .position = {
+ ts_node_start_byte(node),
+ ts_node_start_point(node)
+ },
+ .child_index = 0,
+ .structural_child_index = 0,
+ }));
+}
+
+void ts_tree_cursor_delete(TSTreeCursor *_self) {
+ TreeCursor *self = (TreeCursor *)_self;
+ array_delete(&self->stack);
+}
+
+// TSTreeCursor - walking the tree
+
+bool ts_tree_cursor_goto_first_child(TSTreeCursor *_self) {
+ TreeCursor *self = (TreeCursor *)_self;
+
+ bool did_descend;
+ do {
+ did_descend = false;
+
+ bool visible;
+ TreeCursorEntry entry;
+ CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
+ while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
+ if (visible) {
+ array_push(&self->stack, entry);
+ return true;
+ }
+
+ if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
+ array_push(&self->stack, entry);
+ did_descend = true;
+ break;
+ }
+ }
+ } while (did_descend);
+
+ return false;
+}
+
+int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t goal_byte) {
+ TreeCursor *self = (TreeCursor *)_self;
+ uint32_t initial_size = self->stack.size;
+ uint32_t visible_child_index = 0;
+
+ bool did_descend;
+ do {
+ did_descend = false;
+
+ bool visible;
+ TreeCursorEntry entry;
+ CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
+ while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
+ uint32_t end_byte = entry.position.bytes + ts_subtree_size(*entry.subtree).bytes;
+ bool at_goal = end_byte > goal_byte;
+ uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
+
+ if (at_goal) {
+ if (visible) {
+ array_push(&self->stack, entry);
+ return visible_child_index;
+ }
+
+ if (visible_child_count > 0) {
+ array_push(&self->stack, entry);
+ did_descend = true;
+ break;
+ }
+ } else if (visible) {
+ visible_child_index++;
+ } else {
+ visible_child_index += visible_child_count;
+ }
+ }
+ } while (did_descend);
+
+ if (self->stack.size > initial_size &&
+ ts_tree_cursor_goto_next_sibling((TSTreeCursor *)self)) {
+ return visible_child_index;
+ }
+
+ self->stack.size = initial_size;
+ return -1;
+}
+
+bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) {
+ TreeCursor *self = (TreeCursor *)_self;
+ uint32_t initial_size = self->stack.size;
+
+ while (self->stack.size > 1) {
+ TreeCursorEntry entry = array_pop(&self->stack);
+ CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
+ iterator.child_index = entry.child_index;
+ iterator.structural_child_index = entry.structural_child_index;
+ iterator.position = entry.position;
+
+ bool visible = false;
+ ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible);
+ if (visible && self->stack.size + 1 < initial_size) break;
+
+ while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
+ if (visible) {
+ array_push(&self->stack, entry);
+ return true;
+ }
+
+ if (ts_subtree_visible_child_count(*entry.subtree)) {
+ array_push(&self->stack, entry);
+ ts_tree_cursor_goto_first_child(_self);
+ return true;
+ }
+ }
+ }
+
+ self->stack.size = initial_size;
+ return false;
+}
+
+bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
+ TreeCursor *self = (TreeCursor *)_self;
+ for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
+ TreeCursorEntry *entry = &self->stack.contents[i];
+ bool is_aliased = false;
+ if (i > 0) {
+ TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(
+ self->tree->language,
+ parent_entry->subtree->ptr->production_id
+ );
+ is_aliased = alias_sequence && alias_sequence[entry->structural_child_index];
+ }
+ if (ts_subtree_visible(*entry->subtree) || is_aliased) {
+ self->stack.size = i + 1;
+ return true;
+ }
+ }
+ return false;
+}
+
+TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
+ const TreeCursor *self = (const TreeCursor *)_self;
+ TreeCursorEntry *last_entry = array_back(&self->stack);
+ TSSymbol alias_symbol = 0;
+ if (self->stack.size > 1) {
+ TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(
+ self->tree->language,
+ parent_entry->subtree->ptr->production_id
+ );
+ if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) {
+ alias_symbol = alias_sequence[last_entry->structural_child_index];
+ }
+ }
+ return ts_node_new(
+ self->tree,
+ last_entry->subtree,
+ last_entry->position,
+ alias_symbol
+ );
+}
+
+TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
+ const TreeCursor *self = (const TreeCursor *)_self;
+
+ // Walk up the tree, visiting the current node and its invisible ancestors.
+ for (unsigned i = self->stack.size - 1; i > 0; i--) {
+ TreeCursorEntry *entry = &self->stack.contents[i];
+ TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+
+ // Stop walking up when another visible node is found.
+ if (i != self->stack.size - 1) {
+ if (ts_subtree_visible(*entry->subtree)) break;
+ const TSSymbol *alias_sequence = ts_language_alias_sequence(
+ self->tree->language,
+ parent_entry->subtree->ptr->production_id
+ );
+ if (alias_sequence && alias_sequence[entry->structural_child_index]) {
+ break;
+ }
+ }
+
+ const TSFieldMapEntry *field_map, *field_map_end;
+ ts_language_field_map(
+ self->tree->language,
+ parent_entry->subtree->ptr->production_id,
+ &field_map, &field_map_end
+ );
+
+ while (field_map < field_map_end) {
+ if (
+ !field_map->inherited &&
+ field_map->child_index == entry->structural_child_index
+ ) return field_map->field_id;
+ field_map++;
+ }
+ }
+ return 0;
+}
+
+const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
+ TSFieldId id = ts_tree_cursor_current_field_id(_self);
+ if (id) {
+ const TreeCursor *self = (const TreeCursor *)_self;
+ return self->tree->language->field_names[id];
+ } else {
+ return NULL;
+ }
+}
+
+TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
+ const TreeCursor *cursor = (const TreeCursor *)_cursor;
+ TSTreeCursor res = {NULL, NULL, {0, 0}};
+ TreeCursor *copy = (TreeCursor *)&res;
+ copy->tree = cursor->tree;
+ array_push_all(&copy->stack, &cursor->stack);
+ return res;
+}
diff --git a/src/tree_sitter/tree_cursor.h b/src/tree_sitter/tree_cursor.h
new file mode 100644
index 0000000000..55bdad86da
--- /dev/null
+++ b/src/tree_sitter/tree_cursor.h
@@ -0,0 +1,20 @@
+#ifndef TREE_SITTER_TREE_CURSOR_H_
+#define TREE_SITTER_TREE_CURSOR_H_
+
+#include "./subtree.h"
+
+typedef struct {
+ const Subtree *subtree;
+ Length position;
+ uint32_t child_index;
+ uint32_t structural_child_index;
+} TreeCursorEntry;
+
+typedef struct {
+ const TSTree *tree;
+ Array(TreeCursorEntry) stack;
+} TreeCursor;
+
+void ts_tree_cursor_init(TreeCursor *, TSNode);
+
+#endif // TREE_SITTER_TREE_CURSOR_H_
diff --git a/src/tree_sitter/utf16.c b/src/tree_sitter/utf16.c
new file mode 100644
index 0000000000..3956c01cb9
--- /dev/null
+++ b/src/tree_sitter/utf16.c
@@ -0,0 +1,33 @@
+#include "./utf16.h"
+
+utf8proc_ssize_t utf16_iterate(
+ const utf8proc_uint8_t *string,
+ utf8proc_ssize_t length,
+ utf8proc_int32_t *code_point
+) {
+ if (length < 2) {
+ *code_point = -1;
+ return 0;
+ }
+
+ uint16_t *units = (uint16_t *)string;
+ uint16_t unit = units[0];
+
+ if (unit < 0xd800 || unit >= 0xe000) {
+ *code_point = unit;
+ return 2;
+ }
+
+ if (unit < 0xdc00) {
+ if (length >= 4) {
+ uint16_t next_unit = units[1];
+ if (next_unit >= 0xdc00 && next_unit < 0xe000) {
+ *code_point = 0x10000 + ((unit - 0xd800) << 10) + (next_unit - 0xdc00);
+ return 4;
+ }
+ }
+ }
+
+ *code_point = -1;
+ return 2;
+}
diff --git a/src/tree_sitter/utf16.h b/src/tree_sitter/utf16.h
new file mode 100644
index 0000000000..32fd05e6db
--- /dev/null
+++ b/src/tree_sitter/utf16.h
@@ -0,0 +1,21 @@
+#ifndef TREE_SITTER_UTF16_H_
+#define TREE_SITTER_UTF16_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stdlib.h>
+#include "utf8proc.h"
+
+// Analogous to utf8proc's utf8proc_iterate function. Reads one code point from
+// the given UTF16 string and stores it in the location pointed to by `code_point`.
+// Returns the number of bytes in `string` that were read.
+utf8proc_ssize_t utf16_iterate(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_int32_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // TREE_SITTER_UTF16_H_