aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Vigouroux <tomvig38@gmail.com>2020-09-17 11:25:22 +0200
committerThomas Vigouroux <tomvig38@gmail.com>2020-11-03 10:39:35 +0100
commit3eb241d8310f453ce5e5485f12796a0ae43a0a18 (patch)
treefa214d8724a4c3d3646b1c1c5e12137757464d6f
parenta061d53e18168130aad537a9e8012390834ff8c2 (diff)
downloadrneovim-3eb241d8310f453ce5e5485f12796a0ae43a0a18.tar.gz
rneovim-3eb241d8310f453ce5e5485f12796a0ae43a0a18.tar.bz2
rneovim-3eb241d8310f453ce5e5485f12796a0ae43a0a18.zip
bundle: move tree-sitter as a bundled dep
fixup! bundle: move tree-sitter as a bundled dep fixup! bundle: move tree-sitter as a bundled dep
-rw-r--r--CMakeLists.txt3
-rw-r--r--cmake/FindTreesitter.cmake11
-rw-r--r--codecov.yml3
-rwxr-xr-xscripts/update-ts-runtime.sh39
-rw-r--r--src/nvim/CMakeLists.txt18
-rw-r--r--src/tree_sitter/LICENSE21
-rw-r--r--src/tree_sitter/README.md16
-rw-r--r--src/tree_sitter/alloc.h95
-rw-r--r--src/tree_sitter/api.h876
-rw-r--r--src/tree_sitter/array.h158
-rw-r--r--src/tree_sitter/atomic.h42
-rw-r--r--src/tree_sitter/bits.h29
-rw-r--r--src/tree_sitter/clock.h141
-rw-r--r--src/tree_sitter/error_costs.h11
-rw-r--r--src/tree_sitter/get_changed_ranges.c482
-rw-r--r--src/tree_sitter/get_changed_ranges.h36
-rw-r--r--src/tree_sitter/language.c149
-rw-r--r--src/tree_sitter/language.h143
-rw-r--r--src/tree_sitter/length.h44
-rw-r--r--src/tree_sitter/lexer.c391
-rw-r--r--src/tree_sitter/lexer.h48
-rw-r--r--src/tree_sitter/lib.c17
-rw-r--r--src/tree_sitter/node.c677
-rw-r--r--src/tree_sitter/parser.c1906
-rw-r--r--src/tree_sitter/parser.h235
-rw-r--r--src/tree_sitter/point.h54
-rw-r--r--src/tree_sitter/query.c2143
-rw-r--r--src/tree_sitter/reduce_action.h34
-rw-r--r--src/tree_sitter/reusable_node.h88
-rw-r--r--src/tree_sitter/stack.c857
-rw-r--r--src/tree_sitter/stack.h135
-rw-r--r--src/tree_sitter/subtree.c982
-rw-r--r--src/tree_sitter/subtree.h285
-rw-r--r--src/tree_sitter/tree.c148
-rw-r--r--src/tree_sitter/tree.h34
-rw-r--r--src/tree_sitter/tree_cursor.c367
-rw-r--r--src/tree_sitter/tree_cursor.h21
-rw-r--r--src/tree_sitter/treesitter_commit_hash.txt1
-rw-r--r--src/tree_sitter/unicode.h50
-rw-r--r--src/tree_sitter/unicode/ICU_SHA1
-rw-r--r--src/tree_sitter/unicode/LICENSE414
-rw-r--r--src/tree_sitter/unicode/README.md29
-rw-r--r--src/tree_sitter/unicode/ptypes.h1
-rw-r--r--src/tree_sitter/unicode/umachine.h448
-rw-r--r--src/tree_sitter/unicode/urename.h1
-rw-r--r--src/tree_sitter/unicode/utf.h1
-rw-r--r--src/tree_sitter/unicode/utf16.h733
-rw-r--r--src/tree_sitter/unicode/utf8.h881
-rw-r--r--third-party/CMakeLists.txt8
-rw-r--r--third-party/cmake/BuildTreesitter.cmake22
50 files changed, 48 insertions, 13281 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 87dff54d06..e290500175 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -374,6 +374,9 @@ include_directories(SYSTEM ${MSGPACK_INCLUDE_DIRS})
find_package(LibLUV 1.30.0 REQUIRED)
include_directories(SYSTEM ${LIBLUV_INCLUDE_DIRS})
+find_package(Treesitter REQUIRED)
+include_directories(SYSTEM ${TREESITTER_INCLUDE_DIRS})
+
# Note: The test lib requires LuaJIT; it will be skipped if LuaJIT is missing.
option(PREFER_LUA "Prefer Lua over LuaJIT in the nvim executable." OFF)
diff --git a/cmake/FindTreesitter.cmake b/cmake/FindTreesitter.cmake
new file mode 100644
index 0000000000..8ba3b72d28
--- /dev/null
+++ b/cmake/FindTreesitter.cmake
@@ -0,0 +1,11 @@
+# - Try to find tree-sitter
+# Once done, this will define
+#
+# TREESITTER_FOUND - system has tree-sitter
+# TREESITTER_INCLUDE_DIRS - the tree-sitter include directories
+# TREESITTER_LIBRARIES - link these to use tree-sitter
+
+include(LibFindMacros)
+
+libfind_pkg_detect(TREESITTER tree-sitter FIND_PATH tree_sitter/api.h FIND_LIBRARY tree-sitter)
+libfind_process(TREESITTER)
diff --git a/codecov.yml b/codecov.yml
index 0f867db668..a83fd916ee 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -25,6 +25,3 @@ coverage:
changes: no
comment: off
-
-ignore:
- - "src/tree_sitter"
diff --git a/scripts/update-ts-runtime.sh b/scripts/update-ts-runtime.sh
deleted file mode 100755
index 1a947e0ac9..0000000000
--- a/scripts/update-ts-runtime.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/sh
-#
-# This script will update the treesitter runtime to the provided commit.
-# Usage :
-# $0 <tree-sitter commit sha>
-set -e
-
-ts_source_dir="/tmp/tree-sitter"
-ts_url="https://github.com/tree-sitter/tree-sitter.git"
-
-base_dir="$(cd "$(dirname $(dirname $0))" && pwd)"
-ts_dest_dir="$base_dir/src/tree_sitter/"
-ts_current_commit="$ts_dest_dir/treesitter_commit_hash.txt"
-
-echo "Updating treesitter runtime from $(cat "$ts_current_commit") to $1..."
-
-if [ ! -d "$ts_source_dir" ]; then
- echo "Cloning treesitter..."
- git clone "$ts_url" "$ts_source_dir"
-else
- echo "Found a non-empty $ts_source_dir directory..."
- git -C "$ts_source_dir" fetch
-fi
-
-echo "Checking out $1..."
-git -C "$ts_source_dir" checkout $1
-
-echo "Removing old files..."
-find "$ts_dest_dir" -not -name "LICENSE" -not -name "README.md" -not -type d -delete
-
-echo "Copying files..."
-cp -t "$ts_dest_dir" -r "$ts_source_dir/lib/src"/*
-cp -t "$ts_dest_dir" "$ts_source_dir/lib/include/tree_sitter"/*
-
-echo "$1" > "$ts_current_commit"
-
-make
-TEST_FILE="$base_dir/test/functional/lua/treesitter_spec.lua" make test
-
diff --git a/src/nvim/CMakeLists.txt b/src/nvim/CMakeLists.txt
index 2d98f1a659..46f70f850c 100644
--- a/src/nvim/CMakeLists.txt
+++ b/src/nvim/CMakeLists.txt
@@ -87,10 +87,6 @@ file(GLOB NVIM_HEADERS *.h)
file(GLOB XDIFF_SOURCES xdiff/*.c)
file(GLOB XDIFF_HEADERS xdiff/*.h)
-file(GLOB TREESITTER_SOURCES ../tree_sitter/*.c)
-file(GLOB TS_SOURCE_AMALGAM ../tree_sitter/lib.c)
-list(REMOVE_ITEM TREESITTER_SOURCES ${TS_SOURCE_AMALGAM})
-
foreach(subdir
os
api
@@ -187,13 +183,6 @@ if(NOT MSVC)
set_source_files_properties(
eval/funcs.c PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -Wno-conversion")
endif()
-
- # tree-sitter: inlined external project, we don't maintain it. #10124
- set(TS_FLAGS "-Wno-conversion -Wno-pedantic -Wno-shadow -Wno-missing-prototypes -Wno-unused-variable")
- if(HAVE_WIMPLICIT_FALLTHROUGH_FLAG)
- set(TS_FLAGS "${TS_FLAGS} -Wno-implicit-fallthrough")
- endif()
- set_source_files_properties(${TREESITTER_SOURCES} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${TS_FLAGS}")
endif()
if(NOT "${MIN_LOG_LEVEL}" MATCHES "^$")
@@ -453,6 +442,7 @@ list(APPEND NVIM_LINK_LIBRARIES
${LIBTERMKEY_LIBRARIES}
${UNIBILIUM_LIBRARIES}
${UTF8PROC_LIBRARIES}
+ ${TREESITTER_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT}
)
@@ -472,7 +462,7 @@ endif()
add_executable(nvim ${NVIM_GENERATED_FOR_SOURCES} ${NVIM_GENERATED_FOR_HEADERS}
${NVIM_GENERATED_SOURCES} ${NVIM_SOURCES} ${NVIM_HEADERS}
- ${XDIFF_SOURCES} ${XDIFF_HEADERS} ${TREESITTER_SOURCES})
+ ${XDIFF_SOURCES} ${XDIFF_HEADERS})
target_link_libraries(nvim ${NVIM_EXEC_LINK_LIBRARIES})
install_helper(TARGETS nvim)
@@ -570,7 +560,7 @@ add_library(
EXCLUDE_FROM_ALL
${NVIM_SOURCES} ${NVIM_GENERATED_SOURCES}
${NVIM_HEADERS} ${NVIM_GENERATED_FOR_SOURCES} ${NVIM_GENERATED_FOR_HEADERS}
- ${XDIFF_SOURCES} ${XDIFF_HEADERS} ${TREESITTER_SOURCES}
+ ${XDIFF_SOURCES} ${XDIFF_HEADERS}
)
set_property(TARGET libnvim APPEND PROPERTY
INCLUDE_DIRECTORIES ${LUA_PREFERRED_INCLUDE_DIRS})
@@ -600,7 +590,7 @@ else()
EXCLUDE_FROM_ALL
${NVIM_SOURCES} ${NVIM_GENERATED_SOURCES}
${NVIM_HEADERS} ${NVIM_GENERATED_FOR_SOURCES} ${NVIM_GENERATED_FOR_HEADERS}
- ${XDIFF_SOURCES} ${XDIFF_HEADERS} ${TREESITTER_SOURCES}
+ ${XDIFF_SOURCES} ${XDIFF_HEADERS}
${UNIT_TEST_FIXTURES}
)
target_link_libraries(nvim-test ${NVIM_TEST_LINK_LIBRARIES})
diff --git a/src/tree_sitter/LICENSE b/src/tree_sitter/LICENSE
deleted file mode 100644
index 971b81f9a8..0000000000
--- a/src/tree_sitter/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2018 Max Brunsfeld
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/src/tree_sitter/README.md b/src/tree_sitter/README.md
deleted file mode 100644
index 20cb35e7c3..0000000000
--- a/src/tree_sitter/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-Tree-sitter vendor runtime
-==========================
-
-This is the vendor runtime code for treesitter.
-
-The original code can be found [here](https://github.com/tree-sitter/tree-sitter).
-
-As this code is not ours, if you find any bugs, feel free to open an issue, so that we can
-investigate and determine if this should go upstream.
-
-# Updating
-
-To update the treesitter runtime, use the `update-ts-runtime.sh` script in the `scripts` directory:
-```sh
-./scripts/update-ts-runtime.sh <commit you want to update to>
-```
diff --git a/src/tree_sitter/alloc.h b/src/tree_sitter/alloc.h
deleted file mode 100644
index 32c90f23c8..0000000000
--- a/src/tree_sitter/alloc.h
+++ /dev/null
@@ -1,95 +0,0 @@
-#ifndef TREE_SITTER_ALLOC_H_
-#define TREE_SITTER_ALLOC_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdlib.h>
-#include <stdbool.h>
-#include <stdio.h>
-
-#include "nvim/memory.h"
-
-#if 1
-
-static inline bool ts_toggle_allocation_recording(bool value) {
- return false;
-}
-
-#define ts_malloc xmalloc
-#define ts_calloc xcalloc
-#define ts_realloc xrealloc
-#define ts_free xfree
-
-#elif defined(TREE_SITTER_TEST)
-
-void *ts_record_malloc(size_t);
-void *ts_record_calloc(size_t, size_t);
-void *ts_record_realloc(void *, size_t);
-void ts_record_free(void *);
-bool ts_toggle_allocation_recording(bool);
-
-static inline void *ts_malloc(size_t size) {
- return ts_record_malloc(size);
-}
-
-static inline void *ts_calloc(size_t count, size_t size) {
- return ts_record_calloc(count, size);
-}
-
-static inline void *ts_realloc(void *buffer, size_t size) {
- return ts_record_realloc(buffer, size);
-}
-
-static inline void ts_free(void *buffer) {
- ts_record_free(buffer);
-}
-
-#else
-
-#include <stdlib.h>
-
-static inline bool ts_toggle_allocation_recording(bool value) {
- (void)value;
- return false;
-}
-
-static inline void *ts_malloc(size_t size) {
- void *result = malloc(size);
- if (size > 0 && !result) {
- fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
- exit(1);
- }
- return result;
-}
-
-static inline void *ts_calloc(size_t count, size_t size) {
- void *result = calloc(count, size);
- if (count > 0 && !result) {
- fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
- exit(1);
- }
- return result;
-}
-
-static inline void *ts_realloc(void *buffer, size_t size) {
- void *result = realloc(buffer, size);
- if (size > 0 && !result) {
- fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
- exit(1);
- }
- return result;
-}
-
-static inline void ts_free(void *buffer) {
- free(buffer);
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_ALLOC_H_
diff --git a/src/tree_sitter/api.h b/src/tree_sitter/api.h
deleted file mode 100644
index 9d832e6ec4..0000000000
--- a/src/tree_sitter/api.h
+++ /dev/null
@@ -1,876 +0,0 @@
-#ifndef TREE_SITTER_API_H_
-#define TREE_SITTER_API_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-
-/****************************/
-/* Section - ABI Versioning */
-/****************************/
-
-/**
- * The latest ABI version that is supported by the current version of the
- * library. When Languages are generated by the Tree-sitter CLI, they are
- * assigned an ABI version number that corresponds to the current CLI version.
- * The Tree-sitter library is generally backwards-compatible with languages
- * generated using older CLI versions, but is not forwards-compatible.
- */
-#define TREE_SITTER_LANGUAGE_VERSION 11
-
-/**
- * The earliest ABI version that is supported by the current version of the
- * library.
- */
-#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9
-
-/*******************/
-/* Section - Types */
-/*******************/
-
-typedef uint16_t TSSymbol;
-typedef uint16_t TSFieldId;
-typedef struct TSLanguage TSLanguage;
-typedef struct TSParser TSParser;
-typedef struct TSTree TSTree;
-typedef struct TSQuery TSQuery;
-typedef struct TSQueryCursor TSQueryCursor;
-
-typedef enum {
- TSInputEncodingUTF8,
- TSInputEncodingUTF16,
-} TSInputEncoding;
-
-typedef enum {
- TSSymbolTypeRegular,
- TSSymbolTypeAnonymous,
- TSSymbolTypeAuxiliary,
-} TSSymbolType;
-
-typedef struct {
- uint32_t row;
- uint32_t column;
-} TSPoint;
-
-typedef struct {
- TSPoint start_point;
- TSPoint end_point;
- uint32_t start_byte;
- uint32_t end_byte;
-} TSRange;
-
-typedef struct {
- void *payload;
- const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
- TSInputEncoding encoding;
-} TSInput;
-
-typedef enum {
- TSLogTypeParse,
- TSLogTypeLex,
-} TSLogType;
-
-typedef struct {
- void *payload;
- void (*log)(void *payload, TSLogType, const char *);
-} TSLogger;
-
-typedef struct {
- uint32_t start_byte;
- uint32_t old_end_byte;
- uint32_t new_end_byte;
- TSPoint start_point;
- TSPoint old_end_point;
- TSPoint new_end_point;
-} TSInputEdit;
-
-typedef struct {
- uint32_t context[4];
- const void *id;
- const TSTree *tree;
-} TSNode;
-
-typedef struct {
- const void *tree;
- const void *id;
- uint32_t context[2];
-} TSTreeCursor;
-
-typedef struct {
- TSNode node;
- uint32_t index;
-} TSQueryCapture;
-
-typedef struct {
- uint32_t id;
- uint16_t pattern_index;
- uint16_t capture_count;
- const TSQueryCapture *captures;
-} TSQueryMatch;
-
-typedef enum {
- TSQueryPredicateStepTypeDone,
- TSQueryPredicateStepTypeCapture,
- TSQueryPredicateStepTypeString,
-} TSQueryPredicateStepType;
-
-typedef struct {
- TSQueryPredicateStepType type;
- uint32_t value_id;
-} TSQueryPredicateStep;
-
-typedef enum {
- TSQueryErrorNone = 0,
- TSQueryErrorSyntax,
- TSQueryErrorNodeType,
- TSQueryErrorField,
- TSQueryErrorCapture,
-} TSQueryError;
-
-/********************/
-/* Section - Parser */
-/********************/
-
-/**
- * Create a new parser.
- */
-TSParser *ts_parser_new(void);
-
-/**
- * Delete the parser, freeing all of the memory that it used.
- */
-void ts_parser_delete(TSParser *parser);
-
-/**
- * Set the language that the parser should use for parsing.
- *
- * Returns a boolean indicating whether or not the language was successfully
- * assigned. True means assignment succeeded. False means there was a version
- * mismatch: the language was generated with an incompatible version of the
- * Tree-sitter CLI. Check the language's version using `ts_language_version`
- * and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and
- * `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants.
- */
-bool ts_parser_set_language(TSParser *self, const TSLanguage *language);
-
-/**
- * Get the parser's current language.
- */
-const TSLanguage *ts_parser_language(const TSParser *self);
-
-/**
- * Set the ranges of text that the parser should include when parsing.
- *
- * By default, the parser will always include entire documents. This function
- * allows you to parse only a *portion* of a document but still return a syntax
- * tree whose ranges match up with the document as a whole. You can also pass
- * multiple disjoint ranges.
- *
- * The second and third parameters specify the location and length of an array
- * of ranges. The parser does *not* take ownership of these ranges; it copies
- * the data, so it doesn't matter how these ranges are allocated.
- *
- * If `length` is zero, then the entire document will be parsed. Otherwise,
- * the given ranges must be ordered from earliest to latest in the document,
- * and they must not overlap. That is, the following must hold for all
- * `i` < `length - 1`:
- *
- * ranges[i].end_byte <= ranges[i + 1].start_byte
- *
- * If this requirement is not satisfied, the operation will fail, the ranges
- * will not be assigned, and this function will return `false`. On success,
- * this function returns `true`
- */
-bool ts_parser_set_included_ranges(
- TSParser *self,
- const TSRange *ranges,
- uint32_t length
-);
-
-/**
- * Get the ranges of text that the parser will include when parsing.
- *
- * The returned pointer is owned by the parser. The caller should not free it
- * or write to it. The length of the array will be written to the given
- * `length` pointer.
- */
-const TSRange *ts_parser_included_ranges(
- const TSParser *self,
- uint32_t *length
-);
-
-/**
- * Use the parser to parse some source code and create a syntax tree.
- *
- * If you are parsing this document for the first time, pass `NULL` for the
- * `old_tree` parameter. Otherwise, if you have already parsed an earlier
- * version of this document and the document has since been edited, pass the
- * previous syntax tree so that the unchanged parts of it can be reused.
- * This will save time and memory. For this to work correctly, you must have
- * already edited the old syntax tree using the `ts_tree_edit` function in a
- * way that exactly matches the source code changes.
- *
- * The `TSInput` parameter lets you specify how to read the text. It has the
- * following three fields:
- * 1. `read`: A function to retrieve a chunk of text at a given byte offset
- * and (row, column) position. The function should return a pointer to the
- * text and write its length to the the `bytes_read` pointer. The parser
- * does not take ownership of this buffer; it just borrows it until it has
- * finished reading it. The function should write a zero value to the
- * `bytes_read` pointer to indicate the end of the document.
- * 2. `payload`: An arbitrary pointer that will be passed to each invocation
- * of the `read` function.
- * 3. `encoding`: An indication of how the text is encoded. Either
- * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
- *
- * This function returns a syntax tree on success, and `NULL` on failure. There
- * are three possible reasons for failure:
- * 1. The parser does not have a language assigned. Check for this using the
- `ts_parser_language` function.
- * 2. Parsing was cancelled due to a timeout that was set by an earlier call to
- * the `ts_parser_set_timeout_micros` function. You can resume parsing from
- * where the parser left out by calling `ts_parser_parse` again with the
- * same arguments. Or you can start parsing from scratch by first calling
- * `ts_parser_reset`.
- * 3. Parsing was cancelled using a cancellation flag that was set by an
- * earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing
- * from where the parser left out by calling `ts_parser_parse` again with
- * the same arguments.
- */
-TSTree *ts_parser_parse(
- TSParser *self,
- const TSTree *old_tree,
- TSInput input
-);
-
-/**
- * Use the parser to parse some source code stored in one contiguous buffer.
- * The first two parameters are the same as in the `ts_parser_parse` function
- * above. The second two parameters indicate the location of the buffer and its
- * length in bytes.
- */
-TSTree *ts_parser_parse_string(
- TSParser *self,
- const TSTree *old_tree,
- const char *string,
- uint32_t length
-);
-
-/**
- * Use the parser to parse some source code stored in one contiguous buffer with
- * a given encoding. The first four parameters work the same as in the
- * `ts_parser_parse_string` method above. The final parameter indicates whether
- * the text is encoded as UTF8 or UTF16.
- */
-TSTree *ts_parser_parse_string_encoding(
- TSParser *self,
- const TSTree *old_tree,
- const char *string,
- uint32_t length,
- TSInputEncoding encoding
-);
-
-/**
- * Instruct the parser to start the next parse from the beginning.
- *
- * If the parser previously failed because of a timeout or a cancellation, then
- * by default, it will resume where it left off on the next call to
- * `ts_parser_parse` or other parsing functions. If you don't want to resume,
- * and instead intend to use this parser to parse some other document, you must
- * call `ts_parser_reset` first.
- */
-void ts_parser_reset(TSParser *self);
-
-/**
- * Set the maximum duration in microseconds that parsing should be allowed to
- * take before halting.
- *
- * If parsing takes longer than this, it will halt early, returning NULL.
- * See `ts_parser_parse` for more information.
- */
-void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout);
-
-/**
- * Get the duration in microseconds that parsing is allowed to take.
- */
-uint64_t ts_parser_timeout_micros(const TSParser *self);
-
-/**
- * Set the parser's current cancellation flag pointer.
- *
- * If a non-null pointer is assigned, then the parser will periodically read
- * from this pointer during parsing. If it reads a non-zero value, it will
- * halt early, returning NULL. See `ts_parser_parse` for more information.
- */
-void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);
-
-/**
- * Get the parser's current cancellation flag pointer.
- */
-const size_t *ts_parser_cancellation_flag(const TSParser *self);
-
-/**
- * Set the logger that a parser should use during parsing.
- *
- * The parser does not take ownership over the logger payload. If a logger was
- * previously assigned, the caller is responsible for releasing any memory
- * owned by the previous logger.
- */
-void ts_parser_set_logger(TSParser *self, TSLogger logger);
-
-/**
- * Get the parser's current logger.
- */
-TSLogger ts_parser_logger(const TSParser *self);
-
-/**
- * Set the file descriptor to which the parser should write debugging graphs
- * during parsing. The graphs are formatted in the DOT language. You may want
- * to pipe these graphs directly to a `dot(1)` process in order to generate
- * SVG output. You can turn off this logging by passing a negative number.
- */
-void ts_parser_print_dot_graphs(TSParser *self, int file);
-
-/******************/
-/* Section - Tree */
-/******************/
-
-/**
- * Create a shallow copy of the syntax tree. This is very fast.
- *
- * You need to copy a syntax tree in order to use it on more than one thread at
- * a time, as syntax trees are not thread safe.
- */
-TSTree *ts_tree_copy(const TSTree *self);
-
-/**
- * Delete the syntax tree, freeing all of the memory that it used.
- */
-void ts_tree_delete(TSTree *self);
-
-/**
- * Get the root node of the syntax tree.
- */
-TSNode ts_tree_root_node(const TSTree *self);
-
-/**
- * Get the language that was used to parse the syntax tree.
- */
-const TSLanguage *ts_tree_language(const TSTree *);
-
-/**
- * Edit the syntax tree to keep it in sync with source code that has been
- * edited.
- *
- * You must describe the edit both in terms of byte offsets and in terms of
- * (row, column) coordinates.
- */
-void ts_tree_edit(TSTree *self, const TSInputEdit *edit);
-
-/**
- * Compare an old edited syntax tree to a new syntax tree representing the same
- * document, returning an array of ranges whose syntactic structure has changed.
- *
- * For this to work correctly, the old syntax tree must have been edited such
- * that its ranges match up to the new tree. Generally, you'll want to call
- * this function right after calling one of the `ts_parser_parse` functions.
- * You need to pass the old tree that was passed to parse, as well as the new
- * tree that was returned from that function.
- *
- * The returned array is allocated using `malloc` and the caller is responsible
- * for freeing it using `free`. The length of the array will be written to the
- * given `length` pointer.
- */
-TSRange *ts_tree_get_changed_ranges(
- const TSTree *old_tree,
- const TSTree *new_tree,
- uint32_t *length
-);
-
-/**
- * Write a DOT graph describing the syntax tree to the given file.
- */
-void ts_tree_print_dot_graph(const TSTree *, FILE *);
-
-/******************/
-/* Section - Node */
-/******************/
-
-/**
- * Get the node's type as a null-terminated string.
- */
-const char *ts_node_type(TSNode);
-
-/**
- * Get the node's type as a numerical id.
- */
-TSSymbol ts_node_symbol(TSNode);
-
-/**
- * Get the node's start byte.
- */
-uint32_t ts_node_start_byte(TSNode);
-
-/**
- * Get the node's start position in terms of rows and columns.
- */
-TSPoint ts_node_start_point(TSNode);
-
-/**
- * Get the node's end byte.
- */
-uint32_t ts_node_end_byte(TSNode);
-
-/**
- * Get the node's end position in terms of rows and columns.
- */
-TSPoint ts_node_end_point(TSNode);
-
-/**
- * Get an S-expression representing the node as a string.
- *
- * This string is allocated with `malloc` and the caller is responsible for
- * freeing it using `free`.
- */
-char *ts_node_string(TSNode);
-
-/**
- * Check if the node is null. Functions like `ts_node_child` and
- * `ts_node_next_sibling` will return a null node to indicate that no such node
- * was found.
- */
-bool ts_node_is_null(TSNode);
-
-/**
- * Check if the node is *named*. Named nodes correspond to named rules in the
- * grammar, whereas *anonymous* nodes correspond to string literals in the
- * grammar.
- */
-bool ts_node_is_named(TSNode);
-
-/**
- * Check if the node is *missing*. Missing nodes are inserted by the parser in
- * order to recover from certain kinds of syntax errors.
- */
-bool ts_node_is_missing(TSNode);
-
-/**
- * Check if the node is *extra*. Extra nodes represent things like comments,
- * which are not required the grammar, but can appear anywhere.
- */
-bool ts_node_is_extra(TSNode);
-
-/**
- * Check if a syntax node has been edited.
- */
-bool ts_node_has_changes(TSNode);
-
-/**
- * Check if the node is a syntax error or contains any syntax errors.
- */
-bool ts_node_has_error(TSNode);
-
-/**
- * Get the node's immediate parent.
- */
-TSNode ts_node_parent(TSNode);
-
-/**
- * Get the node's child at the given index, where zero represents the first
- * child.
- */
-TSNode ts_node_child(TSNode, uint32_t);
-
-/**
- * Get the node's number of children.
- */
-uint32_t ts_node_child_count(TSNode);
-
-/**
- * Get the node's *named* child at the given index.
- *
- * See also `ts_node_is_named`.
- */
-TSNode ts_node_named_child(TSNode, uint32_t);
-
-/**
- * Get the node's number of *named* children.
- *
- * See also `ts_node_is_named`.
- */
-uint32_t ts_node_named_child_count(TSNode);
-
-/**
- * Get the node's child with the given field name.
- */
-TSNode ts_node_child_by_field_name(
- TSNode self,
- const char *field_name,
- uint32_t field_name_length
-);
-
-/**
- * Get the node's child with the given numerical field id.
- *
- * You can convert a field name to an id using the
- * `ts_language_field_id_for_name` function.
- */
-TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
-
-/**
- * Get the node's next / previous sibling.
- */
-TSNode ts_node_next_sibling(TSNode);
-TSNode ts_node_prev_sibling(TSNode);
-
-/**
- * Get the node's next / previous *named* sibling.
- */
-TSNode ts_node_next_named_sibling(TSNode);
-TSNode ts_node_prev_named_sibling(TSNode);
-
-/**
- * Get the node's first child that extends beyond the given byte offset.
- */
-TSNode ts_node_first_child_for_byte(TSNode, uint32_t);
-
-/**
- * Get the node's first named child that extends beyond the given byte offset.
- */
-TSNode ts_node_first_named_child_for_byte(TSNode, uint32_t);
-
-/**
- * Get the smallest node within this node that spans the given range of bytes
- * or (row, column) positions.
- */
-TSNode ts_node_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
-TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint);
-
-/**
- * Get the smallest named node within this node that spans the given range of
- * bytes or (row, column) positions.
- */
-TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
-TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint);
-
-/**
- * Edit the node to keep it in-sync with source code that has been edited.
- *
- * This function is only rarely needed. When you edit a syntax tree with the
- * `ts_tree_edit` function, all of the nodes that you retrieve from the tree
- * afterward will already reflect the edit. You only need to use `ts_node_edit`
- * when you have a `TSNode` instance that you want to keep and continue to use
- * after an edit.
- */
-void ts_node_edit(TSNode *, const TSInputEdit *);
-
-/**
- * Check if two nodes are identical.
- */
-bool ts_node_eq(TSNode, TSNode);
-
-/************************/
-/* Section - TreeCursor */
-/************************/
-
-/**
- * Create a new tree cursor starting from the given node.
- *
- * A tree cursor allows you to walk a syntax tree more efficiently than is
- * possible using the `TSNode` functions. It is a mutable object that is always
- * on a certain syntax node, and can be moved imperatively to different nodes.
- */
-TSTreeCursor ts_tree_cursor_new(TSNode);
-
-/**
- * Delete a tree cursor, freeing all of the memory that it used.
- */
-void ts_tree_cursor_delete(TSTreeCursor *);
-
-/**
- * Re-initialize a tree cursor to start at a different node.
- */
-void ts_tree_cursor_reset(TSTreeCursor *, TSNode);
-
-/**
- * Get the tree cursor's current node.
- */
-TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
-
-/**
- * Get the field name of the tree cursor's current node.
- *
- * This returns `NULL` if the current node doesn't have a field.
- * See also `ts_node_child_by_field_name`.
- */
-const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
-
-/**
- * Get the field name of the tree cursor's current node.
- *
- * This returns zero if the current node doesn't have a field.
- * See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`.
- */
-TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
-
-/**
- * Move the cursor to the parent of its current node.
- *
- * This returns `true` if the cursor successfully moved, and returns `false`
- * if there was no parent node (the cursor was already on the root node).
- */
-bool ts_tree_cursor_goto_parent(TSTreeCursor *);
-
-/**
- * Move the cursor to the next sibling of its current node.
- *
- * This returns `true` if the cursor successfully moved, and returns `false`
- * if there was no next sibling node.
- */
-bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
-
-/**
- * Move the cursor to the first child of its current node.
- *
- * This returns `true` if the cursor successfully moved, and returns `false`
- * if there were no children.
- */
-bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
-
-/**
- * Move the cursor to the first child of its current node that extends beyond
- * the given byte offset.
- *
- * This returns the index of the child node if one was found, and returns -1
- * if no such child was found.
- */
-int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
-
-TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *);
-
-/*******************/
-/* Section - Query */
-/*******************/
-
-/**
- * Create a new query from a string containing one or more S-expression
- * patterns. The query is associated with a particular language, and can
- * only be run on syntax nodes parsed with that language.
- *
- * If all of the given patterns are valid, this returns a `TSQuery`.
- * If a pattern is invalid, this returns `NULL`, and provides two pieces
- * of information about the problem:
- * 1. The byte offset of the error is written to the `error_offset` parameter.
- * 2. The type of error is written to the `error_type` parameter.
- */
-TSQuery *ts_query_new(
- const TSLanguage *language,
- const char *source,
- uint32_t source_len,
- uint32_t *error_offset,
- TSQueryError *error_type
-);
-
-/**
- * Delete a query, freeing all of the memory that it used.
- */
-void ts_query_delete(TSQuery *);
-
-/**
- * Get the number of patterns, captures, or string literals in the query.
- */
-uint32_t ts_query_pattern_count(const TSQuery *);
-uint32_t ts_query_capture_count(const TSQuery *);
-uint32_t ts_query_string_count(const TSQuery *);
-
-/**
- * Get the byte offset where the given pattern starts in the query's source.
- *
- * This can be useful when combining queries by concatenating their source
- * code strings.
- */
-uint32_t ts_query_start_byte_for_pattern(const TSQuery *, uint32_t);
-
-/**
- * Get all of the predicates for the given pattern in the query.
- *
- * The predicates are represented as a single array of steps. There are three
- * types of steps in this array, which correspond to the three legal values for
- * the `type` field:
- * - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names
- * of captures. Their `value_id` can be used with the
- * `ts_query_capture_name_for_id` function to obtain the name of the capture.
- * - `TSQueryPredicateStepTypeString` - Steps with this type represent literal
- * strings. Their `value_id` can be used with the
- * `ts_query_string_value_for_id` function to obtain their string value.
- * - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*
- * that represent the end of an individual predicate. If a pattern has two
- * predicates, then there will be two steps with this `type` in the array.
- */
-const TSQueryPredicateStep *ts_query_predicates_for_pattern(
- const TSQuery *self,
- uint32_t pattern_index,
- uint32_t *length
-);
-
-/**
- * Get the name and length of one of the query's captures, or one of the
- * query's string literals. Each capture and string is associated with a
- * numeric id based on the order that it appeared in the query's source.
- */
-const char *ts_query_capture_name_for_id(
- const TSQuery *,
- uint32_t id,
- uint32_t *length
-);
-const char *ts_query_string_value_for_id(
- const TSQuery *,
- uint32_t id,
- uint32_t *length
-);
-
-/**
- * Disable a certain capture within a query.
- *
- * This prevents the capture from being returned in matches, and also avoids
- * any resource usage associated with recording the capture. Currently, there
- * is no way to undo this.
- */
-void ts_query_disable_capture(TSQuery *, const char *, uint32_t);
-
-/**
- * Disable a certain pattern within a query.
- *
- * This prevents the pattern from matching and removes most of the overhead
- * associated with the pattern. Currently, there is no way to undo this.
- */
-void ts_query_disable_pattern(TSQuery *, uint32_t);
-
-/**
- * Create a new cursor for executing a given query.
- *
- * The cursor stores the state that is needed to iteratively search
- * for matches. To use the query cursor, first call `ts_query_cursor_exec`
- * to start running a given query on a given syntax node. Then, there are
- * two options for consuming the results of the query:
- * 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the
- * the *matches* in the order that they were found. Each match contains the
- * index of the pattern that matched, and an array of captures. Because
- * multiple patterns can match the same set of nodes, one match may contain
- * captures that appear *before* some of the captures from a previous match.
- * 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the
- * individual *captures* in the order that they appear. This is useful if
- * don't care about which pattern matched, and just want a single ordered
- * sequence of captures.
- *
- * If you don't care about consuming all of the results, you can stop calling
- * `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point.
- * You can then start executing another query on another node by calling
- * `ts_query_cursor_exec` again.
- */
-TSQueryCursor *ts_query_cursor_new(void);
-
-/**
- * Delete a query cursor, freeing all of the memory that it used.
- */
-void ts_query_cursor_delete(TSQueryCursor *);
-
-/**
- * Start running a given query on a given node.
- */
-void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode);
-
-/**
- * Set the range of bytes or (row, column) positions in which the query
- * will be executed.
- */
-void ts_query_cursor_set_byte_range(TSQueryCursor *, uint32_t, uint32_t);
-void ts_query_cursor_set_point_range(TSQueryCursor *, TSPoint, TSPoint);
-
-/**
- * Advance to the next match of the currently running query.
- *
- * If there is a match, write it to `*match` and return `true`.
- * Otherwise, return `false`.
- */
-bool ts_query_cursor_next_match(TSQueryCursor *, TSQueryMatch *match);
-void ts_query_cursor_remove_match(TSQueryCursor *, uint32_t id);
-
-/**
- * Advance to the next capture of the currently running query.
- *
- * If there is a capture, write its match to `*match` and its index within
- * the matche's capture list to `*capture_index`. Otherwise, return `false`.
- */
-bool ts_query_cursor_next_capture(
- TSQueryCursor *,
- TSQueryMatch *match,
- uint32_t *capture_index
-);
-
-/**********************/
-/* Section - Language */
-/**********************/
-
-/**
- * Get the number of distinct node types in the language.
- */
-uint32_t ts_language_symbol_count(const TSLanguage *);
-
-/**
- * Get a node type string for the given numerical id.
- */
-const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
-
-/**
- * Get the numerical id for the given node type string.
- */
-TSSymbol ts_language_symbol_for_name(
- const TSLanguage *self,
- const char *string,
- uint32_t length,
- bool is_named
-);
-
-/**
- * Get the number of distinct field names in the language.
- */
-uint32_t ts_language_field_count(const TSLanguage *);
-
-/**
- * Get the field name string for the given numerical id.
- */
-const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId);
-
-/**
- * Get the numerical id for the given field name string.
- */
-TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t);
-
-/**
- * Check whether the given node type id belongs to named nodes, anonymous nodes,
- * or a hidden nodes.
- *
- * See also `ts_node_is_named`. Hidden nodes are never returned from the API.
- */
-TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol);
-
-/**
- * Get the ABI version number for this language. This version number is used
- * to ensure that languages were generated by a compatible version of
- * Tree-sitter.
- *
- * See also `ts_parser_set_language`.
- */
-uint32_t ts_language_version(const TSLanguage *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_API_H_
diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h
deleted file mode 100644
index 26cb8448f1..0000000000
--- a/src/tree_sitter/array.h
+++ /dev/null
@@ -1,158 +0,0 @@
-#ifndef TREE_SITTER_ARRAY_H_
-#define TREE_SITTER_ARRAY_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <string.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <assert.h>
-#include <stdbool.h>
-#include "./alloc.h"
-
-#define Array(T) \
- struct { \
- T *contents; \
- uint32_t size; \
- uint32_t capacity; \
- }
-
-#define array_init(self) \
- ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
-
-#define array_new() \
- { NULL, 0, 0 }
-
-#define array_get(self, index) \
- (assert((uint32_t)index < (self)->size), &(self)->contents[index])
-
-#define array_front(self) array_get(self, 0)
-
-#define array_back(self) array_get(self, (self)->size - 1)
-
-#define array_clear(self) ((self)->size = 0)
-
-#define array_reserve(self, new_capacity) \
- array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity)
-
-#define array_erase(self, index) \
- array__erase((VoidArray *)(self), array__elem_size(self), index)
-
-#define array_delete(self) array__delete((VoidArray *)self)
-
-#define array_push(self, element) \
- (array__grow((VoidArray *)(self), 1, array__elem_size(self)), \
- (self)->contents[(self)->size++] = (element))
-
-#define array_grow_by(self, count) \
- (array__grow((VoidArray *)(self), count, array__elem_size(self)), \
- memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \
- (self)->size += (count))
-
-#define array_push_all(self, other) \
- array_splice((self), (self)->size, 0, (other)->size, (other)->contents)
-
-#define array_splice(self, index, old_count, new_count, new_contents) \
- array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
- new_count, new_contents)
-
-#define array_insert(self, index, element) \
- array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element)
-
-#define array_pop(self) ((self)->contents[--(self)->size])
-
-#define array_assign(self, other) \
- array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
-
-// Private
-
-typedef Array(void) VoidArray;
-
-#define array__elem_size(self) sizeof(*(self)->contents)
-
-static inline void array__delete(VoidArray *self) {
- ts_free(self->contents);
- self->contents = NULL;
- self->size = 0;
- self->capacity = 0;
-}
-
-static inline void array__erase(VoidArray *self, size_t element_size,
- uint32_t index) {
- assert(index < self->size);
- char *contents = (char *)self->contents;
- memmove(contents + index * element_size, contents + (index + 1) * element_size,
- (self->size - index - 1) * element_size);
- self->size--;
-}
-
-static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t new_capacity) {
- if (new_capacity > self->capacity) {
- if (self->contents) {
- self->contents = ts_realloc(self->contents, new_capacity * element_size);
- } else {
- self->contents = ts_calloc(new_capacity, element_size);
- }
- self->capacity = new_capacity;
- }
-}
-
-static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) {
- array__reserve(self, element_size, other->size);
- self->size = other->size;
- memcpy(self->contents, other->contents, self->size * element_size);
-}
-
-static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
- size_t new_size = self->size + count;
- if (new_size > self->capacity) {
- size_t new_capacity = self->capacity * 2;
- if (new_capacity < 8) new_capacity = 8;
- if (new_capacity < new_size) new_capacity = new_size;
- array__reserve(self, element_size, new_capacity);
- }
-}
-
-static inline void array__splice(VoidArray *self, size_t element_size,
- uint32_t index, uint32_t old_count,
- uint32_t new_count, const void *elements) {
- uint32_t new_size = self->size + new_count - old_count;
- uint32_t old_end = index + old_count;
- uint32_t new_end = index + new_count;
- assert(old_end <= self->size);
-
- array__reserve(self, element_size, new_size);
-
- char *contents = (char *)self->contents;
- if (self->size > old_end) {
- memmove(
- contents + new_end * element_size,
- contents + old_end * element_size,
- (self->size - old_end) * element_size
- );
- }
- if (new_count > 0) {
- if (elements) {
- memcpy(
- (contents + index * element_size),
- elements,
- new_count * element_size
- );
- } else {
- memset(
- (contents + index * element_size),
- 0,
- new_count * element_size
- );
- }
- }
- self->size += new_count - old_count;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_ARRAY_H_
diff --git a/src/tree_sitter/atomic.h b/src/tree_sitter/atomic.h
deleted file mode 100644
index 7bd0e850a9..0000000000
--- a/src/tree_sitter/atomic.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef TREE_SITTER_ATOMIC_H_
-#define TREE_SITTER_ATOMIC_H_
-
-#include <stdint.h>
-
-#ifdef _WIN32
-
-#include <windows.h>
-
-static inline size_t atomic_load(const volatile size_t *p) {
- return *p;
-}
-
-static inline uint32_t atomic_inc(volatile uint32_t *p) {
- return InterlockedIncrement((long volatile *)p);
-}
-
-static inline uint32_t atomic_dec(volatile uint32_t *p) {
- return InterlockedDecrement((long volatile *)p);
-}
-
-#else
-
-static inline size_t atomic_load(const volatile size_t *p) {
-#ifdef __ATOMIC_RELAXED
- return __atomic_load_n(p, __ATOMIC_RELAXED);
-#else
- return __sync_fetch_and_add((volatile size_t *)p, 0);
-#endif
-}
-
-static inline uint32_t atomic_inc(volatile uint32_t *p) {
- return __sync_add_and_fetch(p, 1u);
-}
-
-static inline uint32_t atomic_dec(volatile uint32_t *p) {
- return __sync_sub_and_fetch(p, 1u);
-}
-
-#endif
-
-#endif // TREE_SITTER_ATOMIC_H_
diff --git a/src/tree_sitter/bits.h b/src/tree_sitter/bits.h
deleted file mode 100644
index ce7a715567..0000000000
--- a/src/tree_sitter/bits.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef TREE_SITTER_BITS_H_
-#define TREE_SITTER_BITS_H_
-
-#include <stdint.h>
-
-static inline uint32_t bitmask_for_index(uint16_t id) {
- return (1u << (31 - id));
-}
-
-#if defined _WIN32 && !defined __GNUC__
-
-#include <intrin.h>
-
-static inline uint32_t count_leading_zeros(uint32_t x) {
- if (x == 0) return 32;
- uint32_t result;
- _BitScanReverse(&result, x);
- return 31 - result;
-}
-
-#else
-
-static inline uint32_t count_leading_zeros(uint32_t x) {
- if (x == 0) return 32;
- return __builtin_clz(x);
-}
-
-#endif
-#endif // TREE_SITTER_BITS_H_
diff --git a/src/tree_sitter/clock.h b/src/tree_sitter/clock.h
deleted file mode 100644
index 94545f3566..0000000000
--- a/src/tree_sitter/clock.h
+++ /dev/null
@@ -1,141 +0,0 @@
-#ifndef TREE_SITTER_CLOCK_H_
-#define TREE_SITTER_CLOCK_H_
-
-#include <stdint.h>
-
-typedef uint64_t TSDuration;
-
-#ifdef _WIN32
-
-// Windows:
-// * Represent a time as a performance counter value.
-// * Represent a duration as a number of performance counter ticks.
-
-#include <windows.h>
-typedef uint64_t TSClock;
-
-static inline TSDuration duration_from_micros(uint64_t micros) {
- LARGE_INTEGER frequency;
- QueryPerformanceFrequency(&frequency);
- return micros * (uint64_t)frequency.QuadPart / 1000000;
-}
-
-static inline uint64_t duration_to_micros(TSDuration self) {
- LARGE_INTEGER frequency;
- QueryPerformanceFrequency(&frequency);
- return self * 1000000 / (uint64_t)frequency.QuadPart;
-}
-
-static inline TSClock clock_null(void) {
- return 0;
-}
-
-static inline TSClock clock_now(void) {
- LARGE_INTEGER result;
- QueryPerformanceCounter(&result);
- return (uint64_t)result.QuadPart;
-}
-
-static inline TSClock clock_after(TSClock base, TSDuration duration) {
- return base + duration;
-}
-
-static inline bool clock_is_null(TSClock self) {
- return !self;
-}
-
-static inline bool clock_is_gt(TSClock self, TSClock other) {
- return self > other;
-}
-
-#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
-
-// POSIX with monotonic clock support (Linux)
-// * Represent a time as a monotonic (seconds, nanoseconds) pair.
-// * Represent a duration as a number of microseconds.
-//
-// On these platforms, parse timeouts will correspond accurately to
-// real time, regardless of what other processes are running.
-
-#include <time.h>
-typedef struct timespec TSClock;
-
-static inline TSDuration duration_from_micros(uint64_t micros) {
- return micros;
-}
-
-static inline uint64_t duration_to_micros(TSDuration self) {
- return self;
-}
-
-static inline TSClock clock_now(void) {
- TSClock result;
- clock_gettime(CLOCK_MONOTONIC, &result);
- return result;
-}
-
-static inline TSClock clock_null(void) {
- return (TSClock) {0, 0};
-}
-
-static inline TSClock clock_after(TSClock base, TSDuration duration) {
- TSClock result = base;
- result.tv_sec += duration / 1000000;
- result.tv_nsec += (duration % 1000000) * 1000;
- return result;
-}
-
-static inline bool clock_is_null(TSClock self) {
- return !self.tv_sec;
-}
-
-static inline bool clock_is_gt(TSClock self, TSClock other) {
- if (self.tv_sec > other.tv_sec) return true;
- if (self.tv_sec < other.tv_sec) return false;
- return self.tv_nsec > other.tv_nsec;
-}
-
-#else
-
-// macOS or POSIX without monotonic clock support
-// * Represent a time as a process clock value.
-// * Represent a duration as a number of process clock ticks.
-//
-// On these platforms, parse timeouts may be affected by other processes,
-// which is not ideal, but is better than using a non-monotonic time API
-// like `gettimeofday`.
-
-#include <time.h>
-typedef uint64_t TSClock;
-
-static inline TSDuration duration_from_micros(uint64_t micros) {
- return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
-}
-
-static inline uint64_t duration_to_micros(TSDuration self) {
- return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
-}
-
-static inline TSClock clock_null(void) {
- return 0;
-}
-
-static inline TSClock clock_now(void) {
- return (uint64_t)clock();
-}
-
-static inline TSClock clock_after(TSClock base, TSDuration duration) {
- return base + duration;
-}
-
-static inline bool clock_is_null(TSClock self) {
- return !self;
-}
-
-static inline bool clock_is_gt(TSClock self, TSClock other) {
- return self > other;
-}
-
-#endif
-
-#endif // TREE_SITTER_CLOCK_H_
diff --git a/src/tree_sitter/error_costs.h b/src/tree_sitter/error_costs.h
deleted file mode 100644
index 32d3666a66..0000000000
--- a/src/tree_sitter/error_costs.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef TREE_SITTER_ERROR_COSTS_H_
-#define TREE_SITTER_ERROR_COSTS_H_
-
-#define ERROR_STATE 0
-#define ERROR_COST_PER_RECOVERY 500
-#define ERROR_COST_PER_MISSING_TREE 110
-#define ERROR_COST_PER_SKIPPED_TREE 100
-#define ERROR_COST_PER_SKIPPED_LINE 30
-#define ERROR_COST_PER_SKIPPED_CHAR 1
-
-#endif
diff --git a/src/tree_sitter/get_changed_ranges.c b/src/tree_sitter/get_changed_ranges.c
deleted file mode 100644
index 5bd1d814bd..0000000000
--- a/src/tree_sitter/get_changed_ranges.c
+++ /dev/null
@@ -1,482 +0,0 @@
-#include "./get_changed_ranges.h"
-#include "./subtree.h"
-#include "./language.h"
-#include "./error_costs.h"
-#include "./tree_cursor.h"
-#include <assert.h>
-
-// #define DEBUG_GET_CHANGED_RANGES
-
-static void ts_range_array_add(TSRangeArray *self, Length start, Length end) {
- if (self->size > 0) {
- TSRange *last_range = array_back(self);
- if (start.bytes <= last_range->end_byte) {
- last_range->end_byte = end.bytes;
- last_range->end_point = end.extent;
- return;
- }
- }
-
- if (start.bytes < end.bytes) {
- TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
- array_push(self, range);
- }
-}
-
-bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index,
- uint32_t start_byte, uint32_t end_byte) {
- for (unsigned i = start_index; i < self->size; i++) {
- TSRange *range = &self->contents[i];
- if (range->end_byte > start_byte) {
- if (range->start_byte >= end_byte) break;
- return true;
- }
- }
- return false;
-}
-
-void ts_range_array_get_changed_ranges(
- const TSRange *old_ranges, unsigned old_range_count,
- const TSRange *new_ranges, unsigned new_range_count,
- TSRangeArray *differences
-) {
- unsigned new_index = 0;
- unsigned old_index = 0;
- Length current_position = length_zero();
- bool in_old_range = false;
- bool in_new_range = false;
-
- while (old_index < old_range_count || new_index < new_range_count) {
- const TSRange *old_range = &old_ranges[old_index];
- const TSRange *new_range = &new_ranges[new_index];
-
- Length next_old_position;
- if (in_old_range) {
- next_old_position = (Length) {old_range->end_byte, old_range->end_point};
- } else if (old_index < old_range_count) {
- next_old_position = (Length) {old_range->start_byte, old_range->start_point};
- } else {
- next_old_position = LENGTH_MAX;
- }
-
- Length next_new_position;
- if (in_new_range) {
- next_new_position = (Length) {new_range->end_byte, new_range->end_point};
- } else if (new_index < new_range_count) {
- next_new_position = (Length) {new_range->start_byte, new_range->start_point};
- } else {
- next_new_position = LENGTH_MAX;
- }
-
- if (next_old_position.bytes < next_new_position.bytes) {
- if (in_old_range != in_new_range) {
- ts_range_array_add(differences, current_position, next_old_position);
- }
- if (in_old_range) old_index++;
- current_position = next_old_position;
- in_old_range = !in_old_range;
- } else if (next_new_position.bytes < next_old_position.bytes) {
- if (in_old_range != in_new_range) {
- ts_range_array_add(differences, current_position, next_new_position);
- }
- if (in_new_range) new_index++;
- current_position = next_new_position;
- in_new_range = !in_new_range;
- } else {
- if (in_old_range != in_new_range) {
- ts_range_array_add(differences, current_position, next_new_position);
- }
- if (in_old_range) old_index++;
- if (in_new_range) new_index++;
- in_old_range = !in_old_range;
- in_new_range = !in_new_range;
- current_position = next_new_position;
- }
- }
-}
-
-typedef struct {
- TreeCursor cursor;
- const TSLanguage *language;
- unsigned visible_depth;
- bool in_padding;
-} Iterator;
-
-static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language) {
- array_clear(&cursor->stack);
- array_push(&cursor->stack, ((TreeCursorEntry){
- .subtree = tree,
- .position = length_zero(),
- .child_index = 0,
- .structural_child_index = 0,
- }));
- return (Iterator) {
- .cursor = *cursor,
- .language = language,
- .visible_depth = 1,
- .in_padding = false,
- };
-}
-
-static bool iterator_done(Iterator *self) {
- return self->cursor.stack.size == 0;
-}
-
-static Length iterator_start_position(Iterator *self) {
- TreeCursorEntry entry = *array_back(&self->cursor.stack);
- if (self->in_padding) {
- return entry.position;
- } else {
- return length_add(entry.position, ts_subtree_padding(*entry.subtree));
- }
-}
-
-static Length iterator_end_position(Iterator *self) {
- TreeCursorEntry entry = *array_back(&self->cursor.stack);
- Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
- if (self->in_padding) {
- return result;
- } else {
- return length_add(result, ts_subtree_size(*entry.subtree));
- }
-}
-
-static bool iterator_tree_is_visible(const Iterator *self) {
- TreeCursorEntry entry = *array_back(&self->cursor.stack);
- if (ts_subtree_visible(*entry.subtree)) return true;
- if (self->cursor.stack.size > 1) {
- Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
- const TSSymbol *alias_sequence = ts_language_alias_sequence(
- self->language,
- parent.ptr->production_id
- );
- return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
- }
- return false;
-}
-
-static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
- TSSymbol *alias_symbol, uint32_t *start_byte) {
- uint32_t i = self->cursor.stack.size - 1;
-
- if (self->in_padding) {
- if (i == 0) return;
- i--;
- }
-
- for (; i + 1 > 0; i--) {
- TreeCursorEntry entry = self->cursor.stack.contents[i];
-
- if (i > 0) {
- const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
- const TSSymbol *alias_sequence = ts_language_alias_sequence(
- self->language,
- parent->ptr->production_id
- );
- if (alias_sequence) {
- *alias_symbol = alias_sequence[entry.structural_child_index];
- }
- }
-
- if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
- *tree = *entry.subtree;
- *start_byte = entry.position.bytes;
- break;
- }
- }
-}
-
-static void iterator_ascend(Iterator *self) {
- if (iterator_done(self)) return;
- if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
- if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
- self->cursor.stack.size--;
-}
-
-static bool iterator_descend(Iterator *self, uint32_t goal_position) {
- if (self->in_padding) return false;
-
- bool did_descend;
- do {
- did_descend = false;
- TreeCursorEntry entry = *array_back(&self->cursor.stack);
- Length position = entry.position;
- uint32_t structural_child_index = 0;
- for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
- const Subtree *child = &entry.subtree->ptr->children[i];
- Length child_left = length_add(position, ts_subtree_padding(*child));
- Length child_right = length_add(child_left, ts_subtree_size(*child));
-
- if (child_right.bytes > goal_position) {
- array_push(&self->cursor.stack, ((TreeCursorEntry){
- .subtree = child,
- .position = position,
- .child_index = i,
- .structural_child_index = structural_child_index,
- }));
-
- if (iterator_tree_is_visible(self)) {
- if (child_left.bytes > goal_position) {
- self->in_padding = true;
- } else {
- self->visible_depth++;
- }
- return true;
- }
-
- did_descend = true;
- break;
- }
-
- position = child_right;
- if (!ts_subtree_extra(*child)) structural_child_index++;
- }
- } while (did_descend);
-
- return false;
-}
-
-static void iterator_advance(Iterator *self) {
- if (self->in_padding) {
- self->in_padding = false;
- if (iterator_tree_is_visible(self)) {
- self->visible_depth++;
- } else {
- iterator_descend(self, 0);
- }
- return;
- }
-
- for (;;) {
- if (iterator_tree_is_visible(self)) self->visible_depth--;
- TreeCursorEntry entry = array_pop(&self->cursor.stack);
- if (iterator_done(self)) return;
-
- const Subtree *parent = array_back(&self->cursor.stack)->subtree;
- uint32_t child_index = entry.child_index + 1;
- if (ts_subtree_child_count(*parent) > child_index) {
- Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
- uint32_t structural_child_index = entry.structural_child_index;
- if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
- const Subtree *next_child = &parent->ptr->children[child_index];
-
- array_push(&self->cursor.stack, ((TreeCursorEntry){
- .subtree = next_child,
- .position = position,
- .child_index = child_index,
- .structural_child_index = structural_child_index,
- }));
-
- if (iterator_tree_is_visible(self)) {
- if (ts_subtree_padding(*next_child).bytes > 0) {
- self->in_padding = true;
- } else {
- self->visible_depth++;
- }
- } else {
- iterator_descend(self, 0);
- }
- break;
- }
- }
-}
-
-typedef enum {
- IteratorDiffers,
- IteratorMayDiffer,
- IteratorMatches,
-} IteratorComparison;
-
-static IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter) {
- Subtree old_tree = NULL_SUBTREE;
- Subtree new_tree = NULL_SUBTREE;
- uint32_t old_start = 0;
- uint32_t new_start = 0;
- TSSymbol old_alias_symbol = 0;
- TSSymbol new_alias_symbol = 0;
- iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
- iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
-
- if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
- if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
-
- if (
- old_alias_symbol == new_alias_symbol &&
- ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
- ) {
- if (old_start == new_start &&
- !ts_subtree_has_changes(old_tree) &&
- ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
- ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
- ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
- ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
- (ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
- (ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
- return IteratorMatches;
- } else {
- return IteratorMayDiffer;
- }
- }
-
- return IteratorDiffers;
-}
-
-#ifdef DEBUG_GET_CHANGED_RANGES
-static inline void iterator_print_state(Iterator *self) {
- TreeCursorEntry entry = *array_back(&self->cursor.stack);
- TSPoint start = iterator_start_position(self).extent;
- TSPoint end = iterator_end_position(self).extent;
- const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
- printf(
- "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
- name, self->in_padding ? "(p)" : " ",
- self->visible_depth,
- start.row + 1, start.column,
- end.row + 1, end.column
- );
-}
-#endif
-
-unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree,
- TreeCursor *cursor1, TreeCursor *cursor2,
- const TSLanguage *language,
- const TSRangeArray *included_range_differences,
- TSRange **ranges) {
- TSRangeArray results = array_new();
-
- Iterator old_iter = iterator_new(cursor1, old_tree, language);
- Iterator new_iter = iterator_new(cursor2, new_tree, language);
-
- unsigned included_range_difference_index = 0;
-
- Length position = iterator_start_position(&old_iter);
- Length next_position = iterator_start_position(&new_iter);
- if (position.bytes < next_position.bytes) {
- ts_range_array_add(&results, position, next_position);
- position = next_position;
- } else if (position.bytes > next_position.bytes) {
- ts_range_array_add(&results, next_position, position);
- next_position = position;
- }
-
- do {
- #ifdef DEBUG_GET_CHANGED_RANGES
- printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
- iterator_print_state(&old_iter);
- printf("\tvs\t");
- iterator_print_state(&new_iter);
- puts("");
- #endif
-
- // Compare the old and new subtrees.
- IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
-
- // Even if the two subtrees appear to be identical, they could differ
- // internally if they contain a range of text that was previously
- // excluded from the parse, and is now included, or vice-versa.
- if (comparison == IteratorMatches && ts_range_array_intersects(
- included_range_differences,
- included_range_difference_index,
- position.bytes,
- iterator_end_position(&old_iter).bytes
- )) {
- comparison = IteratorMayDiffer;
- }
-
- bool is_changed = false;
- switch (comparison) {
- // If the subtrees are definitely identical, move to the end
- // of both subtrees.
- case IteratorMatches:
- next_position = iterator_end_position(&old_iter);
- break;
-
- // If the subtrees might differ internally, descend into both
- // subtrees, finding the first child that spans the current position.
- case IteratorMayDiffer:
- if (iterator_descend(&old_iter, position.bytes)) {
- if (!iterator_descend(&new_iter, position.bytes)) {
- is_changed = true;
- next_position = iterator_end_position(&old_iter);
- }
- } else if (iterator_descend(&new_iter, position.bytes)) {
- is_changed = true;
- next_position = iterator_end_position(&new_iter);
- } else {
- next_position = length_min(
- iterator_end_position(&old_iter),
- iterator_end_position(&new_iter)
- );
- }
- break;
-
- // If the subtrees are different, record a change and then move
- // to the end of both subtrees.
- case IteratorDiffers:
- is_changed = true;
- next_position = length_min(
- iterator_end_position(&old_iter),
- iterator_end_position(&new_iter)
- );
- break;
- }
-
- // Ensure that both iterators are caught up to the current position.
- while (
- !iterator_done(&old_iter) &&
- iterator_end_position(&old_iter).bytes <= next_position.bytes
- ) iterator_advance(&old_iter);
- while (
- !iterator_done(&new_iter) &&
- iterator_end_position(&new_iter).bytes <= next_position.bytes
- ) iterator_advance(&new_iter);
-
- // Ensure that both iterators are at the same depth in the tree.
- while (old_iter.visible_depth > new_iter.visible_depth) {
- iterator_ascend(&old_iter);
- }
- while (new_iter.visible_depth > old_iter.visible_depth) {
- iterator_ascend(&new_iter);
- }
-
- if (is_changed) {
- #ifdef DEBUG_GET_CHANGED_RANGES
- printf(
- " change: [[%u, %u] - [%u, %u]]\n",
- position.extent.row + 1, position.extent.column,
- next_position.extent.row + 1, next_position.extent.column
- );
- #endif
-
- ts_range_array_add(&results, position, next_position);
- }
-
- position = next_position;
-
- // Keep track of the current position in the included range differences
- // array in order to avoid scanning the entire array on each iteration.
- while (included_range_difference_index < included_range_differences->size) {
- const TSRange *range = &included_range_differences->contents[
- included_range_difference_index
- ];
- if (range->end_byte <= position.bytes) {
- included_range_difference_index++;
- } else {
- break;
- }
- }
- } while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
-
- Length old_size = ts_subtree_total_size(*old_tree);
- Length new_size = ts_subtree_total_size(*new_tree);
- if (old_size.bytes < new_size.bytes) {
- ts_range_array_add(&results, old_size, new_size);
- } else if (new_size.bytes < old_size.bytes) {
- ts_range_array_add(&results, new_size, old_size);
- }
-
- *cursor1 = old_iter.cursor;
- *cursor2 = new_iter.cursor;
- *ranges = results.contents;
- return results.size;
-}
diff --git a/src/tree_sitter/get_changed_ranges.h b/src/tree_sitter/get_changed_ranges.h
deleted file mode 100644
index a1f1dbb430..0000000000
--- a/src/tree_sitter/get_changed_ranges.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
-#define TREE_SITTER_GET_CHANGED_RANGES_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./tree_cursor.h"
-#include "./subtree.h"
-
-typedef Array(TSRange) TSRangeArray;
-
-void ts_range_array_get_changed_ranges(
- const TSRange *old_ranges, unsigned old_range_count,
- const TSRange *new_ranges, unsigned new_range_count,
- TSRangeArray *differences
-);
-
-bool ts_range_array_intersects(
- const TSRangeArray *self, unsigned start_index,
- uint32_t start_byte, uint32_t end_byte
-);
-
-unsigned ts_subtree_get_changed_ranges(
- const Subtree *old_tree, const Subtree *new_tree,
- TreeCursor *cursor1, TreeCursor *cursor2,
- const TSLanguage *language,
- const TSRangeArray *included_range_differences,
- TSRange **ranges
-);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_GET_CHANGED_RANGES_H_
diff --git a/src/tree_sitter/language.c b/src/tree_sitter/language.c
deleted file mode 100644
index c00c49e3c0..0000000000
--- a/src/tree_sitter/language.c
+++ /dev/null
@@ -1,149 +0,0 @@
-#include "./language.h"
-#include "./subtree.h"
-#include "./error_costs.h"
-#include <string.h>
-
-uint32_t ts_language_symbol_count(const TSLanguage *self) {
- return self->symbol_count + self->alias_count;
-}
-
-uint32_t ts_language_version(const TSLanguage *self) {
- return self->version;
-}
-
-uint32_t ts_language_field_count(const TSLanguage *self) {
- if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
- return self->field_count;
- } else {
- return 0;
- }
-}
-
-void ts_language_table_entry(
- const TSLanguage *self,
- TSStateId state,
- TSSymbol symbol,
- TableEntry *result
-) {
- if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
- result->action_count = 0;
- result->is_reusable = false;
- result->actions = NULL;
- } else {
- assert(symbol < self->token_count);
- uint32_t action_index = ts_language_lookup(self, state, symbol);
- const TSParseActionEntry *entry = &self->parse_actions[action_index];
- result->action_count = entry->entry.count;
- result->is_reusable = entry->entry.reusable;
- result->actions = (const TSParseAction *)(entry + 1);
- }
-}
-
-TSSymbolMetadata ts_language_symbol_metadata(
- const TSLanguage *self,
- TSSymbol symbol
-) {
- if (symbol == ts_builtin_sym_error) {
- return (TSSymbolMetadata){.visible = true, .named = true};
- } else if (symbol == ts_builtin_sym_error_repeat) {
- return (TSSymbolMetadata){.visible = false, .named = false};
- } else {
- return self->symbol_metadata[symbol];
- }
-}
-
-TSSymbol ts_language_public_symbol(
- const TSLanguage *self,
- TSSymbol symbol
-) {
- if (symbol == ts_builtin_sym_error) return symbol;
- if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
- return self->public_symbol_map[symbol];
- } else {
- return symbol;
- }
-}
-
-const char *ts_language_symbol_name(
- const TSLanguage *self,
- TSSymbol symbol
-) {
- if (symbol == ts_builtin_sym_error) {
- return "ERROR";
- } else if (symbol == ts_builtin_sym_error_repeat) {
- return "_ERROR";
- } else if (symbol < ts_language_symbol_count(self)) {
- return self->symbol_names[symbol];
- } else {
- return NULL;
- }
-}
-
-TSSymbol ts_language_symbol_for_name(
- const TSLanguage *self,
- const char *string,
- uint32_t length,
- bool is_named
-) {
- if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
- uint32_t count = ts_language_symbol_count(self);
- for (TSSymbol i = 0; i < count; i++) {
- TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
- if (!metadata.visible || metadata.named != is_named) continue;
- const char *symbol_name = self->symbol_names[i];
- if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
- if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
- return self->public_symbol_map[i];
- } else {
- return i;
- }
- }
- }
- return 0;
-}
-
-TSSymbolType ts_language_symbol_type(
- const TSLanguage *self,
- TSSymbol symbol
-) {
- TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
- if (metadata.named) {
- return TSSymbolTypeRegular;
- } else if (metadata.visible) {
- return TSSymbolTypeAnonymous;
- } else {
- return TSSymbolTypeAuxiliary;
- }
-}
-
-const char *ts_language_field_name_for_id(
- const TSLanguage *self,
- TSFieldId id
-) {
- uint32_t count = ts_language_field_count(self);
- if (count && id <= count) {
- return self->field_names[id];
- } else {
- return NULL;
- }
-}
-
-TSFieldId ts_language_field_id_for_name(
- const TSLanguage *self,
- const char *name,
- uint32_t name_length
-) {
- uint32_t count = ts_language_field_count(self);
- for (TSSymbol i = 1; i < count + 1; i++) {
- switch (strncmp(name, self->field_names[i], name_length)) {
- case 0:
- if (self->field_names[i][name_length] == 0) return i;
- break;
- case -1:
- return 0;
- default:
- break;
- }
- }
- return 0;
-}
diff --git a/src/tree_sitter/language.h b/src/tree_sitter/language.h
deleted file mode 100644
index 341f0f85af..0000000000
--- a/src/tree_sitter/language.h
+++ /dev/null
@@ -1,143 +0,0 @@
-#ifndef TREE_SITTER_LANGUAGE_H_
-#define TREE_SITTER_LANGUAGE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./subtree.h"
-#include "tree_sitter/parser.h"
-
-#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
-#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
-#define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11
-#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11
-
-typedef struct {
- const TSParseAction *actions;
- uint32_t action_count;
- bool is_reusable;
-} TableEntry;
-
-void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
-
-TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
-
-TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
-
-static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
- return 0 < symbol && symbol < self->external_token_count + 1;
-}
-
-static inline const TSParseAction *ts_language_actions(
- const TSLanguage *self,
- TSStateId state,
- TSSymbol symbol,
- uint32_t *count
-) {
- TableEntry entry;
- ts_language_table_entry(self, state, symbol, &entry);
- *count = entry.action_count;
- return entry.actions;
-}
-
-static inline bool ts_language_has_actions(const TSLanguage *self,
- TSStateId state,
- TSSymbol symbol) {
- TableEntry entry;
- ts_language_table_entry(self, state, symbol, &entry);
- return entry.action_count > 0;
-}
-
-static inline bool ts_language_has_reduce_action(const TSLanguage *self,
- TSStateId state,
- TSSymbol symbol) {
- TableEntry entry;
- ts_language_table_entry(self, state, symbol, &entry);
- return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
-}
-
-static inline uint16_t ts_language_lookup(
- const TSLanguage *self,
- TSStateId state,
- TSSymbol symbol
-) {
- if (
- self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES &&
- state >= self->large_state_count
- ) {
- uint32_t index = self->small_parse_table_map[state - self->large_state_count];
- const uint16_t *data = &self->small_parse_table[index];
- uint16_t section_count = *(data++);
- for (unsigned i = 0; i < section_count; i++) {
- uint16_t section_value = *(data++);
- uint16_t symbol_count = *(data++);
- for (unsigned i = 0; i < symbol_count; i++) {
- if (*(data++) == symbol) return section_value;
- }
- }
- return 0;
- } else {
- return self->parse_table[state * self->symbol_count + symbol];
- }
-}
-
-static inline TSStateId ts_language_next_state(const TSLanguage *self,
- TSStateId state,
- TSSymbol symbol) {
- if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
- return 0;
- } else if (symbol < self->token_count) {
- uint32_t count;
- const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
- if (count > 0) {
- TSParseAction action = actions[count - 1];
- if (action.type == TSParseActionTypeShift) {
- return action.params.shift.extra ? state : action.params.shift.state;
- }
- }
- return 0;
- } else {
- return ts_language_lookup(self, state, symbol);
- }
-}
-
-static inline const bool *
-ts_language_enabled_external_tokens(const TSLanguage *self,
- unsigned external_scanner_state) {
- if (external_scanner_state == 0) {
- return NULL;
- } else {
- return self->external_scanner.states + self->external_token_count * external_scanner_state;
- }
-}
-
-static inline const TSSymbol *
-ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) {
- return production_id > 0 ?
- self->alias_sequences + production_id * self->max_alias_sequence_length :
- NULL;
-}
-
-static inline void ts_language_field_map(
- const TSLanguage *self,
- uint32_t production_id,
- const TSFieldMapEntry **start,
- const TSFieldMapEntry **end
-) {
- if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) {
- *start = NULL;
- *end = NULL;
- return;
- }
-
- TSFieldMapSlice slice = self->field_map_slices[production_id];
- *start = &self->field_map_entries[slice.index];
- *end = &self->field_map_entries[slice.index] + slice.length;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_LANGUAGE_H_
diff --git a/src/tree_sitter/length.h b/src/tree_sitter/length.h
deleted file mode 100644
index 61de9fc1d5..0000000000
--- a/src/tree_sitter/length.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef TREE_SITTER_LENGTH_H_
-#define TREE_SITTER_LENGTH_H_
-
-#include <stdlib.h>
-#include <stdbool.h>
-#include "./point.h"
-#include "tree_sitter/api.h"
-
-typedef struct {
- uint32_t bytes;
- TSPoint extent;
-} Length;
-
-static const Length LENGTH_UNDEFINED = {0, {0, 1}};
-static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
-
-static inline bool length_is_undefined(Length length) {
- return length.bytes == 0 && length.extent.column != 0;
-}
-
-static inline Length length_min(Length len1, Length len2) {
- return (len1.bytes < len2.bytes) ? len1 : len2;
-}
-
-static inline Length length_add(Length len1, Length len2) {
- Length result;
- result.bytes = len1.bytes + len2.bytes;
- result.extent = point_add(len1.extent, len2.extent);
- return result;
-}
-
-static inline Length length_sub(Length len1, Length len2) {
- Length result;
- result.bytes = len1.bytes - len2.bytes;
- result.extent = point_sub(len1.extent, len2.extent);
- return result;
-}
-
-static inline Length length_zero(void) {
- Length result = {0, {0, 0}};
- return result;
-}
-
-#endif
diff --git a/src/tree_sitter/lexer.c b/src/tree_sitter/lexer.c
deleted file mode 100644
index a3c29544d3..0000000000
--- a/src/tree_sitter/lexer.c
+++ /dev/null
@@ -1,391 +0,0 @@
-#include <stdio.h>
-#include "./lexer.h"
-#include "./subtree.h"
-#include "./length.h"
-#include "./unicode.h"
-
-#define LOG(message, character) \
- if (self->logger.log) { \
- snprintf( \
- self->debug_buffer, \
- TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
- 32 <= character && character < 127 ? \
- message " character:'%c'" : \
- message " character:%d", \
- character \
- ); \
- self->logger.log( \
- self->logger.payload, \
- TSLogTypeLex, \
- self->debug_buffer \
- ); \
- }
-
-static const int32_t BYTE_ORDER_MARK = 0xFEFF;
-
-static const TSRange DEFAULT_RANGE = {
- .start_point = {
- .row = 0,
- .column = 0,
- },
- .end_point = {
- .row = UINT32_MAX,
- .column = UINT32_MAX,
- },
- .start_byte = 0,
- .end_byte = UINT32_MAX
-};
-
-// Check if the lexer has reached EOF. This state is stored
-// by setting the lexer's `current_included_range_index` such that
-// it has consumed all of its available ranges.
-static bool ts_lexer__eof(const TSLexer *_self) {
- Lexer *self = (Lexer *)_self;
- return self->current_included_range_index == self->included_range_count;
-}
-
-// Clear the currently stored chunk of source code, because the lexer's
-// position has changed.
-static void ts_lexer__clear_chunk(Lexer *self) {
- self->chunk = NULL;
- self->chunk_size = 0;
- self->chunk_start = 0;
-}
-
-// Call the lexer's input callback to obtain a new chunk of source code
-// for the current position.
-static void ts_lexer__get_chunk(Lexer *self) {
- self->chunk_start = self->current_position.bytes;
- self->chunk = self->input.read(
- self->input.payload,
- self->current_position.bytes,
- self->current_position.extent,
- &self->chunk_size
- );
- if (!self->chunk_size) {
- self->current_included_range_index = self->included_range_count;
- self->chunk = NULL;
- }
-}
-
-// Decode the next unicode character in the current chunk of source code.
-// This assumes that the lexer has already retrieved a chunk of source
-// code that spans the current position.
-static void ts_lexer__get_lookahead(Lexer *self) {
- uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
- uint32_t size = self->chunk_size - position_in_chunk;
-
- if (size == 0) {
- self->lookahead_size = 1;
- self->data.lookahead = '\0';
- return;
- }
-
- const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
- UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
- ? ts_decode_utf8
- : ts_decode_utf16;
-
- self->lookahead_size = decode(chunk, size, &self->data.lookahead);
-
- // If this chunk ended in the middle of a multi-byte character,
- // try again with a fresh chunk.
- if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
- ts_lexer__get_chunk(self);
- chunk = (const uint8_t *)self->chunk;
- size = self->chunk_size;
- self->lookahead_size = decode(chunk, size, &self->data.lookahead);
- }
-
- if (self->data.lookahead == TS_DECODE_ERROR) {
- self->lookahead_size = 1;
- }
-}
-
-// Advance to the next character in the source code, retrieving a new
-// chunk of source code if needed.
-static void ts_lexer__advance(TSLexer *_self, bool skip) {
- Lexer *self = (Lexer *)_self;
- if (!self->chunk) return;
-
- if (skip) {
- LOG("skip", self->data.lookahead);
- } else {
- LOG("consume", self->data.lookahead);
- }
-
- if (self->lookahead_size) {
- self->current_position.bytes += self->lookahead_size;
- if (self->data.lookahead == '\n') {
- self->current_position.extent.row++;
- self->current_position.extent.column = 0;
- } else {
- self->current_position.extent.column += self->lookahead_size;
- }
- }
-
- const TSRange *current_range = NULL;
- if (self->current_included_range_index < self->included_range_count) {
- current_range = &self->included_ranges[self->current_included_range_index];
- if (self->current_position.bytes == current_range->end_byte) {
- self->current_included_range_index++;
- if (self->current_included_range_index < self->included_range_count) {
- current_range++;
- self->current_position = (Length) {
- current_range->start_byte,
- current_range->start_point,
- };
- } else {
- current_range = NULL;
- }
- }
- }
-
- if (skip) self->token_start_position = self->current_position;
-
- if (current_range) {
- if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
- ts_lexer__get_chunk(self);
- }
- ts_lexer__get_lookahead(self);
- } else {
- ts_lexer__clear_chunk(self);
- self->data.lookahead = '\0';
- self->lookahead_size = 1;
- }
-}
-
-// Mark that a token match has completed. This can be called multiple
-// times if a longer match is found later.
-static void ts_lexer__mark_end(TSLexer *_self) {
- Lexer *self = (Lexer *)_self;
- if (!ts_lexer__eof(&self->data)) {
- // If the lexer is right at the beginning of included range,
- // then the token should be considered to end at the *end* of the
- // previous included range, rather than here.
- TSRange *current_included_range = &self->included_ranges[
- self->current_included_range_index
- ];
- if (
- self->current_included_range_index > 0 &&
- self->current_position.bytes == current_included_range->start_byte
- ) {
- TSRange *previous_included_range = current_included_range - 1;
- self->token_end_position = (Length) {
- previous_included_range->end_byte,
- previous_included_range->end_point,
- };
- return;
- }
- }
- self->token_end_position = self->current_position;
-}
-
-static uint32_t ts_lexer__get_column(TSLexer *_self) {
- Lexer *self = (Lexer *)_self;
- uint32_t goal_byte = self->current_position.bytes;
-
- self->current_position.bytes -= self->current_position.extent.column;
- self->current_position.extent.column = 0;
-
- if (self->current_position.bytes < self->chunk_start) {
- ts_lexer__get_chunk(self);
- }
-
- uint32_t result = 0;
- while (self->current_position.bytes < goal_byte) {
- ts_lexer__advance(&self->data, false);
- result++;
- }
-
- return result;
-}
-
-// Is the lexer at a boundary between two disjoint included ranges of
-// source code? This is exposed as an API because some languages' external
-// scanners need to perform custom actions at these bounaries.
-static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
- const Lexer *self = (const Lexer *)_self;
- if (self->current_included_range_index < self->included_range_count) {
- TSRange *current_range = &self->included_ranges[self->current_included_range_index];
- return self->current_position.bytes == current_range->start_byte;
- } else {
- return false;
- }
-}
-
-void ts_lexer_init(Lexer *self) {
- *self = (Lexer) {
- .data = {
- // The lexer's methods are stored as struct fields so that generated
- // parsers can call them without needing to be linked against this
- // library.
- .advance = ts_lexer__advance,
- .mark_end = ts_lexer__mark_end,
- .get_column = ts_lexer__get_column,
- .is_at_included_range_start = ts_lexer__is_at_included_range_start,
- .eof = ts_lexer__eof,
- .lookahead = 0,
- .result_symbol = 0,
- },
- .chunk = NULL,
- .chunk_size = 0,
- .chunk_start = 0,
- .current_position = {0, {0, 0}},
- .logger = {
- .payload = NULL,
- .log = NULL
- },
- .included_ranges = NULL,
- .included_range_count = 0,
- .current_included_range_index = 0,
- };
- ts_lexer_set_included_ranges(self, NULL, 0);
-}
-
-void ts_lexer_delete(Lexer *self) {
- ts_free(self->included_ranges);
-}
-
-static void ts_lexer_goto(Lexer *self, Length position) {
- self->current_position = position;
- bool found_included_range = false;
-
- // Move to the first valid position at or after the given position.
- for (unsigned i = 0; i < self->included_range_count; i++) {
- TSRange *included_range = &self->included_ranges[i];
- if (included_range->end_byte > position.bytes) {
- if (included_range->start_byte > position.bytes) {
- self->current_position = (Length) {
- .bytes = included_range->start_byte,
- .extent = included_range->start_point,
- };
- }
-
- self->current_included_range_index = i;
- found_included_range = true;
- break;
- }
- }
-
- if (found_included_range) {
- // If the current position is outside of the current chunk of text,
- // then clear out the current chunk of text.
- if (self->chunk && (
- position.bytes < self->chunk_start ||
- position.bytes >= self->chunk_start + self->chunk_size
- )) {
- ts_lexer__clear_chunk(self);
- }
-
- self->lookahead_size = 0;
- self->data.lookahead = '\0';
- }
-
- // If the given position is beyond any of included ranges, move to the EOF
- // state - past the end of the included ranges.
- else {
- self->current_included_range_index = self->included_range_count;
- TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
- self->current_position = (Length) {
- .bytes = last_included_range->end_byte,
- .extent = last_included_range->end_point,
- };
- ts_lexer__clear_chunk(self);
- self->lookahead_size = 1;
- self->data.lookahead = '\0';
- }
-}
-
-void ts_lexer_set_input(Lexer *self, TSInput input) {
- self->input = input;
- ts_lexer__clear_chunk(self);
- ts_lexer_goto(self, self->current_position);
-}
-
-// Move the lexer to the given position. This doesn't do any work
-// if the parser is already at the given position.
-void ts_lexer_reset(Lexer *self, Length position) {
- if (position.bytes != self->current_position.bytes) {
- ts_lexer_goto(self, position);
- }
-}
-
-void ts_lexer_start(Lexer *self) {
- self->token_start_position = self->current_position;
- self->token_end_position = LENGTH_UNDEFINED;
- self->data.result_symbol = 0;
- if (!ts_lexer__eof(&self->data)) {
- if (!self->chunk_size) ts_lexer__get_chunk(self);
- if (!self->lookahead_size) ts_lexer__get_lookahead(self);
- if (
- self->current_position.bytes == 0 &&
- self->data.lookahead == BYTE_ORDER_MARK
- ) ts_lexer__advance(&self->data, true);
- }
-}
-
-void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
- if (length_is_undefined(self->token_end_position)) {
- ts_lexer__mark_end(&self->data);
- }
-
- uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
-
- // In order to determine that a byte sequence is invalid UTF8 or UTF16,
- // the character decoding algorithm may have looked at the following byte.
- // Therefore, the next byte *after* the current (invalid) character
- // affects the interpretation of the current character.
- if (self->data.lookahead == TS_DECODE_ERROR) {
- current_lookahead_end_byte++;
- }
-
- if (current_lookahead_end_byte > *lookahead_end_byte) {
- *lookahead_end_byte = current_lookahead_end_byte;
- }
-}
-
-void ts_lexer_advance_to_end(Lexer *self) {
- while (self->chunk) {
- ts_lexer__advance(&self->data, false);
- }
-}
-
-void ts_lexer_mark_end(Lexer *self) {
- ts_lexer__mark_end(&self->data);
-}
-
-bool ts_lexer_set_included_ranges(
- Lexer *self,
- const TSRange *ranges,
- uint32_t count
-) {
- if (count == 0 || !ranges) {
- ranges = &DEFAULT_RANGE;
- count = 1;
- } else {
- uint32_t previous_byte = 0;
- for (unsigned i = 0; i < count; i++) {
- const TSRange *range = &ranges[i];
- if (
- range->start_byte < previous_byte ||
- range->end_byte < range->start_byte
- ) return false;
- previous_byte = range->end_byte;
- }
- }
-
- size_t size = count * sizeof(TSRange);
- self->included_ranges = ts_realloc(self->included_ranges, size);
- memcpy(self->included_ranges, ranges, size);
- self->included_range_count = count;
- ts_lexer_goto(self, self->current_position);
- return true;
-}
-
-TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
- *count = self->included_range_count;
- return self->included_ranges;
-}
-
-#undef LOG
diff --git a/src/tree_sitter/lexer.h b/src/tree_sitter/lexer.h
deleted file mode 100644
index 5e39294529..0000000000
--- a/src/tree_sitter/lexer.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef TREE_SITTER_LEXER_H_
-#define TREE_SITTER_LEXER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./length.h"
-#include "./subtree.h"
-#include "tree_sitter/api.h"
-#include "tree_sitter/parser.h"
-
-typedef struct {
- TSLexer data;
- Length current_position;
- Length token_start_position;
- Length token_end_position;
-
- TSRange *included_ranges;
- size_t included_range_count;
- size_t current_included_range_index;
-
- const char *chunk;
- uint32_t chunk_start;
- uint32_t chunk_size;
- uint32_t lookahead_size;
-
- TSInput input;
- TSLogger logger;
- char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
-} Lexer;
-
-void ts_lexer_init(Lexer *);
-void ts_lexer_delete(Lexer *);
-void ts_lexer_set_input(Lexer *, TSInput);
-void ts_lexer_reset(Lexer *, Length);
-void ts_lexer_start(Lexer *);
-void ts_lexer_finish(Lexer *, uint32_t *);
-void ts_lexer_advance_to_end(Lexer *);
-void ts_lexer_mark_end(Lexer *);
-bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
-TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_LEXER_H_
diff --git a/src/tree_sitter/lib.c b/src/tree_sitter/lib.c
deleted file mode 100644
index 289d32f4c5..0000000000
--- a/src/tree_sitter/lib.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// The Tree-sitter library can be built by compiling this one source file.
-//
-// The following directories must be added to the include path:
-// - include
-
-#define _POSIX_C_SOURCE 200112L
-
-#include "./get_changed_ranges.c"
-#include "./language.c"
-#include "./lexer.c"
-#include "./node.c"
-#include "./parser.c"
-#include "./query.c"
-#include "./stack.c"
-#include "./subtree.c"
-#include "./tree_cursor.c"
-#include "./tree.c"
diff --git a/src/tree_sitter/node.c b/src/tree_sitter/node.c
deleted file mode 100644
index 576f3ef38e..0000000000
--- a/src/tree_sitter/node.c
+++ /dev/null
@@ -1,677 +0,0 @@
-#include <stdbool.h>
-#include "./subtree.h"
-#include "./tree.h"
-#include "./language.h"
-
-typedef struct {
- Subtree parent;
- const TSTree *tree;
- Length position;
- uint32_t child_index;
- uint32_t structural_child_index;
- const TSSymbol *alias_sequence;
-} NodeChildIterator;
-
-// TSNode - constructors
-
-TSNode ts_node_new(
- const TSTree *tree,
- const Subtree *subtree,
- Length position,
- TSSymbol alias
-) {
- return (TSNode) {
- {position.bytes, position.extent.row, position.extent.column, alias},
- subtree,
- tree,
- };
-}
-
-static inline TSNode ts_node__null(void) {
- return ts_node_new(NULL, NULL, length_zero(), 0);
-}
-
-// TSNode - accessors
-
-uint32_t ts_node_start_byte(TSNode self) {
- return self.context[0];
-}
-
-TSPoint ts_node_start_point(TSNode self) {
- return (TSPoint) {self.context[1], self.context[2]};
-}
-
-static inline uint32_t ts_node__alias(const TSNode *self) {
- return self->context[3];
-}
-
-static inline Subtree ts_node__subtree(TSNode self) {
- return *(const Subtree *)self.id;
-}
-
-// NodeChildIterator
-
-static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
- Subtree subtree = ts_node__subtree(*node);
- if (ts_subtree_child_count(subtree) == 0) {
- return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
- }
- const TSSymbol *alias_sequence = ts_language_alias_sequence(
- node->tree->language,
- subtree.ptr->production_id
- );
- return (NodeChildIterator) {
- .tree = node->tree,
- .parent = subtree,
- .position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
- .child_index = 0,
- .structural_child_index = 0,
- .alias_sequence = alias_sequence,
- };
-}
-
-static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
- return self->child_index == self->parent.ptr->child_count;
-}
-
-static inline bool ts_node_child_iterator_next(
- NodeChildIterator *self,
- TSNode *result
-) {
- if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
- const Subtree *child = &self->parent.ptr->children[self->child_index];
- TSSymbol alias_symbol = 0;
- if (!ts_subtree_extra(*child)) {
- if (self->alias_sequence) {
- alias_symbol = self->alias_sequence[self->structural_child_index];
- }
- self->structural_child_index++;
- }
- if (self->child_index > 0) {
- self->position = length_add(self->position, ts_subtree_padding(*child));
- }
- *result = ts_node_new(
- self->tree,
- child,
- self->position,
- alias_symbol
- );
- self->position = length_add(self->position, ts_subtree_size(*child));
- self->child_index++;
- return true;
-}
-
-// TSNode - private
-
-static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
- Subtree tree = ts_node__subtree(self);
- if (include_anonymous) {
- return ts_subtree_visible(tree) || ts_node__alias(&self);
- } else {
- TSSymbol alias = ts_node__alias(&self);
- if (alias) {
- return ts_language_symbol_metadata(self.tree->language, alias).named;
- } else {
- return ts_subtree_visible(tree) && ts_subtree_named(tree);
- }
- }
-}
-
-static inline uint32_t ts_node__relevant_child_count(
- TSNode self,
- bool include_anonymous
-) {
- Subtree tree = ts_node__subtree(self);
- if (ts_subtree_child_count(tree) > 0) {
- if (include_anonymous) {
- return tree.ptr->visible_child_count;
- } else {
- return tree.ptr->named_child_count;
- }
- } else {
- return 0;
- }
-}
-
-static inline TSNode ts_node__child(
- TSNode self,
- uint32_t child_index,
- bool include_anonymous
-) {
- TSNode result = self;
- bool did_descend = true;
-
- while (did_descend) {
- did_descend = false;
-
- TSNode child;
- uint32_t index = 0;
- NodeChildIterator iterator = ts_node_iterate_children(&result);
- while (ts_node_child_iterator_next(&iterator, &child)) {
- if (ts_node__is_relevant(child, include_anonymous)) {
- if (index == child_index) {
- if (ts_node__is_relevant(self, true)) {
- ts_tree_set_cached_parent(self.tree, &child, &self);
- }
- return child;
- }
- index++;
- } else {
- uint32_t grandchild_index = child_index - index;
- uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
- if (grandchild_index < grandchild_count) {
- did_descend = true;
- result = child;
- child_index = grandchild_index;
- break;
- }
- index += grandchild_count;
- }
- }
- }
-
- return ts_node__null();
-}
-
-static bool ts_subtree_has_trailing_empty_descendant(
- Subtree self,
- Subtree other
-) {
- for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
- Subtree child = self.ptr->children[i];
- if (ts_subtree_total_bytes(child) > 0) break;
- if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
- return true;
- }
- }
- return false;
-}
-
-static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
- Subtree self_subtree = ts_node__subtree(self);
- bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
- uint32_t target_end_byte = ts_node_end_byte(self);
-
- TSNode node = ts_node_parent(self);
- TSNode earlier_node = ts_node__null();
- bool earlier_node_is_relevant = false;
-
- while (!ts_node_is_null(node)) {
- TSNode earlier_child = ts_node__null();
- bool earlier_child_is_relevant = false;
- bool found_child_containing_target = false;
-
- TSNode child;
- NodeChildIterator iterator = ts_node_iterate_children(&node);
- while (ts_node_child_iterator_next(&iterator, &child)) {
- if (child.id == self.id) break;
- if (iterator.position.bytes > target_end_byte) {
- found_child_containing_target = true;
- break;
- }
-
- if (iterator.position.bytes == target_end_byte &&
- (!self_is_empty ||
- ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
- found_child_containing_target = true;
- break;
- }
-
- if (ts_node__is_relevant(child, include_anonymous)) {
- earlier_child = child;
- earlier_child_is_relevant = true;
- } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
- earlier_child = child;
- earlier_child_is_relevant = false;
- }
- }
-
- if (found_child_containing_target) {
- if (!ts_node_is_null(earlier_child)) {
- earlier_node = earlier_child;
- earlier_node_is_relevant = earlier_child_is_relevant;
- }
- node = child;
- } else if (earlier_child_is_relevant) {
- return earlier_child;
- } else if (!ts_node_is_null(earlier_child)) {
- node = earlier_child;
- } else if (earlier_node_is_relevant) {
- return earlier_node;
- } else {
- node = earlier_node;
- }
- }
-
- return ts_node__null();
-}
-
-static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
- uint32_t target_end_byte = ts_node_end_byte(self);
-
- TSNode node = ts_node_parent(self);
- TSNode later_node = ts_node__null();
- bool later_node_is_relevant = false;
-
- while (!ts_node_is_null(node)) {
- TSNode later_child = ts_node__null();
- bool later_child_is_relevant = false;
- TSNode child_containing_target = ts_node__null();
-
- TSNode child;
- NodeChildIterator iterator = ts_node_iterate_children(&node);
- while (ts_node_child_iterator_next(&iterator, &child)) {
- if (iterator.position.bytes < target_end_byte) continue;
- if (ts_node_start_byte(child) <= ts_node_start_byte(self)) {
- if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
- child_containing_target = child;
- }
- } else if (ts_node__is_relevant(child, include_anonymous)) {
- later_child = child;
- later_child_is_relevant = true;
- break;
- } else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
- later_child = child;
- later_child_is_relevant = false;
- break;
- }
- }
-
- if (!ts_node_is_null(child_containing_target)) {
- if (!ts_node_is_null(later_child)) {
- later_node = later_child;
- later_node_is_relevant = later_child_is_relevant;
- }
- node = child_containing_target;
- } else if (later_child_is_relevant) {
- return later_child;
- } else if (!ts_node_is_null(later_child)) {
- node = later_child;
- } else if (later_node_is_relevant) {
- return later_node;
- } else {
- node = later_node;
- }
- }
-
- return ts_node__null();
-}
-
-static inline TSNode ts_node__first_child_for_byte(
- TSNode self,
- uint32_t goal,
- bool include_anonymous
-) {
- TSNode node = self;
- bool did_descend = true;
-
- while (did_descend) {
- did_descend = false;
-
- TSNode child;
- NodeChildIterator iterator = ts_node_iterate_children(&node);
- while (ts_node_child_iterator_next(&iterator, &child)) {
- if (ts_node_end_byte(child) > goal) {
- if (ts_node__is_relevant(child, include_anonymous)) {
- return child;
- } else if (ts_node_child_count(child) > 0) {
- did_descend = true;
- node = child;
- break;
- }
- }
- }
- }
-
- return ts_node__null();
-}
-
-static inline TSNode ts_node__descendant_for_byte_range(
- TSNode self,
- uint32_t range_start,
- uint32_t range_end,
- bool include_anonymous
-) {
- TSNode node = self;
- TSNode last_visible_node = self;
-
- bool did_descend = true;
- while (did_descend) {
- did_descend = false;
-
- TSNode child;
- NodeChildIterator iterator = ts_node_iterate_children(&node);
- while (ts_node_child_iterator_next(&iterator, &child)) {
- uint32_t node_end = iterator.position.bytes;
-
- // The end of this node must extend far enough forward to touch
- // the end of the range and exceed the start of the range.
- if (node_end < range_end) continue;
- if (node_end <= range_start) continue;
-
- // The start of this node must extend far enough backward to
- // touch the start of the range.
- if (range_start < ts_node_start_byte(child)) break;
-
- node = child;
- if (ts_node__is_relevant(node, include_anonymous)) {
- ts_tree_set_cached_parent(self.tree, &child, &last_visible_node);
- last_visible_node = node;
- }
- did_descend = true;
- break;
- }
- }
-
- return last_visible_node;
-}
-
-static inline TSNode ts_node__descendant_for_point_range(
- TSNode self,
- TSPoint range_start,
- TSPoint range_end,
- bool include_anonymous
-) {
- TSNode node = self;
- TSNode last_visible_node = self;
-
- bool did_descend = true;
- while (did_descend) {
- did_descend = false;
-
- TSNode child;
- NodeChildIterator iterator = ts_node_iterate_children(&node);
- while (ts_node_child_iterator_next(&iterator, &child)) {
- TSPoint node_end = iterator.position.extent;
-
- // The end of this node must extend far enough forward to touch
- // the end of the range and exceed the start of the range.
- if (point_lt(node_end, range_end)) continue;
- if (point_lte(node_end, range_start)) continue;
-
- // The start of this node must extend far enough backward to
- // touch the start of the range.
- if (point_lt(range_start, ts_node_start_point(child))) break;
-
- node = child;
- if (ts_node__is_relevant(node, include_anonymous)) {
- ts_tree_set_cached_parent(self.tree, &child, &last_visible_node);
- last_visible_node = node;
- }
- did_descend = true;
- break;
- }
- }
-
- return last_visible_node;
-}
-
-// TSNode - public
-
-uint32_t ts_node_end_byte(TSNode self) {
- return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
-}
-
-TSPoint ts_node_end_point(TSNode self) {
- return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
-}
-
-TSSymbol ts_node_symbol(TSNode self) {
- TSSymbol symbol = ts_node__alias(&self);
- if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
- return ts_language_public_symbol(self.tree->language, symbol);
-}
-
-const char *ts_node_type(TSNode self) {
- TSSymbol symbol = ts_node__alias(&self);
- if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
- return ts_language_symbol_name(self.tree->language, symbol);
-}
-
-char *ts_node_string(TSNode self) {
- return ts_subtree_string(ts_node__subtree(self), self.tree->language, false);
-}
-
-bool ts_node_eq(TSNode self, TSNode other) {
- return self.tree == other.tree && self.id == other.id;
-}
-
-bool ts_node_is_null(TSNode self) {
- return self.id == 0;
-}
-
-bool ts_node_is_extra(TSNode self) {
- return ts_subtree_extra(ts_node__subtree(self));
-}
-
-bool ts_node_is_named(TSNode self) {
- TSSymbol alias = ts_node__alias(&self);
- return alias
- ? ts_language_symbol_metadata(self.tree->language, alias).named
- : ts_subtree_named(ts_node__subtree(self));
-}
-
-bool ts_node_is_missing(TSNode self) {
- return ts_subtree_missing(ts_node__subtree(self));
-}
-
-bool ts_node_has_changes(TSNode self) {
- return ts_subtree_has_changes(ts_node__subtree(self));
-}
-
-bool ts_node_has_error(TSNode self) {
- return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
-}
-
-TSNode ts_node_parent(TSNode self) {
- TSNode node = ts_tree_get_cached_parent(self.tree, &self);
- if (node.id) return node;
-
- node = ts_tree_root_node(self.tree);
- uint32_t end_byte = ts_node_end_byte(self);
- if (node.id == self.id) return ts_node__null();
-
- TSNode last_visible_node = node;
- bool did_descend = true;
- while (did_descend) {
- did_descend = false;
-
- TSNode child;
- NodeChildIterator iterator = ts_node_iterate_children(&node);
- while (ts_node_child_iterator_next(&iterator, &child)) {
- if (
- ts_node_start_byte(child) > ts_node_start_byte(self) ||
- child.id == self.id
- ) break;
- if (iterator.position.bytes >= end_byte) {
- node = child;
- if (ts_node__is_relevant(child, true)) {
- ts_tree_set_cached_parent(self.tree, &node, &last_visible_node);
- last_visible_node = node;
- }
- did_descend = true;
- break;
- }
- }
- }
-
- return last_visible_node;
-}
-
-TSNode ts_node_child(TSNode self, uint32_t child_index) {
- return ts_node__child(self, child_index, true);
-}
-
-TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
- return ts_node__child(self, child_index, false);
-}
-
-TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
-recur:
- if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
-
- const TSFieldMapEntry *field_map, *field_map_end;
- ts_language_field_map(
- self.tree->language,
- ts_node__subtree(self).ptr->production_id,
- &field_map,
- &field_map_end
- );
- if (field_map == field_map_end) return ts_node__null();
-
- // The field mappings are sorted by their field id. Scan all
- // the mappings to find the ones for the given field id.
- while (field_map->field_id < field_id) {
- field_map++;
- if (field_map == field_map_end) return ts_node__null();
- }
- while (field_map_end[-1].field_id > field_id) {
- field_map_end--;
- if (field_map == field_map_end) return ts_node__null();
- }
-
- TSNode child;
- NodeChildIterator iterator = ts_node_iterate_children(&self);
- while (ts_node_child_iterator_next(&iterator, &child)) {
- if (!ts_subtree_extra(ts_node__subtree(child))) {
- uint32_t index = iterator.structural_child_index - 1;
- if (index < field_map->child_index) continue;
-
- // Hidden nodes' fields are "inherited" by their visible parent.
- if (field_map->inherited) {
-
- // If this is the *last* possible child node for this field,
- // then perform a tail call to avoid recursion.
- if (field_map + 1 == field_map_end) {
- self = child;
- goto recur;
- }
-
- // Otherwise, descend into this child, but if it doesn't contain
- // the field, continue searching subsequent children.
- else {
- TSNode result = ts_node_child_by_field_id(child, field_id);
- if (result.id) return result;
- field_map++;
- if (field_map == field_map_end) return ts_node__null();
- }
- }
-
- else if (ts_node__is_relevant(child, true)) {
- return child;
- }
-
- // If the field refers to a hidden node, return its first visible
- // child.
- else {
- return ts_node_child(child, 0);
- }
- }
- }
-
- return ts_node__null();
-}
-
-TSNode ts_node_child_by_field_name(
- TSNode self,
- const char *name,
- uint32_t name_length
-) {
- TSFieldId field_id = ts_language_field_id_for_name(
- self.tree->language,
- name,
- name_length
- );
- return ts_node_child_by_field_id(self, field_id);
-}
-
-uint32_t ts_node_child_count(TSNode self) {
- Subtree tree = ts_node__subtree(self);
- if (ts_subtree_child_count(tree) > 0) {
- return tree.ptr->visible_child_count;
- } else {
- return 0;
- }
-}
-
-uint32_t ts_node_named_child_count(TSNode self) {
- Subtree tree = ts_node__subtree(self);
- if (ts_subtree_child_count(tree) > 0) {
- return tree.ptr->named_child_count;
- } else {
- return 0;
- }
-}
-
-TSNode ts_node_next_sibling(TSNode self) {
- return ts_node__next_sibling(self, true);
-}
-
-TSNode ts_node_next_named_sibling(TSNode self) {
- return ts_node__next_sibling(self, false);
-}
-
-TSNode ts_node_prev_sibling(TSNode self) {
- return ts_node__prev_sibling(self, true);
-}
-
-TSNode ts_node_prev_named_sibling(TSNode self) {
- return ts_node__prev_sibling(self, false);
-}
-
-TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
- return ts_node__first_child_for_byte(self, byte, true);
-}
-
-TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
- return ts_node__first_child_for_byte(self, byte, false);
-}
-
-TSNode ts_node_descendant_for_byte_range(
- TSNode self,
- uint32_t start,
- uint32_t end
-) {
- return ts_node__descendant_for_byte_range(self, start, end, true);
-}
-
-TSNode ts_node_named_descendant_for_byte_range(
- TSNode self,
- uint32_t start,
- uint32_t end
-) {
- return ts_node__descendant_for_byte_range(self, start, end, false);
-}
-
-TSNode ts_node_descendant_for_point_range(
- TSNode self,
- TSPoint start,
- TSPoint end
-) {
- return ts_node__descendant_for_point_range(self, start, end, true);
-}
-
-TSNode ts_node_named_descendant_for_point_range(
- TSNode self,
- TSPoint start,
- TSPoint end
-) {
- return ts_node__descendant_for_point_range(self, start, end, false);
-}
-
-void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
- uint32_t start_byte = ts_node_start_byte(*self);
- TSPoint start_point = ts_node_start_point(*self);
-
- if (start_byte >= edit->old_end_byte) {
- start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
- start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
- } else if (start_byte > edit->start_byte) {
- start_byte = edit->new_end_byte;
- start_point = edit->new_end_point;
- }
-
- self->context[0] = start_byte;
- self->context[1] = start_point.row;
- self->context[2] = start_point.column;
-}
diff --git a/src/tree_sitter/parser.c b/src/tree_sitter/parser.c
deleted file mode 100644
index 79cad797a0..0000000000
--- a/src/tree_sitter/parser.c
+++ /dev/null
@@ -1,1906 +0,0 @@
-#include <time.h>
-#include <assert.h>
-#include <stdio.h>
-#include <limits.h>
-#include <stdbool.h>
-#include "tree_sitter/api.h"
-#include "./alloc.h"
-#include "./array.h"
-#include "./atomic.h"
-#include "./clock.h"
-#include "./error_costs.h"
-#include "./get_changed_ranges.h"
-#include "./language.h"
-#include "./length.h"
-#include "./lexer.h"
-#include "./reduce_action.h"
-#include "./reusable_node.h"
-#include "./stack.h"
-#include "./subtree.h"
-#include "./tree.h"
-
-#define LOG(...) \
- if (self->lexer.logger.log || self->dot_graph_file) { \
- snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
- ts_parser__log(self); \
- }
-
-#define LOG_STACK() \
- if (self->dot_graph_file) { \
- ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
- fputs("\n\n", self->dot_graph_file); \
- }
-
-#define LOG_TREE(tree) \
- if (self->dot_graph_file) { \
- ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
- fputs("\n", self->dot_graph_file); \
- }
-
-#define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
-
-#define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree))
-
-static const unsigned MAX_VERSION_COUNT = 6;
-static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
-static const unsigned MAX_SUMMARY_DEPTH = 16;
-static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
-static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100;
-
-typedef struct {
- Subtree token;
- Subtree last_external_token;
- uint32_t byte_index;
-} TokenCache;
-
-struct TSParser {
- Lexer lexer;
- Stack *stack;
- SubtreePool tree_pool;
- const TSLanguage *language;
- ReduceActionSet reduce_actions;
- Subtree finished_tree;
- SubtreeHeapData scratch_tree_data;
- MutableSubtree scratch_tree;
- TokenCache token_cache;
- ReusableNode reusable_node;
- void *external_scanner_payload;
- FILE *dot_graph_file;
- TSClock end_clock;
- TSDuration timeout_duration;
- unsigned accept_count;
- unsigned operation_count;
- const volatile size_t *cancellation_flag;
- Subtree old_tree;
- TSRangeArray included_range_differences;
- unsigned included_range_difference_index;
-};
-
-typedef struct {
- unsigned cost;
- unsigned node_count;
- int dynamic_precedence;
- bool is_in_error;
-} ErrorStatus;
-
-typedef enum {
- ErrorComparisonTakeLeft,
- ErrorComparisonPreferLeft,
- ErrorComparisonNone,
- ErrorComparisonPreferRight,
- ErrorComparisonTakeRight,
-} ErrorComparison;
-
-typedef struct {
- const char *string;
- uint32_t length;
-} TSStringInput;
-
-// StringInput
-
-static const char *ts_string_input_read(
- void *_self,
- uint32_t byte,
- TSPoint pt,
- uint32_t *length
-) {
- (void)pt;
- TSStringInput *self = (TSStringInput *)_self;
- if (byte >= self->length) {
- *length = 0;
- return "";
- } else {
- *length = self->length - byte;
- return self->string + byte;
- }
-}
-
-// Parser - Private
-
-static void ts_parser__log(TSParser *self) {
- if (self->lexer.logger.log) {
- self->lexer.logger.log(
- self->lexer.logger.payload,
- TSLogTypeParse,
- self->lexer.debug_buffer
- );
- }
-
- if (self->dot_graph_file) {
- fprintf(self->dot_graph_file, "graph {\nlabel=\"");
- for (char *c = &self->lexer.debug_buffer[0]; *c != 0; c++) {
- if (*c == '"') fputc('\\', self->dot_graph_file);
- fputc(*c, self->dot_graph_file);
- }
- fprintf(self->dot_graph_file, "\"\n}\n\n");
- }
-}
-
-static bool ts_parser__breakdown_top_of_stack(
- TSParser *self,
- StackVersion version
-) {
- bool did_break_down = false;
- bool pending = false;
-
- do {
- StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
- if (!pop.size) break;
-
- did_break_down = true;
- pending = false;
- for (uint32_t i = 0; i < pop.size; i++) {
- StackSlice slice = pop.contents[i];
- TSStateId state = ts_stack_state(self->stack, slice.version);
- Subtree parent = *array_front(&slice.subtrees);
-
- for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
- Subtree child = parent.ptr->children[j];
- pending = ts_subtree_child_count(child) > 0;
-
- if (ts_subtree_is_error(child)) {
- state = ERROR_STATE;
- } else if (!ts_subtree_extra(child)) {
- state = ts_language_next_state(self->language, state, ts_subtree_symbol(child));
- }
-
- ts_subtree_retain(child);
- ts_stack_push(self->stack, slice.version, child, pending, state);
- }
-
- for (uint32_t j = 1; j < slice.subtrees.size; j++) {
- Subtree tree = slice.subtrees.contents[j];
- ts_stack_push(self->stack, slice.version, tree, false, state);
- }
-
- ts_subtree_release(&self->tree_pool, parent);
- array_delete(&slice.subtrees);
-
- LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
- LOG_STACK();
- }
- } while (pending);
-
- return did_break_down;
-}
-
-static void ts_parser__breakdown_lookahead(
- TSParser *self,
- Subtree *lookahead,
- TSStateId state,
- ReusableNode *reusable_node
-) {
- bool did_descend = false;
- Subtree tree = reusable_node_tree(reusable_node);
- while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
- LOG("state_mismatch sym:%s", TREE_NAME(tree));
- reusable_node_descend(reusable_node);
- tree = reusable_node_tree(reusable_node);
- did_descend = true;
- }
-
- if (did_descend) {
- ts_subtree_release(&self->tree_pool, *lookahead);
- *lookahead = tree;
- ts_subtree_retain(*lookahead);
- }
-}
-
-static ErrorComparison ts_parser__compare_versions(
- TSParser *self,
- ErrorStatus a,
- ErrorStatus b
-) {
- (void)self;
- if (!a.is_in_error && b.is_in_error) {
- if (a.cost < b.cost) {
- return ErrorComparisonTakeLeft;
- } else {
- return ErrorComparisonPreferLeft;
- }
- }
-
- if (a.is_in_error && !b.is_in_error) {
- if (b.cost < a.cost) {
- return ErrorComparisonTakeRight;
- } else {
- return ErrorComparisonPreferRight;
- }
- }
-
- if (a.cost < b.cost) {
- if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
- return ErrorComparisonTakeLeft;
- } else {
- return ErrorComparisonPreferLeft;
- }
- }
-
- if (b.cost < a.cost) {
- if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
- return ErrorComparisonTakeRight;
- } else {
- return ErrorComparisonPreferRight;
- }
- }
-
- if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
- if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
- return ErrorComparisonNone;
-}
-
-static ErrorStatus ts_parser__version_status(
- TSParser *self,
- StackVersion version
-) {
- unsigned cost = ts_stack_error_cost(self->stack, version);
- bool is_paused = ts_stack_is_paused(self->stack, version);
- if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
- return (ErrorStatus) {
- .cost = cost,
- .node_count = ts_stack_node_count_since_error(self->stack, version),
- .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
- .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
- };
-}
-
-static bool ts_parser__better_version_exists(
- TSParser *self,
- StackVersion version,
- bool is_in_error,
- unsigned cost
-) {
- if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
- return true;
- }
-
- Length position = ts_stack_position(self->stack, version);
- ErrorStatus status = {
- .cost = cost,
- .is_in_error = is_in_error,
- .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
- .node_count = ts_stack_node_count_since_error(self->stack, version),
- };
-
- for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
- if (i == version ||
- !ts_stack_is_active(self->stack, i) ||
- ts_stack_position(self->stack, i).bytes < position.bytes) continue;
- ErrorStatus status_i = ts_parser__version_status(self, i);
- switch (ts_parser__compare_versions(self, status, status_i)) {
- case ErrorComparisonTakeRight:
- return true;
- case ErrorComparisonPreferRight:
- if (ts_stack_can_merge(self->stack, i, version)) return true;
- break;
- default:
- break;
- }
- }
-
- return false;
-}
-
-static void ts_parser__restore_external_scanner(
- TSParser *self,
- Subtree external_token
-) {
- if (external_token.ptr) {
- self->language->external_scanner.deserialize(
- self->external_scanner_payload,
- ts_external_scanner_state_data(&external_token.ptr->external_scanner_state),
- external_token.ptr->external_scanner_state.length
- );
- } else {
- self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
- }
-}
-
-static bool ts_parser__can_reuse_first_leaf(
- TSParser *self,
- TSStateId state,
- Subtree tree,
- TableEntry *table_entry
-) {
- TSLexMode current_lex_mode = self->language->lex_modes[state];
- TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
- TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
- TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
-
- // At the end of a non-terminal extra node, the lexer normally returns
- // NULL, which indicates that the parser should look for a reduce action
- // at symbol `0`. Avoid reusing tokens in this situation to ensure that
- // the same thing happens when incrementally reparsing.
- if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
-
- // If the token was created in a state with the same set of lookaheads, it is reusable.
- if (
- table_entry->action_count > 0 &&
- memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexMode)) == 0 &&
- (
- leaf_symbol != self->language->keyword_capture_token ||
- (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state)
- )
- ) return true;
-
- // Empty tokens are not reusable in states with different lookaheads.
- if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
-
- // If the current state allows external tokens or other tokens that conflict with this
- // token, this token is not reusable.
- return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
-}
-
-static Subtree ts_parser__lex(
- TSParser *self,
- StackVersion version,
- TSStateId parse_state
-) {
- TSLexMode lex_mode = self->language->lex_modes[parse_state];
- if (lex_mode.lex_state == (uint16_t)-1) {
- LOG("no_lookahead_after_non_terminal_extra");
- return NULL_SUBTREE;
- }
-
- Length start_position = ts_stack_position(self->stack, version);
- Subtree external_token = ts_stack_last_external_token(self->stack, version);
- const bool *valid_external_tokens = ts_language_enabled_external_tokens(
- self->language,
- lex_mode.external_lex_state
- );
-
- bool found_external_token = false;
- bool error_mode = parse_state == ERROR_STATE;
- bool skipped_error = false;
- int32_t first_error_character = 0;
- Length error_start_position = length_zero();
- Length error_end_position = length_zero();
- uint32_t lookahead_end_byte = 0;
- ts_lexer_reset(&self->lexer, start_position);
-
- for (;;) {
- Length current_position = self->lexer.current_position;
-
- if (valid_external_tokens) {
- LOG(
- "lex_external state:%d, row:%u, column:%u",
- lex_mode.external_lex_state,
- current_position.extent.row + 1,
- current_position.extent.column
- );
- ts_lexer_start(&self->lexer);
- ts_parser__restore_external_scanner(self, external_token);
- bool found_token = self->language->external_scanner.scan(
- self->external_scanner_payload,
- &self->lexer.data,
- valid_external_tokens
- );
- ts_lexer_finish(&self->lexer, &lookahead_end_byte);
-
- // Zero-length external tokens are generally allowed, but they're not
- // allowed right after a syntax error. This is for two reasons:
- // 1. After a syntax error, the lexer is looking for any possible token,
- // as opposed to the specific set of tokens that are valid in some
- // parse state. In this situation, it's very easy for an external
- // scanner to produce unwanted zero-length tokens.
- // 2. The parser sometimes inserts *missing* tokens to recover from
- // errors. These tokens are also zero-length. If we allow more
- // zero-length tokens to be created after missing tokens, it
- // can lead to infinite loops. Forbidding zero-length tokens
- // right at the point of error recovery is a conservative strategy
- // for preventing this kind of infinite loop.
- if (found_token && (
- self->lexer.token_end_position.bytes > current_position.bytes ||
- (!error_mode && ts_stack_has_advanced_since_error(self->stack, version))
- )) {
- found_external_token = true;
- break;
- }
-
- ts_lexer_reset(&self->lexer, current_position);
- }
-
- LOG(
- "lex_internal state:%d, row:%u, column:%u",
- lex_mode.lex_state,
- current_position.extent.row + 1,
- current_position.extent.column
- );
- ts_lexer_start(&self->lexer);
- bool found_token = self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
- ts_lexer_finish(&self->lexer, &lookahead_end_byte);
- if (found_token) break;
-
- if (!error_mode) {
- error_mode = true;
- lex_mode = self->language->lex_modes[ERROR_STATE];
- valid_external_tokens = ts_language_enabled_external_tokens(
- self->language,
- lex_mode.external_lex_state
- );
- ts_lexer_reset(&self->lexer, start_position);
- continue;
- }
-
- if (!skipped_error) {
- LOG("skip_unrecognized_character");
- skipped_error = true;
- error_start_position = self->lexer.token_start_position;
- error_end_position = self->lexer.token_start_position;
- first_error_character = self->lexer.data.lookahead;
- }
-
- if (self->lexer.current_position.bytes == error_end_position.bytes) {
- if (self->lexer.data.eof(&self->lexer.data)) {
- self->lexer.data.result_symbol = ts_builtin_sym_error;
- break;
- }
- self->lexer.data.advance(&self->lexer.data, false);
- }
-
- error_end_position = self->lexer.current_position;
- }
-
- Subtree result;
- if (skipped_error) {
- Length padding = length_sub(error_start_position, start_position);
- Length size = length_sub(error_end_position, error_start_position);
- uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
- result = ts_subtree_new_error(
- &self->tree_pool,
- first_error_character,
- padding,
- size,
- lookahead_bytes,
- parse_state,
- self->language
- );
-
- LOG(
- "lexed_lookahead sym:%s, size:%u, character:'%c'",
- SYM_NAME(ts_subtree_symbol(result)),
- ts_subtree_total_size(result).bytes,
- first_error_character
- );
- } else {
- if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
- self->lexer.token_start_position = self->lexer.token_end_position;
- }
-
- bool is_keyword = false;
- TSSymbol symbol = self->lexer.data.result_symbol;
- Length padding = length_sub(self->lexer.token_start_position, start_position);
- Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
- uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
-
- if (found_external_token) {
- symbol = self->language->external_scanner.symbol_map[symbol];
- } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
- uint32_t end_byte = self->lexer.token_end_position.bytes;
- ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
- ts_lexer_start(&self->lexer);
- if (
- self->language->keyword_lex_fn(&self->lexer.data, 0) &&
- self->lexer.token_end_position.bytes == end_byte &&
- ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
- ) {
- is_keyword = true;
- symbol = self->lexer.data.result_symbol;
- }
- }
-
- result = ts_subtree_new_leaf(
- &self->tree_pool,
- symbol,
- padding,
- size,
- lookahead_bytes,
- parse_state,
- found_external_token,
- is_keyword,
- self->language
- );
-
- if (found_external_token) {
- unsigned length = self->language->external_scanner.serialize(
- self->external_scanner_payload,
- self->lexer.debug_buffer
- );
- ts_external_scanner_state_init(
- &((SubtreeHeapData *)result.ptr)->external_scanner_state,
- self->lexer.debug_buffer,
- length
- );
- }
-
- LOG(
- "lexed_lookahead sym:%s, size:%u",
- SYM_NAME(ts_subtree_symbol(result)),
- ts_subtree_total_size(result).bytes
- );
- }
-
- return result;
-}
-
-static Subtree ts_parser__get_cached_token(
- TSParser *self,
- TSStateId state,
- size_t position,
- Subtree last_external_token,
- TableEntry *table_entry
-) {
- TokenCache *cache = &self->token_cache;
- if (
- cache->token.ptr && cache->byte_index == position &&
- ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
- ) {
- ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
- if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
- ts_subtree_retain(cache->token);
- return cache->token;
- }
- }
- return NULL_SUBTREE;
-}
-
-static void ts_parser__set_cached_token(
- TSParser *self,
- size_t byte_index,
- Subtree last_external_token,
- Subtree token
-) {
- TokenCache *cache = &self->token_cache;
- if (token.ptr) ts_subtree_retain(token);
- if (last_external_token.ptr) ts_subtree_retain(last_external_token);
- if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
- if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token);
- cache->token = token;
- cache->byte_index = byte_index;
- cache->last_external_token = last_external_token;
-}
-
-static bool ts_parser__has_included_range_difference(
- const TSParser *self,
- uint32_t start_position,
- uint32_t end_position
-) {
- return ts_range_array_intersects(
- &self->included_range_differences,
- self->included_range_difference_index,
- start_position,
- end_position
- );
-}
-
-static Subtree ts_parser__reuse_node(
- TSParser *self,
- StackVersion version,
- TSStateId *state,
- uint32_t position,
- Subtree last_external_token,
- TableEntry *table_entry
-) {
- Subtree result;
- while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
- uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
- uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
-
- // Do not reuse an EOF node if the included ranges array has changes
- // later on in the file.
- if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
-
- if (byte_offset > position) {
- LOG("before_reusable_node symbol:%s", TREE_NAME(result));
- break;
- }
-
- if (byte_offset < position) {
- LOG("past_reusable_node symbol:%s", TREE_NAME(result));
- if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
- reusable_node_advance(&self->reusable_node);
- }
- continue;
- }
-
- if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
- LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
- reusable_node_advance(&self->reusable_node);
- continue;
- }
-
- const char *reason = NULL;
- if (ts_subtree_has_changes(result)) {
- reason = "has_changes";
- } else if (ts_subtree_is_error(result)) {
- reason = "is_error";
- } else if (ts_subtree_missing(result)) {
- reason = "is_missing";
- } else if (ts_subtree_is_fragile(result)) {
- reason = "is_fragile";
- } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
- reason = "contains_different_included_range";
- }
-
- if (reason) {
- LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
- if (!reusable_node_descend(&self->reusable_node)) {
- reusable_node_advance(&self->reusable_node);
- ts_parser__breakdown_top_of_stack(self, version);
- *state = ts_stack_state(self->stack, version);
- }
- continue;
- }
-
- TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
- ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
- if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
- LOG(
- "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
- TREE_NAME(result),
- SYM_NAME(leaf_symbol)
- );
- reusable_node_advance_past_leaf(&self->reusable_node);
- break;
- }
-
- LOG("reuse_node symbol:%s", TREE_NAME(result));
- ts_subtree_retain(result);
- return result;
- }
-
- return NULL_SUBTREE;
-}
-
-static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
- if (!left.ptr) return true;
- if (!right.ptr) return false;
-
- if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
- LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
- return true;
- }
-
- if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
- LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
- return false;
- }
-
- if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) {
- LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
- TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
- ts_subtree_dynamic_precedence(left));
- return true;
- }
-
- if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) {
- LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
- TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right),
- ts_subtree_dynamic_precedence(right));
- return false;
- }
-
- if (ts_subtree_error_cost(left) > 0) return true;
-
- int comparison = ts_subtree_compare(left, right);
- switch (comparison) {
- case -1:
- LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
- return false;
- break;
- case 1:
- LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
- return true;
- default:
- LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
- return false;
- }
-}
-
-static void ts_parser__shift(
- TSParser *self,
- StackVersion version,
- TSStateId state,
- Subtree lookahead,
- bool extra
-) {
- Subtree subtree_to_push;
- if (extra != ts_subtree_extra(lookahead)) {
- MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
- ts_subtree_set_extra(&result);
- subtree_to_push = ts_subtree_from_mut(result);
- } else {
- subtree_to_push = lookahead;
- }
-
- bool is_pending = ts_subtree_child_count(subtree_to_push) > 0;
- ts_stack_push(self->stack, version, subtree_to_push, is_pending, state);
- if (ts_subtree_has_external_tokens(subtree_to_push)) {
- ts_stack_set_last_external_token(
- self->stack, version, ts_subtree_last_external_token(subtree_to_push)
- );
- }
-}
-
-static bool ts_parser__replace_children(
- TSParser *self,
- MutableSubtree *tree,
- SubtreeArray *children
-) {
- *self->scratch_tree.ptr = *tree->ptr;
- self->scratch_tree.ptr->child_count = 0;
- ts_subtree_set_children(self->scratch_tree, children->contents, children->size, self->language);
- if (ts_parser__select_tree(self, ts_subtree_from_mut(*tree), ts_subtree_from_mut(self->scratch_tree))) {
- *tree->ptr = *self->scratch_tree.ptr;
- return true;
- } else {
- return false;
- }
-}
-
-static StackVersion ts_parser__reduce(
- TSParser *self,
- StackVersion version,
- TSSymbol symbol,
- uint32_t count,
- int dynamic_precedence,
- uint16_t production_id,
- bool is_fragile,
- bool end_of_non_terminal_extra
-) {
- uint32_t initial_version_count = ts_stack_version_count(self->stack);
-
- // Pop the given number of nodes from the given version of the parse stack.
- // If stack versions have previously merged, then there may be more than one
- // path back through the stack. For each path, create a new parent node to
- // contain the popped children, and push it onto the stack in place of the
- // children.
- StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
- uint32_t removed_version_count = 0;
- for (uint32_t i = 0; i < pop.size; i++) {
- StackSlice slice = pop.contents[i];
- StackVersion slice_version = slice.version - removed_version_count;
-
- // This is where new versions are added to the parse stack. The versions
- // will all be sorted and truncated at the end of the outer parsing loop.
- // Allow the maximum version count to be temporarily exceeded, but only
- // by a limited threshold.
- if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
- ts_stack_remove_version(self->stack, slice_version);
- ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
- removed_version_count++;
- while (i + 1 < pop.size) {
- StackSlice next_slice = pop.contents[i + 1];
- if (next_slice.version != slice.version) break;
- ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
- i++;
- }
- continue;
- }
-
- // Extra tokens on top of the stack should not be included in this new parent
- // node. They will be re-pushed onto the stack after the parent node is
- // created and pushed.
- SubtreeArray children = slice.subtrees;
- while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
- children.size--;
- }
-
- MutableSubtree parent = ts_subtree_new_node(&self->tree_pool,
- symbol, &children, production_id, self->language
- );
-
- // This pop operation may have caused multiple stack versions to collapse
- // into one, because they all diverged from a common state. In that case,
- // choose one of the arrays of trees to be the parent node's children, and
- // delete the rest of the tree arrays.
- while (i + 1 < pop.size) {
- StackSlice next_slice = pop.contents[i + 1];
- if (next_slice.version != slice.version) break;
- i++;
-
- SubtreeArray children = next_slice.subtrees;
- while (children.size > 0 && ts_subtree_extra(children.contents[children.size - 1])) {
- children.size--;
- }
-
- if (ts_parser__replace_children(self, &parent, &children)) {
- ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
- slice = next_slice;
- } else {
- ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
- }
- }
-
- parent.ptr->dynamic_precedence += dynamic_precedence;
- parent.ptr->production_id = production_id;
-
- TSStateId state = ts_stack_state(self->stack, slice_version);
- TSStateId next_state = ts_language_next_state(self->language, state, symbol);
- if (end_of_non_terminal_extra && next_state == state) {
- parent.ptr->extra = true;
- }
- if (is_fragile || pop.size > 1 || initial_version_count > 1) {
- parent.ptr->fragile_left = true;
- parent.ptr->fragile_right = true;
- parent.ptr->parse_state = TS_TREE_STATE_NONE;
- } else {
- parent.ptr->parse_state = state;
- }
-
- // Push the parent node onto the stack, along with any extra tokens that
- // were previously on top of the stack.
- ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
- for (uint32_t j = parent.ptr->child_count; j < slice.subtrees.size; j++) {
- ts_stack_push(self->stack, slice_version, slice.subtrees.contents[j], false, next_state);
- }
-
- for (StackVersion j = 0; j < slice_version; j++) {
- if (j == version) continue;
- if (ts_stack_merge(self->stack, j, slice_version)) {
- removed_version_count++;
- break;
- }
- }
- }
-
- // Return the first new stack version that was created.
- return ts_stack_version_count(self->stack) > initial_version_count
- ? initial_version_count
- : STACK_VERSION_NONE;
-}
-
-static void ts_parser__accept(
- TSParser *self,
- StackVersion version,
- Subtree lookahead
-) {
- assert(ts_subtree_is_eof(lookahead));
- ts_stack_push(self->stack, version, lookahead, false, 1);
-
- StackSliceArray pop = ts_stack_pop_all(self->stack, version);
- for (uint32_t i = 0; i < pop.size; i++) {
- SubtreeArray trees = pop.contents[i].subtrees;
-
- Subtree root = NULL_SUBTREE;
- for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
- Subtree child = trees.contents[j];
- if (!ts_subtree_extra(child)) {
- assert(!child.data.is_inline);
- uint32_t child_count = ts_subtree_child_count(child);
- for (uint32_t k = 0; k < child_count; k++) {
- ts_subtree_retain(child.ptr->children[k]);
- }
- array_splice(&trees, j, 1, child_count, child.ptr->children);
- root = ts_subtree_from_mut(ts_subtree_new_node(
- &self->tree_pool,
- ts_subtree_symbol(child),
- &trees,
- child.ptr->production_id,
- self->language
- ));
- ts_subtree_release(&self->tree_pool, child);
- break;
- }
- }
-
- assert(root.ptr);
- self->accept_count++;
-
- if (self->finished_tree.ptr) {
- if (ts_parser__select_tree(self, self->finished_tree, root)) {
- ts_subtree_release(&self->tree_pool, self->finished_tree);
- self->finished_tree = root;
- } else {
- ts_subtree_release(&self->tree_pool, root);
- }
- } else {
- self->finished_tree = root;
- }
- }
-
- ts_stack_remove_version(self->stack, pop.contents[0].version);
- ts_stack_halt(self->stack, version);
-}
-
-static bool ts_parser__do_all_potential_reductions(
- TSParser *self,
- StackVersion starting_version,
- TSSymbol lookahead_symbol
-) {
- uint32_t initial_version_count = ts_stack_version_count(self->stack);
-
- bool can_shift_lookahead_symbol = false;
- StackVersion version = starting_version;
- for (unsigned i = 0; true; i++) {
- uint32_t version_count = ts_stack_version_count(self->stack);
- if (version >= version_count) break;
-
- bool merged = false;
- for (StackVersion i = initial_version_count; i < version; i++) {
- if (ts_stack_merge(self->stack, i, version)) {
- merged = true;
- break;
- }
- }
- if (merged) continue;
-
- TSStateId state = ts_stack_state(self->stack, version);
- bool has_shift_action = false;
- array_clear(&self->reduce_actions);
-
- TSSymbol first_symbol, end_symbol;
- if (lookahead_symbol != 0) {
- first_symbol = lookahead_symbol;
- end_symbol = lookahead_symbol + 1;
- } else {
- first_symbol = 1;
- end_symbol = self->language->token_count;
- }
-
- for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
- TableEntry entry;
- ts_language_table_entry(self->language, state, symbol, &entry);
- for (uint32_t i = 0; i < entry.action_count; i++) {
- TSParseAction action = entry.actions[i];
- switch (action.type) {
- case TSParseActionTypeShift:
- case TSParseActionTypeRecover:
- if (!action.params.shift.extra && !action.params.shift.repetition) has_shift_action = true;
- break;
- case TSParseActionTypeReduce:
- if (action.params.reduce.child_count > 0)
- ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
- .symbol = action.params.reduce.symbol,
- .count = action.params.reduce.child_count,
- .dynamic_precedence = action.params.reduce.dynamic_precedence,
- .production_id = action.params.reduce.production_id,
- });
- break;
- default:
- break;
- }
- }
- }
-
- StackVersion reduction_version = STACK_VERSION_NONE;
- for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
- ReduceAction action = self->reduce_actions.contents[i];
-
- reduction_version = ts_parser__reduce(
- self, version, action.symbol, action.count,
- action.dynamic_precedence, action.production_id,
- true, false
- );
- }
-
- if (has_shift_action) {
- can_shift_lookahead_symbol = true;
- } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
- ts_stack_renumber_version(self->stack, reduction_version, version);
- continue;
- } else if (lookahead_symbol != 0) {
- ts_stack_remove_version(self->stack, version);
- }
-
- if (version == starting_version) {
- version = version_count;
- } else {
- version++;
- }
- }
-
- return can_shift_lookahead_symbol;
-}
-
-static void ts_parser__handle_error(
- TSParser *self,
- StackVersion version,
- TSSymbol lookahead_symbol
-) {
- uint32_t previous_version_count = ts_stack_version_count(self->stack);
-
- // Perform any reductions that can happen in this state, regardless of the lookahead. After
- // skipping one or more invalid tokens, the parser might find a token that would have allowed
- // a reduction to take place.
- ts_parser__do_all_potential_reductions(self, version, 0);
- uint32_t version_count = ts_stack_version_count(self->stack);
- Length position = ts_stack_position(self->stack, version);
-
- // Push a discontinuity onto the stack. Merge all of the stack versions that
- // were created in the previous step.
- bool did_insert_missing_token = false;
- for (StackVersion v = version; v < version_count;) {
- if (!did_insert_missing_token) {
- TSStateId state = ts_stack_state(self->stack, v);
- for (TSSymbol missing_symbol = 1;
- missing_symbol < self->language->token_count;
- missing_symbol++) {
- TSStateId state_after_missing_symbol = ts_language_next_state(
- self->language, state, missing_symbol
- );
- if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
- continue;
- }
-
- if (ts_language_has_reduce_action(
- self->language,
- state_after_missing_symbol,
- lookahead_symbol
- )) {
- // In case the parser is currently outside of any included range, the lexer will
- // snap to the beginning of the next included range. The missing token's padding
- // must be assigned to position it within the next included range.
- ts_lexer_reset(&self->lexer, position);
- ts_lexer_mark_end(&self->lexer);
- Length padding = length_sub(self->lexer.token_end_position, position);
-
- StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
- Subtree missing_tree = ts_subtree_new_missing_leaf(
- &self->tree_pool, missing_symbol, padding, self->language
- );
- ts_stack_push(
- self->stack, version_with_missing_tree,
- missing_tree, false,
- state_after_missing_symbol
- );
-
- if (ts_parser__do_all_potential_reductions(
- self, version_with_missing_tree,
- lookahead_symbol
- )) {
- LOG(
- "recover_with_missing symbol:%s, state:%u",
- SYM_NAME(missing_symbol),
- ts_stack_state(self->stack, version_with_missing_tree)
- );
- did_insert_missing_token = true;
- break;
- }
- }
- }
- }
-
- ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
- v = (v == version) ? previous_version_count : v + 1;
- }
-
- for (unsigned i = previous_version_count; i < version_count; i++) {
- bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
- assert(did_merge);
- }
-
- ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
- LOG_STACK();
-}
-
-static bool ts_parser__recover_to_state(
- TSParser *self,
- StackVersion version,
- unsigned depth,
- TSStateId goal_state
-) {
- StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
- StackVersion previous_version = STACK_VERSION_NONE;
-
- for (unsigned i = 0; i < pop.size; i++) {
- StackSlice slice = pop.contents[i];
-
- if (slice.version == previous_version) {
- ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
- array_erase(&pop, i--);
- continue;
- }
-
- if (ts_stack_state(self->stack, slice.version) != goal_state) {
- ts_stack_halt(self->stack, slice.version);
- ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
- array_erase(&pop, i--);
- continue;
- }
-
- SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
- if (error_trees.size > 0) {
- assert(error_trees.size == 1);
- Subtree error_tree = error_trees.contents[0];
- uint32_t error_child_count = ts_subtree_child_count(error_tree);
- if (error_child_count > 0) {
- array_splice(&slice.subtrees, 0, 0, error_child_count, error_tree.ptr->children);
- for (unsigned j = 0; j < error_child_count; j++) {
- ts_subtree_retain(slice.subtrees.contents[j]);
- }
- }
- ts_subtree_array_delete(&self->tree_pool, &error_trees);
- }
-
- SubtreeArray trailing_extras = ts_subtree_array_remove_trailing_extras(&slice.subtrees);
-
- if (slice.subtrees.size > 0) {
- Subtree error = ts_subtree_new_error_node(&self->tree_pool, &slice.subtrees, true, self->language);
- ts_stack_push(self->stack, slice.version, error, false, goal_state);
- } else {
- array_delete(&slice.subtrees);
- }
-
- for (unsigned j = 0; j < trailing_extras.size; j++) {
- Subtree tree = trailing_extras.contents[j];
- ts_stack_push(self->stack, slice.version, tree, false, goal_state);
- }
-
- previous_version = slice.version;
- array_delete(&trailing_extras);
- }
-
- return previous_version != STACK_VERSION_NONE;
-}
-
-static void ts_parser__recover(
- TSParser *self,
- StackVersion version,
- Subtree lookahead
-) {
- bool did_recover = false;
- unsigned previous_version_count = ts_stack_version_count(self->stack);
- Length position = ts_stack_position(self->stack, version);
- StackSummary *summary = ts_stack_get_summary(self->stack, version);
- unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
- unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
-
- // When the parser is in the error state, there are two strategies for recovering with a
- // given lookahead token:
- // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
- // create a new stack version that is in that state again. This entails popping all of the
- // subtrees that have been pushed onto the stack since that previous state, and wrapping
- // them in an ERROR node.
- // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
- // move on to the next lookahead token, remaining in the error state.
- //
- // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
- // of the previous parse states and their depths. Look at each state in the summary, to see
- // if the current lookahead token would be valid in that state.
- if (summary && !ts_subtree_is_error(lookahead)) {
- for (unsigned i = 0; i < summary->size; i++) {
- StackSummaryEntry entry = summary->contents[i];
-
- if (entry.state == ERROR_STATE) continue;
- if (entry.position.bytes == position.bytes) continue;
- unsigned depth = entry.depth;
- if (node_count_since_error > 0) depth++;
-
- // Do not recover in ways that create redundant stack versions.
- bool would_merge = false;
- for (unsigned j = 0; j < previous_version_count; j++) {
- if (
- ts_stack_state(self->stack, j) == entry.state &&
- ts_stack_position(self->stack, j).bytes == position.bytes
- ) {
- would_merge = true;
- break;
- }
- }
- if (would_merge) continue;
-
- // Do not recover if the result would clearly be worse than some existing stack version.
- unsigned new_cost =
- current_error_cost +
- entry.depth * ERROR_COST_PER_SKIPPED_TREE +
- (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
- (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
- if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
-
- // If the current lookahead token is valid in some previous state, recover to that state.
- // Then stop looking for further recoveries.
- if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
- if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
- did_recover = true;
- LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
- LOG_STACK();
- break;
- }
- }
- }
- }
-
- // In the process of attemping to recover, some stack versions may have been created
- // and subsequently halted. Remove those versions.
- for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
- if (!ts_stack_is_active(self->stack, i)) {
- ts_stack_remove_version(self->stack, i--);
- }
- }
-
- // If strategy 1 succeeded, a new stack version will have been created which is able to handle
- // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
- // current lookahead token by wrapping it in an ERROR node.
-
- // Don't pursue this additional strategy if there are already too many stack versions.
- if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
- ts_stack_halt(self->stack, version);
- ts_subtree_release(&self->tree_pool, lookahead);
- return;
- }
-
- // If the parser is still in the error state at the end of the file, just wrap everything
- // in an ERROR node and terminate.
- if (ts_subtree_is_eof(lookahead)) {
- LOG("recover_eof");
- SubtreeArray children = array_new();
- Subtree parent = ts_subtree_new_error_node(&self->tree_pool, &children, false, self->language);
- ts_stack_push(self->stack, version, parent, false, 1);
- ts_parser__accept(self, version, lookahead);
- return;
- }
-
- // Do not recover if the result would clearly be worse than some existing stack version.
- unsigned new_cost =
- current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
- ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR +
- ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE;
- if (ts_parser__better_version_exists(self, version, false, new_cost)) {
- ts_stack_halt(self->stack, version);
- ts_subtree_release(&self->tree_pool, lookahead);
- return;
- }
-
- // If the current lookahead token is an extra token, mark it as extra. This means it won't
- // be counted in error cost calculations.
- unsigned n;
- const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
- if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.shift.extra) {
- MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
- ts_subtree_set_extra(&mutable_lookahead);
- lookahead = ts_subtree_from_mut(mutable_lookahead);
- }
-
- // Wrap the lookahead token in an ERROR.
- LOG("skip_token symbol:%s", TREE_NAME(lookahead));
- SubtreeArray children = array_new();
- array_reserve(&children, 1);
- array_push(&children, lookahead);
- MutableSubtree error_repeat = ts_subtree_new_node(
- &self->tree_pool,
- ts_builtin_sym_error_repeat,
- &children,
- 0,
- self->language
- );
-
- // If other tokens have already been skipped, so there is already an ERROR at the top of the
- // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
- // ERROR.
- if (node_count_since_error > 0) {
- StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
-
- // TODO: Figure out how to make this condition occur.
- // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
- // If multiple stack versions have merged at this point, just pick one of the errors
- // arbitrarily and discard the rest.
- if (pop.size > 1) {
- for (unsigned i = 1; i < pop.size; i++) {
- ts_subtree_array_delete(&self->tree_pool, &pop.contents[i].subtrees);
- }
- while (ts_stack_version_count(self->stack) > pop.contents[0].version + 1) {
- ts_stack_remove_version(self->stack, pop.contents[0].version + 1);
- }
- }
-
- ts_stack_renumber_version(self->stack, pop.contents[0].version, version);
- array_push(&pop.contents[0].subtrees, ts_subtree_from_mut(error_repeat));
- error_repeat = ts_subtree_new_node(
- &self->tree_pool,
- ts_builtin_sym_error_repeat,
- &pop.contents[0].subtrees,
- 0,
- self->language
- );
- }
-
- // Push the new ERROR onto the stack.
- ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
- if (ts_subtree_has_external_tokens(lookahead)) {
- ts_stack_set_last_external_token(
- self->stack, version, ts_subtree_last_external_token(lookahead)
- );
- }
-}
-
-static bool ts_parser__advance(
- TSParser *self,
- StackVersion version,
- bool allow_node_reuse
-) {
- TSStateId state = ts_stack_state(self->stack, version);
- uint32_t position = ts_stack_position(self->stack, version).bytes;
- Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
-
- bool did_reuse = true;
- Subtree lookahead = NULL_SUBTREE;
- TableEntry table_entry = {.action_count = 0};
-
- // If possible, reuse a node from the previous syntax tree.
- if (allow_node_reuse) {
- lookahead = ts_parser__reuse_node(
- self, version, &state, position, last_external_token, &table_entry
- );
- }
-
- // If no node from the previous syntax tree could be reused, then try to
- // reuse the token previously returned by the lexer.
- if (!lookahead.ptr) {
- did_reuse = false;
- lookahead = ts_parser__get_cached_token(
- self, state, position, last_external_token, &table_entry
- );
- }
-
- bool needs_lex = !lookahead.ptr;
- for (;;) {
- // Otherwise, re-run the lexer.
- if (needs_lex) {
- needs_lex = false;
- lookahead = ts_parser__lex(self, version, state);
-
- if (lookahead.ptr) {
- ts_parser__set_cached_token(self, position, last_external_token, lookahead);
- ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
- }
-
- // When parsing a non-terminal extra, a null lookahead indicates the
- // end of the rule. The reduction is stored in the EOF table entry.
- // After the reduction, the lexer needs to be run again.
- else {
- ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
- }
- }
-
- // If a cancellation flag or a timeout was provided, then check every
- // time a fixed number of parse actions has been processed.
- if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
- self->operation_count = 0;
- }
- if (
- self->operation_count == 0 &&
- ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
- (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
- ) {
- ts_subtree_release(&self->tree_pool, lookahead);
- return false;
- }
-
- // Process each parse action for the current lookahead token in
- // the current state. If there are multiple actions, then this is
- // an ambiguous state. REDUCE actions always create a new stack
- // version, whereas SHIFT actions update the existing stack version
- // and terminate this loop.
- StackVersion last_reduction_version = STACK_VERSION_NONE;
- for (uint32_t i = 0; i < table_entry.action_count; i++) {
- TSParseAction action = table_entry.actions[i];
-
- switch (action.type) {
- case TSParseActionTypeShift: {
- if (action.params.shift.repetition) break;
- TSStateId next_state;
- if (action.params.shift.extra) {
-
- // TODO: remove when TREE_SITTER_LANGUAGE_VERSION 9 is out.
- if (state == ERROR_STATE) continue;
-
- next_state = state;
- LOG("shift_extra");
- } else {
- next_state = action.params.shift.state;
- LOG("shift state:%u", next_state);
- }
-
- if (ts_subtree_child_count(lookahead) > 0) {
- ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
- next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
- }
-
- ts_parser__shift(self, version, next_state, lookahead, action.params.shift.extra);
- if (did_reuse) reusable_node_advance(&self->reusable_node);
- return true;
- }
-
- case TSParseActionTypeReduce: {
- bool is_fragile = table_entry.action_count > 1;
- bool end_of_non_terminal_extra = lookahead.ptr == NULL;
- LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count);
- StackVersion reduction_version = ts_parser__reduce(
- self, version, action.params.reduce.symbol, action.params.reduce.child_count,
- action.params.reduce.dynamic_precedence, action.params.reduce.production_id,
- is_fragile, end_of_non_terminal_extra
- );
- if (reduction_version != STACK_VERSION_NONE) {
- last_reduction_version = reduction_version;
- }
- break;
- }
-
- case TSParseActionTypeAccept: {
- LOG("accept");
- ts_parser__accept(self, version, lookahead);
- return true;
- }
-
- case TSParseActionTypeRecover: {
- if (ts_subtree_child_count(lookahead) > 0) {
- ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
- }
-
- ts_parser__recover(self, version, lookahead);
- if (did_reuse) reusable_node_advance(&self->reusable_node);
- return true;
- }
- }
- }
-
- // If a reduction was performed, then replace the current stack version
- // with one of the stack versions created by a reduction, and continue
- // processing this version of the stack with the same lookahead symbol.
- if (last_reduction_version != STACK_VERSION_NONE) {
- ts_stack_renumber_version(self->stack, last_reduction_version, version);
- LOG_STACK();
- state = ts_stack_state(self->stack, version);
-
- // At the end of a non-terminal extra rule, the lexer will return a
- // null subtree, because the parser needs to perform a fixed reduction
- // regardless of the lookahead node. After performing that reduction,
- // (and completing the non-terminal extra rule) run the lexer again based
- // on the current parse state.
- if (!lookahead.ptr) {
- needs_lex = true;
- continue;
- }
-
- ts_language_table_entry(
- self->language,
- state,
- ts_subtree_leaf_symbol(lookahead),
- &table_entry
- );
- continue;
- }
-
- if (!lookahead.ptr) {
- ts_stack_pause(self->stack, version, ts_builtin_sym_end);
- return true;
- }
-
- // If there were no parse actions for the current lookahead token, then
- // it is not valid in this state. If the current lookahead token is a
- // keyword, then switch to treating it as the normal word token if that
- // token is valid in this state.
- if (
- ts_subtree_is_keyword(lookahead) &&
- ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
- ) {
- ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry);
- if (table_entry.action_count > 0) {
- LOG(
- "switch from_keyword:%s, to_word_token:%s",
- TREE_NAME(lookahead),
- SYM_NAME(self->language->keyword_capture_token)
- );
-
- MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
- ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
- lookahead = ts_subtree_from_mut(mutable_lookahead);
- continue;
- }
- }
-
- // If the current lookahead token is not valid and the parser is
- // already in the error state, restart the error recovery process.
- // TODO - can this be unified with the other `RECOVER` case above?
- if (state == ERROR_STATE) {
- ts_parser__recover(self, version, lookahead);
- return true;
- }
-
- // If the current lookahead token is not valid and the previous
- // subtree on the stack was reused from an old tree, it isn't actually
- // valid to reuse it. Remove it from the stack, and in its place,
- // push each of its children. Then try again to process the current
- // lookahead.
- if (ts_parser__breakdown_top_of_stack(self, version)) {
- state = ts_stack_state(self->stack, version);
- ts_subtree_release(&self->tree_pool, lookahead);
- needs_lex = true;
- continue;
- }
-
- // At this point, the current lookahead token is definitely not valid
- // for this parse stack version. Mark this version as paused and continue
- // processing any other stack versions that might exist. If some other
- // version advances successfully, then this version can simply be removed.
- // But if all versions end up paused, then error recovery is needed.
- LOG("detect_error");
- ts_stack_pause(self->stack, version, ts_subtree_leaf_symbol(lookahead));
- ts_subtree_release(&self->tree_pool, lookahead);
- return true;
- }
-}
-
-static unsigned ts_parser__condense_stack(TSParser *self) {
- bool made_changes = false;
- unsigned min_error_cost = UINT_MAX;
- for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
- // Prune any versions that have been marked for removal.
- if (ts_stack_is_halted(self->stack, i)) {
- ts_stack_remove_version(self->stack, i);
- i--;
- continue;
- }
-
- // Keep track of the minimum error cost of any stack version so
- // that it can be returned.
- ErrorStatus status_i = ts_parser__version_status(self, i);
- if (!status_i.is_in_error && status_i.cost < min_error_cost) {
- min_error_cost = status_i.cost;
- }
-
- // Examine each pair of stack versions, removing any versions that
- // are clearly worse than another version. Ensure that the versions
- // are ordered from most promising to least promising.
- for (StackVersion j = 0; j < i; j++) {
- ErrorStatus status_j = ts_parser__version_status(self, j);
-
- switch (ts_parser__compare_versions(self, status_j, status_i)) {
- case ErrorComparisonTakeLeft:
- made_changes = true;
- ts_stack_remove_version(self->stack, i);
- i--;
- j = i;
- break;
-
- case ErrorComparisonPreferLeft:
- case ErrorComparisonNone:
- if (ts_stack_merge(self->stack, j, i)) {
- made_changes = true;
- i--;
- j = i;
- }
- break;
-
- case ErrorComparisonPreferRight:
- made_changes = true;
- if (ts_stack_merge(self->stack, j, i)) {
- i--;
- j = i;
- } else {
- ts_stack_swap_versions(self->stack, i, j);
- }
- break;
-
- case ErrorComparisonTakeRight:
- made_changes = true;
- ts_stack_remove_version(self->stack, j);
- i--;
- j--;
- break;
- }
- }
- }
-
- // Enfore a hard upper bound on the number of stack versions by
- // discarding the least promising versions.
- while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
- ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
- made_changes = true;
- }
-
- // If the best-performing stack version is currently paused, or all
- // versions are paused, then resume the best paused version and begin
- // the error recovery process. Otherwise, remove the paused versions.
- if (ts_stack_version_count(self->stack) > 0) {
- bool has_unpaused_version = false;
- for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
- if (ts_stack_is_paused(self->stack, i)) {
- if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
- LOG("resume version:%u", i);
- min_error_cost = ts_stack_error_cost(self->stack, i);
- TSSymbol lookahead_symbol = ts_stack_resume(self->stack, i);
- ts_parser__handle_error(self, i, lookahead_symbol);
- has_unpaused_version = true;
- } else {
- ts_stack_remove_version(self->stack, i);
- i--;
- n--;
- }
- } else {
- has_unpaused_version = true;
- }
- }
- }
-
- if (made_changes) {
- LOG("condense");
- LOG_STACK();
- }
-
- return min_error_cost;
-}
-
-static bool ts_parser_has_outstanding_parse(TSParser *self) {
- return (
- ts_stack_state(self->stack, 0) != 1 ||
- ts_stack_node_count_since_error(self->stack, 0) != 0
- );
-}
-
-// Parser - Public
-
-TSParser *ts_parser_new(void) {
- TSParser *self = ts_calloc(1, sizeof(TSParser));
- ts_lexer_init(&self->lexer);
- array_init(&self->reduce_actions);
- array_reserve(&self->reduce_actions, 4);
- self->tree_pool = ts_subtree_pool_new(32);
- self->stack = ts_stack_new(&self->tree_pool);
- self->finished_tree = NULL_SUBTREE;
- self->reusable_node = reusable_node_new();
- self->dot_graph_file = NULL;
- self->cancellation_flag = NULL;
- self->timeout_duration = 0;
- self->end_clock = clock_null();
- self->operation_count = 0;
- self->old_tree = NULL_SUBTREE;
- self->scratch_tree.ptr = &self->scratch_tree_data;
- self->included_range_differences = (TSRangeArray) array_new();
- self->included_range_difference_index = 0;
- ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
- return self;
-}
-
-void ts_parser_delete(TSParser *self) {
- if (!self) return;
-
- ts_parser_set_language(self, NULL);
- ts_stack_delete(self->stack);
- if (self->reduce_actions.contents) {
- array_delete(&self->reduce_actions);
- }
- if (self->included_range_differences.contents) {
- array_delete(&self->included_range_differences);
- }
- if (self->old_tree.ptr) {
- ts_subtree_release(&self->tree_pool, self->old_tree);
- self->old_tree = NULL_SUBTREE;
- }
- ts_lexer_delete(&self->lexer);
- ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
- ts_subtree_pool_delete(&self->tree_pool);
- reusable_node_delete(&self->reusable_node);
- ts_free(self);
-}
-
-const TSLanguage *ts_parser_language(const TSParser *self) {
- return self->language;
-}
-
-bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
- if (language) {
- if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false;
- if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false;
- }
-
- if (self->external_scanner_payload && self->language->external_scanner.destroy) {
- self->language->external_scanner.destroy(self->external_scanner_payload);
- }
-
- if (language && language->external_scanner.create) {
- self->external_scanner_payload = language->external_scanner.create();
- } else {
- self->external_scanner_payload = NULL;
- }
-
- self->language = language;
- ts_parser_reset(self);
- return true;
-}
-
-TSLogger ts_parser_logger(const TSParser *self) {
- return self->lexer.logger;
-}
-
-void ts_parser_set_logger(TSParser *self, TSLogger logger) {
- self->lexer.logger = logger;
-}
-
-void ts_parser_print_dot_graphs(TSParser *self, int fd) {
- if (self->dot_graph_file) {
- fclose(self->dot_graph_file);
- }
-
- if (fd >= 0) {
- self->dot_graph_file = fdopen(fd, "a");
- } else {
- self->dot_graph_file = NULL;
- }
-}
-
-const size_t *ts_parser_cancellation_flag(const TSParser *self) {
- return (const size_t *)self->cancellation_flag;
-}
-
-void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
- self->cancellation_flag = (const volatile size_t *)flag;
-}
-
-uint64_t ts_parser_timeout_micros(const TSParser *self) {
- return duration_to_micros(self->timeout_duration);
-}
-
-void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
- self->timeout_duration = duration_from_micros(timeout_micros);
-}
-
-bool ts_parser_set_included_ranges(
- TSParser *self,
- const TSRange *ranges,
- uint32_t count
-) {
- return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
-}
-
-const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) {
- return ts_lexer_included_ranges(&self->lexer, count);
-}
-
-void ts_parser_reset(TSParser *self) {
- if (self->language && self->language->external_scanner.deserialize) {
- self->language->external_scanner.deserialize(self->external_scanner_payload, NULL, 0);
- }
-
- if (self->old_tree.ptr) {
- ts_subtree_release(&self->tree_pool, self->old_tree);
- self->old_tree = NULL_SUBTREE;
- }
-
- reusable_node_clear(&self->reusable_node);
- ts_lexer_reset(&self->lexer, length_zero());
- ts_stack_clear(self->stack);
- ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
- if (self->finished_tree.ptr) {
- ts_subtree_release(&self->tree_pool, self->finished_tree);
- self->finished_tree = NULL_SUBTREE;
- }
- self->accept_count = 0;
-}
-
-TSTree *ts_parser_parse(
- TSParser *self,
- const TSTree *old_tree,
- TSInput input
-) {
- if (!self->language || !input.read) return NULL;
-
- ts_lexer_set_input(&self->lexer, input);
-
- array_clear(&self->included_range_differences);
- self->included_range_difference_index = 0;
-
- if (ts_parser_has_outstanding_parse(self)) {
- LOG("resume_parsing");
- } else if (old_tree) {
- ts_subtree_retain(old_tree->root);
- self->old_tree = old_tree->root;
- ts_range_array_get_changed_ranges(
- old_tree->included_ranges, old_tree->included_range_count,
- self->lexer.included_ranges, self->lexer.included_range_count,
- &self->included_range_differences
- );
- reusable_node_reset(&self->reusable_node, old_tree->root);
- LOG("parse_after_edit");
- LOG_TREE(self->old_tree);
- for (unsigned i = 0; i < self->included_range_differences.size; i++) {
- TSRange *range = &self->included_range_differences.contents[i];
- LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
- }
- } else {
- reusable_node_clear(&self->reusable_node);
- LOG("new_parse");
- }
-
- uint32_t position = 0, last_position = 0, version_count = 0;
- self->operation_count = 0;
- if (self->timeout_duration) {
- self->end_clock = clock_after(clock_now(), self->timeout_duration);
- } else {
- self->end_clock = clock_null();
- }
-
- do {
- for (StackVersion version = 0;
- version_count = ts_stack_version_count(self->stack), version < version_count;
- version++) {
- bool allow_node_reuse = version_count == 1;
- while (ts_stack_is_active(self->stack, version)) {
- LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
- version, ts_stack_version_count(self->stack),
- ts_stack_state(self->stack, version),
- ts_stack_position(self->stack, version).extent.row + 1,
- ts_stack_position(self->stack, version).extent.column);
-
- if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
- LOG_STACK();
-
- position = ts_stack_position(self->stack, version).bytes;
- if (position > last_position || (version > 0 && position == last_position)) {
- last_position = position;
- break;
- }
- }
- }
-
- unsigned min_error_cost = ts_parser__condense_stack(self);
- if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
- break;
- }
-
- while (self->included_range_difference_index < self->included_range_differences.size) {
- TSRange *range = &self->included_range_differences.contents[self->included_range_difference_index];
- if (range->end_byte <= position) {
- self->included_range_difference_index++;
- } else {
- break;
- }
- }
- } while (version_count != 0);
-
- ts_subtree_balance(self->finished_tree, &self->tree_pool, self->language);
- LOG("done");
- LOG_TREE(self->finished_tree);
-
- TSTree *result = ts_tree_new(
- self->finished_tree,
- self->language,
- self->lexer.included_ranges,
- self->lexer.included_range_count
- );
- self->finished_tree = NULL_SUBTREE;
- ts_parser_reset(self);
- return result;
-}
-
-TSTree *ts_parser_parse_string(
- TSParser *self,
- const TSTree *old_tree,
- const char *string,
- uint32_t length
-) {
- return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
-}
-
-TSTree *ts_parser_parse_string_encoding(TSParser *self, const TSTree *old_tree,
- const char *string, uint32_t length, TSInputEncoding encoding) {
- TSStringInput input = {string, length};
- return ts_parser_parse(self, old_tree, (TSInput) {
- &input,
- ts_string_input_read,
- encoding,
- });
-}
-
-#undef LOG
diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h
deleted file mode 100644
index 11bf4fc42a..0000000000
--- a/src/tree_sitter/parser.h
+++ /dev/null
@@ -1,235 +0,0 @@
-#ifndef TREE_SITTER_PARSER_H_
-#define TREE_SITTER_PARSER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-#define ts_builtin_sym_error ((TSSymbol)-1)
-#define ts_builtin_sym_end 0
-#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
-
-#ifndef TREE_SITTER_API_H_
-typedef uint16_t TSSymbol;
-typedef uint16_t TSFieldId;
-typedef struct TSLanguage TSLanguage;
-#endif
-
-typedef struct {
- TSFieldId field_id;
- uint8_t child_index;
- bool inherited;
-} TSFieldMapEntry;
-
-typedef struct {
- uint16_t index;
- uint16_t length;
-} TSFieldMapSlice;
-
-typedef uint16_t TSStateId;
-
-typedef struct {
- bool visible : 1;
- bool named : 1;
-} TSSymbolMetadata;
-
-typedef struct TSLexer TSLexer;
-
-struct TSLexer {
- int32_t lookahead;
- TSSymbol result_symbol;
- void (*advance)(TSLexer *, bool);
- void (*mark_end)(TSLexer *);
- uint32_t (*get_column)(TSLexer *);
- bool (*is_at_included_range_start)(const TSLexer *);
- bool (*eof)(const TSLexer *);
-};
-
-typedef enum {
- TSParseActionTypeShift,
- TSParseActionTypeReduce,
- TSParseActionTypeAccept,
- TSParseActionTypeRecover,
-} TSParseActionType;
-
-typedef struct {
- union {
- struct {
- TSStateId state;
- bool extra : 1;
- bool repetition : 1;
- } shift;
- struct {
- TSSymbol symbol;
- int16_t dynamic_precedence;
- uint8_t child_count;
- uint8_t production_id;
- } reduce;
- } params;
- TSParseActionType type : 4;
-} TSParseAction;
-
-typedef struct {
- uint16_t lex_state;
- uint16_t external_lex_state;
-} TSLexMode;
-
-typedef union {
- TSParseAction action;
- struct {
- uint8_t count;
- bool reusable : 1;
- } entry;
-} TSParseActionEntry;
-
-struct TSLanguage {
- uint32_t version;
- uint32_t symbol_count;
- uint32_t alias_count;
- uint32_t token_count;
- uint32_t external_token_count;
- const char **symbol_names;
- const TSSymbolMetadata *symbol_metadata;
- const uint16_t *parse_table;
- const TSParseActionEntry *parse_actions;
- const TSLexMode *lex_modes;
- const TSSymbol *alias_sequences;
- uint16_t max_alias_sequence_length;
- bool (*lex_fn)(TSLexer *, TSStateId);
- bool (*keyword_lex_fn)(TSLexer *, TSStateId);
- TSSymbol keyword_capture_token;
- struct {
- const bool *states;
- const TSSymbol *symbol_map;
- void *(*create)(void);
- void (*destroy)(void *);
- bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
- unsigned (*serialize)(void *, char *);
- void (*deserialize)(void *, const char *, unsigned);
- } external_scanner;
- uint32_t field_count;
- const TSFieldMapSlice *field_map_slices;
- const TSFieldMapEntry *field_map_entries;
- const char **field_names;
- uint32_t large_state_count;
- const uint16_t *small_parse_table;
- const uint32_t *small_parse_table_map;
- const TSSymbol *public_symbol_map;
-};
-
-/*
- * Lexer Macros
- */
-
-#define START_LEXER() \
- bool result = false; \
- bool skip = false; \
- bool eof = false; \
- int32_t lookahead; \
- goto start; \
- next_state: \
- lexer->advance(lexer, skip); \
- start: \
- skip = false; \
- lookahead = lexer->lookahead;
-
-#define ADVANCE(state_value) \
- { \
- state = state_value; \
- goto next_state; \
- }
-
-#define SKIP(state_value) \
- { \
- skip = true; \
- state = state_value; \
- goto next_state; \
- }
-
-#define ACCEPT_TOKEN(symbol_value) \
- result = true; \
- lexer->result_symbol = symbol_value; \
- lexer->mark_end(lexer);
-
-#define END_STATE() return result;
-
-/*
- * Parse Table Macros
- */
-
-#define SMALL_STATE(id) id - LARGE_STATE_COUNT
-
-#define STATE(id) id
-
-#define ACTIONS(id) id
-
-#define SHIFT(state_value) \
- { \
- { \
- .params = { \
- .shift = { \
- .state = state_value \
- } \
- }, \
- .type = TSParseActionTypeShift \
- } \
- }
-
-#define SHIFT_REPEAT(state_value) \
- { \
- { \
- .params = { \
- .shift = { \
- .state = state_value, \
- .repetition = true \
- } \
- }, \
- .type = TSParseActionTypeShift \
- } \
- }
-
-#define RECOVER() \
- { \
- { .type = TSParseActionTypeRecover } \
- }
-
-#define SHIFT_EXTRA() \
- { \
- { \
- .params = { \
- .shift = { \
- .extra = true \
- } \
- }, \
- .type = TSParseActionTypeShift \
- } \
- }
-
-#define REDUCE(symbol_val, child_count_val, ...) \
- { \
- { \
- .params = { \
- .reduce = { \
- .symbol = symbol_val, \
- .child_count = child_count_val, \
- __VA_ARGS__ \
- }, \
- }, \
- .type = TSParseActionTypeReduce \
- } \
- }
-
-#define ACCEPT_INPUT() \
- { \
- { .type = TSParseActionTypeAccept } \
- }
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_PARSER_H_
diff --git a/src/tree_sitter/point.h b/src/tree_sitter/point.h
deleted file mode 100644
index a50d20214b..0000000000
--- a/src/tree_sitter/point.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef TREE_SITTER_POINT_H_
-#define TREE_SITTER_POINT_H_
-
-#include "tree_sitter/api.h"
-
-#define POINT_ZERO ((TSPoint) {0, 0})
-#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
-
-static inline TSPoint point__new(unsigned row, unsigned column) {
- TSPoint result = {row, column};
- return result;
-}
-
-static inline TSPoint point_add(TSPoint a, TSPoint b) {
- if (b.row > 0)
- return point__new(a.row + b.row, b.column);
- else
- return point__new(a.row, a.column + b.column);
-}
-
-static inline TSPoint point_sub(TSPoint a, TSPoint b) {
- if (a.row > b.row)
- return point__new(a.row - b.row, a.column);
- else
- return point__new(0, a.column - b.column);
-}
-
-static inline bool point_lte(TSPoint a, TSPoint b) {
- return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
-}
-
-static inline bool point_lt(TSPoint a, TSPoint b) {
- return (a.row < b.row) || (a.row == b.row && a.column < b.column);
-}
-
-static inline bool point_eq(TSPoint a, TSPoint b) {
- return a.row == b.row && a.column == b.column;
-}
-
-static inline TSPoint point_min(TSPoint a, TSPoint b) {
- if (a.row < b.row || (a.row == b.row && a.column < b.column))
- return a;
- else
- return b;
-}
-
-static inline TSPoint point_max(TSPoint a, TSPoint b) {
- if (a.row > b.row || (a.row == b.row && a.column > b.column))
- return a;
- else
- return b;
-}
-
-#endif
diff --git a/src/tree_sitter/query.c b/src/tree_sitter/query.c
deleted file mode 100644
index b887b74ff6..0000000000
--- a/src/tree_sitter/query.c
+++ /dev/null
@@ -1,2143 +0,0 @@
-#include "tree_sitter/api.h"
-#include "./alloc.h"
-#include "./array.h"
-#include "./bits.h"
-#include "./language.h"
-#include "./point.h"
-#include "./tree_cursor.h"
-#include "./unicode.h"
-#include <wctype.h>
-
-// #define LOG(...) fprintf(stderr, __VA_ARGS__)
-#define LOG(...)
-
-#define MAX_CAPTURE_LIST_COUNT 32
-#define MAX_STEP_CAPTURE_COUNT 3
-
-/*
- * Stream - A sequence of unicode characters derived from a UTF8 string.
- * This struct is used in parsing queries from S-expressions.
- */
-typedef struct {
- const char *input;
- const char *end;
- int32_t next;
- uint8_t next_size;
-} Stream;
-
-/*
- * QueryStep - A step in the process of matching a query. Each node within
- * a query S-expression maps to one of these steps. An entire pattern is
- * represented as a sequence of these steps. Fields:
- *
- * - `symbol` - The grammar symbol to match. A zero value represents the
- * wildcard symbol, '_'.
- * - `field` - The field name to match. A zero value means that a field name
- * was not specified.
- * - `capture_ids` - An array of integers representing the names of captures
- * associated with this node in the pattern, terminated by a `NONE` value.
- * - `depth` - The depth where this node occurs in the pattern. The root node
- * of the pattern has depth zero.
- * - `alternative_index` - The index of a different query step that serves as
- * an alternative to this step.
- */
-typedef struct {
- TSSymbol symbol;
- TSFieldId field;
- uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
- uint16_t alternative_index;
- uint16_t depth;
- bool contains_captures: 1;
- bool is_immediate: 1;
- bool is_last_child: 1;
- bool is_pass_through: 1;
- bool is_dead_end: 1;
- bool alternative_is_immediate: 1;
-} QueryStep;
-
-/*
- * Slice - A slice of an external array. Within a query, capture names,
- * literal string values, and predicate step informations are stored in three
- * contiguous arrays. Individual captures, string values, and predicates are
- * represented as slices of these three arrays.
- */
-typedef struct {
- uint32_t offset;
- uint32_t length;
-} Slice;
-
-/*
- * SymbolTable - a two-way mapping of strings to ids.
- */
-typedef struct {
- Array(char) characters;
- Array(Slice) slices;
-} SymbolTable;
-
-/*
- * PatternEntry - Information about the starting point for matching a
- * particular pattern, consisting of the index of the pattern within the query,
- * and the index of the patter's first step in the shared `steps` array. These
- * entries are stored in a 'pattern map' - a sorted array that makes it
- * possible to efficiently lookup patterns based on the symbol for their first
- * step.
- */
-typedef struct {
- uint16_t step_index;
- uint16_t pattern_index;
-} PatternEntry;
-
-/*
- * QueryState - The state of an in-progress match of a particular pattern
- * in a query. While executing, a `TSQueryCursor` must keep track of a number
- * of possible in-progress matches. Each of those possible matches is
- * represented as one of these states. Fields:
- * - `id` - A numeric id that is exposed to the public API. This allows the
- * caller to remove a given match, preventing any more of its captures
- * from being returned.
- * - `start_depth` - The depth in the tree where the first step of the state's
- * pattern was matched.
- * - `pattern_index` - The pattern that the state is matching.
- * - `consumed_capture_count` - The number of captures from this match that
- * have already been returned.
- * - `capture_list_id` - A numeric id that can be used to retrieve the state's
- * list of captures from the `CaptureListPool`.
- * - `seeking_immediate_match` - A flag that indicates that the state's next
- * step must be matched by the very next sibling. This is used when
- * processing repetitions.
- * - `has_in_progress_alternatives` - A flag that indicates that there is are
- * other states that have the same captures as this state, but are at
- * different steps in their pattern. This means that in order to obey the
- * 'longest-match' rule, this state should not be returned as a match until
- * it is clear that there can be no longer match.
- */
-typedef struct {
- uint32_t id;
- uint16_t start_depth;
- uint16_t step_index;
- uint16_t pattern_index;
- uint16_t capture_list_id;
- uint16_t consumed_capture_count: 12;
- bool seeking_immediate_match: 1;
- bool has_in_progress_alternatives: 1;
- bool dead: 1;
-} QueryState;
-
-typedef Array(TSQueryCapture) CaptureList;
-
-/*
- * CaptureListPool - A collection of *lists* of captures. Each QueryState
- * needs to maintain its own list of captures. To avoid repeated allocations,
- * the reuses a fixed set of capture lists, and keeps track of which ones
- * are currently in use.
- */
-typedef struct {
- CaptureList list[MAX_CAPTURE_LIST_COUNT];
- CaptureList empty_list;
- uint32_t usage_map;
-} CaptureListPool;
-
-/*
- * TSQuery - A tree query, compiled from a string of S-expressions. The query
- * itself is immutable. The mutable state used in the process of executing the
- * query is stored in a `TSQueryCursor`.
- */
-struct TSQuery {
- SymbolTable captures;
- SymbolTable predicate_values;
- Array(QueryStep) steps;
- Array(PatternEntry) pattern_map;
- Array(TSQueryPredicateStep) predicate_steps;
- Array(Slice) predicates_by_pattern;
- Array(uint32_t) start_bytes_by_pattern;
- const TSLanguage *language;
- uint16_t wildcard_root_pattern_count;
- TSSymbol *symbol_map;
-};
-
-/*
- * TSQueryCursor - A stateful struct used to execute a query on a tree.
- */
-struct TSQueryCursor {
- const TSQuery *query;
- TSTreeCursor cursor;
- Array(QueryState) states;
- Array(QueryState) finished_states;
- CaptureListPool capture_list_pool;
- uint32_t depth;
- uint32_t start_byte;
- uint32_t end_byte;
- uint32_t next_state_id;
- TSPoint start_point;
- TSPoint end_point;
- bool ascending;
- bool halted;
-};
-
-static const TSQueryError PARENT_DONE = -1;
-static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
-static const uint16_t NONE = UINT16_MAX;
-static const TSSymbol WILDCARD_SYMBOL = 0;
-static const TSSymbol NAMED_WILDCARD_SYMBOL = UINT16_MAX - 1;
-
-/**********
- * Stream
- **********/
-
-// Advance to the next unicode code point in the stream.
-static bool stream_advance(Stream *self) {
- self->input += self->next_size;
- if (self->input < self->end) {
- uint32_t size = ts_decode_utf8(
- (const uint8_t *)self->input,
- self->end - self->input,
- &self->next
- );
- if (size > 0) {
- self->next_size = size;
- return true;
- }
- } else {
- self->next_size = 0;
- self->next = '\0';
- }
- return false;
-}
-
-// Reset the stream to the given input position, represented as a pointer
-// into the input string.
-static void stream_reset(Stream *self, const char *input) {
- self->input = input;
- self->next_size = 0;
- stream_advance(self);
-}
-
-static Stream stream_new(const char *string, uint32_t length) {
- Stream self = {
- .next = 0,
- .input = string,
- .end = string + length,
- };
- stream_advance(&self);
- return self;
-}
-
-static void stream_skip_whitespace(Stream *stream) {
- for (;;) {
- if (iswspace(stream->next)) {
- stream_advance(stream);
- } else if (stream->next == ';') {
- // skip over comments
- stream_advance(stream);
- while (stream->next && stream->next != '\n') {
- if (!stream_advance(stream)) break;
- }
- } else {
- break;
- }
- }
-}
-
-static bool stream_is_ident_start(Stream *stream) {
- return iswalnum(stream->next) || stream->next == '_' || stream->next == '-';
-}
-
-static void stream_scan_identifier(Stream *stream) {
- do {
- stream_advance(stream);
- } while (
- iswalnum(stream->next) ||
- stream->next == '_' ||
- stream->next == '-' ||
- stream->next == '.' ||
- stream->next == '?' ||
- stream->next == '!'
- );
-}
-
-/******************
- * CaptureListPool
- ******************/
-
-static CaptureListPool capture_list_pool_new(void) {
- return (CaptureListPool) {
- .empty_list = array_new(),
- .usage_map = UINT32_MAX,
- };
-}
-
-static void capture_list_pool_reset(CaptureListPool *self) {
- self->usage_map = UINT32_MAX;
- for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
- array_clear(&self->list[i]);
- }
-}
-
-static void capture_list_pool_delete(CaptureListPool *self) {
- for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) {
- array_delete(&self->list[i]);
- }
-}
-
-static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
- if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list;
- return &self->list[id];
-}
-
-static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
- assert(id < MAX_CAPTURE_LIST_COUNT);
- return &self->list[id];
-}
-
-static bool capture_list_pool_is_empty(const CaptureListPool *self) {
- return self->usage_map == 0;
-}
-
-static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
- // In the usage_map bitmask, ones represent free lists, and zeros represent
- // lists that are in use. A free list id can quickly be found by counting
- // the leading zeros in the usage map. An id of zero corresponds to the
- // highest-order bit in the bitmask.
- uint16_t id = count_leading_zeros(self->usage_map);
- if (id >= MAX_CAPTURE_LIST_COUNT) return NONE;
- self->usage_map &= ~bitmask_for_index(id);
- array_clear(&self->list[id]);
- return id;
-}
-
-static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
- if (id >= MAX_CAPTURE_LIST_COUNT) return;
- array_clear(&self->list[id]);
- self->usage_map |= bitmask_for_index(id);
-}
-
-/**************
- * SymbolTable
- **************/
-
-static SymbolTable symbol_table_new(void) {
- return (SymbolTable) {
- .characters = array_new(),
- .slices = array_new(),
- };
-}
-
-static void symbol_table_delete(SymbolTable *self) {
- array_delete(&self->characters);
- array_delete(&self->slices);
-}
-
-static int symbol_table_id_for_name(
- const SymbolTable *self,
- const char *name,
- uint32_t length
-) {
- for (unsigned i = 0; i < self->slices.size; i++) {
- Slice slice = self->slices.contents[i];
- if (
- slice.length == length &&
- !strncmp(&self->characters.contents[slice.offset], name, length)
- ) return i;
- }
- return -1;
-}
-
-static const char *symbol_table_name_for_id(
- const SymbolTable *self,
- uint16_t id,
- uint32_t *length
-) {
- Slice slice = self->slices.contents[id];
- *length = slice.length;
- return &self->characters.contents[slice.offset];
-}
-
-static uint16_t symbol_table_insert_name(
- SymbolTable *self,
- const char *name,
- uint32_t length
-) {
- int id = symbol_table_id_for_name(self, name, length);
- if (id >= 0) return (uint16_t)id;
- Slice slice = {
- .offset = self->characters.size,
- .length = length,
- };
- array_grow_by(&self->characters, length + 1);
- memcpy(&self->characters.contents[slice.offset], name, length);
- self->characters.contents[self->characters.size - 1] = 0;
- array_push(&self->slices, slice);
- return self->slices.size - 1;
-}
-
-static uint16_t symbol_table_insert_name_with_escapes(
- SymbolTable *self,
- const char *escaped_name,
- uint32_t escaped_length
-) {
- Slice slice = {
- .offset = self->characters.size,
- .length = 0,
- };
- array_grow_by(&self->characters, escaped_length + 1);
-
- // Copy the contents of the literal into the characters buffer, processing escape
- // sequences like \n and \". This needs to be done before checking if the literal
- // is already present, in order to do the string comparison.
- bool is_escaped = false;
- for (unsigned i = 0; i < escaped_length; i++) {
- const char *src = &escaped_name[i];
- char *dest = &self->characters.contents[slice.offset + slice.length];
- if (is_escaped) {
- switch (*src) {
- case 'n':
- *dest = '\n';
- break;
- case 'r':
- *dest = '\r';
- break;
- case 't':
- *dest = '\t';
- break;
- case '0':
- *dest = '\0';
- break;
- default:
- *dest = *src;
- break;
- }
- is_escaped = false;
- slice.length++;
- } else {
- if (*src == '\\') {
- is_escaped = true;
- } else {
- *dest = *src;
- slice.length++;
- }
- }
- }
-
- // If the string is already present, remove the redundant content from the characters
- // buffer and return the existing id.
- int id = symbol_table_id_for_name(self, &self->characters.contents[slice.offset], slice.length);
- if (id >= 0) {
- self->characters.size -= (escaped_length + 1);
- return id;
- }
-
- self->characters.contents[slice.offset + slice.length] = 0;
- array_push(&self->slices, slice);
- return self->slices.size - 1;
-}
-
-/************
- * QueryStep
- ************/
-
-static QueryStep query_step__new(
- TSSymbol symbol,
- uint16_t depth,
- bool is_immediate
-) {
- return (QueryStep) {
- .symbol = symbol,
- .depth = depth,
- .field = 0,
- .capture_ids = {NONE, NONE, NONE},
- .alternative_index = NONE,
- .contains_captures = false,
- .is_last_child = false,
- .is_pass_through = false,
- .is_dead_end = false,
- .is_immediate = is_immediate,
- .alternative_is_immediate = false,
- };
-}
-
-static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
- for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
- if (self->capture_ids[i] == NONE) {
- self->capture_ids[i] = capture_id;
- break;
- }
- }
-}
-
-static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
- for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
- if (self->capture_ids[i] == capture_id) {
- self->capture_ids[i] = NONE;
- while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
- if (self->capture_ids[i + 1] == NONE) break;
- self->capture_ids[i] = self->capture_ids[i + 1];
- self->capture_ids[i + 1] = NONE;
- i++;
- }
- break;
- }
- }
-}
-
-/*********
- * Query
- *********/
-
-// The `pattern_map` contains a mapping from TSSymbol values to indices in the
-// `steps` array. For a given syntax node, the `pattern_map` makes it possible
-// to quickly find the starting steps of all of the patterns whose root matches
-// that node. Each entry has two fields: a `pattern_index`, which identifies one
-// of the patterns in the query, and a `step_index`, which indicates the start
-// offset of that pattern's steps within the `steps` array.
-//
-// The entries are sorted by the patterns' root symbols, and lookups use a
-// binary search. This ensures that the cost of this initial lookup step
-// scales logarithmically with the number of patterns in the query.
-//
-// This returns `true` if the symbol is present and `false` otherwise.
-// If the symbol is not present `*result` is set to the index where the
-// symbol should be inserted.
-static inline bool ts_query__pattern_map_search(
- const TSQuery *self,
- TSSymbol needle,
- uint32_t *result
-) {
- uint32_t base_index = self->wildcard_root_pattern_count;
- uint32_t size = self->pattern_map.size - base_index;
- if (size == 0) {
- *result = base_index;
- return false;
- }
- while (size > 1) {
- uint32_t half_size = size / 2;
- uint32_t mid_index = base_index + half_size;
- TSSymbol mid_symbol = self->steps.contents[
- self->pattern_map.contents[mid_index].step_index
- ].symbol;
- if (needle > mid_symbol) base_index = mid_index;
- size -= half_size;
- }
-
- TSSymbol symbol = self->steps.contents[
- self->pattern_map.contents[base_index].step_index
- ].symbol;
-
- if (needle > symbol) {
- base_index++;
- if (base_index < self->pattern_map.size) {
- symbol = self->steps.contents[
- self->pattern_map.contents[base_index].step_index
- ].symbol;
- }
- }
-
- *result = base_index;
- return needle == symbol;
-}
-
-// Insert a new pattern's start index into the pattern map, maintaining
-// the pattern map's ordering invariant.
-static inline void ts_query__pattern_map_insert(
- TSQuery *self,
- TSSymbol symbol,
- uint32_t start_step_index,
- uint32_t pattern_index
-) {
- uint32_t index;
- ts_query__pattern_map_search(self, symbol, &index);
-
- // Ensure that the entries are sorted not only by symbol, but also
- // by pattern_index. This way, states for earlier patterns will be
- // initiated first, which allows the ordering of the states array
- // to be maintained more efficiently.
- while (index < self->pattern_map.size) {
- PatternEntry *entry = &self->pattern_map.contents[index];
- if (
- self->steps.contents[entry->step_index].symbol == symbol &&
- entry->pattern_index < pattern_index
- ) {
- index++;
- } else {
- break;
- }
- }
-
- array_insert(&self->pattern_map, index, ((PatternEntry) {
- .step_index = start_step_index,
- .pattern_index = pattern_index,
- }));
-}
-
-static void ts_query__finalize_steps(TSQuery *self) {
- for (unsigned i = 0; i < self->steps.size; i++) {
- QueryStep *step = &self->steps.contents[i];
- uint32_t depth = step->depth;
- if (step->capture_ids[0] != NONE) {
- step->contains_captures = true;
- } else {
- step->contains_captures = false;
- for (unsigned j = i + 1; j < self->steps.size; j++) {
- QueryStep *s = &self->steps.contents[j];
- if (s->depth == PATTERN_DONE_MARKER || s->depth <= depth) break;
- if (s->capture_ids[0] != NONE) step->contains_captures = true;
- }
- }
- }
-}
-
-// Parse a single predicate associated with a pattern, adding it to the
-// query's internal `predicate_steps` array. Predicates are arbitrary
-// S-expressions associated with a pattern which are meant to be handled at
-// a higher level of abstraction, such as the Rust/JavaScript bindings. They
-// can contain '@'-prefixed capture names, double-quoted strings, and bare
-// symbols, which also represent strings.
-static TSQueryError ts_query__parse_predicate(
- TSQuery *self,
- Stream *stream
-) {
- if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
- const char *predicate_name = stream->input;
- stream_scan_identifier(stream);
- uint32_t length = stream->input - predicate_name;
- uint16_t id = symbol_table_insert_name(
- &self->predicate_values,
- predicate_name,
- length
- );
- array_back(&self->predicates_by_pattern)->length++;
- array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
- .type = TSQueryPredicateStepTypeString,
- .value_id = id,
- }));
- stream_skip_whitespace(stream);
-
- for (;;) {
- if (stream->next == ')') {
- stream_advance(stream);
- stream_skip_whitespace(stream);
- array_back(&self->predicates_by_pattern)->length++;
- array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
- .type = TSQueryPredicateStepTypeDone,
- .value_id = 0,
- }));
- break;
- }
-
- // Parse an '@'-prefixed capture name
- else if (stream->next == '@') {
- stream_advance(stream);
-
- // Parse the capture name
- if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
- const char *capture_name = stream->input;
- stream_scan_identifier(stream);
- uint32_t length = stream->input - capture_name;
-
- // Add the capture id to the first step of the pattern
- int capture_id = symbol_table_id_for_name(
- &self->captures,
- capture_name,
- length
- );
- if (capture_id == -1) {
- stream_reset(stream, capture_name);
- return TSQueryErrorCapture;
- }
-
- array_back(&self->predicates_by_pattern)->length++;
- array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
- .type = TSQueryPredicateStepTypeCapture,
- .value_id = capture_id,
- }));
- }
-
- // Parse a string literal
- else if (stream->next == '"') {
- stream_advance(stream);
-
- // Parse the string content
- bool is_escaped = false;
- const char *string_content = stream->input;
- for (;;) {
- if (is_escaped) {
- is_escaped = false;
- } else {
- if (stream->next == '\\') {
- is_escaped = true;
- } else if (stream->next == '"') {
- break;
- } else if (stream->next == '\n') {
- stream_reset(stream, string_content - 1);
- return TSQueryErrorSyntax;
- }
- }
- if (!stream_advance(stream)) {
- stream_reset(stream, string_content - 1);
- return TSQueryErrorSyntax;
- }
- }
- uint32_t length = stream->input - string_content;
-
- // Add a step for the node
- uint16_t id = symbol_table_insert_name_with_escapes(
- &self->predicate_values,
- string_content,
- length
- );
- array_back(&self->predicates_by_pattern)->length++;
- array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
- .type = TSQueryPredicateStepTypeString,
- .value_id = id,
- }));
-
- if (stream->next != '"') return TSQueryErrorSyntax;
- stream_advance(stream);
- }
-
- // Parse a bare symbol
- else if (stream_is_ident_start(stream)) {
- const char *symbol_start = stream->input;
- stream_scan_identifier(stream);
- uint32_t length = stream->input - symbol_start;
- uint16_t id = symbol_table_insert_name(
- &self->predicate_values,
- symbol_start,
- length
- );
- array_back(&self->predicates_by_pattern)->length++;
- array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
- .type = TSQueryPredicateStepTypeString,
- .value_id = id,
- }));
- }
-
- else {
- return TSQueryErrorSyntax;
- }
-
- stream_skip_whitespace(stream);
- }
-
- return 0;
-}
-
-// Read one S-expression pattern from the stream, and incorporate it into
-// the query's internal state machine representation. For nested patterns,
-// this function calls itself recursively.
-static TSQueryError ts_query__parse_pattern(
- TSQuery *self,
- Stream *stream,
- uint32_t depth,
- uint32_t *capture_count,
- bool is_immediate
-) {
- const uint32_t starting_step_index = self->steps.size;
-
- if (stream->next == 0) return TSQueryErrorSyntax;
-
- // Finish the parent S-expression.
- if (stream->next == ')' || stream->next == ']') {
- return PARENT_DONE;
- }
-
- // An open bracket is the start of an alternation.
- else if (stream->next == '[') {
- stream_advance(stream);
- stream_skip_whitespace(stream);
-
- // Parse each branch, and add a placeholder step in between the branches.
- Array(uint32_t) branch_step_indices = array_new();
- for (;;) {
- uint32_t start_index = self->steps.size;
- TSQueryError e = ts_query__parse_pattern(
- self,
- stream,
- depth,
- capture_count,
- is_immediate
- );
-
- if (e == PARENT_DONE && stream->next == ']' && branch_step_indices.size > 0) {
- stream_advance(stream);
- break;
- } else if (e) {
- array_delete(&branch_step_indices);
- return e;
- }
-
- array_push(&branch_step_indices, start_index);
- array_push(&self->steps, query_step__new(0, depth, false));
- }
- (void)array_pop(&self->steps);
-
- // For all of the branches except for the last one, add the subsequent branch as an
- // alternative, and link the end of the branch to the current end of the steps.
- for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
- uint32_t step_index = branch_step_indices.contents[i];
- uint32_t next_step_index = branch_step_indices.contents[i + 1];
- QueryStep *start_step = &self->steps.contents[step_index];
- QueryStep *end_step = &self->steps.contents[next_step_index - 1];
- start_step->alternative_index = next_step_index;
- end_step->alternative_index = self->steps.size;
- end_step->is_dead_end = true;
- }
-
- array_delete(&branch_step_indices);
- }
-
- // An open parenthesis can be the start of three possible constructs:
- // * A grouped sequence
- // * A predicate
- // * A named node
- else if (stream->next == '(') {
- stream_advance(stream);
- stream_skip_whitespace(stream);
-
- // If this parenthesis is followed by a node, then it represents a grouped sequence.
- if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
- bool child_is_immediate = false;
- for (;;) {
- if (stream->next == '.') {
- child_is_immediate = true;
- stream_advance(stream);
- stream_skip_whitespace(stream);
- }
- TSQueryError e = ts_query__parse_pattern(
- self,
- stream,
- depth,
- capture_count,
- child_is_immediate
- );
- if (e == PARENT_DONE && stream->next == ')') {
- stream_advance(stream);
- break;
- } else if (e) {
- return e;
- }
-
- child_is_immediate = false;
- }
- }
-
- // A dot/pound character indicates the start of a predicate.
- else if (stream->next == '.' || stream->next == '#') {
- stream_advance(stream);
- return ts_query__parse_predicate(self, stream);
- }
-
- // Otherwise, this parenthesis is the start of a named node.
- else {
- TSSymbol symbol;
-
- // Parse the wildcard symbol
- if (
- stream->next == '_' ||
-
- // TODO - remove.
- // For temporary backward compatibility, handle '*' as a wildcard.
- stream->next == '*'
- ) {
- symbol = depth > 0 ? NAMED_WILDCARD_SYMBOL : WILDCARD_SYMBOL;
- stream_advance(stream);
- }
-
- // Parse a normal node name
- else if (stream_is_ident_start(stream)) {
- const char *node_name = stream->input;
- stream_scan_identifier(stream);
- uint32_t length = stream->input - node_name;
-
- // TODO - remove.
- // For temporary backward compatibility, handle predicates without the leading '#' sign.
- if (length > 0 && (node_name[length - 1] == '!' || node_name[length - 1] == '?')) {
- stream_reset(stream, node_name);
- return ts_query__parse_predicate(self, stream);
- }
-
- symbol = ts_language_symbol_for_name(
- self->language,
- node_name,
- length,
- true
- );
- if (!symbol) {
- stream_reset(stream, node_name);
- return TSQueryErrorNodeType;
- }
- } else {
- return TSQueryErrorSyntax;
- }
-
- // Add a step for the node.
- array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
-
- // Parse the child patterns
- stream_skip_whitespace(stream);
- bool child_is_immediate = false;
- uint16_t child_start_step_index = self->steps.size;
- for (;;) {
- if (stream->next == '.') {
- child_is_immediate = true;
- stream_advance(stream);
- stream_skip_whitespace(stream);
- }
-
- TSQueryError e = ts_query__parse_pattern(
- self,
- stream,
- depth + 1,
- capture_count,
- child_is_immediate
- );
- if (e == PARENT_DONE && stream->next == ')') {
- if (child_is_immediate) {
- self->steps.contents[child_start_step_index].is_last_child = true;
- }
- stream_advance(stream);
- break;
- } else if (e) {
- return e;
- }
-
- child_is_immediate = false;
- }
- }
- }
-
- // Parse a wildcard pattern
- else if (
- stream->next == '_' ||
-
- // TODO remove.
- // For temporary backward compatibility, handle '*' as a wildcard.
- stream->next == '*'
- ) {
- stream_advance(stream);
- stream_skip_whitespace(stream);
-
- // Add a step that matches any kind of node
- array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
- }
-
- // Parse a double-quoted anonymous leaf node expression
- else if (stream->next == '"') {
- stream_advance(stream);
-
- // Parse the string content
- const char *string_content = stream->input;
- while (stream->next != '"') {
- if (!stream_advance(stream)) {
- stream_reset(stream, string_content - 1);
- return TSQueryErrorSyntax;
- }
- }
- uint32_t length = stream->input - string_content;
-
- // Add a step for the node
- TSSymbol symbol = ts_language_symbol_for_name(
- self->language,
- string_content,
- length,
- false
- );
- if (!symbol) {
- stream_reset(stream, string_content);
- return TSQueryErrorNodeType;
- }
- array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
-
- if (stream->next != '"') return TSQueryErrorSyntax;
- stream_advance(stream);
- }
-
- // Parse a field-prefixed pattern
- else if (stream_is_ident_start(stream)) {
- // Parse the field name
- const char *field_name = stream->input;
- stream_scan_identifier(stream);
- uint32_t length = stream->input - field_name;
- stream_skip_whitespace(stream);
-
- if (stream->next != ':') {
- stream_reset(stream, field_name);
- return TSQueryErrorSyntax;
- }
- stream_advance(stream);
- stream_skip_whitespace(stream);
-
- // Parse the pattern
- TSQueryError e = ts_query__parse_pattern(
- self,
- stream,
- depth,
- capture_count,
- is_immediate
- );
- if (e == PARENT_DONE) return TSQueryErrorSyntax;
- if (e) return e;
-
- // Add the field name to the first step of the pattern
- TSFieldId field_id = ts_language_field_id_for_name(
- self->language,
- field_name,
- length
- );
- if (!field_id) {
- stream->input = field_name;
- return TSQueryErrorField;
- }
-
- uint32_t step_index = starting_step_index;
- QueryStep *step = &self->steps.contents[step_index];
- for (;;) {
- step->field = field_id;
- if (
- step->alternative_index != NONE &&
- step->alternative_index > step_index &&
- step->alternative_index < self->steps.size
- ) {
- step_index = step->alternative_index;
- step = &self->steps.contents[step_index];
- } else {
- break;
- }
- }
- }
-
- else {
- return TSQueryErrorSyntax;
- }
-
- stream_skip_whitespace(stream);
-
- // Parse suffixes modifiers for this pattern
- for (;;) {
- QueryStep *step = &self->steps.contents[starting_step_index];
-
- // Parse the one-or-more operator.
- if (stream->next == '+') {
- stream_advance(stream);
- stream_skip_whitespace(stream);
-
- QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
- repeat_step.alternative_index = starting_step_index;
- repeat_step.is_pass_through = true;
- repeat_step.alternative_is_immediate = true;
- array_push(&self->steps, repeat_step);
- }
-
- // Parse the zero-or-more repetition operator.
- else if (stream->next == '*') {
- stream_advance(stream);
- stream_skip_whitespace(stream);
-
- QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
- repeat_step.alternative_index = starting_step_index;
- repeat_step.is_pass_through = true;
- repeat_step.alternative_is_immediate = true;
- array_push(&self->steps, repeat_step);
-
- while (step->alternative_index != NONE) {
- step = &self->steps.contents[step->alternative_index];
- }
- step->alternative_index = self->steps.size;
- }
-
- // Parse the optional operator.
- else if (stream->next == '?') {
- stream_advance(stream);
- stream_skip_whitespace(stream);
-
- while (step->alternative_index != NONE) {
- step = &self->steps.contents[step->alternative_index];
- }
- step->alternative_index = self->steps.size;
- }
-
- // Parse an '@'-prefixed capture pattern
- else if (stream->next == '@') {
- stream_advance(stream);
- if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
- const char *capture_name = stream->input;
- stream_scan_identifier(stream);
- uint32_t length = stream->input - capture_name;
- stream_skip_whitespace(stream);
-
- // Add the capture id to the first step of the pattern
- uint16_t capture_id = symbol_table_insert_name(
- &self->captures,
- capture_name,
- length
- );
-
- uint32_t step_index = starting_step_index;
- for (;;) {
- query_step__add_capture(step, capture_id);
- if (
- step->alternative_index != NONE &&
- step->alternative_index > step_index &&
- step->alternative_index < self->steps.size
- ) {
- step_index = step->alternative_index;
- step = &self->steps.contents[step_index];
- } else {
- break;
- }
- }
-
- (*capture_count)++;
- }
-
- // No more suffix modifiers
- else {
- break;
- }
- }
-
- return 0;
-}
-
-TSQuery *ts_query_new(
- const TSLanguage *language,
- const char *source,
- uint32_t source_len,
- uint32_t *error_offset,
- TSQueryError *error_type
-) {
- TSSymbol *symbol_map;
- if (ts_language_version(language) >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) {
- symbol_map = NULL;
- } else {
- // Work around the fact that multiple symbols can currently be
- // associated with the same name, due to "simple aliases".
- // In the next language ABI version, this map will be contained
- // in the language's `public_symbol_map` field.
- uint32_t symbol_count = ts_language_symbol_count(language);
- symbol_map = ts_malloc(sizeof(TSSymbol) * symbol_count);
- for (unsigned i = 0; i < symbol_count; i++) {
- const char *name = ts_language_symbol_name(language, i);
- const TSSymbolType symbol_type = ts_language_symbol_type(language, i);
-
- symbol_map[i] = i;
-
- for (unsigned j = 0; j < i; j++) {
- if (ts_language_symbol_type(language, j) == symbol_type) {
- if (!strcmp(name, ts_language_symbol_name(language, j))) {
- symbol_map[i] = j;
- break;
- }
- }
- }
- }
- }
-
- TSQuery *self = ts_malloc(sizeof(TSQuery));
- *self = (TSQuery) {
- .steps = array_new(),
- .pattern_map = array_new(),
- .captures = symbol_table_new(),
- .predicate_values = symbol_table_new(),
- .predicate_steps = array_new(),
- .predicates_by_pattern = array_new(),
- .symbol_map = symbol_map,
- .wildcard_root_pattern_count = 0,
- .language = language,
- };
-
- // Parse all of the S-expressions in the given string.
- Stream stream = stream_new(source, source_len);
- stream_skip_whitespace(&stream);
- while (stream.input < stream.end) {
- uint32_t pattern_index = self->predicates_by_pattern.size;
- uint32_t start_step_index = self->steps.size;
- uint32_t capture_count = 0;
- array_push(&self->start_bytes_by_pattern, stream.input - source);
- array_push(&self->predicates_by_pattern, ((Slice) {
- .offset = self->predicate_steps.size,
- .length = 0,
- }));
- *error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count, false);
- array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
-
- // If any pattern could not be parsed, then report the error information
- // and terminate.
- if (*error_type) {
- if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
- *error_offset = stream.input - source;
- ts_query_delete(self);
- return NULL;
- }
-
- // If a pattern has a wildcard at its root, optimize the matching process
- // by skipping matching the wildcard.
- if (
- self->steps.contents[start_step_index].symbol == WILDCARD_SYMBOL
- ) {
- QueryStep *second_step = &self->steps.contents[start_step_index + 1];
- if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth != PATTERN_DONE_MARKER) {
- start_step_index += 1;
- }
- }
-
- // Maintain a map that can look up patterns for a given root symbol.
- for (;;) {
- QueryStep *step = &self->steps.contents[start_step_index];
- ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index);
- if (step->symbol == WILDCARD_SYMBOL) {
- self->wildcard_root_pattern_count++;
- }
-
- // If there are alternatives or options at the root of the pattern,
- // then add multiple entries to the pattern map.
- if (step->alternative_index != NONE) {
- start_step_index = step->alternative_index;
- step->alternative_index = NONE;
- } else {
- break;
- }
- }
- }
-
- ts_query__finalize_steps(self);
- return self;
-}
-
-void ts_query_delete(TSQuery *self) {
- if (self) {
- array_delete(&self->steps);
- array_delete(&self->pattern_map);
- array_delete(&self->predicate_steps);
- array_delete(&self->predicates_by_pattern);
- array_delete(&self->start_bytes_by_pattern);
- symbol_table_delete(&self->captures);
- symbol_table_delete(&self->predicate_values);
- ts_free(self->symbol_map);
- ts_free(self);
- }
-}
-
-uint32_t ts_query_pattern_count(const TSQuery *self) {
- return self->predicates_by_pattern.size;
-}
-
-uint32_t ts_query_capture_count(const TSQuery *self) {
- return self->captures.slices.size;
-}
-
-uint32_t ts_query_string_count(const TSQuery *self) {
- return self->predicate_values.slices.size;
-}
-
-const char *ts_query_capture_name_for_id(
- const TSQuery *self,
- uint32_t index,
- uint32_t *length
-) {
- return symbol_table_name_for_id(&self->captures, index, length);
-}
-
-const char *ts_query_string_value_for_id(
- const TSQuery *self,
- uint32_t index,
- uint32_t *length
-) {
- return symbol_table_name_for_id(&self->predicate_values, index, length);
-}
-
-const TSQueryPredicateStep *ts_query_predicates_for_pattern(
- const TSQuery *self,
- uint32_t pattern_index,
- uint32_t *step_count
-) {
- Slice slice = self->predicates_by_pattern.contents[pattern_index];
- *step_count = slice.length;
- if (self->predicate_steps.contents == NULL) {
- return NULL;
- }
- return &self->predicate_steps.contents[slice.offset];
-}
-
-uint32_t ts_query_start_byte_for_pattern(
- const TSQuery *self,
- uint32_t pattern_index
-) {
- return self->start_bytes_by_pattern.contents[pattern_index];
-}
-
-void ts_query_disable_capture(
- TSQuery *self,
- const char *name,
- uint32_t length
-) {
- // Remove capture information for any pattern step that previously
- // captured with the given name.
- int id = symbol_table_id_for_name(&self->captures, name, length);
- if (id != -1) {
- for (unsigned i = 0; i < self->steps.size; i++) {
- QueryStep *step = &self->steps.contents[i];
- query_step__remove_capture(step, id);
- }
- ts_query__finalize_steps(self);
- }
-}
-
-void ts_query_disable_pattern(
- TSQuery *self,
- uint32_t pattern_index
-) {
- // Remove the given pattern from the pattern map. Its steps will still
- // be in the `steps` array, but they will never be read.
- for (unsigned i = 0; i < self->pattern_map.size; i++) {
- PatternEntry *pattern = &self->pattern_map.contents[i];
- if (pattern->pattern_index == pattern_index) {
- array_erase(&self->pattern_map, i);
- i--;
- }
- }
-}
-
-/***************
- * QueryCursor
- ***************/
-
-TSQueryCursor *ts_query_cursor_new(void) {
- TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
- *self = (TSQueryCursor) {
- .ascending = false,
- .halted = false,
- .states = array_new(),
- .finished_states = array_new(),
- .capture_list_pool = capture_list_pool_new(),
- .start_byte = 0,
- .end_byte = UINT32_MAX,
- .start_point = {0, 0},
- .end_point = POINT_MAX,
- };
- array_reserve(&self->states, 8);
- array_reserve(&self->finished_states, 8);
- return self;
-}
-
-void ts_query_cursor_delete(TSQueryCursor *self) {
- array_delete(&self->states);
- array_delete(&self->finished_states);
- ts_tree_cursor_delete(&self->cursor);
- capture_list_pool_delete(&self->capture_list_pool);
- ts_free(self);
-}
-
-void ts_query_cursor_exec(
- TSQueryCursor *self,
- const TSQuery *query,
- TSNode node
-) {
- array_clear(&self->states);
- array_clear(&self->finished_states);
- ts_tree_cursor_reset(&self->cursor, node);
- capture_list_pool_reset(&self->capture_list_pool);
- self->next_state_id = 0;
- self->depth = 0;
- self->ascending = false;
- self->halted = false;
- self->query = query;
-}
-
-void ts_query_cursor_set_byte_range(
- TSQueryCursor *self,
- uint32_t start_byte,
- uint32_t end_byte
-) {
- if (end_byte == 0) {
- start_byte = 0;
- end_byte = UINT32_MAX;
- }
- self->start_byte = start_byte;
- self->end_byte = end_byte;
-}
-
-void ts_query_cursor_set_point_range(
- TSQueryCursor *self,
- TSPoint start_point,
- TSPoint end_point
-) {
- if (end_point.row == 0 && end_point.column == 0) {
- start_point = POINT_ZERO;
- end_point = POINT_MAX;
- }
- self->start_point = start_point;
- self->end_point = end_point;
-}
-
-// Search through all of the in-progress states, and find the captured
-// node that occurs earliest in the document.
-static bool ts_query_cursor__first_in_progress_capture(
- TSQueryCursor *self,
- uint32_t *state_index,
- uint32_t *byte_offset,
- uint32_t *pattern_index
-) {
- bool result = false;
- *state_index = UINT32_MAX;
- *byte_offset = UINT32_MAX;
- *pattern_index = UINT32_MAX;
- for (unsigned i = 0; i < self->states.size; i++) {
- const QueryState *state = &self->states.contents[i];
- if (state->dead) continue;
- const CaptureList *captures = capture_list_pool_get(
- &self->capture_list_pool,
- state->capture_list_id
- );
- if (captures->size > 0) {
- uint32_t capture_byte = ts_node_start_byte(captures->contents[0].node);
- if (
- !result ||
- capture_byte < *byte_offset ||
- (capture_byte == *byte_offset && state->pattern_index < *pattern_index)
- ) {
- result = true;
- *state_index = i;
- *byte_offset = capture_byte;
- *pattern_index = state->pattern_index;
- }
- }
- }
- return result;
-}
-
-// Determine which node is first in a depth-first traversal
-int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
- if (left.id != right.id) {
- uint32_t left_start = ts_node_start_byte(left);
- uint32_t right_start = ts_node_start_byte(right);
- if (left_start < right_start) return -1;
- if (left_start > right_start) return 1;
- uint32_t left_node_count = ts_node_end_byte(left);
- uint32_t right_node_count = ts_node_end_byte(right);
- if (left_node_count > right_node_count) return -1;
- if (left_node_count < right_node_count) return 1;
- }
- return 0;
-}
-
-// Determine if either state contains a superset of the other state's captures.
-void ts_query_cursor__compare_captures(
- TSQueryCursor *self,
- QueryState *left_state,
- QueryState *right_state,
- bool *left_contains_right,
- bool *right_contains_left
-) {
- const CaptureList *left_captures = capture_list_pool_get(
- &self->capture_list_pool,
- left_state->capture_list_id
- );
- const CaptureList *right_captures = capture_list_pool_get(
- &self->capture_list_pool,
- right_state->capture_list_id
- );
- *left_contains_right = true;
- *right_contains_left = true;
- unsigned i = 0, j = 0;
- for (;;) {
- if (i < left_captures->size) {
- if (j < right_captures->size) {
- TSQueryCapture *left = &left_captures->contents[i];
- TSQueryCapture *right = &right_captures->contents[j];
- if (left->node.id == right->node.id && left->index == right->index) {
- i++;
- j++;
- } else {
- switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
- case -1:
- *right_contains_left = false;
- i++;
- break;
- case 1:
- *left_contains_right = false;
- j++;
- break;
- default:
- *right_contains_left = false;
- *left_contains_right = false;
- i++;
- j++;
- break;
- }
- }
- } else {
- *right_contains_left = false;
- break;
- }
- } else {
- if (j < right_captures->size) {
- *left_contains_right = false;
- }
- break;
- }
- }
-}
-
-static void ts_query_cursor__add_state(
- TSQueryCursor *self,
- const PatternEntry *pattern
-) {
- QueryStep *step = &self->query->steps.contents[pattern->step_index];
- uint32_t start_depth = self->depth - step->depth;
-
- // Keep the states array in ascending order of start_depth and pattern_index,
- // so that it can be processed more efficiently elsewhere. Usually, there is
- // no work to do here because of two facts:
- // * States with lower start_depth are naturally added first due to the
- // order in which nodes are visited.
- // * Earlier patterns are naturally added first because of the ordering of the
- // pattern_map data structure that's used to initiate matches.
- //
- // This loop is only needed in cases where two conditions hold:
- // * A pattern consists of more than one sibling node, so that its states
- // remain in progress after exiting the node that started the match.
- // * The first node in the pattern matches against multiple nodes at the
- // same depth.
- //
- // An example of this is the pattern '((comment)* (function))'. If multiple
- // `comment` nodes appear in a row, then we may initiate a new state for this
- // pattern while another state for the same pattern is already in progress.
- // If there are multiple patterns like this in a query, then this loop will
- // need to execute in order to keep the states ordered by pattern_index.
- uint32_t index = self->states.size;
- while (index > 0) {
- QueryState *prev_state = &self->states.contents[index - 1];
- if (prev_state->start_depth < start_depth) break;
- if (prev_state->start_depth == start_depth) {
- if (prev_state->pattern_index < pattern->pattern_index) break;
- if (prev_state->pattern_index == pattern->pattern_index) {
- // Avoid unnecessarily inserting an unnecessary duplicate state,
- // which would be immediately pruned by the longest-match criteria.
- if (prev_state->step_index == pattern->step_index) return;
- }
- }
- index--;
- }
-
- LOG(
- " start state. pattern:%u, step:%u\n",
- pattern->pattern_index,
- pattern->step_index
- );
- array_insert(&self->states, index, ((QueryState) {
- .capture_list_id = NONE,
- .step_index = pattern->step_index,
- .pattern_index = pattern->pattern_index,
- .start_depth = start_depth,
- .consumed_capture_count = 0,
- .seeking_immediate_match = true,
- .has_in_progress_alternatives = false,
- .dead = false,
- }));
-}
-
-// Acquire a capture list for this state. If there are no capture lists left in the
-// pool, this will steal the capture list from another existing state, and mark that
-// other state as 'dead'.
-static CaptureList *ts_query_cursor__prepare_to_capture(
- TSQueryCursor *self,
- QueryState *state,
- unsigned state_index_to_preserve
-) {
- if (state->capture_list_id == NONE) {
- state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
-
- // If there are no capture lists left in the pool, then terminate whichever
- // state has captured the earliest node in the document, and steal its
- // capture list.
- if (state->capture_list_id == NONE) {
- uint32_t state_index, byte_offset, pattern_index;
- if (
- ts_query_cursor__first_in_progress_capture(
- self,
- &state_index,
- &byte_offset,
- &pattern_index
- ) &&
- state_index != state_index_to_preserve
- ) {
- LOG(
- " abandon state. index:%u, pattern:%u, offset:%u.\n",
- state_index, pattern_index, byte_offset
- );
- QueryState *other_state = &self->states.contents[state_index];
- state->capture_list_id = other_state->capture_list_id;
- other_state->capture_list_id = NONE;
- other_state->dead = true;
- CaptureList *list = capture_list_pool_get_mut(
- &self->capture_list_pool,
- state->capture_list_id
- );
- array_clear(list);
- return list;
- } else {
- LOG(" ran out of capture lists");
- return NULL;
- }
- }
- }
- return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
-}
-
-// Duplicate the given state and insert the newly-created state immediately after
-// the given state in the `states` array. Ensures that the given state reference is
-// still valid, even if the states array is reallocated.
-static QueryState *ts_query_cursor__copy_state(
- TSQueryCursor *self,
- QueryState **state_ref
-) {
- const QueryState *state = *state_ref;
- uint32_t state_index = state - self->states.contents;
- QueryState copy = *state;
- copy.capture_list_id = NONE;
-
- // If the state has captures, copy its capture list.
- if (state->capture_list_id != NONE) {
- CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
- if (!new_captures) return NULL;
- const CaptureList *old_captures = capture_list_pool_get(
- &self->capture_list_pool,
- state->capture_list_id
- );
- array_push_all(new_captures, old_captures);
- }
-
- array_insert(&self->states, state_index + 1, copy);
- *state_ref = &self->states.contents[state_index];
- return &self->states.contents[state_index + 1];
-}
-
-// Walk the tree, processing patterns until at least one pattern finishes,
-// If one or more patterns finish, return `true` and store their states in the
-// `finished_states` array. Multiple patterns can finish on the same node. If
-// there are no more matches, return `false`.
-static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
- bool did_match = false;
- for (;;) {
- if (self->halted) {
- while (self->states.size > 0) {
- QueryState state = array_pop(&self->states);
- capture_list_pool_release(
- &self->capture_list_pool,
- state.capture_list_id
- );
- }
- }
-
- if (did_match || self->halted) return did_match;
-
- if (self->ascending) {
- LOG("leave node. type:%s\n", ts_node_type(ts_tree_cursor_current_node(&self->cursor)));
-
- // Leave this node by stepping to its next sibling or to its parent.
- if (ts_tree_cursor_goto_next_sibling(&self->cursor)) {
- self->ascending = false;
- } else if (ts_tree_cursor_goto_parent(&self->cursor)) {
- self->depth--;
- } else {
- self->halted = true;
- }
-
- // After leaving a node, remove any states that cannot make further progress.
- uint32_t deleted_count = 0;
- for (unsigned i = 0, n = self->states.size; i < n; i++) {
- QueryState *state = &self->states.contents[i];
- QueryStep *step = &self->query->steps.contents[state->step_index];
-
- // If a state completed its pattern inside of this node, but was deferred from finishing
- // in order to search for longer matches, mark it as finished.
- if (step->depth == PATTERN_DONE_MARKER) {
- if (state->start_depth > self->depth || self->halted) {
- LOG(" finish pattern %u\n", state->pattern_index);
- state->id = self->next_state_id++;
- array_push(&self->finished_states, *state);
- did_match = true;
- deleted_count++;
- continue;
- }
- }
-
- // If a state needed to match something within this node, then remove that state
- // as it has failed to match.
- else if ((uint32_t)state->start_depth + (uint32_t)step->depth > self->depth) {
- LOG(
- " failed to match. pattern:%u, step:%u\n",
- state->pattern_index,
- state->step_index
- );
- capture_list_pool_release(
- &self->capture_list_pool,
- state->capture_list_id
- );
- deleted_count++;
- continue;
- }
-
- if (deleted_count > 0) {
- self->states.contents[i - deleted_count] = *state;
- }
- }
- self->states.size -= deleted_count;
- } else {
- // If this node is before the selected range, then avoid descending into it.
- TSNode node = ts_tree_cursor_current_node(&self->cursor);
- if (
- ts_node_end_byte(node) <= self->start_byte ||
- point_lte(ts_node_end_point(node), self->start_point)
- ) {
- if (!ts_tree_cursor_goto_next_sibling(&self->cursor)) {
- self->ascending = true;
- }
- continue;
- }
-
- // If this node is after the selected range, then stop walking.
- if (
- self->end_byte <= ts_node_start_byte(node) ||
- point_lte(self->end_point, ts_node_start_point(node))
- ) {
- self->halted = true;
- continue;
- }
-
- // Get the properties of the current node.
- TSSymbol symbol = ts_node_symbol(node);
- bool is_named = ts_node_is_named(node);
- if (symbol != ts_builtin_sym_error && self->query->symbol_map) {
- symbol = self->query->symbol_map[symbol];
- }
- bool can_have_later_siblings;
- bool can_have_later_siblings_with_this_field;
- TSFieldId field_id = ts_tree_cursor_current_status(
- &self->cursor,
- &can_have_later_siblings,
- &can_have_later_siblings_with_this_field
- );
- LOG(
- "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
- ts_node_type(node),
- ts_language_field_name_for_id(self->query->language, field_id),
- ts_node_start_point(node).row,
- self->states.size,
- self->finished_states.size
- );
-
- // Add new states for any patterns whose root node is a wildcard.
- for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
- PatternEntry *pattern = &self->query->pattern_map.contents[i];
- QueryStep *step = &self->query->steps.contents[pattern->step_index];
-
- // If this node matches the first step of the pattern, then add a new
- // state at the start of this pattern.
- if (step->field && field_id != step->field) continue;
- ts_query_cursor__add_state(self, pattern);
- }
-
- // Add new states for any patterns whose root node matches this node.
- unsigned i;
- if (ts_query__pattern_map_search(self->query, symbol, &i)) {
- PatternEntry *pattern = &self->query->pattern_map.contents[i];
- QueryStep *step = &self->query->steps.contents[pattern->step_index];
- do {
- // If this node matches the first step of the pattern, then add a new
- // state at the start of this pattern.
- if (step->field && field_id != step->field) continue;
- ts_query_cursor__add_state(self, pattern);
-
- // Advance to the next pattern whose root node matches this node.
- i++;
- if (i == self->query->pattern_map.size) break;
- pattern = &self->query->pattern_map.contents[i];
- step = &self->query->steps.contents[pattern->step_index];
- } while (step->symbol == symbol);
- }
-
- // Update all of the in-progress states with current node.
- for (unsigned i = 0, copy_count = 0; i < self->states.size; i += 1 + copy_count) {
- QueryState *state = &self->states.contents[i];
- QueryStep *step = &self->query->steps.contents[state->step_index];
- state->has_in_progress_alternatives = false;
- copy_count = 0;
-
- // Check that the node matches all of the criteria for the next
- // step of the pattern.
- if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
-
- // Determine if this node matches this step of the pattern, and also
- // if this node can have later siblings that match this step of the
- // pattern.
- bool node_does_match =
- step->symbol == symbol ||
- step->symbol == WILDCARD_SYMBOL ||
- (step->symbol == NAMED_WILDCARD_SYMBOL && is_named);
- bool later_sibling_can_match = can_have_later_siblings;
- if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
- later_sibling_can_match = false;
- }
- if (step->is_last_child && can_have_later_siblings) {
- node_does_match = false;
- }
- if (step->field) {
- if (step->field == field_id) {
- if (!can_have_later_siblings_with_this_field) {
- later_sibling_can_match = false;
- }
- } else {
- node_does_match = false;
- }
- }
-
- // Remove states immediately if it is ever clear that they cannot match.
- if (!node_does_match) {
- if (!later_sibling_can_match) {
- LOG(
- " discard state. pattern:%u, step:%u\n",
- state->pattern_index,
- state->step_index
- );
- capture_list_pool_release(
- &self->capture_list_pool,
- state->capture_list_id
- );
- array_erase(&self->states, i);
- i--;
- }
- continue;
- }
-
- // Some patterns can match their root node in multiple ways, capturing different
- // children. If this pattern step could match later children within the same
- // parent, then this query state cannot simply be updated in place. It must be
- // split into two states: one that matches this node, and one which skips over
- // this node, to preserve the possibility of matching later siblings.
- if (later_sibling_can_match && step->contains_captures) {
- if (ts_query_cursor__copy_state(self, &state)) {
- LOG(
- " split state for capture. pattern:%u, step:%u\n",
- state->pattern_index,
- state->step_index
- );
- copy_count++;
- }
- }
-
- // If the current node is captured in this pattern, add it to the capture list.
- if (step->capture_ids[0] != NONE) {
- CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
- if (!capture_list) {
- array_erase(&self->states, i);
- i--;
- continue;
- }
-
- for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
- uint16_t capture_id = step->capture_ids[j];
- if (step->capture_ids[j] == NONE) break;
- array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
- LOG(
- " capture node. pattern:%u, capture_id:%u, capture_count:%u\n",
- state->pattern_index,
- capture_id,
- capture_list->size
- );
- }
- }
-
- // Advance this state to the next step of its pattern.
- state->step_index++;
- state->seeking_immediate_match = false;
- LOG(
- " advance state. pattern:%u, step:%u\n",
- state->pattern_index,
- state->step_index
- );
-
- // If this state's next step has an alternative step, then copy the state in order
- // to pursue both alternatives. The alternative step itself may have an alternative,
- // so this is an interative process.
- unsigned end_index = i + 1;
- for (unsigned j = i; j < end_index; j++) {
- QueryState *state = &self->states.contents[j];
- QueryStep *next_step = &self->query->steps.contents[state->step_index];
- if (next_step->alternative_index != NONE) {
- if (next_step->is_dead_end) {
- state->step_index = next_step->alternative_index;
- j--;
- continue;
- }
-
- if (next_step->is_pass_through) {
- state->step_index++;
- j--;
- }
-
- QueryState *copy = ts_query_cursor__copy_state(self, &state);
- if (copy) {
- LOG(
- " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
- copy->pattern_index,
- copy->step_index,
- next_step->alternative_index,
- next_step->alternative_is_immediate,
- capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
- );
- end_index++;
- copy_count++;
- copy->step_index = next_step->alternative_index;
- if (next_step->alternative_is_immediate) {
- copy->seeking_immediate_match = true;
- }
- }
- }
- }
- }
-
- for (unsigned i = 0; i < self->states.size; i++) {
- QueryState *state = &self->states.contents[i];
- if (state->dead) {
- array_erase(&self->states, i);
- i--;
- continue;
- }
-
- // Enfore the longest-match criteria. When a query pattern contains optional or
- // repeated nodes, this is necessary to avoid multiple redundant states, where
- // one state has a strict subset of another state's captures.
- bool did_remove = false;
- for (unsigned j = i + 1; j < self->states.size; j++) {
- QueryState *other_state = &self->states.contents[j];
-
- // Query states are kept in ascending order of start_depth and pattern_index.
- // Since the longest-match criteria is only used for deduping matches of the same
- // pattern and root node, we only need to perform pairwise comparisons within a
- // small slice of the states array.
- if (
- other_state->start_depth != state->start_depth ||
- other_state->pattern_index != state->pattern_index
- ) break;
-
- bool left_contains_right, right_contains_left;
- ts_query_cursor__compare_captures(
- self,
- state,
- other_state,
- &left_contains_right,
- &right_contains_left
- );
- if (left_contains_right) {
- if (state->step_index == other_state->step_index) {
- LOG(
- " drop shorter state. pattern: %u, step_index: %u\n",
- state->pattern_index,
- state->step_index
- );
- capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
- array_erase(&self->states, j);
- j--;
- continue;
- }
- other_state->has_in_progress_alternatives = true;
- }
- if (right_contains_left) {
- if (state->step_index == other_state->step_index) {
- LOG(
- " drop shorter state. pattern: %u, step_index: %u\n",
- state->pattern_index,
- state->step_index
- );
- capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
- array_erase(&self->states, i);
- i--;
- did_remove = true;
- break;
- }
- state->has_in_progress_alternatives = true;
- }
- }
-
- // If there the state is at the end of its pattern, remove it from the list
- // of in-progress states and add it to the list of finished states.
- if (!did_remove) {
- LOG(
- " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
- state->pattern_index,
- state->start_depth,
- state->step_index,
- capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
- );
- QueryStep *next_step = &self->query->steps.contents[state->step_index];
- if (next_step->depth == PATTERN_DONE_MARKER) {
- if (state->has_in_progress_alternatives) {
- LOG(" defer finishing pattern %u\n", state->pattern_index);
- } else {
- LOG(" finish pattern %u\n", state->pattern_index);
- state->id = self->next_state_id++;
- array_push(&self->finished_states, *state);
- array_erase(&self->states, state - self->states.contents);
- did_match = true;
- i--;
- }
- }
- }
- }
-
- // Continue descending if possible.
- if (ts_tree_cursor_goto_first_child(&self->cursor)) {
- self->depth++;
- } else {
- self->ascending = true;
- }
- }
- }
-}
-
-bool ts_query_cursor_next_match(
- TSQueryCursor *self,
- TSQueryMatch *match
-) {
- if (self->finished_states.size == 0) {
- if (!ts_query_cursor__advance(self)) {
- return false;
- }
- }
-
- QueryState *state = &self->finished_states.contents[0];
- match->id = state->id;
- match->pattern_index = state->pattern_index;
- const CaptureList *captures = capture_list_pool_get(
- &self->capture_list_pool,
- state->capture_list_id
- );
- match->captures = captures->contents;
- match->capture_count = captures->size;
- capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
- array_erase(&self->finished_states, 0);
- return true;
-}
-
-void ts_query_cursor_remove_match(
- TSQueryCursor *self,
- uint32_t match_id
-) {
- for (unsigned i = 0; i < self->finished_states.size; i++) {
- const QueryState *state = &self->finished_states.contents[i];
- if (state->id == match_id) {
- capture_list_pool_release(
- &self->capture_list_pool,
- state->capture_list_id
- );
- array_erase(&self->finished_states, i);
- return;
- }
- }
-}
-
-bool ts_query_cursor_next_capture(
- TSQueryCursor *self,
- TSQueryMatch *match,
- uint32_t *capture_index
-) {
- for (;;) {
- // The goal here is to return captures in order, even though they may not
- // be discovered in order, because patterns can overlap. If there are any
- // finished patterns, then try to find one that contains a capture that
- // is *definitely* before any capture in an *unfinished* pattern.
- if (self->finished_states.size > 0) {
- // First, identify the position of the earliest capture in an unfinished
- // match. For a finished capture to be returned, it must be *before*
- // this position.
- uint32_t first_unfinished_capture_byte;
- uint32_t first_unfinished_pattern_index;
- uint32_t first_unfinished_state_index;
- ts_query_cursor__first_in_progress_capture(
- self,
- &first_unfinished_state_index,
- &first_unfinished_capture_byte,
- &first_unfinished_pattern_index
- );
-
- // Find the earliest capture in a finished match.
- int first_finished_state_index = -1;
- uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
- uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
- for (unsigned i = 0; i < self->finished_states.size; i++) {
- const QueryState *state = &self->finished_states.contents[i];
- const CaptureList *captures = capture_list_pool_get(
- &self->capture_list_pool,
- state->capture_list_id
- );
- if (captures->size > state->consumed_capture_count) {
- uint32_t capture_byte = ts_node_start_byte(
- captures->contents[state->consumed_capture_count].node
- );
- if (
- capture_byte < first_finished_capture_byte ||
- (
- capture_byte == first_finished_capture_byte &&
- state->pattern_index < first_finished_pattern_index
- )
- ) {
- first_finished_state_index = i;
- first_finished_capture_byte = capture_byte;
- first_finished_pattern_index = state->pattern_index;
- }
- } else {
- capture_list_pool_release(
- &self->capture_list_pool,
- state->capture_list_id
- );
- array_erase(&self->finished_states, i);
- i--;
- }
- }
-
- // If there is finished capture that is clearly before any unfinished
- // capture, then return its match, and its capture index. Internally
- // record the fact that the capture has been 'consumed'.
- if (first_finished_state_index != -1) {
- QueryState *state = &self->finished_states.contents[
- first_finished_state_index
- ];
- match->id = state->id;
- match->pattern_index = state->pattern_index;
- const CaptureList *captures = capture_list_pool_get(
- &self->capture_list_pool,
- state->capture_list_id
- );
- match->captures = captures->contents;
- match->capture_count = captures->size;
- *capture_index = state->consumed_capture_count;
- state->consumed_capture_count++;
- return true;
- }
-
- if (capture_list_pool_is_empty(&self->capture_list_pool)) {
- LOG(
- " abandon state. index:%u, pattern:%u, offset:%u.\n",
- first_unfinished_state_index,
- first_unfinished_pattern_index,
- first_unfinished_capture_byte
- );
- capture_list_pool_release(
- &self->capture_list_pool,
- self->states.contents[first_unfinished_state_index].capture_list_id
- );
- array_erase(&self->states, first_unfinished_state_index);
- }
- }
-
- // If there are no finished matches that are ready to be returned, then
- // continue finding more matches.
- if (
- !ts_query_cursor__advance(self) &&
- self->finished_states.size == 0
- ) return false;
- }
-}
-
-#undef LOG
diff --git a/src/tree_sitter/reduce_action.h b/src/tree_sitter/reduce_action.h
deleted file mode 100644
index 72aff08d73..0000000000
--- a/src/tree_sitter/reduce_action.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef TREE_SITTER_REDUCE_ACTION_H_
-#define TREE_SITTER_REDUCE_ACTION_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./array.h"
-#include "tree_sitter/api.h"
-
-typedef struct {
- uint32_t count;
- TSSymbol symbol;
- int dynamic_precedence;
- unsigned short production_id;
-} ReduceAction;
-
-typedef Array(ReduceAction) ReduceActionSet;
-
-static inline void ts_reduce_action_set_add(ReduceActionSet *self,
- ReduceAction new_action) {
- for (uint32_t i = 0; i < self->size; i++) {
- ReduceAction action = self->contents[i];
- if (action.symbol == new_action.symbol && action.count == new_action.count)
- return;
- }
- array_push(self, new_action);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_REDUCE_ACTION_H_
diff --git a/src/tree_sitter/reusable_node.h b/src/tree_sitter/reusable_node.h
deleted file mode 100644
index 9cba951909..0000000000
--- a/src/tree_sitter/reusable_node.h
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "./subtree.h"
-
-typedef struct {
- Subtree tree;
- uint32_t child_index;
- uint32_t byte_offset;
-} StackEntry;
-
-typedef struct {
- Array(StackEntry) stack;
- Subtree last_external_token;
-} ReusableNode;
-
-static inline ReusableNode reusable_node_new(void) {
- return (ReusableNode) {array_new(), NULL_SUBTREE};
-}
-
-static inline void reusable_node_clear(ReusableNode *self) {
- array_clear(&self->stack);
- self->last_external_token = NULL_SUBTREE;
-}
-
-static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
- reusable_node_clear(self);
- array_push(&self->stack, ((StackEntry) {
- .tree = tree,
- .child_index = 0,
- .byte_offset = 0,
- }));
-}
-
-static inline Subtree reusable_node_tree(ReusableNode *self) {
- return self->stack.size > 0
- ? self->stack.contents[self->stack.size - 1].tree
- : NULL_SUBTREE;
-}
-
-static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
- return self->stack.size > 0
- ? self->stack.contents[self->stack.size - 1].byte_offset
- : UINT32_MAX;
-}
-
-static inline void reusable_node_delete(ReusableNode *self) {
- array_delete(&self->stack);
-}
-
-static inline void reusable_node_advance(ReusableNode *self) {
- StackEntry last_entry = *array_back(&self->stack);
- uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
- if (ts_subtree_has_external_tokens(last_entry.tree)) {
- self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
- }
-
- Subtree tree;
- uint32_t next_index;
- do {
- StackEntry popped_entry = array_pop(&self->stack);
- next_index = popped_entry.child_index + 1;
- if (self->stack.size == 0) return;
- tree = array_back(&self->stack)->tree;
- } while (ts_subtree_child_count(tree) <= next_index);
-
- array_push(&self->stack, ((StackEntry) {
- .tree = tree.ptr->children[next_index],
- .child_index = next_index,
- .byte_offset = byte_offset,
- }));
-}
-
-static inline bool reusable_node_descend(ReusableNode *self) {
- StackEntry last_entry = *array_back(&self->stack);
- if (ts_subtree_child_count(last_entry.tree) > 0) {
- array_push(&self->stack, ((StackEntry) {
- .tree = last_entry.tree.ptr->children[0],
- .child_index = 0,
- .byte_offset = last_entry.byte_offset,
- }));
- return true;
- } else {
- return false;
- }
-}
-
-static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
- while (reusable_node_descend(self)) {}
- reusable_node_advance(self);
-}
diff --git a/src/tree_sitter/stack.c b/src/tree_sitter/stack.c
deleted file mode 100644
index 6a8d897c37..0000000000
--- a/src/tree_sitter/stack.c
+++ /dev/null
@@ -1,857 +0,0 @@
-#include "./alloc.h"
-#include "./language.h"
-#include "./subtree.h"
-#include "./array.h"
-#include "./stack.h"
-#include "./length.h"
-#include <assert.h>
-#include <stdio.h>
-
-#define MAX_LINK_COUNT 8
-#define MAX_NODE_POOL_SIZE 50
-#define MAX_ITERATOR_COUNT 64
-
-#if defined _WIN32 && !defined __GNUC__
-#define inline __forceinline
-#else
-#define inline static inline __attribute__((always_inline))
-#endif
-
-typedef struct StackNode StackNode;
-
-typedef struct {
- StackNode *node;
- Subtree subtree;
- bool is_pending;
-} StackLink;
-
-struct StackNode {
- TSStateId state;
- Length position;
- StackLink links[MAX_LINK_COUNT];
- short unsigned int link_count;
- uint32_t ref_count;
- unsigned error_cost;
- unsigned node_count;
- int dynamic_precedence;
-};
-
-typedef struct {
- StackNode *node;
- SubtreeArray subtrees;
- uint32_t subtree_count;
- bool is_pending;
-} StackIterator;
-
-typedef struct {
- void *payload;
- StackIterateCallback callback;
-} StackIterateSession;
-
-typedef Array(StackNode *) StackNodeArray;
-
-typedef enum {
- StackStatusActive,
- StackStatusPaused,
- StackStatusHalted,
-} StackStatus;
-
-typedef struct {
- StackNode *node;
- Subtree last_external_token;
- StackSummary *summary;
- unsigned node_count_at_last_error;
- TSSymbol lookahead_when_paused;
- StackStatus status;
-} StackHead;
-
-struct Stack {
- Array(StackHead) heads;
- StackSliceArray slices;
- Array(StackIterator) iterators;
- StackNodeArray node_pool;
- StackNode *base_node;
- SubtreePool *subtree_pool;
-};
-
-typedef unsigned StackAction;
-enum {
- StackActionNone,
- StackActionStop = 1,
- StackActionPop = 2,
-};
-
-typedef StackAction (*StackCallback)(void *, const StackIterator *);
-
-static void stack_node_retain(StackNode *self) {
- if (!self)
- return;
- assert(self->ref_count > 0);
- self->ref_count++;
- assert(self->ref_count != 0);
-}
-
-static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
-recur:
- assert(self->ref_count != 0);
- self->ref_count--;
- if (self->ref_count > 0) return;
-
- StackNode *first_predecessor = NULL;
- if (self->link_count > 0) {
- for (unsigned i = self->link_count - 1; i > 0; i--) {
- StackLink link = self->links[i];
- if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
- stack_node_release(link.node, pool, subtree_pool);
- }
- StackLink link = self->links[0];
- if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
- first_predecessor = self->links[0].node;
- }
-
- if (pool->size < MAX_NODE_POOL_SIZE) {
- array_push(pool, self);
- } else {
- ts_free(self);
- }
-
- if (first_predecessor) {
- self = first_predecessor;
- goto recur;
- }
-}
-
-static StackNode *stack_node_new(StackNode *previous_node, Subtree subtree,
- bool is_pending, TSStateId state, StackNodeArray *pool) {
- StackNode *node = pool->size > 0 ?
- array_pop(pool) :
- ts_malloc(sizeof(StackNode));
- *node = (StackNode){.ref_count = 1, .link_count = 0, .state = state};
-
- if (previous_node) {
- node->link_count = 1;
- node->links[0] = (StackLink){
- .node = previous_node,
- .subtree = subtree,
- .is_pending = is_pending,
- };
-
- node->position = previous_node->position;
- node->error_cost = previous_node->error_cost;
- node->dynamic_precedence = previous_node->dynamic_precedence;
- node->node_count = previous_node->node_count;
-
- if (subtree.ptr) {
- node->error_cost += ts_subtree_error_cost(subtree);
- node->position = length_add(node->position, ts_subtree_total_size(subtree));
- node->node_count += ts_subtree_node_count(subtree);
- node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
- }
- } else {
- node->position = length_zero();
- node->error_cost = 0;
- }
-
- return node;
-}
-
-static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
- return
- left.ptr == right.ptr ||
- (left.ptr && right.ptr &&
- ts_subtree_symbol(left) == ts_subtree_symbol(right) &&
- ((ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) ||
- (ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
- ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
- ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
- ts_subtree_extra(left) == ts_subtree_extra(right) &&
- ts_subtree_external_scanner_state_eq(left, right))));
-}
-
-static void stack_node_add_link(StackNode *self, StackLink link, SubtreePool *subtree_pool) {
- if (link.node == self) return;
-
- for (int i = 0; i < self->link_count; i++) {
- StackLink *existing_link = &self->links[i];
- if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
- // In general, we preserve ambiguities until they are removed from the stack
- // during a pop operation where multiple paths lead to the same node. But in
- // the special case where two links directly connect the same pair of nodes,
- // we can safely remove the ambiguity ahead of time without changing behavior.
- if (existing_link->node == link.node) {
- if (
- ts_subtree_dynamic_precedence(link.subtree) >
- ts_subtree_dynamic_precedence(existing_link->subtree)
- ) {
- ts_subtree_retain(link.subtree);
- ts_subtree_release(subtree_pool, existing_link->subtree);
- existing_link->subtree = link.subtree;
- self->dynamic_precedence =
- link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
- }
- return;
- }
-
- // If the previous nodes are mergeable, merge them recursively.
- if (existing_link->node->state == link.node->state &&
- existing_link->node->position.bytes == link.node->position.bytes) {
- for (int j = 0; j < link.node->link_count; j++) {
- stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
- }
- int32_t dynamic_precedence = link.node->dynamic_precedence;
- if (link.subtree.ptr) {
- dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
- }
- if (dynamic_precedence > self->dynamic_precedence) {
- self->dynamic_precedence = dynamic_precedence;
- }
- return;
- }
- }
- }
-
- if (self->link_count == MAX_LINK_COUNT) return;
-
- stack_node_retain(link.node);
- unsigned node_count = link.node->node_count;
- int dynamic_precedence = link.node->dynamic_precedence;
- self->links[self->link_count++] = link;
-
- if (link.subtree.ptr) {
- ts_subtree_retain(link.subtree);
- node_count += ts_subtree_node_count(link.subtree);
- dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
- }
-
- if (node_count > self->node_count) self->node_count = node_count;
- if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
-}
-
-static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
- if (self->node) {
- if (self->last_external_token.ptr) {
- ts_subtree_release(subtree_pool, self->last_external_token);
- }
- if (self->summary) {
- array_delete(self->summary);
- ts_free(self->summary);
- }
- stack_node_release(self->node, pool, subtree_pool);
- }
-}
-
-static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version,
- StackNode *node) {
- StackHead head = {
- .node = node,
- .node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
- .last_external_token = self->heads.contents[original_version].last_external_token,
- .status = StackStatusActive,
- .lookahead_when_paused = 0,
- };
- array_push(&self->heads, head);
- stack_node_retain(node);
- if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
- return (StackVersion)(self->heads.size - 1);
-}
-
-static void ts_stack__add_slice(Stack *self, StackVersion original_version,
- StackNode *node, SubtreeArray *subtrees) {
- for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
- StackVersion version = self->slices.contents[i].version;
- if (self->heads.contents[version].node == node) {
- StackSlice slice = {*subtrees, version};
- array_insert(&self->slices, i + 1, slice);
- return;
- }
- }
-
- StackVersion version = ts_stack__add_version(self, original_version, node);
- StackSlice slice = { *subtrees, version };
- array_push(&self->slices, slice);
-}
-
-inline StackSliceArray stack__iter(Stack *self, StackVersion version,
- StackCallback callback, void *payload,
- int goal_subtree_count) {
- array_clear(&self->slices);
- array_clear(&self->iterators);
-
- StackHead *head = array_get(&self->heads, version);
- StackIterator iterator = {
- .node = head->node,
- .subtrees = array_new(),
- .subtree_count = 0,
- .is_pending = true,
- };
-
- bool include_subtrees = false;
- if (goal_subtree_count >= 0) {
- include_subtrees = true;
- array_reserve(&iterator.subtrees, goal_subtree_count);
- }
-
- array_push(&self->iterators, iterator);
-
- while (self->iterators.size > 0) {
- for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
- StackIterator *iterator = &self->iterators.contents[i];
- StackNode *node = iterator->node;
-
- StackAction action = callback(payload, iterator);
- bool should_pop = action & StackActionPop;
- bool should_stop = action & StackActionStop || node->link_count == 0;
-
- if (should_pop) {
- SubtreeArray subtrees = iterator->subtrees;
- if (!should_stop)
- ts_subtree_array_copy(subtrees, &subtrees);
- ts_subtree_array_reverse(&subtrees);
- ts_stack__add_slice(
- self,
- version,
- node,
- &subtrees
- );
- }
-
- if (should_stop) {
- if (!should_pop)
- ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
- array_erase(&self->iterators, i);
- i--, size--;
- continue;
- }
-
- for (uint32_t j = 1; j <= node->link_count; j++) {
- StackIterator *next_iterator;
- StackLink link;
- if (j == node->link_count) {
- link = node->links[0];
- next_iterator = &self->iterators.contents[i];
- } else {
- if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
- link = node->links[j];
- StackIterator current_iterator = self->iterators.contents[i];
- array_push(&self->iterators, current_iterator);
- next_iterator = array_back(&self->iterators);
- ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
- }
-
- next_iterator->node = link.node;
- if (link.subtree.ptr) {
- if (include_subtrees) {
- array_push(&next_iterator->subtrees, link.subtree);
- ts_subtree_retain(link.subtree);
- }
-
- if (!ts_subtree_extra(link.subtree)) {
- next_iterator->subtree_count++;
- if (!link.is_pending) {
- next_iterator->is_pending = false;
- }
- }
- } else {
- next_iterator->subtree_count++;
- next_iterator->is_pending = false;
- }
- }
- }
- }
-
- return self->slices;
-}
-
-Stack *ts_stack_new(SubtreePool *subtree_pool) {
- Stack *self = ts_calloc(1, sizeof(Stack));
-
- array_init(&self->heads);
- array_init(&self->slices);
- array_init(&self->iterators);
- array_init(&self->node_pool);
- array_reserve(&self->heads, 4);
- array_reserve(&self->slices, 4);
- array_reserve(&self->iterators, 4);
- array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
-
- self->subtree_pool = subtree_pool;
- self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
- ts_stack_clear(self);
-
- return self;
-}
-
-void ts_stack_delete(Stack *self) {
- if (self->slices.contents)
- array_delete(&self->slices);
- if (self->iterators.contents)
- array_delete(&self->iterators);
- stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
- for (uint32_t i = 0; i < self->heads.size; i++) {
- stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
- }
- array_clear(&self->heads);
- if (self->node_pool.contents) {
- for (uint32_t i = 0; i < self->node_pool.size; i++)
- ts_free(self->node_pool.contents[i]);
- array_delete(&self->node_pool);
- }
- array_delete(&self->heads);
- ts_free(self);
-}
-
-uint32_t ts_stack_version_count(const Stack *self) {
- return self->heads.size;
-}
-
-TSStateId ts_stack_state(const Stack *self, StackVersion version) {
- return array_get(&self->heads, version)->node->state;
-}
-
-Length ts_stack_position(const Stack *self, StackVersion version) {
- return array_get(&self->heads, version)->node->position;
-}
-
-Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
- return array_get(&self->heads, version)->last_external_token;
-}
-
-void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
- StackHead *head = array_get(&self->heads, version);
- if (token.ptr) ts_subtree_retain(token);
- if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
- head->last_external_token = token;
-}
-
-unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
- StackHead *head = array_get(&self->heads, version);
- unsigned result = head->node->error_cost;
- if (
- head->status == StackStatusPaused ||
- (head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
- result += ERROR_COST_PER_RECOVERY;
- }
- return result;
-}
-
-unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
- StackHead *head = array_get(&self->heads, version);
- if (head->node->node_count < head->node_count_at_last_error) {
- head->node_count_at_last_error = head->node->node_count;
- }
- return head->node->node_count - head->node_count_at_last_error;
-}
-
-void ts_stack_push(Stack *self, StackVersion version, Subtree subtree,
- bool pending, TSStateId state) {
- StackHead *head = array_get(&self->heads, version);
- StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
- if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
- head->node = new_node;
-}
-
-inline StackAction iterate_callback(void *payload, const StackIterator *iterator) {
- StackIterateSession *session = payload;
- session->callback(
- session->payload,
- iterator->node->state,
- iterator->subtree_count
- );
- return StackActionNone;
-}
-
-void ts_stack_iterate(Stack *self, StackVersion version,
- StackIterateCallback callback, void *payload) {
- StackIterateSession session = {payload, callback};
- stack__iter(self, version, iterate_callback, &session, -1);
-}
-
-inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
- unsigned *goal_subtree_count = payload;
- if (iterator->subtree_count == *goal_subtree_count) {
- return StackActionPop | StackActionStop;
- } else {
- return StackActionNone;
- }
-}
-
-StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
- return stack__iter(self, version, pop_count_callback, &count, count);
-}
-
-inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
- (void)payload;
- if (iterator->subtree_count >= 1) {
- if (iterator->is_pending) {
- return StackActionPop | StackActionStop;
- } else {
- return StackActionStop;
- }
- } else {
- return StackActionNone;
- }
-}
-
-StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
- StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
- if (pop.size > 0) {
- ts_stack_renumber_version(self, pop.contents[0].version, version);
- pop.contents[0].version = version;
- }
- return pop;
-}
-
-inline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
- if (iterator->subtrees.size > 0) {
- bool *found_error = payload;
- if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) {
- *found_error = true;
- return StackActionPop | StackActionStop;
- } else {
- return StackActionStop;
- }
- } else {
- return StackActionNone;
- }
-}
-
-SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
- StackNode *node = array_get(&self->heads, version)->node;
- for (unsigned i = 0; i < node->link_count; i++) {
- if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
- bool found_error = false;
- StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
- if (pop.size > 0) {
- assert(pop.size == 1);
- ts_stack_renumber_version(self, pop.contents[0].version, version);
- return pop.contents[0].subtrees;
- }
- break;
- }
- }
- return (SubtreeArray){.size = 0};
-}
-
-inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
- (void)payload;
- return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
-}
-
-StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
- return stack__iter(self, version, pop_all_callback, NULL, 0);
-}
-
-typedef struct {
- StackSummary *summary;
- unsigned max_depth;
-} SummarizeStackSession;
-
-inline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
- SummarizeStackSession *session = payload;
- TSStateId state = iterator->node->state;
- unsigned depth = iterator->subtree_count;
- if (depth > session->max_depth) return StackActionStop;
- for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
- StackSummaryEntry entry = session->summary->contents[i];
- if (entry.depth < depth) break;
- if (entry.depth == depth && entry.state == state) return StackActionNone;
- }
- array_push(session->summary, ((StackSummaryEntry){
- .position = iterator->node->position,
- .depth = depth,
- .state = state,
- }));
- return StackActionNone;
-}
-
-void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
- SummarizeStackSession session = {
- .summary = ts_malloc(sizeof(StackSummary)),
- .max_depth = max_depth
- };
- array_init(session.summary);
- stack__iter(self, version, summarize_stack_callback, &session, -1);
- StackHead *head = &self->heads.contents[version];
- if (head->summary) {
- array_delete(head->summary);
- ts_free(head->summary);
- }
- head->summary = session.summary;
-}
-
-StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
- return array_get(&self->heads, version)->summary;
-}
-
-int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
- return array_get(&self->heads, version)->node->dynamic_precedence;
-}
-
-bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
- const StackHead *head = array_get(&self->heads, version);
- const StackNode *node = head->node;
- if (node->error_cost == 0) return true;
- while (node) {
- if (node->link_count > 0) {
- Subtree subtree = node->links[0].subtree;
- if (subtree.ptr) {
- if (ts_subtree_total_bytes(subtree) > 0) {
- return true;
- } else if (
- node->node_count > head->node_count_at_last_error &&
- ts_subtree_error_cost(subtree) == 0
- ) {
- node = node->links[0].node;
- continue;
- }
- }
- }
- break;
- }
- return false;
-}
-
-void ts_stack_remove_version(Stack *self, StackVersion version) {
- stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
- array_erase(&self->heads, version);
-}
-
-void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
- if (v1 == v2) return;
- assert(v2 < v1);
- assert((uint32_t)v1 < self->heads.size);
- StackHead *source_head = &self->heads.contents[v1];
- StackHead *target_head = &self->heads.contents[v2];
- if (target_head->summary && !source_head->summary) {
- source_head->summary = target_head->summary;
- target_head->summary = NULL;
- }
- stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
- *target_head = *source_head;
- array_erase(&self->heads, v1);
-}
-
-void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
- StackHead temporary_head = self->heads.contents[v1];
- self->heads.contents[v1] = self->heads.contents[v2];
- self->heads.contents[v2] = temporary_head;
-}
-
-StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
- assert(version < self->heads.size);
- array_push(&self->heads, self->heads.contents[version]);
- StackHead *head = array_back(&self->heads);
- stack_node_retain(head->node);
- if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
- head->summary = NULL;
- return self->heads.size - 1;
-}
-
-bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
- if (!ts_stack_can_merge(self, version1, version2)) return false;
- StackHead *head1 = &self->heads.contents[version1];
- StackHead *head2 = &self->heads.contents[version2];
- for (uint32_t i = 0; i < head2->node->link_count; i++) {
- stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
- }
- if (head1->node->state == ERROR_STATE) {
- head1->node_count_at_last_error = head1->node->node_count;
- }
- ts_stack_remove_version(self, version2);
- return true;
-}
-
-bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
- StackHead *head1 = &self->heads.contents[version1];
- StackHead *head2 = &self->heads.contents[version2];
- return
- head1->status == StackStatusActive &&
- head2->status == StackStatusActive &&
- head1->node->state == head2->node->state &&
- head1->node->position.bytes == head2->node->position.bytes &&
- head1->node->error_cost == head2->node->error_cost &&
- ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
-}
-
-void ts_stack_halt(Stack *self, StackVersion version) {
- array_get(&self->heads, version)->status = StackStatusHalted;
-}
-
-void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) {
- StackHead *head = array_get(&self->heads, version);
- head->status = StackStatusPaused;
- head->lookahead_when_paused = lookahead;
- head->node_count_at_last_error = head->node->node_count;
-}
-
-bool ts_stack_is_active(const Stack *self, StackVersion version) {
- return array_get(&self->heads, version)->status == StackStatusActive;
-}
-
-bool ts_stack_is_halted(const Stack *self, StackVersion version) {
- return array_get(&self->heads, version)->status == StackStatusHalted;
-}
-
-bool ts_stack_is_paused(const Stack *self, StackVersion version) {
- return array_get(&self->heads, version)->status == StackStatusPaused;
-}
-
-TSSymbol ts_stack_resume(Stack *self, StackVersion version) {
- StackHead *head = array_get(&self->heads, version);
- assert(head->status == StackStatusPaused);
- TSSymbol result = head->lookahead_when_paused;
- head->status = StackStatusActive;
- head->lookahead_when_paused = 0;
- return result;
-}
-
-void ts_stack_clear(Stack *self) {
- stack_node_retain(self->base_node);
- for (uint32_t i = 0; i < self->heads.size; i++) {
- stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
- }
- array_clear(&self->heads);
- array_push(&self->heads, ((StackHead){
- .node = self->base_node,
- .last_external_token = NULL_SUBTREE,
- .status = StackStatusActive,
- .lookahead_when_paused = 0,
- }));
-}
-
-bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
- array_reserve(&self->iterators, 32);
- bool was_recording_allocations = ts_toggle_allocation_recording(false);
- if (!f) f = stderr;
-
- fprintf(f, "digraph stack {\n");
- fprintf(f, "rankdir=\"RL\";\n");
- fprintf(f, "edge [arrowhead=none]\n");
-
- Array(StackNode *) visited_nodes = array_new();
-
- array_clear(&self->iterators);
- for (uint32_t i = 0; i < self->heads.size; i++) {
- StackHead *head = &self->heads.contents[i];
- if (head->status == StackStatusHalted) continue;
-
- fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
- fprintf(f, "node_head_%u -> node_%p [", i, head->node);
-
- if (head->status == StackStatusPaused) {
- fprintf(f, "color=red ");
- }
- fprintf(f,
- "label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
- i,
- ts_stack_node_count_since_error(self, i),
- ts_stack_error_cost(self, i)
- );
-
- if (head->summary) {
- fprintf(f, "\nsummary_size: %u", head->summary->size);
- }
-
- if (head->last_external_token.ptr) {
- const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
- const char *data = ts_external_scanner_state_data(state);
- fprintf(f, "\nexternal_scanner_state:");
- for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
- }
-
- fprintf(f, "\"]\n");
- array_push(&self->iterators, ((StackIterator){.node = head->node }));
- }
-
- bool all_iterators_done = false;
- while (!all_iterators_done) {
- all_iterators_done = true;
-
- for (uint32_t i = 0; i < self->iterators.size; i++) {
- StackIterator iterator = self->iterators.contents[i];
- StackNode *node = iterator.node;
-
- for (uint32_t j = 0; j < visited_nodes.size; j++) {
- if (visited_nodes.contents[j] == node) {
- node = NULL;
- break;
- }
- }
-
- if (!node) continue;
- all_iterators_done = false;
-
- fprintf(f, "node_%p [", node);
- if (node->state == ERROR_STATE) {
- fprintf(f, "label=\"?\"");
- } else if (
- node->link_count == 1 &&
- node->links[0].subtree.ptr &&
- ts_subtree_extra(node->links[0].subtree)
- ) {
- fprintf(f, "shape=point margin=0 label=\"\"");
- } else {
- fprintf(f, "label=\"%d\"", node->state);
- }
-
- fprintf(
- f,
- " tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
- node->position.extent.row + 1,
- node->position.extent.column,
- node->node_count,
- node->error_cost,
- node->dynamic_precedence
- );
-
- for (int j = 0; j < node->link_count; j++) {
- StackLink link = node->links[j];
- fprintf(f, "node_%p -> node_%p [", node, link.node);
- if (link.is_pending) fprintf(f, "style=dashed ");
- if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");
-
- if (!link.subtree.ptr) {
- fprintf(f, "color=red");
- } else {
- fprintf(f, "label=\"");
- bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
- if (quoted) fprintf(f, "'");
- const char *name = ts_language_symbol_name(language, ts_subtree_symbol(link.subtree));
- for (const char *c = name; *c; c++) {
- if (*c == '\"' || *c == '\\') fprintf(f, "\\");
- fprintf(f, "%c", *c);
- }
- if (quoted) fprintf(f, "'");
- fprintf(f, "\"");
- fprintf(
- f,
- "labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"",
- ts_subtree_error_cost(link.subtree),
- ts_subtree_dynamic_precedence(link.subtree)
- );
- }
-
- fprintf(f, "];\n");
-
- StackIterator *next_iterator;
- if (j == 0) {
- next_iterator = &self->iterators.contents[i];
- } else {
- array_push(&self->iterators, iterator);
- next_iterator = array_back(&self->iterators);
- }
- next_iterator->node = link.node;
- }
-
- array_push(&visited_nodes, node);
- }
- }
-
- fprintf(f, "}\n");
-
- array_delete(&visited_nodes);
- ts_toggle_allocation_recording(was_recording_allocations);
- return true;
-}
-
-#undef inline
diff --git a/src/tree_sitter/stack.h b/src/tree_sitter/stack.h
deleted file mode 100644
index ec7a69d2b4..0000000000
--- a/src/tree_sitter/stack.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef TREE_SITTER_PARSE_STACK_H_
-#define TREE_SITTER_PARSE_STACK_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "./array.h"
-#include "./subtree.h"
-#include "./error_costs.h"
-#include <stdio.h>
-
-typedef struct Stack Stack;
-
-typedef unsigned StackVersion;
-#define STACK_VERSION_NONE ((StackVersion)-1)
-
-typedef struct {
- SubtreeArray subtrees;
- StackVersion version;
-} StackSlice;
-typedef Array(StackSlice) StackSliceArray;
-
-typedef struct {
- Length position;
- unsigned depth;
- TSStateId state;
-} StackSummaryEntry;
-typedef Array(StackSummaryEntry) StackSummary;
-
-// Create a stack.
-Stack *ts_stack_new(SubtreePool *);
-
-// Release the memory reserved for a given stack.
-void ts_stack_delete(Stack *);
-
-// Get the stack's current number of versions.
-uint32_t ts_stack_version_count(const Stack *);
-
-// Get the state at the top of the given version of the stack. If the stack is
-// empty, this returns the initial state, 0.
-TSStateId ts_stack_state(const Stack *, StackVersion);
-
-// Get the last external token associated with a given version of the stack.
-Subtree ts_stack_last_external_token(const Stack *, StackVersion);
-
-// Set the last external token associated with a given version of the stack.
-void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
-
-// Get the position of the given version of the stack within the document.
-Length ts_stack_position(const Stack *, StackVersion);
-
-// Push a tree and state onto the given version of the stack.
-//
-// This transfers ownership of the tree to the Stack. Callers that
-// need to retain ownership of the tree for their own purposes should
-// first retain the tree.
-void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
-
-// Pop the given number of entries from the given version of the stack. This
-// operation can increase the number of stack versions by revealing multiple
-// versions which had previously been merged. It returns an array that
-// specifies the index of each revealed version and the trees that were
-// removed from that version.
-StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
-
-// Remove an error at the top of the given version of the stack.
-SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
-
-// Remove any pending trees from the top of the given version of the stack.
-StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
-
-// Remove any all trees from the given version of the stack.
-StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
-
-// Get the maximum number of tree nodes reachable from this version of the stack
-// since the last error was detected.
-unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
-
-int ts_stack_dynamic_precedence(Stack *, StackVersion);
-
-bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
-
-// Compute a summary of all the parse states near the top of the given
-// version of the stack and store the summary for later retrieval.
-void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
-
-// Retrieve a summary of all the parse states near the top of the
-// given version of the stack.
-StackSummary *ts_stack_get_summary(Stack *, StackVersion);
-
-// Get the total cost of all errors on the given version of the stack.
-unsigned ts_stack_error_cost(const Stack *, StackVersion version);
-
-// Merge the given two stack versions if possible, returning true
-// if they were successfully merged and false otherwise.
-bool ts_stack_merge(Stack *, StackVersion, StackVersion);
-
-// Determine whether the given two stack versions can be merged.
-bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
-
-TSSymbol ts_stack_resume(Stack *, StackVersion);
-
-void ts_stack_pause(Stack *, StackVersion, TSSymbol);
-
-void ts_stack_halt(Stack *, StackVersion);
-
-bool ts_stack_is_active(const Stack *, StackVersion);
-
-bool ts_stack_is_paused(const Stack *, StackVersion);
-
-bool ts_stack_is_halted(const Stack *, StackVersion);
-
-void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
-
-void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);
-
-StackVersion ts_stack_copy_version(Stack *, StackVersion);
-
-// Remove the given version from the stack.
-void ts_stack_remove_version(Stack *, StackVersion);
-
-void ts_stack_clear(Stack *);
-
-bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
-
-typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);
-
-void ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_PARSE_STACK_H_
diff --git a/src/tree_sitter/subtree.c b/src/tree_sitter/subtree.c
deleted file mode 100644
index ef92a32fe4..0000000000
--- a/src/tree_sitter/subtree.c
+++ /dev/null
@@ -1,982 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <limits.h>
-#include <stdbool.h>
-#include <string.h>
-#include <stdio.h>
-#include "./alloc.h"
-#include "./atomic.h"
-#include "./subtree.h"
-#include "./length.h"
-#include "./language.h"
-#include "./error_costs.h"
-#include <stddef.h>
-
-typedef struct {
- Length start;
- Length old_end;
- Length new_end;
-} Edit;
-
-#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
-#define TS_MAX_TREE_POOL_SIZE 32
-
-static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0};
-
-// ExternalScannerState
-
-void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
- self->length = length;
- if (length > sizeof(self->short_data)) {
- self->long_data = ts_malloc(length);
- memcpy(self->long_data, data, length);
- } else {
- memcpy(self->short_data, data, length);
- }
-}
-
-ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
- ExternalScannerState result = *self;
- if (self->length > sizeof(self->short_data)) {
- result.long_data = ts_malloc(self->length);
- memcpy(result.long_data, self->long_data, self->length);
- }
- return result;
-}
-
-void ts_external_scanner_state_delete(ExternalScannerState *self) {
- if (self->length > sizeof(self->short_data)) {
- ts_free(self->long_data);
- }
-}
-
-const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
- if (self->length > sizeof(self->short_data)) {
- return self->long_data;
- } else {
- return self->short_data;
- }
-}
-
-bool ts_external_scanner_state_eq(const ExternalScannerState *a, const ExternalScannerState *b) {
- return a == b || (
- a->length == b->length &&
- !memcmp(ts_external_scanner_state_data(a), ts_external_scanner_state_data(b), a->length)
- );
-}
-
-// SubtreeArray
-
-void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
- dest->size = self.size;
- dest->capacity = self.capacity;
- dest->contents = self.contents;
- if (self.capacity > 0) {
- dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
- memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
- for (uint32_t i = 0; i < self.size; i++) {
- ts_subtree_retain(dest->contents[i]);
- }
- }
-}
-
-void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
- for (uint32_t i = 0; i < self->size; i++) {
- ts_subtree_release(pool, self->contents[i]);
- }
- array_delete(self);
-}
-
-SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
- SubtreeArray result = array_new();
-
- uint32_t i = self->size - 1;
- for (; i + 1 > 0; i--) {
- Subtree child = self->contents[i];
- if (!ts_subtree_extra(child)) break;
- array_push(&result, child);
- }
-
- self->size = i + 1;
- ts_subtree_array_reverse(&result);
- return result;
-}
-
-void ts_subtree_array_reverse(SubtreeArray *self) {
- for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
- size_t reverse_index = self->size - 1 - i;
- Subtree swap = self->contents[i];
- self->contents[i] = self->contents[reverse_index];
- self->contents[reverse_index] = swap;
- }
-}
-
-// SubtreePool
-
-SubtreePool ts_subtree_pool_new(uint32_t capacity) {
- SubtreePool self = {array_new(), array_new()};
- array_reserve(&self.free_trees, capacity);
- return self;
-}
-
-void ts_subtree_pool_delete(SubtreePool *self) {
- if (self->free_trees.contents) {
- for (unsigned i = 0; i < self->free_trees.size; i++) {
- ts_free(self->free_trees.contents[i].ptr);
- }
- array_delete(&self->free_trees);
- }
- if (self->tree_stack.contents) array_delete(&self->tree_stack);
-}
-
-static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
- if (self->free_trees.size > 0) {
- return array_pop(&self->free_trees).ptr;
- } else {
- return ts_malloc(sizeof(SubtreeHeapData));
- }
-}
-
-static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
- if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
- array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
- } else {
- ts_free(tree);
- }
-}
-
-// Subtree
-
-static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
- return
- padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
- padding.extent.row < 16 &&
- padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
- size.extent.row == 0 &&
- size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
- lookahead_bytes < 16;
-}
-
-Subtree ts_subtree_new_leaf(
- SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
- uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
- bool is_keyword, const TSLanguage *language
-) {
- TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
- bool extra = symbol == ts_builtin_sym_end;
-
- bool is_inline = (
- symbol <= UINT8_MAX &&
- !has_external_tokens &&
- ts_subtree_can_inline(padding, size, lookahead_bytes)
- );
-
- if (is_inline) {
- return (Subtree) {{
- .parse_state = parse_state,
- .symbol = symbol,
- .padding_bytes = padding.bytes,
- .padding_rows = padding.extent.row,
- .padding_columns = padding.extent.column,
- .size_bytes = size.bytes,
- .lookahead_bytes = lookahead_bytes,
- .visible = metadata.visible,
- .named = metadata.named,
- .extra = extra,
- .has_changes = false,
- .is_missing = false,
- .is_keyword = is_keyword,
- .is_inline = true,
- }};
- } else {
- SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
- *data = (SubtreeHeapData) {
- .ref_count = 1,
- .padding = padding,
- .size = size,
- .lookahead_bytes = lookahead_bytes,
- .error_cost = 0,
- .child_count = 0,
- .symbol = symbol,
- .parse_state = parse_state,
- .visible = metadata.visible,
- .named = metadata.named,
- .extra = extra,
- .fragile_left = false,
- .fragile_right = false,
- .has_changes = false,
- .has_external_tokens = has_external_tokens,
- .is_missing = false,
- .is_keyword = is_keyword,
- {{.first_leaf = {.symbol = 0, .parse_state = 0}}}
- };
- return (Subtree) {.ptr = data};
- }
-}
-
-void ts_subtree_set_symbol(
- MutableSubtree *self,
- TSSymbol symbol,
- const TSLanguage *language
-) {
- TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
- if (self->data.is_inline) {
- assert(symbol < UINT8_MAX);
- self->data.symbol = symbol;
- self->data.named = metadata.named;
- self->data.visible = metadata.visible;
- } else {
- self->ptr->symbol = symbol;
- self->ptr->named = metadata.named;
- self->ptr->visible = metadata.visible;
- }
-}
-
-Subtree ts_subtree_new_error(
- SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
- uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
-) {
- Subtree result = ts_subtree_new_leaf(
- pool, ts_builtin_sym_error, padding, size, bytes_scanned,
- parse_state, false, false, language
- );
- SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
- data->fragile_left = true;
- data->fragile_right = true;
- data->lookahead_char = lookahead_char;
- return result;
-}
-
-MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
- if (self.data.is_inline) return (MutableSubtree) {self.data};
- if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
-
- SubtreeHeapData *result = ts_subtree_pool_allocate(pool);
- memcpy(result, self.ptr, sizeof(SubtreeHeapData));
- if (result->child_count > 0) {
- result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree));
- memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree));
- for (uint32_t i = 0; i < result->child_count; i++) {
- ts_subtree_retain(result->children[i]);
- }
- } else if (result->has_external_tokens) {
- result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state);
- }
- result->ref_count = 1;
- ts_subtree_release(pool, self);
- return (MutableSubtree) {.ptr = result};
-}
-
-static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language,
- MutableSubtreeArray *stack) {
- unsigned initial_stack_size = stack->size;
-
- MutableSubtree tree = self;
- TSSymbol symbol = tree.ptr->symbol;
- for (unsigned i = 0; i < count; i++) {
- if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
-
- MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
- if (
- child.data.is_inline ||
- child.ptr->child_count < 2 ||
- child.ptr->ref_count > 1 ||
- child.ptr->symbol != symbol
- ) break;
-
- MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]);
- if (
- grandchild.data.is_inline ||
- grandchild.ptr->child_count < 2 ||
- grandchild.ptr->ref_count > 1 ||
- grandchild.ptr->symbol != symbol
- ) break;
-
- tree.ptr->children[0] = ts_subtree_from_mut(grandchild);
- child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1];
- grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
- array_push(stack, tree);
- tree = grandchild;
- }
-
- while (stack->size > initial_stack_size) {
- tree = array_pop(stack);
- MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
- MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]);
- ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language);
- ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language);
- ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language);
- }
-}
-
-void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) {
- array_clear(&pool->tree_stack);
-
- if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) {
- array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
- }
-
- while (pool->tree_stack.size > 0) {
- MutableSubtree tree = array_pop(&pool->tree_stack);
-
- if (tree.ptr->repeat_depth > 0) {
- Subtree child1 = tree.ptr->children[0];
- Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1];
- long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
- if (repeat_delta > 0) {
- unsigned n = repeat_delta;
- for (unsigned i = n / 2; i > 0; i /= 2) {
- ts_subtree__compress(tree, i, language, &pool->tree_stack);
- n -= i;
- }
- }
- }
-
- for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
- Subtree child = tree.ptr->children[i];
- if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
- array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
- }
- }
- }
-}
-
-void ts_subtree_set_children(
- MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language
-) {
- assert(!self.data.is_inline);
-
- if (self.ptr->child_count > 0 && children != self.ptr->children) {
- ts_free(self.ptr->children);
- }
-
- self.ptr->child_count = child_count;
- self.ptr->children = children;
- self.ptr->named_child_count = 0;
- self.ptr->visible_child_count = 0;
- self.ptr->error_cost = 0;
- self.ptr->repeat_depth = 0;
- self.ptr->node_count = 1;
- self.ptr->has_external_tokens = false;
- self.ptr->dynamic_precedence = 0;
-
- uint32_t non_extra_index = 0;
- const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
- uint32_t lookahead_end_byte = 0;
-
- for (uint32_t i = 0; i < self.ptr->child_count; i++) {
- Subtree child = self.ptr->children[i];
-
- if (i == 0) {
- self.ptr->padding = ts_subtree_padding(child);
- self.ptr->size = ts_subtree_size(child);
- } else {
- self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
- }
-
- uint32_t child_lookahead_end_byte =
- self.ptr->padding.bytes +
- self.ptr->size.bytes +
- ts_subtree_lookahead_bytes(child);
- if (child_lookahead_end_byte > lookahead_end_byte) lookahead_end_byte = child_lookahead_end_byte;
-
- if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
- self.ptr->error_cost += ts_subtree_error_cost(child);
- }
-
- self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
- self.ptr->node_count += ts_subtree_node_count(child);
-
- if (alias_sequence && alias_sequence[non_extra_index] != 0 && !ts_subtree_extra(child)) {
- self.ptr->visible_child_count++;
- if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
- self.ptr->named_child_count++;
- }
- } else if (ts_subtree_visible(child)) {
- self.ptr->visible_child_count++;
- if (ts_subtree_named(child)) self.ptr->named_child_count++;
- } else if (ts_subtree_child_count(child) > 0) {
- self.ptr->visible_child_count += child.ptr->visible_child_count;
- self.ptr->named_child_count += child.ptr->named_child_count;
- }
-
- if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
-
- if (ts_subtree_is_error(child)) {
- self.ptr->fragile_left = self.ptr->fragile_right = true;
- self.ptr->parse_state = TS_TREE_STATE_NONE;
- }
-
- if (!ts_subtree_extra(child)) non_extra_index++;
- }
-
- self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
-
- if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) {
- self.ptr->error_cost +=
- ERROR_COST_PER_RECOVERY +
- ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
- ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
- for (uint32_t i = 0; i < self.ptr->child_count; i++) {
- Subtree child = self.ptr->children[i];
- uint32_t grandchild_count = ts_subtree_child_count(child);
- if (ts_subtree_extra(child)) continue;
- if (ts_subtree_is_error(child) && grandchild_count == 0) continue;
- if (ts_subtree_visible(child)) {
- self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
- } else if (grandchild_count > 0) {
- self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
- }
- }
- }
-
- if (self.ptr->child_count > 0) {
- Subtree first_child = self.ptr->children[0];
- Subtree last_child = self.ptr->children[self.ptr->child_count - 1];
-
- self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
- self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
-
- if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
- if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
-
- if (
- self.ptr->child_count >= 2 &&
- !self.ptr->visible &&
- !self.ptr->named &&
- ts_subtree_symbol(first_child) == self.ptr->symbol
- ) {
- if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
- self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
- } else {
- self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
- }
- }
- }
-}
-
-MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
- SubtreeArray *children, unsigned production_id,
- const TSLanguage *language) {
- TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
- bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
- SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
- *data = (SubtreeHeapData) {
- .ref_count = 1,
- .symbol = symbol,
- .visible = metadata.visible,
- .named = metadata.named,
- .has_changes = false,
- .fragile_left = fragile,
- .fragile_right = fragile,
- .is_keyword = false,
- {{
- .node_count = 0,
- .production_id = production_id,
- .first_leaf = {.symbol = 0, .parse_state = 0},
- }}
- };
- MutableSubtree result = {.ptr = data};
- ts_subtree_set_children(result, children->contents, children->size, language);
- return result;
-}
-
-Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
- bool extra, const TSLanguage *language) {
- MutableSubtree result = ts_subtree_new_node(
- pool, ts_builtin_sym_error, children, 0, language
- );
- result.ptr->extra = extra;
- return ts_subtree_from_mut(result);
-}
-
-Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
- const TSLanguage *language) {
- Subtree result = ts_subtree_new_leaf(
- pool, symbol, padding, length_zero(), 0,
- 0, false, false, language
- );
-
- if (result.data.is_inline) {
- result.data.is_missing = true;
- } else {
- ((SubtreeHeapData *)result.ptr)->is_missing = true;
- }
-
- return result;
-}
-
-void ts_subtree_retain(Subtree self) {
- if (self.data.is_inline) return;
- assert(self.ptr->ref_count > 0);
- atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
- assert(self.ptr->ref_count != 0);
-}
-
-void ts_subtree_release(SubtreePool *pool, Subtree self) {
- if (self.data.is_inline) return;
- array_clear(&pool->tree_stack);
-
- assert(self.ptr->ref_count > 0);
- if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
- array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
- }
-
- while (pool->tree_stack.size > 0) {
- MutableSubtree tree = array_pop(&pool->tree_stack);
- if (tree.ptr->child_count > 0) {
- for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
- Subtree child = tree.ptr->children[i];
- if (child.data.is_inline) continue;
- assert(child.ptr->ref_count > 0);
- if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
- array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
- }
- }
- ts_free(tree.ptr->children);
- } else if (tree.ptr->has_external_tokens) {
- ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
- }
- ts_subtree_pool_free(pool, tree.ptr);
- }
-}
-
-bool ts_subtree_eq(Subtree self, Subtree other) {
- if (self.data.is_inline || other.data.is_inline) {
- return memcmp(&self, &other, sizeof(SubtreeInlineData)) == 0;
- }
-
- if (self.ptr) {
- if (!other.ptr) return false;
- } else {
- return !other.ptr;
- }
-
- if (self.ptr->symbol != other.ptr->symbol) return false;
- if (self.ptr->visible != other.ptr->visible) return false;
- if (self.ptr->named != other.ptr->named) return false;
- if (self.ptr->padding.bytes != other.ptr->padding.bytes) return false;
- if (self.ptr->size.bytes != other.ptr->size.bytes) return false;
- if (self.ptr->symbol == ts_builtin_sym_error) return self.ptr->lookahead_char == other.ptr->lookahead_char;
- if (self.ptr->child_count != other.ptr->child_count) return false;
- if (self.ptr->child_count > 0) {
- if (self.ptr->visible_child_count != other.ptr->visible_child_count) return false;
- if (self.ptr->named_child_count != other.ptr->named_child_count) return false;
-
- for (uint32_t i = 0; i < self.ptr->child_count; i++) {
- if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) {
- return false;
- }
- }
- }
- return true;
-}
-
-int ts_subtree_compare(Subtree left, Subtree right) {
- if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1;
- if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1;
- if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
- if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
- for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
- Subtree left_child = left.ptr->children[i];
- Subtree right_child = right.ptr->children[i];
- switch (ts_subtree_compare(left_child, right_child)) {
- case -1: return -1;
- case 1: return 1;
- default: break;
- }
- }
- return 0;
-}
-
-static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
- if (self->data.is_inline) {
- self->data.has_changes = true;
- } else {
- self->ptr->has_changes = true;
- }
-}
-
-Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool) {
- typedef struct {
- Subtree *tree;
- Edit edit;
- } StackEntry;
-
- Array(StackEntry) stack = array_new();
- array_push(&stack, ((StackEntry) {
- .tree = &self,
- .edit = (Edit) {
- .start = {edit->start_byte, edit->start_point},
- .old_end = {edit->old_end_byte, edit->old_end_point},
- .new_end = {edit->new_end_byte, edit->new_end_point},
- },
- }));
-
- while (stack.size) {
- StackEntry entry = array_pop(&stack);
- Edit edit = entry.edit;
- bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
- bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
-
- Length size = ts_subtree_size(*entry.tree);
- Length padding = ts_subtree_padding(*entry.tree);
- uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
- uint32_t end_byte = padding.bytes + size.bytes + lookahead_bytes;
- if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
-
- // If the edit is entirely within the space before this subtree, then shift this
- // subtree over according to the edit without changing its size.
- if (edit.old_end.bytes <= padding.bytes) {
- padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
- }
-
- // If the edit starts in the space before this subtree and extends into this subtree,
- // shrink the subtree's content to compensate for the change in the space before it.
- else if (edit.start.bytes < padding.bytes) {
- size = length_sub(size, length_sub(edit.old_end, padding));
- padding = edit.new_end;
- }
-
- // If the edit is a pure insertion right at the start of the subtree,
- // shift the subtree over according to the insertion.
- else if (edit.start.bytes == padding.bytes && is_pure_insertion) {
- padding = edit.new_end;
- }
-
- // If the edit is within this subtree, resize the subtree to reflect the edit.
- else {
- uint32_t total_bytes = padding.bytes + size.bytes;
- if (edit.start.bytes < total_bytes ||
- (edit.start.bytes == total_bytes && is_pure_insertion)) {
- size = length_add(
- length_sub(edit.new_end, padding),
- length_sub(size, length_sub(edit.old_end, padding))
- );
- }
- }
-
- MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
-
- if (result.data.is_inline) {
- if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
- result.data.padding_bytes = padding.bytes;
- result.data.padding_rows = padding.extent.row;
- result.data.padding_columns = padding.extent.column;
- result.data.size_bytes = size.bytes;
- } else {
- SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
- data->ref_count = 1;
- data->padding = padding;
- data->size = size;
- data->lookahead_bytes = lookahead_bytes;
- data->error_cost = 0;
- data->child_count = 0;
- data->symbol = result.data.symbol;
- data->parse_state = result.data.parse_state;
- data->visible = result.data.visible;
- data->named = result.data.named;
- data->extra = result.data.extra;
- data->fragile_left = false;
- data->fragile_right = false;
- data->has_changes = false;
- data->has_external_tokens = false;
- data->is_missing = result.data.is_missing;
- data->is_keyword = result.data.is_keyword;
- result.ptr = data;
- }
- } else {
- result.ptr->padding = padding;
- result.ptr->size = size;
- }
-
- ts_subtree_set_has_changes(&result);
- *entry.tree = ts_subtree_from_mut(result);
-
- Length child_left, child_right = length_zero();
- for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
- Subtree *child = &result.ptr->children[i];
- Length child_size = ts_subtree_total_size(*child);
- child_left = child_right;
- child_right = length_add(child_left, child_size);
-
- // If this child ends before the edit, it is not affected.
- if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
-
- // If this child starts after the edit, then we're done processing children.
- if (child_left.bytes > edit.old_end.bytes ||
- (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
-
- // Transform edit into the child's coordinate space.
- Edit child_edit = {
- .start = length_sub(edit.start, child_left),
- .old_end = length_sub(edit.old_end, child_left),
- .new_end = length_sub(edit.new_end, child_left),
- };
-
- // Clamp child_edit to the child's bounds.
- if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
- if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
- if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
- if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
-
- // Interpret all inserted text as applying to the *first* child that touches the edit.
- // Subsequent children are only never have any text inserted into them; they are only
- // shrunk to compensate for the edit.
- if (child_right.bytes > edit.start.bytes ||
- (child_right.bytes == edit.start.bytes && is_pure_insertion)) {
- edit.new_end = edit.start;
- }
-
- // Children that occur before the edit are not reshaped by the edit.
- else {
- child_edit.old_end = child_edit.start;
- child_edit.new_end = child_edit.start;
- }
-
- // Queue processing of this child's subtree.
- array_push(&stack, ((StackEntry) {
- .tree = child,
- .edit = child_edit,
- }));
- }
- }
-
- array_delete(&stack);
- return self;
-}
-
-Subtree ts_subtree_last_external_token(Subtree tree) {
- if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
- while (tree.ptr->child_count > 0) {
- for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
- Subtree child = tree.ptr->children[i];
- if (ts_subtree_has_external_tokens(child)) {
- tree = child;
- break;
- }
- }
- }
- return tree;
-}
-
-static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) {
- if (c == -1)
- return snprintf(s, n, "INVALID");
- else if (c == '\0')
- return snprintf(s, n, "'\\0'");
- else if (c == '\n')
- return snprintf(s, n, "'\\n'");
- else if (c == '\t')
- return snprintf(s, n, "'\\t'");
- else if (c == '\r')
- return snprintf(s, n, "'\\r'");
- else if (0 < c && c < 128 && isprint(c))
- return snprintf(s, n, "'%c'", c);
- else
- return snprintf(s, n, "%d", c);
-}
-
-static void ts_subtree__write_dot_string(FILE *f, const char *string) {
- for (const char *c = string; *c; c++) {
- if (*c == '"') {
- fputs("\\\"", f);
- } else if (*c == '\n') {
- fputs("\\n", f);
- } else {
- fputc(*c, f);
- }
- }
-}
-
-static const char *ROOT_FIELD = "__ROOT__";
-
-static size_t ts_subtree__write_to_string(
- Subtree self, char *string, size_t limit,
- const TSLanguage *language, bool include_all,
- TSSymbol alias_symbol, bool alias_is_named, const char *field_name
-) {
- if (!self.ptr) return snprintf(string, limit, "(NULL)");
-
- char *cursor = string;
- char **writer = (limit > 0) ? &cursor : &string;
- bool is_root = field_name == ROOT_FIELD;
- bool is_visible =
- include_all ||
- ts_subtree_missing(self) ||
- (
- alias_symbol
- ? alias_is_named
- : ts_subtree_visible(self) && ts_subtree_named(self)
- );
-
- if (is_visible) {
- if (!is_root) {
- cursor += snprintf(*writer, limit, " ");
- if (field_name) {
- cursor += snprintf(*writer, limit, "%s: ", field_name);
- }
- }
-
- if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
- cursor += snprintf(*writer, limit, "(UNEXPECTED ");
- cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
- } else {
- TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
- const char *symbol_name = ts_language_symbol_name(language, symbol);
- if (ts_subtree_missing(self)) {
- cursor += snprintf(*writer, limit, "(MISSING ");
- if (alias_is_named || ts_subtree_named(self)) {
- cursor += snprintf(*writer, limit, "%s", symbol_name);
- } else {
- cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
- }
- } else {
- cursor += snprintf(*writer, limit, "(%s", symbol_name);
- }
- }
- } else if (is_root) {
- TSSymbol symbol = ts_subtree_symbol(self);
- const char *symbol_name = ts_language_symbol_name(language, symbol);
- cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name);
- }
-
- if (ts_subtree_child_count(self)) {
- const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
- const TSFieldMapEntry *field_map, *field_map_end;
- ts_language_field_map(
- language,
- self.ptr->production_id,
- &field_map,
- &field_map_end
- );
-
- uint32_t structural_child_index = 0;
- for (uint32_t i = 0; i < self.ptr->child_count; i++) {
- Subtree child = self.ptr->children[i];
- if (ts_subtree_extra(child)) {
- cursor += ts_subtree__write_to_string(
- child, *writer, limit,
- language, include_all,
- 0, false, NULL
- );
- } else {
- TSSymbol alias_symbol = alias_sequence
- ? alias_sequence[structural_child_index]
- : 0;
- bool alias_is_named = alias_symbol
- ? ts_language_symbol_metadata(language, alias_symbol).named
- : false;
-
- const char *child_field_name = is_visible ? NULL : field_name;
- for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
- if (!i->inherited && i->child_index == structural_child_index) {
- child_field_name = language->field_names[i->field_id];
- break;
- }
- }
-
- cursor += ts_subtree__write_to_string(
- child, *writer, limit,
- language, include_all,
- alias_symbol, alias_is_named, child_field_name
- );
- structural_child_index++;
- }
- }
- }
-
- if (is_visible) cursor += snprintf(*writer, limit, ")");
-
- return cursor - string;
-}
-
-char *ts_subtree_string(
- Subtree self,
- const TSLanguage *language,
- bool include_all
-) {
- char scratch_string[1];
- size_t size = ts_subtree__write_to_string(
- self, scratch_string, 0,
- language, include_all,
- 0, false, ROOT_FIELD
- ) + 1;
- char *result = malloc(size * sizeof(char));
- ts_subtree__write_to_string(
- self, result, size,
- language, include_all,
- 0, false, ROOT_FIELD
- );
- return result;
-}
-
-void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
- const TSLanguage *language, TSSymbol alias_symbol,
- FILE *f) {
- TSSymbol subtree_symbol = ts_subtree_symbol(*self);
- TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
- uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
- fprintf(f, "tree_%p [label=\"", self);
- ts_subtree__write_dot_string(f, ts_language_symbol_name(language, symbol));
- fprintf(f, "\"");
-
- if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
- if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
-
- fprintf(f, ", tooltip=\""
- "range: %u - %u\n"
- "state: %d\n"
- "error-cost: %u\n"
- "has-changes: %u\n"
- "repeat-depth: %u\n"
- "lookahead-bytes: %u",
- start_offset, end_offset,
- ts_subtree_parse_state(*self),
- ts_subtree_error_cost(*self),
- ts_subtree_has_changes(*self),
- ts_subtree_repeat_depth(*self),
- ts_subtree_lookahead_bytes(*self)
- );
-
- if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) {
- fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
- }
-
- fprintf(f, "\"]\n");
-
- uint32_t child_start_offset = start_offset;
- uint32_t child_info_offset =
- language->max_alias_sequence_length *
- ts_subtree_production_id(*self);
- for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
- const Subtree *child = &self->ptr->children[i];
- TSSymbol alias_symbol = 0;
- if (!ts_subtree_extra(*child) && child_info_offset) {
- alias_symbol = language->alias_sequences[child_info_offset];
- child_info_offset++;
- }
- ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
- fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
- child_start_offset += ts_subtree_total_bytes(*child);
- }
-}
-
-void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
- fprintf(f, "digraph tree {\n");
- fprintf(f, "edge [arrowhead=none]\n");
- ts_subtree__print_dot_graph(&self, 0, language, 0, f);
- fprintf(f, "}\n");
-}
-
-bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
- const ExternalScannerState *state1 = &empty_state;
- const ExternalScannerState *state2 = &empty_state;
- if (self.ptr && ts_subtree_has_external_tokens(self) && !self.ptr->child_count) {
- state1 = &self.ptr->external_scanner_state;
- }
- if (other.ptr && ts_subtree_has_external_tokens(other) && !other.ptr->child_count) {
- state2 = &other.ptr->external_scanner_state;
- }
- return ts_external_scanner_state_eq(state1, state2);
-}
diff --git a/src/tree_sitter/subtree.h b/src/tree_sitter/subtree.h
deleted file mode 100644
index 18c48dcbd0..0000000000
--- a/src/tree_sitter/subtree.h
+++ /dev/null
@@ -1,285 +0,0 @@
-#ifndef TREE_SITTER_SUBTREE_H_
-#define TREE_SITTER_SUBTREE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <limits.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include "./length.h"
-#include "./array.h"
-#include "./error_costs.h"
-#include "tree_sitter/api.h"
-#include "tree_sitter/parser.h"
-
-static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
-#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
-
-typedef union Subtree Subtree;
-typedef union MutableSubtree MutableSubtree;
-
-typedef struct {
- union {
- char *long_data;
- char short_data[24];
- };
- uint32_t length;
-} ExternalScannerState;
-
-typedef struct {
- bool is_inline : 1;
- bool visible : 1;
- bool named : 1;
- bool extra : 1;
- bool has_changes : 1;
- bool is_missing : 1;
- bool is_keyword : 1;
- uint8_t symbol;
- uint8_t padding_bytes;
- uint8_t size_bytes;
- uint8_t padding_columns;
- uint8_t padding_rows : 4;
- uint8_t lookahead_bytes : 4;
- uint16_t parse_state;
-} SubtreeInlineData;
-
-typedef struct {
- volatile uint32_t ref_count;
- Length padding;
- Length size;
- uint32_t lookahead_bytes;
- uint32_t error_cost;
- uint32_t child_count;
- TSSymbol symbol;
- TSStateId parse_state;
-
- bool visible : 1;
- bool named : 1;
- bool extra : 1;
- bool fragile_left : 1;
- bool fragile_right : 1;
- bool has_changes : 1;
- bool has_external_tokens : 1;
- bool is_missing : 1;
- bool is_keyword : 1;
-
- union {
- // Non-terminal subtrees (`child_count > 0`)
- struct {
- Subtree *children;
- uint32_t visible_child_count;
- uint32_t named_child_count;
- uint32_t node_count;
- uint32_t repeat_depth;
- int32_t dynamic_precedence;
- uint16_t production_id;
- struct {
- TSSymbol symbol;
- TSStateId parse_state;
- } first_leaf;
- };
-
- // External terminal subtrees (`child_count == 0 && has_external_tokens`)
- ExternalScannerState external_scanner_state;
-
- // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
- int32_t lookahead_char;
- };
-} SubtreeHeapData;
-
-union Subtree {
- SubtreeInlineData data;
- const SubtreeHeapData *ptr;
-};
-
-union MutableSubtree {
- SubtreeInlineData data;
- SubtreeHeapData *ptr;
-};
-
-typedef Array(Subtree) SubtreeArray;
-typedef Array(MutableSubtree) MutableSubtreeArray;
-
-typedef struct {
- MutableSubtreeArray free_trees;
- MutableSubtreeArray tree_stack;
-} SubtreePool;
-
-void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
-const char *ts_external_scanner_state_data(const ExternalScannerState *);
-
-void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
-void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
-SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
-void ts_subtree_array_reverse(SubtreeArray *);
-
-SubtreePool ts_subtree_pool_new(uint32_t capacity);
-void ts_subtree_pool_delete(SubtreePool *);
-
-Subtree ts_subtree_new_leaf(
- SubtreePool *, TSSymbol, Length, Length, uint32_t,
- TSStateId, bool, bool, const TSLanguage *
-);
-Subtree ts_subtree_new_error(
- SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
-);
-MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
-Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *);
-Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
-MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
-void ts_subtree_retain(Subtree);
-void ts_subtree_release(SubtreePool *, Subtree);
-bool ts_subtree_eq(Subtree, Subtree);
-int ts_subtree_compare(Subtree, Subtree);
-void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
-void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *);
-void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
-Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
-char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
-void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
-Subtree ts_subtree_last_external_token(Subtree);
-bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
-
-#define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name)
-
-static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
-static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
-static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
-static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
-static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
-static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
-static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
-static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
-static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
-
-#undef SUBTREE_GET
-
-static inline void ts_subtree_set_extra(MutableSubtree *self) {
- if (self->data.is_inline) {
- self->data.extra = true;
- } else {
- self->ptr->extra = true;
- }
-}
-
-static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
- if (self.data.is_inline) return self.data.symbol;
- if (self.ptr->child_count == 0) return self.ptr->symbol;
- return self.ptr->first_leaf.symbol;
-}
-
-static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
- if (self.data.is_inline) return self.data.parse_state;
- if (self.ptr->child_count == 0) return self.ptr->parse_state;
- return self.ptr->first_leaf.parse_state;
-}
-
-static inline Length ts_subtree_padding(Subtree self) {
- if (self.data.is_inline) {
- Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
- return result;
- } else {
- return self.ptr->padding;
- }
-}
-
-static inline Length ts_subtree_size(Subtree self) {
- if (self.data.is_inline) {
- Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
- return result;
- } else {
- return self.ptr->size;
- }
-}
-
-static inline Length ts_subtree_total_size(Subtree self) {
- return length_add(ts_subtree_padding(self), ts_subtree_size(self));
-}
-
-static inline uint32_t ts_subtree_total_bytes(Subtree self) {
- return ts_subtree_total_size(self).bytes;
-}
-
-static inline uint32_t ts_subtree_child_count(Subtree self) {
- return self.data.is_inline ? 0 : self.ptr->child_count;
-}
-
-static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
- return self.data.is_inline ? 0 : self.ptr->repeat_depth;
-}
-
-static inline uint32_t ts_subtree_node_count(Subtree self) {
- return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count;
-}
-
-static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
- if (ts_subtree_child_count(self) > 0) {
- return self.ptr->visible_child_count;
- } else {
- return 0;
- }
-}
-
-static inline uint32_t ts_subtree_error_cost(Subtree self) {
- if (ts_subtree_missing(self)) {
- return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
- } else {
- return self.data.is_inline ? 0 : self.ptr->error_cost;
- }
-}
-
-static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
- return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
-}
-
-static inline uint16_t ts_subtree_production_id(Subtree self) {
- if (ts_subtree_child_count(self) > 0) {
- return self.ptr->production_id;
- } else {
- return 0;
- }
-}
-
-static inline bool ts_subtree_fragile_left(Subtree self) {
- return self.data.is_inline ? false : self.ptr->fragile_left;
-}
-
-static inline bool ts_subtree_fragile_right(Subtree self) {
- return self.data.is_inline ? false : self.ptr->fragile_right;
-}
-
-static inline bool ts_subtree_has_external_tokens(Subtree self) {
- return self.data.is_inline ? false : self.ptr->has_external_tokens;
-}
-
-static inline bool ts_subtree_is_fragile(Subtree self) {
- return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
-}
-
-static inline bool ts_subtree_is_error(Subtree self) {
- return ts_subtree_symbol(self) == ts_builtin_sym_error;
-}
-
-static inline bool ts_subtree_is_eof(Subtree self) {
- return ts_subtree_symbol(self) == ts_builtin_sym_end;
-}
-
-static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
- Subtree result;
- result.data = self.data;
- return result;
-}
-
-static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
- MutableSubtree result;
- result.data = self.data;
- return result;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_SUBTREE_H_
diff --git a/src/tree_sitter/tree.c b/src/tree_sitter/tree.c
deleted file mode 100644
index 391fa7f592..0000000000
--- a/src/tree_sitter/tree.c
+++ /dev/null
@@ -1,148 +0,0 @@
-#include "tree_sitter/api.h"
-#include "./array.h"
-#include "./get_changed_ranges.h"
-#include "./subtree.h"
-#include "./tree_cursor.h"
-#include "./tree.h"
-
-static const unsigned PARENT_CACHE_CAPACITY = 32;
-
-TSTree *ts_tree_new(
- Subtree root, const TSLanguage *language,
- const TSRange *included_ranges, unsigned included_range_count
-) {
- TSTree *result = ts_malloc(sizeof(TSTree));
- result->root = root;
- result->language = language;
- result->parent_cache = NULL;
- result->parent_cache_start = 0;
- result->parent_cache_size = 0;
- result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
- memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
- result->included_range_count = included_range_count;
- return result;
-}
-
-TSTree *ts_tree_copy(const TSTree *self) {
- ts_subtree_retain(self->root);
- return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
-}
-
-void ts_tree_delete(TSTree *self) {
- if (!self) return;
-
- SubtreePool pool = ts_subtree_pool_new(0);
- ts_subtree_release(&pool, self->root);
- ts_subtree_pool_delete(&pool);
- ts_free(self->included_ranges);
- if (self->parent_cache) ts_free(self->parent_cache);
- ts_free(self);
-}
-
-TSNode ts_tree_root_node(const TSTree *self) {
- return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
-}
-
-const TSLanguage *ts_tree_language(const TSTree *self) {
- return self->language;
-}
-
-void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
- for (unsigned i = 0; i < self->included_range_count; i++) {
- TSRange *range = &self->included_ranges[i];
- if (range->end_byte >= edit->old_end_byte) {
- if (range->end_byte != UINT32_MAX) {
- range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
- range->end_point = point_add(
- edit->new_end_point,
- point_sub(range->end_point, edit->old_end_point)
- );
- if (range->end_byte < edit->new_end_byte) {
- range->end_byte = UINT32_MAX;
- range->end_point = POINT_MAX;
- }
- }
- if (range->start_byte >= edit->old_end_byte) {
- range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
- range->start_point = point_add(
- edit->new_end_point,
- point_sub(range->start_point, edit->old_end_point)
- );
- if (range->start_byte < edit->new_end_byte) {
- range->start_byte = UINT32_MAX;
- range->start_point = POINT_MAX;
- }
- }
- }
- }
-
- SubtreePool pool = ts_subtree_pool_new(0);
- self->root = ts_subtree_edit(self->root, edit, &pool);
- self->parent_cache_start = 0;
- self->parent_cache_size = 0;
- ts_subtree_pool_delete(&pool);
-}
-
-TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
- TreeCursor cursor1 = {NULL, array_new()};
- TreeCursor cursor2 = {NULL, array_new()};
- ts_tree_cursor_init(&cursor1, ts_tree_root_node(self));
- ts_tree_cursor_init(&cursor2, ts_tree_root_node(other));
-
- TSRangeArray included_range_differences = array_new();
- ts_range_array_get_changed_ranges(
- self->included_ranges, self->included_range_count,
- other->included_ranges, other->included_range_count,
- &included_range_differences
- );
-
- TSRange *result;
- *count = ts_subtree_get_changed_ranges(
- &self->root, &other->root, &cursor1, &cursor2,
- self->language, &included_range_differences, &result
- );
-
- array_delete(&included_range_differences);
- array_delete(&cursor1.stack);
- array_delete(&cursor2.stack);
- return result;
-}
-
-void ts_tree_print_dot_graph(const TSTree *self, FILE *file) {
- ts_subtree_print_dot_graph(self->root, self->language, file);
-}
-
-TSNode ts_tree_get_cached_parent(const TSTree *self, const TSNode *node) {
- for (uint32_t i = 0; i < self->parent_cache_size; i++) {
- uint32_t index = (self->parent_cache_start + i) % PARENT_CACHE_CAPACITY;
- ParentCacheEntry *entry = &self->parent_cache[index];
- if (entry->child == node->id) {
- return ts_node_new(self, entry->parent, entry->position, entry->alias_symbol);
- }
- }
- return ts_node_new(NULL, NULL, length_zero(), 0);
-}
-
-void ts_tree_set_cached_parent(const TSTree *_self, const TSNode *node, const TSNode *parent) {
- TSTree *self = (TSTree *)_self;
- if (!self->parent_cache) {
- self->parent_cache = ts_calloc(PARENT_CACHE_CAPACITY, sizeof(ParentCacheEntry));
- }
-
- uint32_t index = (self->parent_cache_start + self->parent_cache_size) % PARENT_CACHE_CAPACITY;
- self->parent_cache[index] = (ParentCacheEntry) {
- .child = node->id,
- .parent = (const Subtree *)parent->id,
- .position = {
- parent->context[0],
- {parent->context[1], parent->context[2]}
- },
- .alias_symbol = parent->context[3],
- };
-
- if (self->parent_cache_size == PARENT_CACHE_CAPACITY) {
- self->parent_cache_start++;
- } else {
- self->parent_cache_size++;
- }
-}
diff --git a/src/tree_sitter/tree.h b/src/tree_sitter/tree.h
deleted file mode 100644
index 92a7e64179..0000000000
--- a/src/tree_sitter/tree.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef TREE_SITTER_TREE_H_
-#define TREE_SITTER_TREE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct {
- const Subtree *child;
- const Subtree *parent;
- Length position;
- TSSymbol alias_symbol;
-} ParentCacheEntry;
-
-struct TSTree {
- Subtree root;
- const TSLanguage *language;
- ParentCacheEntry *parent_cache;
- uint32_t parent_cache_start;
- uint32_t parent_cache_size;
- TSRange *included_ranges;
- unsigned included_range_count;
-};
-
-TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned);
-TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
-TSNode ts_tree_get_cached_parent(const TSTree *, const TSNode *);
-void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_TREE_H_
diff --git a/src/tree_sitter/tree_cursor.c b/src/tree_sitter/tree_cursor.c
deleted file mode 100644
index 00b9679d73..0000000000
--- a/src/tree_sitter/tree_cursor.c
+++ /dev/null
@@ -1,367 +0,0 @@
-#include "tree_sitter/api.h"
-#include "./alloc.h"
-#include "./tree_cursor.h"
-#include "./language.h"
-#include "./tree.h"
-
-typedef struct {
- Subtree parent;
- const TSTree *tree;
- Length position;
- uint32_t child_index;
- uint32_t structural_child_index;
- const TSSymbol *alias_sequence;
-} CursorChildIterator;
-
-// CursorChildIterator
-
-static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
- TreeCursorEntry *last_entry = array_back(&self->stack);
- if (ts_subtree_child_count(*last_entry->subtree) == 0) {
- return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL};
- }
- const TSSymbol *alias_sequence = ts_language_alias_sequence(
- self->tree->language,
- last_entry->subtree->ptr->production_id
- );
- return (CursorChildIterator) {
- .tree = self->tree,
- .parent = *last_entry->subtree,
- .position = last_entry->position,
- .child_index = 0,
- .structural_child_index = 0,
- .alias_sequence = alias_sequence,
- };
-}
-
-static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
- TreeCursorEntry *result,
- bool *visible) {
- if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
- const Subtree *child = &self->parent.ptr->children[self->child_index];
- *result = (TreeCursorEntry) {
- .subtree = child,
- .position = self->position,
- .child_index = self->child_index,
- .structural_child_index = self->structural_child_index,
- };
- *visible = ts_subtree_visible(*child);
- bool extra = ts_subtree_extra(*child);
- if (!extra && self->alias_sequence) {
- *visible |= self->alias_sequence[self->structural_child_index];
- self->structural_child_index++;
- }
-
- self->position = length_add(self->position, ts_subtree_size(*child));
- self->child_index++;
-
- if (self->child_index < self->parent.ptr->child_count) {
- Subtree next_child = self->parent.ptr->children[self->child_index];
- self->position = length_add(self->position, ts_subtree_padding(next_child));
- }
-
- return true;
-}
-
-// TSTreeCursor - lifecycle
-
-TSTreeCursor ts_tree_cursor_new(TSNode node) {
- TSTreeCursor self = {NULL, NULL, {0, 0}};
- ts_tree_cursor_init((TreeCursor *)&self, node);
- return self;
-}
-
-void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
- ts_tree_cursor_init((TreeCursor *)_self, node);
-}
-
-void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
- self->tree = node.tree;
- array_clear(&self->stack);
- array_push(&self->stack, ((TreeCursorEntry) {
- .subtree = (const Subtree *)node.id,
- .position = {
- ts_node_start_byte(node),
- ts_node_start_point(node)
- },
- .child_index = 0,
- .structural_child_index = 0,
- }));
-}
-
-void ts_tree_cursor_delete(TSTreeCursor *_self) {
- TreeCursor *self = (TreeCursor *)_self;
- array_delete(&self->stack);
-}
-
-// TSTreeCursor - walking the tree
-
-bool ts_tree_cursor_goto_first_child(TSTreeCursor *_self) {
- TreeCursor *self = (TreeCursor *)_self;
-
- bool did_descend;
- do {
- did_descend = false;
-
- bool visible;
- TreeCursorEntry entry;
- CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
- while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
- if (visible) {
- array_push(&self->stack, entry);
- return true;
- }
-
- if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
- array_push(&self->stack, entry);
- did_descend = true;
- break;
- }
- }
- } while (did_descend);
-
- return false;
-}
-
-int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t goal_byte) {
- TreeCursor *self = (TreeCursor *)_self;
- uint32_t initial_size = self->stack.size;
- uint32_t visible_child_index = 0;
-
- bool did_descend;
- do {
- did_descend = false;
-
- bool visible;
- TreeCursorEntry entry;
- CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
- while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
- uint32_t end_byte = entry.position.bytes + ts_subtree_size(*entry.subtree).bytes;
- bool at_goal = end_byte > goal_byte;
- uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
-
- if (at_goal) {
- if (visible) {
- array_push(&self->stack, entry);
- return visible_child_index;
- }
-
- if (visible_child_count > 0) {
- array_push(&self->stack, entry);
- did_descend = true;
- break;
- }
- } else if (visible) {
- visible_child_index++;
- } else {
- visible_child_index += visible_child_count;
- }
- }
- } while (did_descend);
-
- if (self->stack.size > initial_size &&
- ts_tree_cursor_goto_next_sibling((TSTreeCursor *)self)) {
- return visible_child_index;
- }
-
- self->stack.size = initial_size;
- return -1;
-}
-
-bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) {
- TreeCursor *self = (TreeCursor *)_self;
- uint32_t initial_size = self->stack.size;
-
- while (self->stack.size > 1) {
- TreeCursorEntry entry = array_pop(&self->stack);
- CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
- iterator.child_index = entry.child_index;
- iterator.structural_child_index = entry.structural_child_index;
- iterator.position = entry.position;
-
- bool visible = false;
- ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible);
- if (visible && self->stack.size + 1 < initial_size) break;
-
- while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
- if (visible) {
- array_push(&self->stack, entry);
- return true;
- }
-
- if (ts_subtree_visible_child_count(*entry.subtree)) {
- array_push(&self->stack, entry);
- ts_tree_cursor_goto_first_child(_self);
- return true;
- }
- }
- }
-
- self->stack.size = initial_size;
- return false;
-}
-
-bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
- TreeCursor *self = (TreeCursor *)_self;
- for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
- TreeCursorEntry *entry = &self->stack.contents[i];
- bool is_aliased = false;
- if (i > 0) {
- TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
- const TSSymbol *alias_sequence = ts_language_alias_sequence(
- self->tree->language,
- parent_entry->subtree->ptr->production_id
- );
- is_aliased = alias_sequence && alias_sequence[entry->structural_child_index];
- }
- if (ts_subtree_visible(*entry->subtree) || is_aliased) {
- self->stack.size = i + 1;
- return true;
- }
- }
- return false;
-}
-
-TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
- const TreeCursor *self = (const TreeCursor *)_self;
- TreeCursorEntry *last_entry = array_back(&self->stack);
- TSSymbol alias_symbol = 0;
- if (self->stack.size > 1) {
- TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
- const TSSymbol *alias_sequence = ts_language_alias_sequence(
- self->tree->language,
- parent_entry->subtree->ptr->production_id
- );
- if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) {
- alias_symbol = alias_sequence[last_entry->structural_child_index];
- }
- }
- return ts_node_new(
- self->tree,
- last_entry->subtree,
- last_entry->position,
- alias_symbol
- );
-}
-
-TSFieldId ts_tree_cursor_current_status(
- const TSTreeCursor *_self,
- bool *can_have_later_siblings,
- bool *can_have_later_siblings_with_this_field
-) {
- const TreeCursor *self = (const TreeCursor *)_self;
- TSFieldId result = 0;
- *can_have_later_siblings = false;
- *can_have_later_siblings_with_this_field = false;
-
- // Walk up the tree, visiting the current node and its invisible ancestors,
- // because fields can refer to nodes through invisible *wrapper* nodes,
- for (unsigned i = self->stack.size - 1; i > 0; i--) {
- TreeCursorEntry *entry = &self->stack.contents[i];
- TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
-
- // Stop walking up when a visible ancestor is found.
- if (i != self->stack.size - 1) {
- if (ts_subtree_visible(*entry->subtree)) break;
- const TSSymbol *alias_sequence = ts_language_alias_sequence(
- self->tree->language,
- parent_entry->subtree->ptr->production_id
- );
- if (alias_sequence && alias_sequence[entry->structural_child_index]) {
- break;
- }
- }
-
- if (ts_subtree_child_count(*parent_entry->subtree) > entry->child_index + 1) {
- *can_have_later_siblings = true;
- }
-
- if (ts_subtree_extra(*entry->subtree)) break;
-
- const TSFieldMapEntry *field_map, *field_map_end;
- ts_language_field_map(
- self->tree->language,
- parent_entry->subtree->ptr->production_id,
- &field_map, &field_map_end
- );
-
- // Look for a field name associated with the current node.
- if (!result) {
- for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
- if (!i->inherited && i->child_index == entry->structural_child_index) {
- result = i->field_id;
- *can_have_later_siblings_with_this_field = false;
- break;
- }
- }
- }
-
- // Determine if there other later siblings with the same field name.
- if (result) {
- for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
- if (i->field_id == result && i->child_index > entry->structural_child_index) {
- *can_have_later_siblings_with_this_field = true;
- break;
- }
- }
- }
- }
-
- return result;
-}
-
-TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
- const TreeCursor *self = (const TreeCursor *)_self;
-
- // Walk up the tree, visiting the current node and its invisible ancestors.
- for (unsigned i = self->stack.size - 1; i > 0; i--) {
- TreeCursorEntry *entry = &self->stack.contents[i];
- TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
-
- // Stop walking up when another visible node is found.
- if (i != self->stack.size - 1) {
- if (ts_subtree_visible(*entry->subtree)) break;
- const TSSymbol *alias_sequence = ts_language_alias_sequence(
- self->tree->language,
- parent_entry->subtree->ptr->production_id
- );
- if (alias_sequence && alias_sequence[entry->structural_child_index]) {
- break;
- }
- }
-
- if (ts_subtree_extra(*entry->subtree)) break;
-
- const TSFieldMapEntry *field_map, *field_map_end;
- ts_language_field_map(
- self->tree->language,
- parent_entry->subtree->ptr->production_id,
- &field_map, &field_map_end
- );
- for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
- if (!i->inherited && i->child_index == entry->structural_child_index) {
- return i->field_id;
- }
- }
- }
- return 0;
-}
-
-const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
- TSFieldId id = ts_tree_cursor_current_field_id(_self);
- if (id) {
- const TreeCursor *self = (const TreeCursor *)_self;
- return self->tree->language->field_names[id];
- } else {
- return NULL;
- }
-}
-
-TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
- const TreeCursor *cursor = (const TreeCursor *)_cursor;
- TSTreeCursor res = {NULL, NULL, {0, 0}};
- TreeCursor *copy = (TreeCursor *)&res;
- copy->tree = cursor->tree;
- array_push_all(&copy->stack, &cursor->stack);
- return res;
-}
diff --git a/src/tree_sitter/tree_cursor.h b/src/tree_sitter/tree_cursor.h
deleted file mode 100644
index 5a39dd278c..0000000000
--- a/src/tree_sitter/tree_cursor.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef TREE_SITTER_TREE_CURSOR_H_
-#define TREE_SITTER_TREE_CURSOR_H_
-
-#include "./subtree.h"
-
-typedef struct {
- const Subtree *subtree;
- Length position;
- uint32_t child_index;
- uint32_t structural_child_index;
-} TreeCursorEntry;
-
-typedef struct {
- const TSTree *tree;
- Array(TreeCursorEntry) stack;
-} TreeCursor;
-
-void ts_tree_cursor_init(TreeCursor *, TSNode);
-TSFieldId ts_tree_cursor_current_status(const TSTreeCursor *, bool *, bool *);
-
-#endif // TREE_SITTER_TREE_CURSOR_H_
diff --git a/src/tree_sitter/treesitter_commit_hash.txt b/src/tree_sitter/treesitter_commit_hash.txt
deleted file mode 100644
index 322cdd24a6..0000000000
--- a/src/tree_sitter/treesitter_commit_hash.txt
+++ /dev/null
@@ -1 +0,0 @@
-87df53a99b51bce0d1e901cd6838f24e1c7a4073
diff --git a/src/tree_sitter/unicode.h b/src/tree_sitter/unicode.h
deleted file mode 100644
index 2ab51c2a3a..0000000000
--- a/src/tree_sitter/unicode.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef TREE_SITTER_UNICODE_H_
-#define TREE_SITTER_UNICODE_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <limits.h>
-#include <stdint.h>
-
-#define U_EXPORT
-#define U_EXPORT2
-#include "./unicode/utf8.h"
-#include "./unicode/utf16.h"
-
-static const int32_t TS_DECODE_ERROR = U_SENTINEL;
-
-// These functions read one unicode code point from the given string,
-// returning the number of bytes consumed.
-typedef uint32_t (*UnicodeDecodeFunction)(
- const uint8_t *string,
- uint32_t length,
- int32_t *code_point
-);
-
-static inline uint32_t ts_decode_utf8(
- const uint8_t *string,
- uint32_t length,
- int32_t *code_point
-) {
- uint32_t i = 0;
- U8_NEXT(string, i, length, *code_point);
- return i;
-}
-
-static inline uint32_t ts_decode_utf16(
- const uint8_t *string,
- uint32_t length,
- int32_t *code_point
-) {
- uint32_t i = 0;
- U16_NEXT(((uint16_t *)string), i, length, *code_point);
- return i * 2;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // TREE_SITTER_UNICODE_H_
diff --git a/src/tree_sitter/unicode/ICU_SHA b/src/tree_sitter/unicode/ICU_SHA
deleted file mode 100644
index 3622283ba3..0000000000
--- a/src/tree_sitter/unicode/ICU_SHA
+++ /dev/null
@@ -1 +0,0 @@
-552b01f61127d30d6589aa4bf99468224979b661
diff --git a/src/tree_sitter/unicode/LICENSE b/src/tree_sitter/unicode/LICENSE
deleted file mode 100644
index 2e01e36876..0000000000
--- a/src/tree_sitter/unicode/LICENSE
+++ /dev/null
@@ -1,414 +0,0 @@
-COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
-
-Copyright © 1991-2019 Unicode, Inc. All rights reserved.
-Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of the Unicode data files and any associated documentation
-(the "Data Files") or Unicode software and any associated documentation
-(the "Software") to deal in the Data Files or Software
-without restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, and/or sell copies of
-the Data Files or Software, and to permit persons to whom the Data Files
-or Software are furnished to do so, provided that either
-(a) this copyright and permission notice appear with all copies
-of the Data Files or Software, or
-(b) this copyright and permission notice appear in associated
-Documentation.
-
-THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
-ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
-WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT OF THIRD PARTY RIGHTS.
-IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
-NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
-DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
-DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
-TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-PERFORMANCE OF THE DATA FILES OR SOFTWARE.
-
-Except as contained in this notice, the name of a copyright holder
-shall not be used in advertising or otherwise to promote the sale,
-use or other dealings in these Data Files or Software without prior
-written authorization of the copyright holder.
-
----------------------
-
-Third-Party Software Licenses
-
-This section contains third-party software notices and/or additional
-terms for licensed third-party software components included within ICU
-libraries.
-
-1. ICU License - ICU 1.8.1 to ICU 57.1
-
-COPYRIGHT AND PERMISSION NOTICE
-
-Copyright (c) 1995-2016 International Business Machines Corporation and others
-All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, and/or sell copies of the Software, and to permit persons
-to whom the Software is furnished to do so, provided that the above
-copyright notice(s) and this permission notice appear in all copies of
-the Software and that both the above copyright notice(s) and this
-permission notice appear in supporting documentation.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
-OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
-HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
-SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
-RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
-CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-Except as contained in this notice, the name of a copyright holder
-shall not be used in advertising or otherwise to promote the sale, use
-or other dealings in this Software without prior written authorization
-of the copyright holder.
-
-All trademarks and registered trademarks mentioned herein are the
-property of their respective owners.
-
-2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
-
- # The Google Chrome software developed by Google is licensed under
- # the BSD license. Other software included in this distribution is
- # provided under other licenses, as set forth below.
- #
- # The BSD License
- # http://opensource.org/licenses/bsd-license.php
- # Copyright (C) 2006-2008, Google Inc.
- #
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are met:
- #
- # Redistributions of source code must retain the above copyright notice,
- # this list of conditions and the following disclaimer.
- # Redistributions in binary form must reproduce the above
- # copyright notice, this list of conditions and the following
- # disclaimer in the documentation and/or other materials provided with
- # the distribution.
- # Neither the name of Google Inc. nor the names of its
- # contributors may be used to endorse or promote products derived from
- # this software without specific prior written permission.
- #
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #
- #
- # The word list in cjdict.txt are generated by combining three word lists
- # listed below with further processing for compound word breaking. The
- # frequency is generated with an iterative training against Google web
- # corpora.
- #
- # * Libtabe (Chinese)
- # - https://sourceforge.net/project/?group_id=1519
- # - Its license terms and conditions are shown below.
- #
- # * IPADIC (Japanese)
- # - http://chasen.aist-nara.ac.jp/chasen/distribution.html
- # - Its license terms and conditions are shown below.
- #
- # ---------COPYING.libtabe ---- BEGIN--------------------
- #
- # /*
- # * Copyright (c) 1999 TaBE Project.
- # * Copyright (c) 1999 Pai-Hsiang Hsiao.
- # * All rights reserved.
- # *
- # * Redistribution and use in source and binary forms, with or without
- # * modification, are permitted provided that the following conditions
- # * are met:
- # *
- # * . Redistributions of source code must retain the above copyright
- # * notice, this list of conditions and the following disclaimer.
- # * . Redistributions in binary form must reproduce the above copyright
- # * notice, this list of conditions and the following disclaimer in
- # * the documentation and/or other materials provided with the
- # * distribution.
- # * . Neither the name of the TaBE Project nor the names of its
- # * contributors may be used to endorse or promote products derived
- # * from this software without specific prior written permission.
- # *
- # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- # * OF THE POSSIBILITY OF SUCH DAMAGE.
- # */
- #
- # /*
- # * Copyright (c) 1999 Computer Systems and Communication Lab,
- # * Institute of Information Science, Academia
- # * Sinica. All rights reserved.
- # *
- # * Redistribution and use in source and binary forms, with or without
- # * modification, are permitted provided that the following conditions
- # * are met:
- # *
- # * . Redistributions of source code must retain the above copyright
- # * notice, this list of conditions and the following disclaimer.
- # * . Redistributions in binary form must reproduce the above copyright
- # * notice, this list of conditions and the following disclaimer in
- # * the documentation and/or other materials provided with the
- # * distribution.
- # * . Neither the name of the Computer Systems and Communication Lab
- # * nor the names of its contributors may be used to endorse or
- # * promote products derived from this software without specific
- # * prior written permission.
- # *
- # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- # * OF THE POSSIBILITY OF SUCH DAMAGE.
- # */
- #
- # Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
- # University of Illinois
- # c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
- #
- # ---------------COPYING.libtabe-----END--------------------------------
- #
- #
- # ---------------COPYING.ipadic-----BEGIN-------------------------------
- #
- # Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
- # and Technology. All Rights Reserved.
- #
- # Use, reproduction, and distribution of this software is permitted.
- # Any copy of this software, whether in its original form or modified,
- # must include both the above copyright notice and the following
- # paragraphs.
- #
- # Nara Institute of Science and Technology (NAIST),
- # the copyright holders, disclaims all warranties with regard to this
- # software, including all implied warranties of merchantability and
- # fitness, in no event shall NAIST be liable for
- # any special, indirect or consequential damages or any damages
- # whatsoever resulting from loss of use, data or profits, whether in an
- # action of contract, negligence or other tortuous action, arising out
- # of or in connection with the use or performance of this software.
- #
- # A large portion of the dictionary entries
- # originate from ICOT Free Software. The following conditions for ICOT
- # Free Software applies to the current dictionary as well.
- #
- # Each User may also freely distribute the Program, whether in its
- # original form or modified, to any third party or parties, PROVIDED
- # that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
- # on, or be attached to, the Program, which is distributed substantially
- # in the same form as set out herein and that such intended
- # distribution, if actually made, will neither violate or otherwise
- # contravene any of the laws and regulations of the countries having
- # jurisdiction over the User or the intended distribution itself.
- #
- # NO WARRANTY
- #
- # The program was produced on an experimental basis in the course of the
- # research and development conducted during the project and is provided
- # to users as so produced on an experimental basis. Accordingly, the
- # program is provided without any warranty whatsoever, whether express,
- # implied, statutory or otherwise. The term "warranty" used herein
- # includes, but is not limited to, any warranty of the quality,
- # performance, merchantability and fitness for a particular purpose of
- # the program and the nonexistence of any infringement or violation of
- # any right of any third party.
- #
- # Each user of the program will agree and understand, and be deemed to
- # have agreed and understood, that there is no warranty whatsoever for
- # the program and, accordingly, the entire risk arising from or
- # otherwise connected with the program is assumed by the user.
- #
- # Therefore, neither ICOT, the copyright holder, or any other
- # organization that participated in or was otherwise related to the
- # development of the program and their respective officials, directors,
- # officers and other employees shall be held liable for any and all
- # damages, including, without limitation, general, special, incidental
- # and consequential damages, arising out of or otherwise in connection
- # with the use or inability to use the program or any product, material
- # or result produced or otherwise obtained by using the program,
- # regardless of whether they have been advised of, or otherwise had
- # knowledge of, the possibility of such damages at any time during the
- # project or thereafter. Each user will be deemed to have agreed to the
- # foregoing by his or her commencement of use of the program. The term
- # "use" as used herein includes, but is not limited to, the use,
- # modification, copying and distribution of the program and the
- # production of secondary products from the program.
- #
- # In the case where the program, whether in its original form or
- # modified, was distributed or delivered to or received by a user from
- # any person, organization or entity other than ICOT, unless it makes or
- # grants independently of ICOT any specific warranty to the user in
- # writing, such person, organization or entity, will also be exempted
- # from and not be held liable to the user for any such damages as noted
- # above as far as the program is concerned.
- #
- # ---------------COPYING.ipadic-----END----------------------------------
-
-3. Lao Word Break Dictionary Data (laodict.txt)
-
- # Copyright (c) 2013 International Business Machines Corporation
- # and others. All Rights Reserved.
- #
- # Project: http://code.google.com/p/lao-dictionary/
- # Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
- # License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
- # (copied below)
- #
- # This file is derived from the above dictionary, with slight
- # modifications.
- # ----------------------------------------------------------------------
- # Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification,
- # are permitted provided that the following conditions are met:
- #
- #
- # Redistributions of source code must retain the above copyright notice, this
- # list of conditions and the following disclaimer. Redistributions in
- # binary form must reproduce the above copyright notice, this list of
- # conditions and the following disclaimer in the documentation and/or
- # other materials provided with the distribution.
- #
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
- # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- # OF THE POSSIBILITY OF SUCH DAMAGE.
- # --------------------------------------------------------------------------
-
-4. Burmese Word Break Dictionary Data (burmesedict.txt)
-
- # Copyright (c) 2014 International Business Machines Corporation
- # and others. All Rights Reserved.
- #
- # This list is part of a project hosted at:
- # github.com/kanyawtech/myanmar-karen-word-lists
- #
- # --------------------------------------------------------------------------
- # Copyright (c) 2013, LeRoy Benjamin Sharon
- # All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met: Redistributions of source code must retain the above
- # copyright notice, this list of conditions and the following
- # disclaimer. Redistributions in binary form must reproduce the
- # above copyright notice, this list of conditions and the following
- # disclaimer in the documentation and/or other materials provided
- # with the distribution.
- #
- # Neither the name Myanmar Karen Word Lists, nor the names of its
- # contributors may be used to endorse or promote products derived
- # from this software without specific prior written permission.
- #
- # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
- # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
- # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- # SUCH DAMAGE.
- # --------------------------------------------------------------------------
-
-5. Time Zone Database
-
- ICU uses the public domain data and code derived from Time Zone
-Database for its time zone support. The ownership of the TZ database
-is explained in BCP 175: Procedure for Maintaining the Time Zone
-Database section 7.
-
- # 7. Database Ownership
- #
- # The TZ database itself is not an IETF Contribution or an IETF
- # document. Rather it is a pre-existing and regularly updated work
- # that is in the public domain, and is intended to remain in the
- # public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
- # not apply to the TZ Database or contributions that individuals make
- # to it. Should any claims be made and substantiated against the TZ
- # Database, the organization that is providing the IANA
- # Considerations defined in this RFC, under the memorandum of
- # understanding with the IETF, currently ICANN, may act in accordance
- # with all competent court orders. No ownership claims will be made
- # by ICANN or the IETF Trust on the database or the code. Any person
- # making a contribution to the database or code waives all rights to
- # future claims in that contribution or in the TZ Database.
-
-6. Google double-conversion
-
-Copyright 2006-2011, the V8 project authors. All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following
- disclaimer in the documentation and/or other materials provided
- with the distribution.
- * Neither the name of Google Inc. nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/tree_sitter/unicode/README.md b/src/tree_sitter/unicode/README.md
deleted file mode 100644
index 623b8e3843..0000000000
--- a/src/tree_sitter/unicode/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# ICU Parts
-
-This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).
-
-### License
-
-The license for these files is contained in the `LICENSE` file within this directory.
-
-### Contents
-
-* Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
- * `utf8.h`
- * `utf16.h`
- * `umachine.h`
-* Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
- * `ptypes.h`
- * `urename.h`
- * `utf.h`
-* `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
-* `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
-* `README.md` - This text file.
-
-### Updating ICU
-
-To incorporate changes from the upstream `icu` repository:
-
-* Update `ICU_SHA` with the new Git SHA.
-* Update `LICENSE` with the license text from the directory mentioned above.
-* Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.
diff --git a/src/tree_sitter/unicode/ptypes.h b/src/tree_sitter/unicode/ptypes.h
deleted file mode 100644
index ac79ad0f98..0000000000
--- a/src/tree_sitter/unicode/ptypes.h
+++ /dev/null
@@ -1 +0,0 @@
-// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/src/tree_sitter/unicode/umachine.h b/src/tree_sitter/unicode/umachine.h
deleted file mode 100644
index bbf6ef9c8b..0000000000
--- a/src/tree_sitter/unicode/umachine.h
+++ /dev/null
@@ -1,448 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 1999-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: umachine.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999sep13
-* created by: Markus W. Scherer
-*
-* This file defines basic types and constants for ICU to be
-* platform-independent. umachine.h and utf.h are included into
-* utypes.h to provide all the general definitions for ICU.
-* All of these definitions used to be in utypes.h before
-* the UTF-handling macros made this unmaintainable.
-*/
-
-#ifndef __UMACHINE_H__
-#define __UMACHINE_H__
-
-
-/**
- * \file
- * \brief Basic types and constants for UTF
- *
- * <h2> Basic types and constants for UTF </h2>
- * This file defines basic types and constants for utf.h to be
- * platform-independent. umachine.h and utf.h are included into
- * utypes.h to provide all the general definitions for ICU.
- * All of these definitions used to be in utypes.h before
- * the UTF-handling macros made this unmaintainable.
- *
- */
-/*==========================================================================*/
-/* Include platform-dependent definitions */
-/* which are contained in the platform-specific file platform.h */
-/*==========================================================================*/
-
-#include "./ptypes.h" /* platform.h is included in ptypes.h */
-
-/*
- * ANSI C headers:
- * stddef.h defines wchar_t
- */
-#include <stddef.h>
-
-/*==========================================================================*/
-/* For C wrappers, we use the symbol U_STABLE. */
-/* This works properly if the includer is C or C++. */
-/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */
-/*==========================================================================*/
-
-/**
- * \def U_CFUNC
- * This is used in a declaration of a library private ICU C function.
- * @stable ICU 2.4
- */
-
-/**
- * \def U_CDECL_BEGIN
- * This is used to begin a declaration of a library private ICU C API.
- * @stable ICU 2.4
- */
-
-/**
- * \def U_CDECL_END
- * This is used to end a declaration of a library private ICU C API
- * @stable ICU 2.4
- */
-
-#ifdef __cplusplus
-# define U_CFUNC extern "C"
-# define U_CDECL_BEGIN extern "C" {
-# define U_CDECL_END }
-#else
-# define U_CFUNC extern
-# define U_CDECL_BEGIN
-# define U_CDECL_END
-#endif
-
-#ifndef U_ATTRIBUTE_DEPRECATED
-/**
- * \def U_ATTRIBUTE_DEPRECATED
- * This is used for GCC specific attributes
- * @internal
- */
-#if U_GCC_MAJOR_MINOR >= 302
-# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
-/**
- * \def U_ATTRIBUTE_DEPRECATED
- * This is used for Visual C++ specific attributes
- * @internal
- */
-#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
-# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
-#else
-# define U_ATTRIBUTE_DEPRECATED
-#endif
-#endif
-
-/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
-#define U_CAPI U_CFUNC U_EXPORT
-/** This is used to declare a function as a stable public ICU C API*/
-#define U_STABLE U_CAPI
-/** This is used to declare a function as a draft public ICU C API */
-#define U_DRAFT U_CAPI
-/** This is used to declare a function as a deprecated public ICU C API */
-#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
-/** This is used to declare a function as an obsolete public ICU C API */
-#define U_OBSOLETE U_CAPI
-/** This is used to declare a function as an internal ICU C API */
-#define U_INTERNAL U_CAPI
-
-/**
- * \def U_OVERRIDE
- * Defined to the C++11 "override" keyword if available.
- * Denotes a class or member which is an override of the base class.
- * May result in an error if it applied to something not an override.
- * @internal
- */
-#ifndef U_OVERRIDE
-#define U_OVERRIDE override
-#endif
-
-/**
- * \def U_FINAL
- * Defined to the C++11 "final" keyword if available.
- * Denotes a class or member which may not be overridden in subclasses.
- * May result in an error if subclasses attempt to override.
- * @internal
- */
-#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
-#define U_FINAL final
-#endif
-
-// Before ICU 65, function-like, multi-statement ICU macros were just defined as
-// series of statements wrapped in { } blocks and the caller could choose to
-// either treat them as if they were actual functions and end the invocation
-// with a trailing ; creating an empty statement after the block or else omit
-// this trailing ; using the knowledge that the macro would expand to { }.
-//
-// But doing so doesn't work well with macros that look like functions and
-// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
-// switches to the standard solution of wrapping such macros in do { } while.
-//
-// This will however break existing code that depends on being able to invoke
-// these macros without a trailing ; so to be able to remain compatible with
-// such code the wrapper is itself defined as macros so that it's possible to
-// build ICU 65 and later with the old macro behaviour, like this:
-//
-// CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
-// runConfigureICU ...
-
-/**
- * \def UPRV_BLOCK_MACRO_BEGIN
- * Defined as the "do" keyword by default.
- * @internal
- */
-#ifndef UPRV_BLOCK_MACRO_BEGIN
-#define UPRV_BLOCK_MACRO_BEGIN do
-#endif
-
-/**
- * \def UPRV_BLOCK_MACRO_END
- * Defined as "while (FALSE)" by default.
- * @internal
- */
-#ifndef UPRV_BLOCK_MACRO_END
-#define UPRV_BLOCK_MACRO_END while (FALSE)
-#endif
-
-/*==========================================================================*/
-/* limits for int32_t etc., like in POSIX inttypes.h */
-/*==========================================================================*/
-
-#ifndef INT8_MIN
-/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
-# define INT8_MIN ((int8_t)(-128))
-#endif
-#ifndef INT16_MIN
-/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
-# define INT16_MIN ((int16_t)(-32767-1))
-#endif
-#ifndef INT32_MIN
-/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
-# define INT32_MIN ((int32_t)(-2147483647-1))
-#endif
-
-#ifndef INT8_MAX
-/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
-# define INT8_MAX ((int8_t)(127))
-#endif
-#ifndef INT16_MAX
-/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
-# define INT16_MAX ((int16_t)(32767))
-#endif
-#ifndef INT32_MAX
-/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
-# define INT32_MAX ((int32_t)(2147483647))
-#endif
-
-#ifndef UINT8_MAX
-/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
-# define UINT8_MAX ((uint8_t)(255U))
-#endif
-#ifndef UINT16_MAX
-/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
-# define UINT16_MAX ((uint16_t)(65535U))
-#endif
-#ifndef UINT32_MAX
-/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
-# define UINT32_MAX ((uint32_t)(4294967295U))
-#endif
-
-#if defined(U_INT64_T_UNAVAILABLE)
-# error int64_t is required for decimal format and rule-based number format.
-#else
-# ifndef INT64_C
-/**
- * Provides a platform independent way to specify a signed 64-bit integer constant.
- * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
- * @stable ICU 2.8
- */
-# define INT64_C(c) c ## LL
-# endif
-# ifndef UINT64_C
-/**
- * Provides a platform independent way to specify an unsigned 64-bit integer constant.
- * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
- * @stable ICU 2.8
- */
-# define UINT64_C(c) c ## ULL
-# endif
-# ifndef U_INT64_MIN
-/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
-# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
-# endif
-# ifndef U_INT64_MAX
-/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
-# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
-# endif
-# ifndef U_UINT64_MAX
-/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
-# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
-# endif
-#endif
-
-/*==========================================================================*/
-/* Boolean data type */
-/*==========================================================================*/
-
-/** The ICU boolean type @stable ICU 2.0 */
-typedef int8_t UBool;
-
-#ifndef TRUE
-/** The TRUE value of a UBool @stable ICU 2.0 */
-# define TRUE 1
-#endif
-#ifndef FALSE
-/** The FALSE value of a UBool @stable ICU 2.0 */
-# define FALSE 0
-#endif
-
-
-/*==========================================================================*/
-/* Unicode data types */
-/*==========================================================================*/
-
-/* wchar_t-related definitions -------------------------------------------- */
-
-/*
- * \def U_WCHAR_IS_UTF16
- * Defined if wchar_t uses UTF-16.
- *
- * @stable ICU 2.0
- */
-/*
- * \def U_WCHAR_IS_UTF32
- * Defined if wchar_t uses UTF-32.
- *
- * @stable ICU 2.0
- */
-#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
-# ifdef __STDC_ISO_10646__
-# if (U_SIZEOF_WCHAR_T==2)
-# define U_WCHAR_IS_UTF16
-# elif (U_SIZEOF_WCHAR_T==4)
-# define U_WCHAR_IS_UTF32
-# endif
-# elif defined __UCS2__
-# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
-# define U_WCHAR_IS_UTF16
-# endif
-# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
-# if (U_SIZEOF_WCHAR_T==4)
-# define U_WCHAR_IS_UTF32
-# endif
-# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
-# define U_WCHAR_IS_UTF32
-# elif U_PLATFORM_HAS_WIN32_API
-# define U_WCHAR_IS_UTF16
-# endif
-#endif
-
-/* UChar and UChar32 definitions -------------------------------------------- */
-
-/** Number of bytes in a UChar. @stable ICU 2.0 */
-#define U_SIZEOF_UCHAR 2
-
-/**
- * \def U_CHAR16_IS_TYPEDEF
- * If 1, then char16_t is a typedef and not a real type (yet)
- * @internal
- */
-#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
-// for AIX, uchar.h needs to be included
-# include <uchar.h>
-# define U_CHAR16_IS_TYPEDEF 1
-#elif defined(_MSC_VER) && (_MSC_VER < 1900)
-// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
-// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
-# define U_CHAR16_IS_TYPEDEF 1
-#else
-# define U_CHAR16_IS_TYPEDEF 0
-#endif
-
-
-/**
- * \var UChar
- *
- * The base type for UTF-16 code units and pointers.
- * Unsigned 16-bit integer.
- * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
- *
- * UChar is configurable by defining the macro UCHAR_TYPE
- * on the preprocessor or compiler command line:
- * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
- * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
- * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
- *
- * The default is UChar=char16_t.
- *
- * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
- *
- * In C, char16_t is a simple typedef of uint_least16_t.
- * ICU requires uint_least16_t=uint16_t for data memory mapping.
- * On macOS, char16_t is not available because the uchar.h standard header is missing.
- *
- * @stable ICU 4.4
- */
-
-#if 1
- // #if 1 is normal. UChar defaults to char16_t in C++.
- // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
- // The intltest Makefile #defines UCHAR_TYPE=char16_t,
- // so we only #define it to uint16_t if it is undefined so far.
-#elif !defined(UCHAR_TYPE)
-# define UCHAR_TYPE uint16_t
-#endif
-
-#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
- defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
- // Inside the ICU library code, never configurable.
- typedef char16_t UChar;
-#elif defined(UCHAR_TYPE)
- typedef UCHAR_TYPE UChar;
-#elif defined(__cplusplus)
- typedef char16_t UChar;
-#else
- typedef uint16_t UChar;
-#endif
-
-/**
- * \var OldUChar
- * Default ICU 58 definition of UChar.
- * A base type for UTF-16 code units and pointers.
- * Unsigned 16-bit integer.
- *
- * Define OldUChar to be wchar_t if that is 16 bits wide.
- * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
- *
- * This makes the definition of OldUChar platform-dependent
- * but allows direct string type compatibility with platforms with
- * 16-bit wchar_t types.
- *
- * This is how UChar was defined in ICU 58, for transition convenience.
- * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
- * The current UChar responds to UCHAR_TYPE but OldUChar does not.
- *
- * @stable ICU 59
- */
-#if U_SIZEOF_WCHAR_T==2
- typedef wchar_t OldUChar;
-#elif defined(__CHAR16_TYPE__)
- typedef __CHAR16_TYPE__ OldUChar;
-#else
- typedef uint16_t OldUChar;
-#endif
-
-/**
- * Define UChar32 as a type for single Unicode code points.
- * UChar32 is a signed 32-bit integer (same as int32_t).
- *
- * The Unicode code point range is 0..0x10ffff.
- * All other values (negative or >=0x110000) are illegal as Unicode code points.
- * They may be used as sentinel values to indicate "done", "error"
- * or similar non-code point conditions.
- *
- * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
- * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
- * or else to be uint32_t.
- * That is, the definition of UChar32 was platform-dependent.
- *
- * @see U_SENTINEL
- * @stable ICU 2.4
- */
-typedef int32_t UChar32;
-
-/**
- * This value is intended for sentinel values for APIs that
- * (take or) return single code points (UChar32).
- * It is outside of the Unicode code point range 0..0x10ffff.
- *
- * For example, a "done" or "error" value in a new API
- * could be indicated with U_SENTINEL.
- *
- * ICU APIs designed before ICU 2.4 usually define service-specific "done"
- * values, mostly 0xffff.
- * Those may need to be distinguished from
- * actual U+ffff text contents by calling functions like
- * CharacterIterator::hasNext() or UnicodeString::length().
- *
- * @return -1
- * @see UChar32
- * @stable ICU 2.4
- */
-#define U_SENTINEL (-1)
-
-#include "./urename.h"
-
-#endif
diff --git a/src/tree_sitter/unicode/urename.h b/src/tree_sitter/unicode/urename.h
deleted file mode 100644
index ac79ad0f98..0000000000
--- a/src/tree_sitter/unicode/urename.h
+++ /dev/null
@@ -1 +0,0 @@
-// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/src/tree_sitter/unicode/utf.h b/src/tree_sitter/unicode/utf.h
deleted file mode 100644
index ac79ad0f98..0000000000
--- a/src/tree_sitter/unicode/utf.h
+++ /dev/null
@@ -1 +0,0 @@
-// This file must exist in order for `utf8.h` and `utf16.h` to be used.
diff --git a/src/tree_sitter/unicode/utf16.h b/src/tree_sitter/unicode/utf16.h
deleted file mode 100644
index b547922441..0000000000
--- a/src/tree_sitter/unicode/utf16.h
+++ /dev/null
@@ -1,733 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2012, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: utf16.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999sep09
-* created by: Markus W. Scherer
-*/
-
-/**
- * \file
- * \brief C API: 16-bit Unicode handling macros
- *
- * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
- *
- * For more information see utf.h and the ICU User Guide Strings chapter
- * (http://userguide.icu-project.org/strings).
- *
- * <em>Usage:</em>
- * ICU coding guidelines for if() statements should be followed when using these macros.
- * Compound statements (curly braces {}) must be used for if-else-while...
- * bodies and all macro statements should be terminated with semicolon.
- */
-
-#ifndef __UTF16_H__
-#define __UTF16_H__
-
-#include "./umachine.h"
-#ifndef __UTF_H__
-# include "./utf.h"
-#endif
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * Does this code unit alone encode a code point (BMP, not a surrogate)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
-
-/**
- * Is this code unit a lead surrogate (U+d800..U+dbff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
-
-/**
- * Is this code unit a trail surrogate (U+dc00..U+dfff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
-
-/**
- * Is this code unit a surrogate (U+d800..U+dfff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
-
-/**
- * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
- * is it a lead surrogate?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
-
-/**
- * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
- * is it a trail surrogate?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 4.2
- */
-#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
-
-/**
- * Helper constant for U16_GET_SUPPLEMENTARY.
- * @internal
- */
-#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
-
-/**
- * Get a supplementary code point value (U+10000..U+10ffff)
- * from its lead and trail surrogates.
- * The result is undefined if the input values are not
- * lead and trail surrogates.
- *
- * @param lead lead surrogate (U+d800..U+dbff)
- * @param trail trail surrogate (U+dc00..U+dfff)
- * @return supplementary code point (U+10000..U+10ffff)
- * @stable ICU 2.4
- */
-#define U16_GET_SUPPLEMENTARY(lead, trail) \
- (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
-
-
-/**
- * Get the lead surrogate (0xd800..0xdbff) for a
- * supplementary code point (0x10000..0x10ffff).
- * @param supplementary 32-bit code point (U+10000..U+10ffff)
- * @return lead surrogate (U+d800..U+dbff) for supplementary
- * @stable ICU 2.4
- */
-#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
-
-/**
- * Get the trail surrogate (0xdc00..0xdfff) for a
- * supplementary code point (0x10000..0x10ffff).
- * @param supplementary 32-bit code point (U+10000..U+10ffff)
- * @return trail surrogate (U+dc00..U+dfff) for supplementary
- * @stable ICU 2.4
- */
-#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
-
-/**
- * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
- * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
- * @param c 32-bit code point
- * @return 1 or 2
- * @stable ICU 2.4
- */
-#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
-
-/**
- * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
- * @return 2
- * @stable ICU 2.4
- */
-#define U16_MAX_LENGTH 2
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The offset may point to either the lead or trail surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the adjacent matching surrogate as well.
- * The result is undefined if the offset points to a single, unpaired surrogate.
- * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_GET
- * @stable ICU 2.4
- */
-#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[i]; \
- if(U16_IS_SURROGATE(c)) { \
- if(U16_IS_SURROGATE_LEAD(c)) { \
- (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
- } else { \
- (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The offset may point to either the lead or trail surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the adjacent matching surrogate as well.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * If the offset points to a single, unpaired surrogate, then
- * c is set to that unpaired surrogate.
- * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i<length
- * @param length string length
- * @param c output UChar32 variable
- * @see U16_GET_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[i]; \
- if(U16_IS_SURROGATE(c)) { \
- uint16_t __c2; \
- if(U16_IS_SURROGATE_LEAD(c)) { \
- if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
- (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
- } \
- } else { \
- if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
- (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
- } \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The offset may point to either the lead or trail surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the adjacent matching surrogate as well.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * If the offset points to a single, unpaired surrogate, then
- * c is set to U+FFFD.
- * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i<length
- * @param length string length
- * @param c output UChar32 variable
- * @see U16_GET_UNSAFE
- * @stable ICU 60
- */
-#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[i]; \
- if(U16_IS_SURROGATE(c)) { \
- uint16_t __c2; \
- if(U16_IS_SURROGATE_LEAD(c)) { \
- if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
- (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
- } else { \
- (c)=0xfffd; \
- } \
- } else { \
- if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
- (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
- } else { \
- (c)=0xfffd; \
- } \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* definitions with forward iteration --------------------------------------- */
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The offset may point to the lead surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the following trail surrogate as well.
- * If the offset points to a trail surrogate, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset points to a single, unpaired lead surrogate.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_NEXT
- * @stable ICU 2.4
- */
-#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[(i)++]; \
- if(U16_IS_LEAD(c)) { \
- (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * The offset may point to the lead surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the following trail surrogate as well.
- * If the offset points to a trail surrogate or
- * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
- *
- * @param s const UChar * string
- * @param i string offset, must be i<length
- * @param length string length
- * @param c output UChar32 variable
- * @see U16_NEXT_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[(i)++]; \
- if(U16_IS_LEAD(c)) { \
- uint16_t __c2; \
- if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
- ++(i); \
- (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * The offset may point to the lead surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the following trail surrogate as well.
- * If the offset points to a trail surrogate or
- * to a single, unpaired lead surrogate, then c is set to U+FFFD.
- *
- * @param s const UChar * string
- * @param i string offset, must be i<length
- * @param length string length
- * @param c output UChar32 variable
- * @see U16_NEXT_UNSAFE
- * @stable ICU 60
- */
-#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[(i)++]; \
- if(U16_IS_SURROGATE(c)) { \
- uint16_t __c2; \
- if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
- ++(i); \
- (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
- } else { \
- (c)=0xfffd; \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Append a code point to a string, overwriting 1 or 2 code units.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
- * Otherwise, the result is undefined.
- *
- * @param s const UChar * string buffer
- * @param i string offset
- * @param c code point to append
- * @see U16_APPEND
- * @stable ICU 2.4
- */
-#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- if((uint32_t)(c)<=0xffff) { \
- (s)[(i)++]=(uint16_t)(c); \
- } else { \
- (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
- (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Append a code point to a string, overwriting 1 or 2 code units.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Safe" macro, checks for a valid code point.
- * If a surrogate pair is written, checks for sufficient space in the string.
- * If the code point is not valid or a trail surrogate does not fit,
- * then isError is set to TRUE.
- *
- * @param s const UChar * string buffer
- * @param i string offset, must be i<capacity
- * @param capacity size of the string buffer
- * @param c code point to append
- * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
- * @see U16_APPEND_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
- if((uint32_t)(c)<=0xffff) { \
- (s)[(i)++]=(uint16_t)(c); \
- } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
- (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
- (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
- } else /* c>0x10ffff or not enough space */ { \
- (isError)=TRUE; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_FWD_1
- * @stable ICU 2.4
- */
-#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(U16_IS_LEAD((s)[(i)++])) { \
- ++(i); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const UChar * string
- * @param i string offset, must be i<length
- * @param length string length
- * @see U16_FWD_1_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
- if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
- ++(i); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U16_FWD_N
- * @stable ICU 2.4
- */
-#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __N=(n); \
- while(__N>0) { \
- U16_FWD_1_UNSAFE(s, i); \
- --__N; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const UChar * string
- * @param i int32_t string offset, must be i<length
- * @param length int32_t string length
- * @param n number of code points to skip
- * @see U16_FWD_N_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __N=(n); \
- while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
- U16_FWD_1(s, i, length); \
- --__N; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to the trail surrogate of a surrogate pair,
- * then the offset is decremented.
- * Otherwise, it is not modified.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_SET_CP_START
- * @stable ICU 2.4
- */
-#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(U16_IS_TRAIL((s)[i])) { \
- --(i); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to the trail surrogate of a surrogate pair,
- * then the offset is decremented.
- * Otherwise, it is not modified.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i
- * @see U16_SET_CP_START_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
- --(i); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* definitions with backward iteration -------------------------------------- */
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a trail surrogate unit
- * for a supplementary code point, then the macro will read
- * the preceding lead surrogate as well.
- * If the offset is behind a lead surrogate, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset is behind a single, unpaired trail surrogate.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_PREV
- * @stable ICU 2.4
- */
-#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[--(i)]; \
- if(U16_IS_TRAIL(c)) { \
- (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a trail surrogate unit
- * for a supplementary code point, then the macro will read
- * the preceding lead surrogate as well.
- * If the offset is behind a lead surrogate or behind a single, unpaired
- * trail surrogate, then c is set to that unpaired surrogate.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
- * @param c output UChar32 variable
- * @see U16_PREV_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[--(i)]; \
- if(U16_IS_TRAIL(c)) { \
- uint16_t __c2; \
- if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
- --(i); \
- (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a trail surrogate unit
- * for a supplementary code point, then the macro will read
- * the preceding lead surrogate as well.
- * If the offset is behind a lead surrogate or behind a single, unpaired
- * trail surrogate, then c is set to U+FFFD.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
- * @param c output UChar32 variable
- * @see U16_PREV_UNSAFE
- * @stable ICU 60
- */
-#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(s)[--(i)]; \
- if(U16_IS_SURROGATE(c)) { \
- uint16_t __c2; \
- if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
- --(i); \
- (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
- } else { \
- (c)=0xfffd; \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_BACK_1
- * @stable ICU 2.4
- */
-#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(U16_IS_TRAIL((s)[--(i)])) { \
- --(i); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
- * @see U16_BACK_1_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
- --(i); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U16_BACK_N
- * @stable ICU 2.4
- */
-#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __N=(n); \
- while(__N>0) { \
- U16_BACK_1_UNSAFE(s, i); \
- --__N; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start start of string
- * @param i string offset, must be start<i
- * @param n number of code points to skip
- * @see U16_BACK_N_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __N=(n); \
- while(__N>0 && (i)>(start)) { \
- U16_BACK_1(s, start, i); \
- --__N; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind the lead surrogate of a surrogate pair,
- * then the offset is incremented.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_SET_CP_LIMIT
- * @stable ICU 2.4
- */
-#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(U16_IS_LEAD((s)[(i)-1])) { \
- ++(i); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind the lead surrogate of a surrogate pair,
- * then the offset is incremented.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const UChar * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, start<=i<=length
- * @param length int32_t string length
- * @see U16_SET_CP_LIMIT_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
- if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
- ++(i); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-#endif
diff --git a/src/tree_sitter/unicode/utf8.h b/src/tree_sitter/unicode/utf8.h
deleted file mode 100644
index 3b37873e37..0000000000
--- a/src/tree_sitter/unicode/utf8.h
+++ /dev/null
@@ -1,881 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-* Copyright (C) 1999-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-*******************************************************************************
-* file name: utf8.h
-* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 1999sep13
-* created by: Markus W. Scherer
-*/
-
-/**
- * \file
- * \brief C API: 8-bit Unicode handling macros
- *
- * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
- *
- * For more information see utf.h and the ICU User Guide Strings chapter
- * (http://userguide.icu-project.org/strings).
- *
- * <em>Usage:</em>
- * ICU coding guidelines for if() statements should be followed when using these macros.
- * Compound statements (curly braces {}) must be used for if-else-while...
- * bodies and all macro statements should be terminated with semicolon.
- */
-
-#ifndef __UTF8_H__
-#define __UTF8_H__
-
-#include "./umachine.h"
-#ifndef __UTF_H__
-# include "./utf.h"
-#endif
-
-/* internal definitions ----------------------------------------------------- */
-
-/**
- * Counts the trail bytes for a UTF-8 lead byte.
- * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
- * leadByte might be evaluated multiple times.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is called by public macros in this file and thus must remain stable.
- *
- * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
- * @internal
- */
-#define U8_COUNT_TRAIL_BYTES(leadByte) \
- (U8_IS_LEAD(leadByte) ? \
- ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
-
-/**
- * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
- * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
- * leadByte might be evaluated multiple times.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is called by public macros in this file and thus must remain stable.
- *
- * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
- * @internal
- */
-#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
- (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
-
-/**
- * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is called by public macros in this file and thus must remain stable.
- * @internal
- */
-#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
-
-/**
- * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
- * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
- * Lead byte E0..EF bits 3..0 are used as byte index,
- * first trail byte bits 7..5 are used as bit index into that byte.
- * @see U8_IS_VALID_LEAD3_AND_T1
- * @internal
- */
-#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
-
-/**
- * Internal 3-byte UTF-8 validity check.
- * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
- * @internal
- */
-#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
-
-/**
- * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
- * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
- * First trail byte bits 7..4 are used as byte index,
- * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
- * @see U8_IS_VALID_LEAD4_AND_T1
- * @internal
- */
-#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
-
-/**
- * Internal 4-byte UTF-8 validity check.
- * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
- * @internal
- */
-#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
-
-/**
- * Function for handling "next code point" with error-checking.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
- * file and thus must remain stable, and should not be hidden when other internal
- * functions are hidden (otherwise public macros would fail to compile).
- * @internal
- */
-U_STABLE UChar32 U_EXPORT2
-utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
-
-/**
- * Function for handling "append code point" with error-checking.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
- * file and thus must remain stable, and should not be hidden when other internal
- * functions are hidden (otherwise public macros would fail to compile).
- * @internal
- */
-U_STABLE int32_t U_EXPORT2
-utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
-
-/**
- * Function for handling "previous code point" with error-checking.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
- * file and thus must remain stable, and should not be hidden when other internal
- * functions are hidden (otherwise public macros would fail to compile).
- * @internal
- */
-U_STABLE UChar32 U_EXPORT2
-utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
-
-/**
- * Function for handling "skip backward one code point" with error-checking.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
- * file and thus must remain stable, and should not be hidden when other internal
- * functions are hidden (otherwise public macros would fail to compile).
- * @internal
- */
-U_STABLE int32_t U_EXPORT2
-utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_SINGLE(c) (((c)&0x80)==0)
-
-/**
- * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
-// 0x32=0xf4-0xc2
-
-/**
- * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
-
-/**
- * How many code units (bytes) are used for the UTF-8 encoding
- * of this Unicode code point?
- * @param c 32-bit code point
- * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
- * @stable ICU 2.4
- */
-#define U8_LENGTH(c) \
- ((uint32_t)(c)<=0x7f ? 1 : \
- ((uint32_t)(c)<=0x7ff ? 2 : \
- ((uint32_t)(c)<=0xd7ff ? 3 : \
- ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
- ((uint32_t)(c)<=0xffff ? 3 : 4)\
- ) \
- ) \
- ) \
- )
-
-/**
- * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
- * @return 4
- * @stable ICU 2.4
- */
-#define U8_MAX_LENGTH 4
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * The offset may point to either the lead byte or one of the trail bytes
- * for a code point, in which case the macro will read all of the bytes
- * for the code point.
- * The result is undefined if the offset points to an illegal UTF-8
- * byte sequence.
- * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U8_GET
- * @stable ICU 2.4
- */
-#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t _u8_get_unsafe_index=(int32_t)(i); \
- U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
- U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * The offset may point to either the lead byte or one of the trail bytes
- * for a code point, in which case the macro will read all of the bytes
- * for the code point.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * If the offset points to an illegal UTF-8 byte sequence, then
- * c is set to a negative value.
- * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset
- * @param i int32_t string offset, must be start<=i<length
- * @param length int32_t string length
- * @param c output UChar32 variable, set to <0 in case of an error
- * @see U8_GET_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t _u8_get_index=(i); \
- U8_SET_CP_START(s, start, _u8_get_index); \
- U8_NEXT(s, _u8_get_index, length, c); \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * The offset may point to either the lead byte or one of the trail bytes
- * for a code point, in which case the macro will read all of the bytes
- * for the code point.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * If the offset points to an illegal UTF-8 byte sequence, then
- * c is set to U+FFFD.
- * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
- *
- * This macro does not distinguish between a real U+FFFD in the text
- * and U+FFFD returned for an ill-formed sequence.
- * Use U8_GET() if that distinction is important.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset
- * @param i int32_t string offset, must be start<=i<length
- * @param length int32_t string length
- * @param c output UChar32 variable, set to U+FFFD in case of an error
- * @see U8_GET
- * @stable ICU 51
- */
-#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t _u8_get_index=(i); \
- U8_SET_CP_START(s, start, _u8_get_index); \
- U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
-} UPRV_BLOCK_MACRO_END
-
-/* definitions with forward iteration --------------------------------------- */
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * The offset may point to the lead byte of a multi-byte sequence,
- * in which case the macro will read the whole sequence.
- * The result is undefined if the offset points to a trail byte
- * or an illegal UTF-8 sequence.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U8_NEXT
- * @stable ICU 2.4
- */
-#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(uint8_t)(s)[(i)++]; \
- if(!U8_IS_SINGLE(c)) { \
- if((c)<0xe0) { \
- (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
- } else if((c)<0xf0) { \
- /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
- (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
- (i)+=2; \
- } else { \
- (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
- (i)+=3; \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * The offset may point to the lead byte of a multi-byte sequence,
- * in which case the macro will read the whole sequence.
- * If the offset points to a trail byte or an illegal UTF-8 sequence, then
- * c is set to a negative value.
- *
- * @param s const uint8_t * string
- * @param i int32_t string offset, must be i<length
- * @param length int32_t string length
- * @param c output UChar32 variable, set to <0 in case of an error
- * @see U8_NEXT_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * The offset may point to the lead byte of a multi-byte sequence,
- * in which case the macro will read the whole sequence.
- * If the offset points to a trail byte or an illegal UTF-8 sequence, then
- * c is set to U+FFFD.
- *
- * This macro does not distinguish between a real U+FFFD in the text
- * and U+FFFD returned for an ill-formed sequence.
- * Use U8_NEXT() if that distinction is important.
- *
- * @param s const uint8_t * string
- * @param i int32_t string offset, must be i<length
- * @param length int32_t string length
- * @param c output UChar32 variable, set to U+FFFD in case of an error
- * @see U8_NEXT
- * @stable ICU 51
- */
-#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
-
-/** @internal */
-#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(uint8_t)(s)[(i)++]; \
- if(!U8_IS_SINGLE(c)) { \
- uint8_t __t = 0; \
- if((i)!=(length) && \
- /* fetch/validate/assemble all but last trail byte */ \
- ((c)>=0xe0 ? \
- ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
- U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
- (__t&=0x3f, 1) \
- : /* U+10000..U+10FFFF */ \
- ((c)-=0xf0)<=4 && \
- U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
- ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
- (__t=(s)[i]-0x80)<=0x3f) && \
- /* valid second-to-last trail byte */ \
- ((c)=((c)<<6)|__t, ++(i)!=(length)) \
- : /* U+0080..U+07FF */ \
- (c)>=0xc2 && ((c)&=0x1f, 1)) && \
- /* last trail byte */ \
- (__t=(s)[i]-0x80)<=0x3f && \
- ((c)=((c)<<6)|__t, ++(i), 1)) { \
- } else { \
- (c)=(sub); /* ill-formed*/ \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Append a code point to a string, overwriting 1 to 4 bytes.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
- * Otherwise, the result is undefined.
- *
- * @param s const uint8_t * string buffer
- * @param i string offset
- * @param c code point to append
- * @see U8_APPEND
- * @stable ICU 2.4
- */
-#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- uint32_t __uc=(c); \
- if(__uc<=0x7f) { \
- (s)[(i)++]=(uint8_t)__uc; \
- } else { \
- if(__uc<=0x7ff) { \
- (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
- } else { \
- if(__uc<=0xffff) { \
- (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
- } else { \
- (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
- (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
- } \
- (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
- } \
- (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Append a code point to a string, overwriting 1 to 4 bytes.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Safe" macro, checks for a valid code point.
- * If a non-ASCII code point is written, checks for sufficient space in the string.
- * If the code point is not valid or trail bytes do not fit,
- * then isError is set to TRUE.
- *
- * @param s const uint8_t * string buffer
- * @param i int32_t string offset, must be i<capacity
- * @param capacity int32_t size of the string buffer
- * @param c UChar32 code point to append
- * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
- * @see U8_APPEND_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
- uint32_t __uc=(c); \
- if(__uc<=0x7f) { \
- (s)[(i)++]=(uint8_t)__uc; \
- } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
- (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
- (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
- } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
- (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
- (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
- (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
- } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
- (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
- (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
- (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
- (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
- } else { \
- (isError)=TRUE; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_FWD_1
- * @stable ICU 2.4
- */
-#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
- (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const uint8_t * string
- * @param i int32_t string offset, must be i<length
- * @param length int32_t string length
- * @see U8_FWD_1_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
- uint8_t __b=(s)[(i)++]; \
- if(U8_IS_LEAD(__b) && (i)!=(length)) { \
- uint8_t __t1=(s)[i]; \
- if((0xe0<=__b && __b<0xf0)) { \
- if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
- ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
- ++(i); \
- } \
- } else if(__b<0xe0) { \
- if(U8_IS_TRAIL(__t1)) { \
- ++(i); \
- } \
- } else /* c>=0xf0 */ { \
- if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
- ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
- ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
- ++(i); \
- } \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U8_FWD_N
- * @stable ICU 2.4
- */
-#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __N=(n); \
- while(__N>0) { \
- U8_FWD_1_UNSAFE(s, i); \
- --__N; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const uint8_t * string
- * @param i int32_t string offset, must be i<length
- * @param length int32_t string length
- * @param n number of code points to skip
- * @see U8_FWD_N_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __N=(n); \
- while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
- U8_FWD_1(s, i, length); \
- --__N; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to a UTF-8 trail byte,
- * then the offset is moved backward to the corresponding lead byte.
- * Otherwise, it is not modified.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_SET_CP_START
- * @stable ICU 2.4
- */
-#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
- while(U8_IS_TRAIL((s)[i])) { --(i); } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to a UTF-8 trail byte,
- * then the offset is moved backward to the corresponding lead byte.
- * Otherwise, it is not modified.
- *
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, must be start<=i
- * @see U8_SET_CP_START_UNSAFE
- * @see U8_TRUNCATE_IF_INCOMPLETE
- * @stable ICU 2.4
- */
-#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(U8_IS_TRAIL((s)[(i)])) { \
- (i)=utf8_back1SafeBody(s, start, (i)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * If the string ends with a UTF-8 byte sequence that is valid so far
- * but incomplete, then reduce the length of the string to end before
- * the lead byte of that incomplete sequence.
- * For example, if the string ends with E1 80, the length is reduced by 2.
- *
- * In all other cases (the string ends with a complete sequence, or it is not
- * possible for any further trail byte to extend the trailing sequence)
- * the length remains unchanged.
- *
- * Useful for processing text split across multiple buffers
- * (save the incomplete sequence for later)
- * and for optimizing iteration
- * (check for string length only once per character).
- *
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- * Unlike U8_SET_CP_START(), this macro never reads s[length].
- *
- * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param length int32_t string length (usually start<=length)
- * @see U8_SET_CP_START
- * @stable ICU 61
- */
-#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
- if((length)>(start)) { \
- uint8_t __b1=s[(length)-1]; \
- if(U8_IS_SINGLE(__b1)) { \
- /* common ASCII character */ \
- } else if(U8_IS_LEAD(__b1)) { \
- --(length); \
- } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
- uint8_t __b2=s[(length)-2]; \
- if(0xe0<=__b2 && __b2<=0xf4) { \
- if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
- U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
- (length)-=2; \
- } \
- } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
- uint8_t __b3=s[(length)-3]; \
- if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
- (length)-=3; \
- } \
- } \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/* definitions with backward iteration -------------------------------------- */
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a multi-byte sequence, then the macro will read
- * the whole sequence.
- * If the offset is behind a lead byte, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset is behind an illegal UTF-8 sequence.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U8_PREV
- * @stable ICU 2.4
- */
-#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(uint8_t)(s)[--(i)]; \
- if(U8_IS_TRAIL(c)) { \
- uint8_t __b, __count=1, __shift=6; \
-\
- /* c is a trail byte */ \
- (c)&=0x3f; \
- for(;;) { \
- __b=(s)[--(i)]; \
- if(__b>=0xc0) { \
- U8_MASK_LEAD_BYTE(__b, __count); \
- (c)|=(UChar32)__b<<__shift; \
- break; \
- } else { \
- (c)|=(UChar32)(__b&0x3f)<<__shift; \
- ++__count; \
- __shift+=6; \
- } \
- } \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a multi-byte sequence, then the macro will read
- * the whole sequence.
- * If the offset is behind a lead byte, then that itself
- * will be returned as the code point.
- * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, must be start<i
- * @param c output UChar32 variable, set to <0 in case of an error
- * @see U8_PREV_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(uint8_t)(s)[--(i)]; \
- if(!U8_IS_SINGLE(c)) { \
- (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a multi-byte sequence, then the macro will read
- * the whole sequence.
- * If the offset is behind a lead byte, then that itself
- * will be returned as the code point.
- * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
- *
- * This macro does not distinguish between a real U+FFFD in the text
- * and U+FFFD returned for an ill-formed sequence.
- * Use U8_PREV() if that distinction is important.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, must be start<i
- * @param c output UChar32 variable, set to U+FFFD in case of an error
- * @see U8_PREV
- * @stable ICU 51
- */
-#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
- (c)=(uint8_t)(s)[--(i)]; \
- if(!U8_IS_SINGLE(c)) { \
- (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_BACK_1
- * @stable ICU 2.4
- */
-#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
- while(U8_IS_TRAIL((s)[--(i)])) {} \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, must be start<i
- * @see U8_BACK_1_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
- if(U8_IS_TRAIL((s)[--(i)])) { \
- (i)=utf8_back1SafeBody(s, start, (i)); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U8_BACK_N
- * @stable ICU 2.4
- */
-#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __N=(n); \
- while(__N>0) { \
- U8_BACK_1_UNSAFE(s, i); \
- --__N; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param start int32_t index of the start of the string
- * @param i int32_t string offset, must be start<i
- * @param n number of code points to skip
- * @see U8_BACK_N_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
- int32_t __N=(n); \
- while(__N>0 && (i)>(start)) { \
- U8_BACK_1(s, start, i); \
- --__N; \
- } \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind a partial multi-byte sequence,
- * then the offset is incremented to behind the whole sequence.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_SET_CP_LIMIT
- * @stable ICU 2.4
- */
-#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
- U8_BACK_1_UNSAFE(s, i); \
- U8_FWD_1_UNSAFE(s, i); \
-} UPRV_BLOCK_MACRO_END
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind a partial multi-byte sequence,
- * then the offset is incremented to behind the whole sequence.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, must be start<=i<=length
- * @param length int32_t string length
- * @see U8_SET_CP_LIMIT_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
- if((start)<(i) && ((i)<(length) || (length)<0)) { \
- U8_BACK_1(s, start, i); \
- U8_FWD_1(s, i, length); \
- } \
-} UPRV_BLOCK_MACRO_END
-
-#endif
diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt
index 493d7aacdd..27fa05660a 100644
--- a/third-party/CMakeLists.txt
+++ b/third-party/CMakeLists.txt
@@ -42,6 +42,7 @@ option(USE_BUNDLED_LUV "Use the bundled version of luv." ${USE_BUNDLED})
# build it unless explicitly requested
option(USE_BUNDLED_LUA "Use the bundled version of lua." OFF)
option(USE_BUNDLED_TS_PARSERS "Use the bundled treesitter parsers." ${USE_BUNDLED})
+option(USE_BUNDLED_TS "Use the bundled treesitter runtime." ${USE_BUNDLED})
if(USE_BUNDLED AND MSVC)
option(USE_BUNDLED_GETTEXT "Use the bundled version of gettext." ON)
@@ -198,6 +199,9 @@ set(LIBICONV_SHA256 ccf536620a45458d26ba83887a983b96827001e92a13847b45e4925cc891
set(TREESITTER_C_URL https://github.com/tree-sitter/tree-sitter-c/archive/6002fcd.tar.gz)
set(TREESITTER_C_SHA256 46f8d44fa886d9ddb92571bb6fa8b175992c8758eca749cb1217464e512b6e97)
+set(TREESITTER_URL https://github.com/tree-sitter/tree-sitter/archive/0.16.9.zip)
+set(TREESITTER_SHA256 63ef1f0cfde0f37f4f15803e9412863a397c5276dbc680e8fc917c9f6851ea9b)
+
if(USE_BUNDLED_UNIBILIUM)
include(BuildUnibilium)
endif()
@@ -253,6 +257,10 @@ if(USE_BUNDLED_TS_PARSERS)
include(BuildTreesitterParsers)
endif()
+if(USE_BUNDLED_TS)
+ include(BuildTreesitter)
+endif()
+
if(WIN32)
include(GetBinaryDeps)
diff --git a/third-party/cmake/BuildTreesitter.cmake b/third-party/cmake/BuildTreesitter.cmake
new file mode 100644
index 0000000000..3212d6ea08
--- /dev/null
+++ b/third-party/cmake/BuildTreesitter.cmake
@@ -0,0 +1,22 @@
+set(TS_CFLAGS "-O3 -Wall -Wextra")
+
+ExternalProject_Add(tree-sitter
+ PREFIX ${DEPS_BUILD_DIR}
+ URL ${TREESITTER_URL}
+ DOWNLOAD_DIR ${DEPS_DOWNLOAD_DIR}/tree-sitter
+ INSTALL_DIR ${DEPS_INSTALL_DIR}
+ DOWNLOAD_COMMAND ${CMAKE_COMMAND}
+ -DPREFIX=${DEPS_BUILD_DIR}
+ -DDOWNLOAD_DIR=${DEPS_DOWNLOAD_DIR}/tree-sitter
+ -DURL=${TREESITTER_URL}
+ -DEXPECTED_SHA256=${TREESITTER_SHA256}
+ -DTARGET=tree-sitter
+ -DUSE_EXISTING_SRC_DIR=${USE_EXISTING_SRC_DIR}
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/DownloadAndExtractFile.cmake
+ BUILD_IN_SOURCE 1
+ PATCH_COMMAND ""
+ CONFIGURE_COMMAND ""
+ BUILD_COMMAND ${MAKE_PRG} CC=${DEPS_C_COMPILER} CFLAGS=${TS_CFLAGS}
+ INSTALL_COMMAND ${MAKE_PRG} CC=${DEPS_C_COMPILER} PREFIX=${DEPS_INSTALL_DIR} install)
+
+list(APPEND THIRD_PARTY_DEPS tree-sitter)