aboutsummaryrefslogtreecommitdiff
path: root/src/tree_sitter/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/tree_sitter/parser.c')
-rw-r--r--src/tree_sitter/parser.c81
1 files changed, 54 insertions, 27 deletions
diff --git a/src/tree_sitter/parser.c b/src/tree_sitter/parser.c
index dd222cd3c4..79cad797a0 100644
--- a/src/tree_sitter/parser.c
+++ b/src/tree_sitter/parser.c
@@ -292,6 +292,7 @@ static bool ts_parser__better_version_exists(
return true;
case ErrorComparisonPreferRight:
if (ts_stack_can_merge(self->stack, i, version)) return true;
+ break;
default:
break;
}
@@ -355,10 +356,14 @@ static Subtree ts_parser__lex(
StackVersion version,
TSStateId parse_state
) {
+ TSLexMode lex_mode = self->language->lex_modes[parse_state];
+ if (lex_mode.lex_state == (uint16_t)-1) {
+ LOG("no_lookahead_after_non_terminal_extra");
+ return NULL_SUBTREE;
+ }
+
Length start_position = ts_stack_position(self->stack, version);
Subtree external_token = ts_stack_last_external_token(self->stack, version);
- TSLexMode lex_mode = self->language->lex_modes[parse_state];
- if (lex_mode.lex_state == (uint16_t)-1) return NULL_SUBTREE;
const bool *valid_external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_lex_state
@@ -761,20 +766,26 @@ static StackVersion ts_parser__reduce(
int dynamic_precedence,
uint16_t production_id,
bool is_fragile,
- bool is_extra
+ bool end_of_non_terminal_extra
) {
uint32_t initial_version_count = ts_stack_version_count(self->stack);
- uint32_t removed_version_count = 0;
- StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
+ // Pop the given number of nodes from the given version of the parse stack.
+ // If stack versions have previously merged, then there may be more than one
+ // path back through the stack. For each path, create a new parent node to
+ // contain the popped children, and push it onto the stack in place of the
+ // children.
+ StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
+ uint32_t removed_version_count = 0;
for (uint32_t i = 0; i < pop.size; i++) {
StackSlice slice = pop.contents[i];
StackVersion slice_version = slice.version - removed_version_count;
- // Error recovery can sometimes cause lots of stack versions to merge,
- // such that a single pop operation can produce a lots of slices.
- // Avoid creating too many stack versions in that situation.
- if (i > 0 && slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
+ // This is where new versions are added to the parse stack. The versions
+ // will all be sorted and truncated at the end of the outer parsing loop.
+ // Allow the maximum version count to be temporarily exceeded, but only
+ // by a limited threshold.
+ if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
ts_stack_remove_version(self->stack, slice_version);
ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
removed_version_count++;
@@ -826,7 +837,9 @@ static StackVersion ts_parser__reduce(
TSStateId state = ts_stack_state(self->stack, slice_version);
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
- if (is_extra) parent.ptr->extra = true;
+ if (end_of_non_terminal_extra && next_state == state) {
+ parent.ptr->extra = true;
+ }
if (is_fragile || pop.size > 1 || initial_version_count > 1) {
parent.ptr->fragile_left = true;
parent.ptr->fragile_right = true;
@@ -963,6 +976,7 @@ static bool ts_parser__do_all_potential_reductions(
.dynamic_precedence = action.params.reduce.dynamic_precedence,
.production_id = action.params.reduce.production_id,
});
+ break;
default:
break;
}
@@ -1339,23 +1353,26 @@ static bool ts_parser__advance(
);
}
- // Otherwise, re-run the lexer.
- if (!lookahead.ptr) {
- lookahead = ts_parser__lex(self, version, state);
- if (lookahead.ptr) {
- ts_parser__set_cached_token(self, position, last_external_token, lookahead);
- ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
- }
+ bool needs_lex = !lookahead.ptr;
+ for (;;) {
+ // Otherwise, re-run the lexer.
+ if (needs_lex) {
+ needs_lex = false;
+ lookahead = ts_parser__lex(self, version, state);
+
+ if (lookahead.ptr) {
+ ts_parser__set_cached_token(self, position, last_external_token, lookahead);
+ ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
+ }
- // When parsing a non-terminal extra, a null lookahead indicates the
- // end of the rule. The reduction is stored in the EOF table entry.
- // After the reduction, the lexer needs to be run again.
- else {
- ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
+ // When parsing a non-terminal extra, a null lookahead indicates the
+ // end of the rule. The reduction is stored in the EOF table entry.
+ // After the reduction, the lexer needs to be run again.
+ else {
+ ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
+ }
}
- }
- for (;;) {
// If a cancellation flag or a timeout was provided, then check every
// time a fixed number of parse actions has been processed.
if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) {
@@ -1407,12 +1424,12 @@ static bool ts_parser__advance(
case TSParseActionTypeReduce: {
bool is_fragile = table_entry.action_count > 1;
- bool is_extra = lookahead.ptr == NULL;
+ bool end_of_non_terminal_extra = lookahead.ptr == NULL;
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count);
StackVersion reduction_version = ts_parser__reduce(
self, version, action.params.reduce.symbol, action.params.reduce.child_count,
action.params.reduce.dynamic_precedence, action.params.reduce.production_id,
- is_fragile, is_extra
+ is_fragile, end_of_non_terminal_extra
);
if (reduction_version != STACK_VERSION_NONE) {
last_reduction_version = reduction_version;
@@ -1452,8 +1469,10 @@ static bool ts_parser__advance(
// (and completing the non-terminal extra rule) run the lexer again based
// on the current parse state.
if (!lookahead.ptr) {
- lookahead = ts_parser__lex(self, version, state);
+ needs_lex = true;
+ continue;
}
+
ts_language_table_entry(
self->language,
state,
@@ -1463,6 +1482,11 @@ static bool ts_parser__advance(
continue;
}
+ if (!lookahead.ptr) {
+ ts_stack_pause(self->stack, version, ts_builtin_sym_end);
+ return true;
+ }
+
// If there were no parse actions for the current lookahead token, then
// it is not valid in this state. If the current lookahead token is a
// keyword, then switch to treating it as the normal word token if that
@@ -1500,6 +1524,9 @@ static bool ts_parser__advance(
// push each of its children. Then try again to process the current
// lookahead.
if (ts_parser__breakdown_top_of_stack(self, version)) {
+ state = ts_stack_state(self->stack, version);
+ ts_subtree_release(&self->tree_pool, lookahead);
+ needs_lex = true;
continue;
}