diff options
Diffstat (limited to 'src/tree_sitter/query.c')
-rw-r--r-- | src/tree_sitter/query.c | 218 |
1 files changed, 154 insertions, 64 deletions
diff --git a/src/tree_sitter/query.c b/src/tree_sitter/query.c index 2563325248..053882cf68 100644 --- a/src/tree_sitter/query.c +++ b/src/tree_sitter/query.c @@ -19,6 +19,8 @@ typedef struct { uint8_t next_size; } Stream; +#define MAX_STEP_CAPTURE_COUNT 4 + /* * QueryStep - A step in the process of matching a query. Each node within * a query S-expression maps to one of these steps. An entire pattern is @@ -37,9 +39,11 @@ typedef struct { typedef struct { TSSymbol symbol; TSFieldId field; - uint16_t capture_id; - uint16_t depth: 15; + uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; + uint16_t depth: 13; bool contains_captures: 1; + bool is_immediate: 1; + bool is_last: 1; } QueryStep; /* @@ -62,10 +66,12 @@ typedef struct { } SymbolTable; /* - * PatternEntry - The set of steps needed to match a particular pattern, - * represented as a slice of a shared array. These entries are stored in a - * 'pattern map' - a sorted array that makes it possible to efficiently lookup - * patterns based on the symbol for their first step. + * PatternEntry - Information about the starting point for matching a + * particular pattern, consisting of the index of the pattern within the query, + * and the index of the patter's first step in the shared `steps` array. These + * entries are stored in a 'pattern map' - a sorted array that makes it + * possible to efficiently lookup patterns based on the symbol for their first + * step. */ typedef struct { uint16_t step_index; @@ -140,6 +146,7 @@ static const TSQueryError PARENT_DONE = -1; static const uint8_t PATTERN_DONE_MARKER = UINT8_MAX; static const uint16_t NONE = UINT16_MAX; static const TSSymbol WILDCARD_SYMBOL = 0; +static const TSSymbol NAMED_WILDCARD_SYMBOL = UINT16_MAX - 1; static const uint16_t MAX_STATE_COUNT = 32; // #define LOG(...) fprintf(stderr, __VA_ARGS__) @@ -324,6 +331,49 @@ static uint16_t symbol_table_insert_name( return self->slices.size - 1; } +/************ + * QueryStep + ************/ + +static QueryStep query_step__new( + TSSymbol symbol, + uint16_t depth, + bool is_immediate +) { + return (QueryStep) { + .symbol = symbol, + .depth = depth, + .field = 0, + .capture_ids = {NONE, NONE, NONE, NONE}, + .contains_captures = false, + .is_immediate = is_immediate, + }; +} + +static void query_step__add_capture(QueryStep *self, uint16_t capture_id) { + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + if (self->capture_ids[i] == NONE) { + self->capture_ids[i] = capture_id; + break; + } + } +} + +static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) { + for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) { + if (self->capture_ids[i] == capture_id) { + self->capture_ids[i] = NONE; + while (i + 1 < MAX_STEP_CAPTURE_COUNT) { + if (self->capture_ids[i + 1] == NONE) break; + self->capture_ids[i] = self->capture_ids[i + 1]; + self->capture_ids[i + 1] = NONE; + i++; + } + break; + } + } +} + /********* * Query *********/ @@ -333,7 +383,7 @@ static uint16_t symbol_table_insert_name( // to quickly find the starting steps of all of the patterns whose root matches // that node. Each entry has two fields: a `pattern_index`, which identifies one // of the patterns in the query, and a `step_index`, which indicates the start -// offset of that pattern's steps pattern within the `steps` array. +// offset of that pattern's steps within the `steps` array. // // The entries are sorted by the patterns' root symbols, and lookups use a // binary search. This ensures that the cost of this initial lookup step @@ -399,14 +449,14 @@ static void ts_query__finalize_steps(TSQuery *self) { for (unsigned i = 0; i < self->steps.size; i++) { QueryStep *step = &self->steps.contents[i]; uint32_t depth = step->depth; - if (step->capture_id != NONE) { + if (step->capture_ids[0] != NONE) { step->contains_captures = true; } else { step->contains_captures = false; for (unsigned j = i + 1; j < self->steps.size; j++) { QueryStep *s = &self->steps.contents[j]; if (s->depth == PATTERN_DONE_MARKER || s->depth <= depth) break; - if (s->capture_id != NONE) step->contains_captures = true; + if (s->capture_ids[0] != NONE) step->contains_captures = true; } } } @@ -533,7 +583,8 @@ static TSQueryError ts_query__parse_pattern( TSQuery *self, Stream *stream, uint32_t depth, - uint32_t *capture_count + uint32_t *capture_count, + bool is_immediate ) { uint16_t starting_step_index = self->steps.size; @@ -552,7 +603,7 @@ static TSQueryError ts_query__parse_pattern( // Parse a nested list, which represents a pattern followed by // zero-or-more predicates. if (stream->next == '(' && depth == 0) { - TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count); + TSQueryError e = ts_query__parse_pattern(self, stream, 0, capture_count, is_immediate); if (e) return e; // Parse the predicates. @@ -573,7 +624,7 @@ static TSQueryError ts_query__parse_pattern( // Parse the wildcard symbol if (stream->next == '*') { - symbol = WILDCARD_SYMBOL; + symbol = NAMED_WILDCARD_SYMBOL; stream_advance(stream); } @@ -597,24 +648,37 @@ static TSQueryError ts_query__parse_pattern( } // Add a step for the node. - array_push(&self->steps, ((QueryStep) { - .depth = depth, - .symbol = symbol, - .field = 0, - .capture_id = NONE, - .contains_captures = false, - })); + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); // Parse the child patterns stream_skip_whitespace(stream); + bool child_is_immediate = false; + uint16_t child_start_step_index = self->steps.size; for (;;) { - TSQueryError e = ts_query__parse_pattern(self, stream, depth + 1, capture_count); + if (stream->next == '.') { + child_is_immediate = true; + stream_advance(stream); + stream_skip_whitespace(stream); + } + + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth + 1, + capture_count, + child_is_immediate + ); if (e == PARENT_DONE) { + if (child_is_immediate) { + self->steps.contents[child_start_step_index].is_last = true; + } stream_advance(stream); break; } else if (e) { return e; } + + child_is_immediate = false; } } @@ -643,13 +707,7 @@ static TSQueryError ts_query__parse_pattern( stream_reset(stream, string_content); return TSQueryErrorNodeType; } - array_push(&self->steps, ((QueryStep) { - .depth = depth, - .symbol = symbol, - .field = 0, - .capture_id = NONE, - .contains_captures = false, - })); + array_push(&self->steps, query_step__new(symbol, depth, is_immediate)); if (stream->next != '"') return TSQueryErrorSyntax; stream_advance(stream); @@ -672,7 +730,13 @@ static TSQueryError ts_query__parse_pattern( // Parse the pattern uint32_t step_index = self->steps.size; - TSQueryError e = ts_query__parse_pattern(self, stream, depth, capture_count); + TSQueryError e = ts_query__parse_pattern( + self, + stream, + depth, + capture_count, + is_immediate + ); if (e == PARENT_DONE) return TSQueryErrorSyntax; if (e) return e; @@ -695,12 +759,7 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Add a step that matches any kind of node - array_push(&self->steps, ((QueryStep) { - .depth = depth, - .symbol = WILDCARD_SYMBOL, - .field = 0, - .contains_captures = false, - })); + array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate)); } else { @@ -710,7 +769,7 @@ static TSQueryError ts_query__parse_pattern( stream_skip_whitespace(stream); // Parse an '@'-prefixed capture pattern - if (stream->next == '@') { + while (stream->next == '@') { stream_advance(stream); // Parse the capture name @@ -725,7 +784,8 @@ static TSQueryError ts_query__parse_pattern( capture_name, length ); - self->steps.contents[starting_step_index].capture_id = capture_id; + QueryStep *step = &self->steps.contents[starting_step_index]; + query_step__add_capture(step, capture_id); (*capture_count)++; stream_skip_whitespace(stream); @@ -794,8 +854,8 @@ TSQuery *ts_query_new( .offset = self->predicate_steps.size, .length = 0, })); - *error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count); - array_push(&self->steps, ((QueryStep) { .depth = PATTERN_DONE_MARKER })); + *error_type = ts_query__parse_pattern(self, &stream, 0, &capture_count, false); + array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false)); // If any pattern could not be parsed, then report the error information // and terminate. @@ -891,16 +951,31 @@ void ts_query_disable_capture( const char *name, uint32_t length ) { + // Remove capture information for any pattern step that previously + // captured with the given name. int id = symbol_table_id_for_name(&self->captures, name, length); if (id != -1) { for (unsigned i = 0; i < self->steps.size; i++) { QueryStep *step = &self->steps.contents[i]; - if (step->capture_id == id) { - step->capture_id = NONE; - } + query_step__remove_capture(step, id); + } + ts_query__finalize_steps(self); + } +} + +void ts_query_disable_pattern( + TSQuery *self, + uint32_t pattern_index +) { + // Remove the given pattern from the pattern map. Its steps will still + // be in the `steps` array, but they will never be read. + for (unsigned i = 0; i < self->pattern_map.size; i++) { + PatternEntry *pattern = &self->pattern_map.contents[i]; + if (pattern->pattern_index == pattern_index) { + array_erase(&self->pattern_map, i); + i--; } } - ts_query__finalize_steps(self); } /*************** @@ -1010,7 +1085,7 @@ static bool ts_query_cursor__first_in_progress_capture( static bool ts_query__cursor_add_state( TSQueryCursor *self, - const PatternEntry *slice + const PatternEntry *pattern ) { uint32_t list_id = capture_list_pool_acquire(&self->capture_list_pool); @@ -1037,11 +1112,11 @@ static bool ts_query__cursor_add_state( } } - LOG(" start state. pattern:%u\n", slice->pattern_index); + LOG(" start state. pattern:%u\n", pattern->pattern_index); array_push(&self->states, ((QueryState) { .capture_list_id = list_id, - .step_index = slice->step_index, - .pattern_index = slice->pattern_index, + .step_index = pattern->step_index, + .pattern_index = pattern->pattern_index, .start_depth = self->depth, .capture_count = 0, .consumed_capture_count = 0, @@ -1113,15 +1188,16 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { return self->finished_states.size > 0; } } else { - bool can_have_later_siblings; + bool has_later_siblings; bool can_have_later_siblings_with_this_field; TSFieldId field_id = ts_tree_cursor_current_status( &self->cursor, - &can_have_later_siblings, + &has_later_siblings, &can_have_later_siblings_with_this_field ); TSNode node = ts_tree_cursor_current_node(&self->cursor); TSSymbol symbol = ts_node_symbol(node); + bool is_named = ts_node_is_named(node); if (symbol != ts_builtin_sym_error && self->query->symbol_map) { symbol = self->query->symbol_map[symbol]; } @@ -1145,43 +1221,46 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { ) return false; LOG( - "enter node. type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u, can_have_later_siblings:%d, can_have_later_siblings_with_this_field:%d\n", + "enter node. " + "type:%s, field:%s, row:%u state_count:%u, " + "finished_state_count:%u, has_later_siblings:%d, " + "can_have_later_siblings_with_this_field:%d\n", ts_node_type(node), ts_language_field_name_for_id(self->query->language, field_id), ts_node_start_point(node).row, self->states.size, self->finished_states.size, - can_have_later_siblings, + has_later_siblings, can_have_later_siblings_with_this_field ); // Add new states for any patterns whose root node is a wildcard. for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { - PatternEntry *slice = &self->query->pattern_map.contents[i]; - QueryStep *step = &self->query->steps.contents[slice->step_index]; + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + QueryStep *step = &self->query->steps.contents[pattern->step_index]; // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query__cursor_add_state(self, slice)) break; + if (!ts_query__cursor_add_state(self, pattern)) break; } // Add new states for any patterns whose root node matches this node. unsigned i; if (ts_query__pattern_map_search(self->query, symbol, &i)) { - PatternEntry *slice = &self->query->pattern_map.contents[i]; - QueryStep *step = &self->query->steps.contents[slice->step_index]; + PatternEntry *pattern = &self->query->pattern_map.contents[i]; + QueryStep *step = &self->query->steps.contents[pattern->step_index]; do { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. if (step->field && field_id != step->field) continue; - if (!ts_query__cursor_add_state(self, slice)) break; + if (!ts_query__cursor_add_state(self, pattern)) break; // Advance to the next pattern whose root node matches this node. i++; if (i == self->query->pattern_map.size) break; - slice = &self->query->pattern_map.contents[i]; - step = &self->query->steps.contents[slice->step_index]; + pattern = &self->query->pattern_map.contents[i]; + step = &self->query->steps.contents[pattern->step_index]; } while (step->symbol == symbol); } @@ -1191,14 +1270,23 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { QueryStep *step = &self->query->steps.contents[state->step_index]; // Check that the node matches all of the criteria for the next - // step of the pattern.if ( + // step of the pattern. if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue; // Determine if this node matches this step of the pattern, and also // if this node can have later siblings that match this step of the // pattern. - bool node_does_match = !step->symbol || step->symbol == symbol; - bool later_sibling_can_match = can_have_later_siblings; + bool node_does_match = + step->symbol == symbol || + step->symbol == WILDCARD_SYMBOL || + (step->symbol == NAMED_WILDCARD_SYMBOL && is_named); + bool later_sibling_can_match = has_later_siblings; + if (step->is_immediate && is_named) { + later_sibling_can_match = false; + } + if (step->is_last && has_later_siblings) { + node_does_match = false; + } if (step->field) { if (step->field == field_id) { if (!can_have_later_siblings_with_this_field) { @@ -1261,11 +1349,13 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { // If the current node is captured in this pattern, add it to the // capture list. - if (step->capture_id != NONE) { + for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) { + uint16_t capture_id = step->capture_ids[j]; + if (step->capture_ids[j] == NONE) break; LOG( " capture node. pattern:%u, capture_id:%u\n", next_state->pattern_index, - step->capture_id + capture_id ); TSQueryCapture *capture_list = capture_list_pool_get( &self->capture_list_pool, @@ -1273,7 +1363,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) { ); capture_list[next_state->capture_count++] = (TSQueryCapture) { node, - step->capture_id + capture_id }; } |