aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/viml/parser/expressions.h
blob: cf0907850a246d611a00e387eadfcb5ebcae861d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#ifndef NVIM_VIML_PARSER_EXPRESSIONS_H
#define NVIM_VIML_PARSER_EXPRESSIONS_H

#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>

#include "nvim/types.h"
#include "nvim/viml/parser/parser.h"

// Defines whether to ignore case:
//    ==   kCCStrategyUseOption
//    ==#  kCCStrategyMatchCase
//    ==?  kCCStrategyIgnoreCase
typedef enum {
  kCCStrategyUseOption = 0,  // 0 for xcalloc
  kCCStrategyMatchCase = '#',
  kCCStrategyIgnoreCase = '?',
} CaseCompareStrategy;

/// Lexer token type
typedef enum {
  kExprLexInvalid = 0,  ///< Invalid token, indicaten an error.
  kExprLexMissing,  ///< Missing token, for use in parser.
  kExprLexSpacing,  ///< Spaces, tabs, newlines, etc.
  kExprLexEOC,  ///< End of command character: NL, |, just end of stream.

  kExprLexQuestion,  ///< Question mark, for use in ternary.
  kExprLexColon,  ///< Colon, for use in ternary.
  kExprLexOr,  ///< Logical or operator.
  kExprLexAnd,  ///< Logical and operator.
  kExprLexComparison,  ///< One of the comparison operators.
  kExprLexPlus,  ///< Plus sign.
  kExprLexMinus,  ///< Minus sign.
  kExprLexDot,  ///< Dot: either concat or subscript, also part of the float.
  kExprLexMultiplication,  ///< Multiplication, division or modulo operator.

  kExprLexNot,  ///< Not: !.

  kExprLexNumber,  ///< Integer number literal, or part of a float.
  kExprLexSingleQuotedString,  ///< Single quoted string literal.
  kExprLexDoubleQuotedString,  ///< Double quoted string literal.
  kExprLexOption,  ///< &optionname option value.
  kExprLexRegister,  ///< @r register value.
  kExprLexEnv,  ///< Environment $variable value.
  kExprLexPlainIdentifier,  ///< Identifier without scope: `abc`, `foo#bar`.

  kExprLexBracket,  ///< Bracket, either opening or closing.
  kExprLexFigureBrace,  ///< Figure brace, either opening or closing.
  kExprLexParenthesis,  ///< Parenthesis, either opening or closing.
  kExprLexComma,  ///< Comma.
  kExprLexArrow,  ///< Arrow, like from lambda expressions.
} LexExprTokenType;

/// Lexer token
typedef struct {
  ParserPosition start;
  size_t len;
  LexExprTokenType type;
  union {
    struct {
      enum {
        kExprLexCmpEqual,  ///< Equality, unequality.
        kExprLexCmpMatches,  ///< Matches regex, not matches regex.
        kExprLexCmpGreater,  ///< `>` or `<=`
        kExprLexCmpGreaterOrEqual,  ///< `>=` or `<`.
        kExprLexCmpIdentical,  ///< `is` or `isnot`
      } type;  ///< Comparison type.
      CaseCompareStrategy ccs;  ///< Case comparison strategy.
      bool inv;  ///< True if comparison is to be inverted.
    } cmp;  ///< For kExprLexComparison.

    struct {
      enum {
        kExprLexMulMul,  ///< Real multiplication.
        kExprLexMulDiv,  ///< Division.
        kExprLexMulMod,  ///< Modulo.
      } type;  ///< Multiplication type.
    } mul;  ///< For kExprLexMultiplication.

    struct {
      bool closing;  ///< True if bracket/etc is a closing one.
    } brc;  ///< For brackets/braces/parenthesis.

    struct {
      int name;  ///< Register name, may be -1 if name not present.
    } reg;  ///< For kExprLexRegister.

    struct {
      bool closed;  ///< True if quote was closed.
    } str;  ///< For kExprLexSingleQuotedString and kExprLexDoubleQuotedString.

    struct {
      const char *name;  ///< Option name start.
      size_t len;  ///< Option name length.
      enum {
        kExprLexOptUnspecified = 0,
        kExprLexOptGlobal = 1,
        kExprLexOptLocal = 2,
      } scope;  ///< Option scope: &l:, &g: or not specified.
    } opt;  ///< Option properties.

    struct {
      int scope;  ///< Scope character or 0 if not present.
      bool autoload;  ///< Has autoload characters.
    } var;  ///< For kExprLexPlainIdentifier

    struct {
      LexExprTokenType type;  ///< Suggested type for parsing incorrect code.
      const char *msg;  ///< Error message.
    } err;  ///< For kExprLexInvalid

    struct {
      bool is_float;  ///< True if number is a floating-point.
    } num;  ///< For kExprLexNumber
  } data;  ///< Additional data, if needed.
} LexExprToken;

typedef enum {
  /// If set, “pointer” to the current byte in pstate will not be shifted
  kELFlagPeek = (1 << 0),
  /// Determines whether scope is allowed to come before the identifier
  kELFlagForbidScope = (1 << 1),
  /// Determines whether floating-point numbers are allowed
  ///
  /// I.e. whether dot is a decimal point separator or is not a part of
  /// a number at all.
  kELFlagAllowFloat = (1 << 2),
  /// Determines whether `is` and `isnot` are seen as comparison operators
  ///
  /// If set they are supposed to be just regular identifiers.
  kELFlagIsNotCmp = (1 << 3),
  /// Determines whether EOC tokens are allowed
  ///
  /// If set then it will yield Invalid token with E15 in place of EOC one if
  /// “EOC” is something like "|". It is fine with emitting EOC at the end of
  /// string still, with or without this flag set.
  kELFlagForbidEOC = (1 << 4),
  // WARNING: whenever you add a new flag, alter klee_assume() statement in
  // viml_expressions_lexer.c.
} LexExprFlags;

/// Expression AST node type
typedef enum {
  kExprNodeMissing = 'X',
  kExprNodeOpMissing = '_',
  kExprNodeTernary = '?',  ///< Ternary operator.
  kExprNodeTernaryValue = 'C',  ///< Ternary operator, colon.
  kExprNodeRegister = '@',  ///< Register.
  kExprNodeSubscript = 's',  ///< Subscript.
  kExprNodeListLiteral = 'l',  ///< List literal.
  kExprNodeUnaryPlus = 'p',
  kExprNodeBinaryPlus = '+',
  kExprNodeNested = 'e',  ///< Nested parenthesised expression.
  kExprNodeCall = 'c',  ///< Function call.
  /// Plain identifier: simple variable/function name
  ///
  /// Looks like "string", "g:Foo", etc: consists from a single 
  /// kExprLexPlainIdentifier token.
  kExprNodePlainIdentifier = 'i',
  /// Complex identifier: variable/function name with curly braces
  kExprNodeComplexIdentifier = 'I',
  /// Figure brace expression which is not yet known
  ///
  /// May resolve to any of kExprNodeDictLiteral, kExprNodeLambda or
  /// kExprNodeCurlyBracesIdentifier.
  kExprNodeUnknownFigure = '{',
  kExprNodeLambda = '\\',  ///< Lambda.
  kExprNodeDictLiteral = 'd',  ///< Dictionary literal.
  kExprNodeCurlyBracesIdentifier= '}',  ///< Part of the curly braces name.
  kExprNodeComma = ',',  ///< Comma “operator”.
  kExprNodeColon = ':',  ///< Colon “operator”.
  kExprNodeArrow = '>',  ///< Arrow “operator”.
} ExprASTNodeType;

typedef struct expr_ast_node ExprASTNode;

/// Structure representing one AST node
struct expr_ast_node {
  ExprASTNodeType type;  ///< Node type.
  /// Node children: e.g. for 1 + 2 nodes 1 and 2 will be children of +.
  ExprASTNode *children;
  /// Next node: e.g. for 1 + 2 child nodes 1 and 2 are put into a single-linked
  /// list: `(+)->children` references only node 1, node 2 is in
  /// `(+)->children->next`.
  ExprASTNode *next;
  ParserPosition start;
  size_t len;
  union {
    struct {
      int name;  ///< Register name, may be -1 if name not present.
    } reg;  ///< For kExprNodeRegister.
    struct {
      /// Which nodes UnknownFigure can’t possibly represent.
      struct {
        /// True if UnknownFigure may actually represent dictionary literal.
        bool allow_dict;
        /// True if UnknownFigure may actually represent lambda.
        bool allow_lambda;
        /// True if UnknownFigure may actually be part of curly braces name.
        bool allow_ident;
      } type_guesses;
      /// Highlight chunk index, used for rehighlighting if needed
      size_t opening_hl_idx;
    } fig;  ///< For kExprNodeUnknownFigure.
    struct {
      int scope;  ///< Scope character or 0 if not present.
      /// Actual identifier without scope.
      ///
      /// Points to inside parser reader state.
      const char *ident;
      size_t ident_len;  ///< Actual identifier length.
    } var;  ///< For kExprNodePlainIdentifier.
    struct {
      bool got_colon;  ///< True if colon was seen.
    } ter;  ///< For kExprNodeTernaryValue.
  } data;
};

enum {
  /// Allow multiple expressions in a row: e.g. for :echo
  ///
  /// Parser will still parse only one of them though.
  kExprFlagsMulti = (1 << 0),
  /// Allow NL, NUL and bar to be EOC
  ///
  /// When parsing expressions input by user bar is assumed to be a binary
  /// operator and other two are spacings.
  kExprFlagsDisallowEOC = (1 << 1),
  /// Print errors when encountered
  ///
  /// Without the flag they are only taken into account when parsing.
  kExprFlagsPrintError = (1 << 2),
  // WARNING: whenever you add a new flag, alter klee_assume() statement in
  // viml_expressions_parser.c.
} ExprParserFlags;

/// Structure representing complety AST for one expression
typedef struct {
  /// True if represented AST is correct and can be executed. Incorrect ones may
  /// still be used for completion, or in linters.
  bool correct;
  /// When AST is not correct this message will be printed.
  ///
  /// Uses `emsgf(msg, arg_len, arg);`, `msg` is assumed to contain only `%.*s`.
  struct {
    const char *msg;
    int arg_len;
    const char *arg;
  } err;
  /// Root node of the AST.
  ExprASTNode *root;
} ExprAST;

#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "viml/parser/expressions.h.generated.h"
#endif

#endif  // NVIM_VIML_PARSER_EXPRESSIONS_H