aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/vterm/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/nvim/vterm/parser.c')
-rw-r--r--src/nvim/vterm/parser.c411
1 files changed, 411 insertions, 0 deletions
diff --git a/src/nvim/vterm/parser.c b/src/nvim/vterm/parser.c
new file mode 100644
index 0000000000..79d348f2c1
--- /dev/null
+++ b/src/nvim/vterm/parser.c
@@ -0,0 +1,411 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "nvim/vterm/parser.h"
+#include "nvim/vterm/vterm.h"
+#include "nvim/vterm/vterm_internal_defs.h"
+
+#ifdef INCLUDE_GENERATED_DECLARATIONS
+# include "vterm/parser.c.generated.h"
+#endif
+
+#undef DEBUG_PARSER
+
+static bool is_intermed(uint8_t c)
+{
+ return c >= 0x20 && c <= 0x2f;
+}
+
+static void do_control(VTerm *vt, uint8_t control)
+{
+ if (vt->parser.callbacks && vt->parser.callbacks->control) {
+ if ((*vt->parser.callbacks->control)(control, vt->parser.cbdata)) {
+ return;
+ }
+ }
+
+ DEBUG_LOG("libvterm: Unhandled control 0x%02x\n", control);
+}
+
+static void do_csi(VTerm *vt, char command)
+{
+#ifdef DEBUG_PARSER
+ printf("Parsed CSI args as:\n", arglen, args);
+ printf(" leader: %s\n", vt->parser.v.csi.leader);
+ for (int argi = 0; argi < vt->parser.v.csi.argi; argi++) {
+ printf(" %lu", CSI_ARG(vt->parser.v.csi.args[argi]));
+ if (!CSI_ARG_HAS_MORE(vt->parser.v.csi.args[argi])) {
+ printf("\n");
+ }
+ printf(" intermed: %s\n", vt->parser.intermed);
+ }
+#endif
+
+ if (vt->parser.callbacks && vt->parser.callbacks->csi) {
+ if ((*vt->parser.callbacks->csi)(vt->parser.v.csi.leaderlen ? vt->parser.v.csi.leader : NULL,
+ vt->parser.v.csi.args,
+ vt->parser.v.csi.argi,
+ vt->parser.intermedlen ? vt->parser.intermed : NULL,
+ command,
+ vt->parser.cbdata)) {
+ return;
+ }
+ }
+
+ DEBUG_LOG("libvterm: Unhandled CSI %c\n", command);
+}
+
+static void do_escape(VTerm *vt, char command)
+{
+ char seq[INTERMED_MAX + 1];
+
+ size_t len = (size_t)vt->parser.intermedlen;
+ strncpy(seq, vt->parser.intermed, len); // NOLINT(runtime/printf)
+ seq[len++] = command;
+ seq[len] = 0;
+
+ if (vt->parser.callbacks && vt->parser.callbacks->escape) {
+ if ((*vt->parser.callbacks->escape)(seq, len, vt->parser.cbdata)) {
+ return;
+ }
+ }
+
+ DEBUG_LOG("libvterm: Unhandled escape ESC 0x%02x\n", command);
+}
+
+static void string_fragment(VTerm *vt, const char *str, size_t len, bool final)
+{
+ VTermStringFragment frag = {
+ .str = str,
+ .len = len,
+ .initial = vt->parser.string_initial,
+ .final = final,
+ };
+
+ switch (vt->parser.state) {
+ case OSC:
+ if (vt->parser.callbacks && vt->parser.callbacks->osc) {
+ (*vt->parser.callbacks->osc)(vt->parser.v.osc.command, frag, vt->parser.cbdata);
+ }
+ break;
+
+ case DCS_VTERM:
+ if (vt->parser.callbacks && vt->parser.callbacks->dcs) {
+ (*vt->parser.callbacks->dcs)(vt->parser.v.dcs.command, (size_t)vt->parser.v.dcs.commandlen,
+ frag,
+ vt->parser.cbdata);
+ }
+ break;
+
+ case APC:
+ if (vt->parser.callbacks && vt->parser.callbacks->apc) {
+ (*vt->parser.callbacks->apc)(frag, vt->parser.cbdata);
+ }
+ break;
+
+ case PM:
+ if (vt->parser.callbacks && vt->parser.callbacks->pm) {
+ (*vt->parser.callbacks->pm)(frag, vt->parser.cbdata);
+ }
+ break;
+
+ case SOS:
+ if (vt->parser.callbacks && vt->parser.callbacks->sos) {
+ (*vt->parser.callbacks->sos)(frag, vt->parser.cbdata);
+ }
+ break;
+
+ case NORMAL:
+ case CSI_LEADER:
+ case CSI_ARGS:
+ case CSI_INTERMED:
+ case OSC_COMMAND:
+ case DCS_COMMAND:
+ break;
+ }
+
+ vt->parser.string_initial = false;
+}
+
+size_t vterm_input_write(VTerm *vt, const char *bytes, size_t len)
+{
+ size_t pos = 0;
+ const char *string_start;
+
+ switch (vt->parser.state) {
+ case NORMAL:
+ case CSI_LEADER:
+ case CSI_ARGS:
+ case CSI_INTERMED:
+ case OSC_COMMAND:
+ case DCS_COMMAND:
+ string_start = NULL;
+ break;
+ case OSC:
+ case DCS_VTERM:
+ case APC:
+ case PM:
+ case SOS:
+ string_start = bytes;
+ break;
+ }
+
+#define ENTER_STATE(st) do { vt->parser.state = st; string_start = NULL; } while (0)
+#define ENTER_NORMAL_STATE() ENTER_STATE(NORMAL)
+
+#define IS_STRING_STATE() (vt->parser.state >= OSC_COMMAND)
+
+ for (; pos < len; pos++) {
+ uint8_t c = (uint8_t)bytes[pos];
+ bool c1_allowed = !vt->mode.utf8;
+
+ if (c == 0x00 || c == 0x7f) { // NUL, DEL
+ if (IS_STRING_STATE()) {
+ string_fragment(vt, string_start, (size_t)(bytes + pos - string_start), false);
+ string_start = bytes + pos + 1;
+ }
+ if (vt->parser.emit_nul) {
+ do_control(vt, c);
+ }
+ continue;
+ }
+ if (c == 0x18 || c == 0x1a) { // CAN, SUB
+ vt->parser.in_esc = false;
+ ENTER_NORMAL_STATE();
+ if (vt->parser.emit_nul) {
+ do_control(vt, c);
+ }
+ continue;
+ } else if (c == 0x1b) { // ESC
+ vt->parser.intermedlen = 0;
+ if (!IS_STRING_STATE()) {
+ vt->parser.state = NORMAL;
+ }
+ vt->parser.in_esc = true;
+ continue;
+ } else if (c == 0x07 // BEL, can stand for ST in OSC or DCS state
+ && IS_STRING_STATE()) {} else if (c < 0x20) { // other C0
+ if (vt->parser.state == SOS) {
+ continue; // All other C0s permitted in SOS
+ }
+ if (IS_STRING_STATE()) {
+ string_fragment(vt, string_start, (size_t)(bytes + pos - string_start), false);
+ }
+ do_control(vt, c);
+ if (IS_STRING_STATE()) {
+ string_start = bytes + pos + 1;
+ }
+ continue;
+ }
+
+ size_t string_len = (size_t)(bytes + pos - string_start);
+
+ if (vt->parser.in_esc) {
+ // Hoist an ESC letter into a C1 if we're not in a string mode
+ // Always accept ESC \ == ST even in string mode
+ if (!vt->parser.intermedlen
+ && c >= 0x40 && c < 0x60
+ && ((!IS_STRING_STATE() || c == 0x5c))) {
+ c += 0x40;
+ c1_allowed = true;
+ if (string_len) {
+ assert(string_len > 0);
+ string_len -= 1;
+ }
+ vt->parser.in_esc = false;
+ } else {
+ string_start = NULL;
+ vt->parser.state = NORMAL;
+ }
+ }
+
+ switch (vt->parser.state) {
+ case CSI_LEADER:
+ // Extract leader bytes 0x3c to 0x3f
+ if (c >= 0x3c && c <= 0x3f) {
+ if (vt->parser.v.csi.leaderlen < CSI_LEADER_MAX - 1) {
+ vt->parser.v.csi.leader[vt->parser.v.csi.leaderlen++] = (char)c;
+ }
+ break;
+ }
+
+ vt->parser.v.csi.leader[vt->parser.v.csi.leaderlen] = 0;
+
+ vt->parser.v.csi.argi = 0;
+ vt->parser.v.csi.args[0] = CSI_ARG_MISSING;
+ vt->parser.state = CSI_ARGS;
+
+ FALLTHROUGH;
+ case CSI_ARGS:
+ // Numerical value of argument
+ if (c >= '0' && c <= '9') {
+ if (vt->parser.v.csi.args[vt->parser.v.csi.argi] == CSI_ARG_MISSING) {
+ vt->parser.v.csi.args[vt->parser.v.csi.argi] = 0;
+ }
+ vt->parser.v.csi.args[vt->parser.v.csi.argi] *= 10;
+ vt->parser.v.csi.args[vt->parser.v.csi.argi] += c - '0';
+ break;
+ }
+ if (c == ':') {
+ vt->parser.v.csi.args[vt->parser.v.csi.argi] |= CSI_ARG_FLAG_MORE;
+ c = ';';
+ }
+ if (c == ';') {
+ vt->parser.v.csi.argi++;
+ vt->parser.v.csi.args[vt->parser.v.csi.argi] = CSI_ARG_MISSING;
+ break;
+ }
+
+ vt->parser.v.csi.argi++;
+ vt->parser.intermedlen = 0;
+ vt->parser.state = CSI_INTERMED;
+ FALLTHROUGH;
+ case CSI_INTERMED:
+ if (is_intermed(c)) {
+ if (vt->parser.intermedlen < INTERMED_MAX - 1) {
+ vt->parser.intermed[vt->parser.intermedlen++] = (char)c;
+ }
+ break;
+ } else if (c == 0x1b) {
+ // ESC in CSI cancels
+ } else if (c >= 0x40 && c <= 0x7e) {
+ vt->parser.intermed[vt->parser.intermedlen] = 0;
+ do_csi(vt, (char)c);
+ }
+ // else was invalid CSI
+
+ ENTER_NORMAL_STATE();
+ break;
+
+ case OSC_COMMAND:
+ // Numerical value of command
+ if (c >= '0' && c <= '9') {
+ if (vt->parser.v.osc.command == -1) {
+ vt->parser.v.osc.command = 0;
+ } else {
+ vt->parser.v.osc.command *= 10;
+ }
+ vt->parser.v.osc.command += c - '0';
+ break;
+ }
+ if (c == ';') {
+ vt->parser.state = OSC;
+ string_start = bytes + pos + 1;
+ break;
+ }
+
+ string_start = bytes + pos;
+ string_len = 0;
+ vt->parser.state = OSC;
+ goto string_state;
+
+ case DCS_COMMAND:
+ if (vt->parser.v.dcs.commandlen < CSI_LEADER_MAX) {
+ vt->parser.v.dcs.command[vt->parser.v.dcs.commandlen++] = (char)c;
+ }
+
+ if (c >= 0x40 && c <= 0x7e) {
+ string_start = bytes + pos + 1;
+ vt->parser.state = DCS_VTERM;
+ }
+ break;
+
+string_state:
+ case OSC:
+ case DCS_VTERM:
+ case APC:
+ case PM:
+ case SOS:
+ if (c == 0x07 || (c1_allowed && c == 0x9c)) {
+ string_fragment(vt, string_start, string_len, true);
+ ENTER_NORMAL_STATE();
+ }
+ break;
+
+ case NORMAL:
+ if (vt->parser.in_esc) {
+ if (is_intermed(c)) {
+ if (vt->parser.intermedlen < INTERMED_MAX - 1) {
+ vt->parser.intermed[vt->parser.intermedlen++] = (char)c;
+ }
+ } else if (c >= 0x30 && c < 0x7f) {
+ do_escape(vt, (char)c);
+ vt->parser.in_esc = 0;
+ ENTER_NORMAL_STATE();
+ } else {
+ DEBUG_LOG("TODO: Unhandled byte %02x in Escape\n", c);
+ }
+ break;
+ }
+ if (c1_allowed && c >= 0x80 && c < 0xa0) {
+ switch (c) {
+ case 0x90: // DCS
+ vt->parser.string_initial = true;
+ vt->parser.v.dcs.commandlen = 0;
+ ENTER_STATE(DCS_COMMAND);
+ break;
+ case 0x98: // SOS
+ vt->parser.string_initial = true;
+ ENTER_STATE(SOS);
+ string_start = bytes + pos + 1;
+ break;
+ case 0x9b: // CSI
+ vt->parser.v.csi.leaderlen = 0;
+ ENTER_STATE(CSI_LEADER);
+ break;
+ case 0x9d: // OSC
+ vt->parser.v.osc.command = -1;
+ vt->parser.string_initial = true;
+ ENTER_STATE(OSC_COMMAND);
+ break;
+ case 0x9e: // PM
+ vt->parser.string_initial = true;
+ ENTER_STATE(PM);
+ string_start = bytes + pos + 1;
+ break;
+ case 0x9f: // APC
+ vt->parser.string_initial = true;
+ ENTER_STATE(APC);
+ string_start = bytes + pos + 1;
+ break;
+ default:
+ do_control(vt, c);
+ break;
+ }
+ } else {
+ size_t eaten = 0;
+ if (vt->parser.callbacks && vt->parser.callbacks->text) {
+ eaten = (size_t)(*vt->parser.callbacks->text)(bytes + pos, len - pos, vt->parser.cbdata);
+ }
+
+ if (!eaten) {
+ DEBUG_LOG("libvterm: Text callback did not consume any input\n");
+ // force it to make progress
+ eaten = 1;
+ }
+
+ pos += (eaten - 1); // we'll ++ it again in a moment
+ }
+ break;
+ }
+ }
+
+ if (string_start) {
+ size_t string_len = (size_t)(bytes + pos - string_start);
+ if (string_len > 0) {
+ if (vt->parser.in_esc) {
+ string_len -= 1;
+ }
+ string_fragment(vt, string_start, string_len, false);
+ }
+ }
+
+ return len;
+}
+
+void vterm_parser_set_callbacks(VTerm *vt, const VTermParserCallbacks *callbacks, void *user)
+{
+ vt->parser.callbacks = callbacks;
+ vt->parser.cbdata = user;
+}