]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd-terminal/term-parser.c
terminal: add parser state-machine
[thirdparty/systemd.git] / src / libsystemd-terminal / term-parser.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com>
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 /*
23 * Terminal Parser
24 * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The
25 * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST
26 * control sequences and generic escape sequences.
27 * The parser itself does not perform any actions but lets the caller react to
28 * detected sequences.
29 */
30
31 #include <stdbool.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include "macro.h"
35 #include "term-internal.h"
36 #include "util.h"
37
38 /**
39 * term_utf8_encode() - Encode single UCS-4 character as UTF-8
40 * @out_utf8: output buffer of at least 4 bytes or NULL
41 * @g: UCS-4 character to encode
42 *
43 * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8.
44 * The length of the character is returned. It is not zero-terminated! If the
45 * output buffer is NULL, only the length is returned.
46 *
47 * Returns: The length in bytes that the UTF-8 representation does or would
48 * occupy.
49 */
50 size_t term_utf8_encode(char *out_utf8, uint32_t g) {
51 if (g < (1 << 7)) {
52 if (out_utf8)
53 out_utf8[0] = g & 0x7f;
54 return 1;
55 } else if (g < (1 << 11)) {
56 if (out_utf8) {
57 out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f);
58 out_utf8[1] = 0x80 | (g & 0x3f);
59 }
60 return 2;
61 } else if (g < (1 << 16)) {
62 if (out_utf8) {
63 out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f);
64 out_utf8[1] = 0x80 | ((g >> 6) & 0x3f);
65 out_utf8[2] = 0x80 | (g & 0x3f);
66 }
67 return 3;
68 } else if (g < (1 << 21)) {
69 if (out_utf8) {
70 out_utf8[0] = 0xf0 | ((g >> 18) & 0x07);
71 out_utf8[1] = 0x80 | ((g >> 12) & 0x3f);
72 out_utf8[2] = 0x80 | ((g >> 6) & 0x3f);
73 out_utf8[3] = 0x80 | (g & 0x3f);
74 }
75 return 4;
76 } else {
77 return 0;
78 }
79 }
80
81 /**
82 * term_utf8_decode() - Try decoding the next UCS-4 character
83 * @p: decoder object to operate on or NULL
84 * @out_len: output buffer for length of decoded UCS-4 string or NULL
85 * @c: next char to push into decoder
86 *
87 * This decodes a UTF-8 stream. It must be called for each input-byte of the
88 * UTF-8 stream and returns a UCS-4 stream. The length of the returned UCS-4
89 * string (number of parsed characters) is stored in @out_len if non-NULL. A
90 * pointer to the string is returned (or NULL if none was parsed). The string
91 * is not zero-terminated! Furthermore, the string is only valid until the next
92 * invokation of this function. It is also bound to the parser-state @p.
93 *
94 * This function is highly optimized to work with terminal-emulators. Instead
95 * of being strict about UTF-8 validity, this tries to perform a fallback to
96 * ISO-8859-1 in case a wrong series was detected. Therefore, this function
97 * might return multiple UCS-4 characters by parsing just a single UTF-8 byte.
98 *
99 * The parser state @p should be allocated and managed by the caller. There're
100 * no helpers to do that for you. To initialize it, simply reset it to all
101 * zero. You can reset or free the object at any point in time.
102 *
103 * Returns: Pointer to the UCS-4 string or NULL.
104 */
105 const uint32_t *term_utf8_decode(term_utf8 *p, size_t *out_len, char c) {
106 uint32_t t, *res = NULL;
107 uint8_t byte;
108 size_t len = 0;
109
110 if (!p)
111 goto out;
112
113 byte = c;
114
115 if (!p->valid || p->i_bytes >= p->n_bytes) {
116 /*
117 * If the previous sequence was invalid or fully parsed, start
118 * parsing a fresh new sequence.
119 */
120
121 if ((byte & 0xE0) == 0xC0) {
122 /* start of two byte sequence */
123 t = byte & 0x1F;
124 p->n_bytes = 2;
125 p->i_bytes = 1;
126 p->valid = 1;
127 } else if ((byte & 0xF0) == 0xE0) {
128 /* start of three byte sequence */
129 t = byte & 0x0F;
130 p->n_bytes = 3;
131 p->i_bytes = 1;
132 p->valid = 1;
133 } else if ((byte & 0xF8) == 0xF0) {
134 /* start of four byte sequence */
135 t = byte & 0x07;
136 p->n_bytes = 4;
137 p->i_bytes = 1;
138 p->valid = 1;
139 } else {
140 /* Either of:
141 * - single ASCII 7-bit char
142 * - out-of-sync continuation byte
143 * - overlong encoding
144 * All of them are treated as single byte ISO-8859-1 */
145 t = byte;
146 p->n_bytes = 1;
147 p->i_bytes = 1;
148 p->valid = 0;
149 }
150
151 p->chars[0] = byte;
152 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
153 } else {
154 /*
155 * ..otherwise, try to continue the previous sequence..
156 */
157
158 if ((byte & 0xC0) == 0x80) {
159 /*
160 * Valid continuation byte. Append to sequence and
161 * update the ucs4 cache accordingly.
162 */
163
164 t = byte & 0x3F;
165 p->chars[p->i_bytes++] = byte;
166 p->ucs4 |= t << (6 * (p->n_bytes - p->i_bytes));
167 } else {
168 /*
169 * Invalid continuation? Treat cached sequence as
170 * ISO-8859-1, but parse the new char as valid new
171 * starting character. If it's a new single-byte UTF-8
172 * sequence, we immediately return it in the same run,
173 * otherwise, we might suffer from starvation.
174 */
175
176 if ((byte & 0xE0) == 0xC0 ||
177 (byte & 0xF0) == 0xE0 ||
178 (byte & 0xF8) == 0xF0) {
179 /*
180 * New multi-byte sequence. Move to-be-returned
181 * data at the end and start new sequence. Only
182 * return the old sequence.
183 */
184
185 memmove(p->chars + 1,
186 p->chars,
187 sizeof(*p->chars) * p->i_bytes);
188 res = p->chars + 1;
189 len = p->i_bytes;
190
191 if ((byte & 0xE0) == 0xC0) {
192 /* start of two byte sequence */
193 t = byte & 0x1F;
194 p->n_bytes = 2;
195 p->i_bytes = 1;
196 p->valid = 1;
197 } else if ((byte & 0xF0) == 0xE0) {
198 /* start of three byte sequence */
199 t = byte & 0x0F;
200 p->n_bytes = 3;
201 p->i_bytes = 1;
202 p->valid = 1;
203 } else if ((byte & 0xF8) == 0xF0) {
204 /* start of four byte sequence */
205 t = byte & 0x07;
206 p->n_bytes = 4;
207 p->i_bytes = 1;
208 p->valid = 1;
209 }
210
211 p->chars[0] = byte;
212 p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes));
213
214 goto out;
215 } else {
216 /*
217 * New single byte sequence, append to output
218 * and return combined sequence.
219 */
220
221 p->chars[p->i_bytes++] = byte;
222 p->valid = 0;
223 }
224 }
225 }
226
227 /*
228 * Check whether a full sequence (valid or invalid) has been parsed and
229 * then return it. Otherwise, return nothing.
230 */
231 if (p->valid) {
232 /* still parsing? then bail out */
233 if (p->i_bytes < p->n_bytes)
234 goto out;
235
236 res = &p->ucs4;
237 len = 1;
238 } else {
239 res = p->chars;
240 len = p->i_bytes;
241 }
242
243 p->valid = 0;
244 p->i_bytes = 0;
245 p->n_bytes = 0;
246
247 out:
248 if (out_len)
249 *out_len = len;
250 return len > 0 ? res : NULL;
251 }
252
253 /*
254 * Command Parser
255 * The ctl-seq parser "term_parser" only detects whole sequences, it does not
256 * detect the specific command. Once a sequence is parsed, the command-parsers
257 * are used to figure out their meaning. Note that this depends on whether we
258 * run on the host or terminal side.
259 */
260
261 static unsigned int term_parse_host_control(const term_seq *seq) {
262 assert_return(seq, TERM_CMD_NONE);
263
264 switch (seq->terminator) {
265 case 0x00: /* NUL */
266 return TERM_CMD_NULL;
267 case 0x05: /* ENQ */
268 return TERM_CMD_ENQ;
269 case 0x07: /* BEL */
270 return TERM_CMD_BEL;
271 case 0x08: /* BS */
272 return TERM_CMD_BS;
273 case 0x09: /* HT */
274 return TERM_CMD_HT;
275 case 0x0a: /* LF */
276 return TERM_CMD_LF;
277 case 0x0b: /* VT */
278 return TERM_CMD_VT;
279 case 0x0c: /* FF */
280 return TERM_CMD_FF;
281 case 0x0d: /* CR */
282 return TERM_CMD_CR;
283 case 0x0e: /* SO */
284 return TERM_CMD_SO;
285 case 0x0f: /* SI */
286 return TERM_CMD_SI;
287 case 0x11: /* DC1 */
288 return TERM_CMD_DC1;
289 case 0x13: /* DC3 */
290 return TERM_CMD_DC3;
291 case 0x18: /* CAN */
292 /* this is already handled by the state-machine */
293 break;
294 case 0x1a: /* SUB */
295 return TERM_CMD_SUB;
296 case 0x1b: /* ESC */
297 /* this is already handled by the state-machine */
298 break;
299 case 0x1f: /* DEL */
300 /* this is already handled by the state-machine */
301 break;
302 case 0x84: /* IND */
303 return TERM_CMD_IND;
304 case 0x85: /* NEL */
305 return TERM_CMD_NEL;
306 case 0x88: /* HTS */
307 return TERM_CMD_HTS;
308 case 0x8d: /* RI */
309 return TERM_CMD_RI;
310 case 0x8e: /* SS2 */
311 return TERM_CMD_SS2;
312 case 0x8f: /* SS3 */
313 return TERM_CMD_SS3;
314 case 0x90: /* DCS */
315 /* this is already handled by the state-machine */
316 break;
317 case 0x96: /* SPA */
318 return TERM_CMD_SPA;
319 case 0x97: /* EPA */
320 return TERM_CMD_EPA;
321 case 0x98: /* SOS */
322 /* this is already handled by the state-machine */
323 break;
324 case 0x9a: /* DECID */
325 return TERM_CMD_DECID;
326 case 0x9b: /* CSI */
327 /* this is already handled by the state-machine */
328 break;
329 case 0x9c: /* ST */
330 return TERM_CMD_ST;
331 case 0x9d: /* OSC */
332 /* this is already handled by the state-machine */
333 break;
334 case 0x9e: /* PM */
335 /* this is already handled by the state-machine */
336 break;
337 case 0x9f: /* APC */
338 /* this is already handled by the state-machine */
339 break;
340 }
341
342 return TERM_CMD_NONE;
343 }
344
345 static inline int charset_from_cmd(uint32_t raw, unsigned int flags, bool require_96) {
346 static const struct {
347 uint32_t raw;
348 unsigned int flags;
349 } charset_cmds[] = {
350 /* 96-compat charsets */
351 [TERM_CHARSET_ISO_LATIN1_SUPPLEMENTAL] = { .raw = 'A', .flags = 0 },
352 [TERM_CHARSET_ISO_LATIN2_SUPPLEMENTAL] = { .raw = 'B', .flags = 0 },
353 [TERM_CHARSET_ISO_LATIN5_SUPPLEMENTAL] = { .raw = 'M', .flags = 0 },
354 [TERM_CHARSET_ISO_GREEK_SUPPLEMENTAL] = { .raw = 'F', .flags = 0 },
355 [TERM_CHARSET_ISO_HEBREW_SUPPLEMENTAL] = { .raw = 'H', .flags = 0 },
356 [TERM_CHARSET_ISO_LATIN_CYRILLIC] = { .raw = 'L', .flags = 0 },
357
358 /* 94-compat charsets */
359 [TERM_CHARSET_DEC_SPECIAL_GRAPHIC] = { .raw = '0', .flags = 0 },
360 [TERM_CHARSET_DEC_SUPPLEMENTAL] = { .raw = '5', .flags = TERM_SEQ_FLAG_PERCENT },
361 [TERM_CHARSET_DEC_TECHNICAL] = { .raw = '>', .flags = 0 },
362 [TERM_CHARSET_CYRILLIC_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_AND },
363 [TERM_CHARSET_DUTCH_NRCS] = { .raw = '4', .flags = 0 },
364 [TERM_CHARSET_FINNISH_NRCS] = { .raw = '5', .flags = 0 },
365 [TERM_CHARSET_FRENCH_NRCS] = { .raw = 'R', .flags = 0 },
366 [TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = '9', .flags = 0 },
367 [TERM_CHARSET_GERMAN_NRCS] = { .raw = 'K', .flags = 0 },
368 [TERM_CHARSET_GREEK_DEC] = { .raw = '?', .flags = TERM_SEQ_FLAG_DQUOTE },
369 [TERM_CHARSET_GREEK_NRCS] = { .raw = '>', .flags = TERM_SEQ_FLAG_DQUOTE },
370 [TERM_CHARSET_HEBREW_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_DQUOTE },
371 [TERM_CHARSET_HEBREW_NRCS] = { .raw = '=', .flags = TERM_SEQ_FLAG_PERCENT },
372 [TERM_CHARSET_ITALIAN_NRCS] = { .raw = 'Y', .flags = 0 },
373 [TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '`', .flags = 0 },
374 [TERM_CHARSET_PORTUGUESE_NRCS] = { .raw = '6', .flags = TERM_SEQ_FLAG_PERCENT },
375 [TERM_CHARSET_RUSSIAN_NRCS] = { .raw = '5', .flags = TERM_SEQ_FLAG_AND },
376 [TERM_CHARSET_SCS_NRCS] = { .raw = '3', .flags = TERM_SEQ_FLAG_PERCENT },
377 [TERM_CHARSET_SPANISH_NRCS] = { .raw = 'Z', .flags = 0 },
378 [TERM_CHARSET_SWEDISH_NRCS] = { .raw = '7', .flags = 0 },
379 [TERM_CHARSET_SWISS_NRCS] = { .raw = '=', .flags = 0 },
380 [TERM_CHARSET_TURKISH_DEC] = { .raw = '0', .flags = TERM_SEQ_FLAG_PERCENT },
381 [TERM_CHARSET_TURKISH_NRCS] = { .raw = '2', .flags = TERM_SEQ_FLAG_PERCENT },
382
383 /* special charsets */
384 [TERM_CHARSET_USERPREF_SUPPLEMENTAL] = { .raw = '<', .flags = 0 },
385
386 /* secondary choices */
387 [TERM_CHARSET_CNT + TERM_CHARSET_FINNISH_NRCS] = { .raw = 'C', .flags = 0 },
388 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_NRCS] = { .raw = 'f', .flags = 0 },
389 [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = 'Q', .flags = 0 },
390 [TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = 'E', .flags = 0 },
391 [TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */
392
393 /* tertiary choices */
394 [TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 },
395 };
396 size_t i, cs;
397
398 /*
399 * Secondary choice on SWEDISH_NRCS and primary choice on
400 * ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0.
401 * We always choose the ISO 96-compat set, which is what VT510 does.
402 */
403
404 for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) {
405 if (charset_cmds[i].raw == raw && charset_cmds[i].flags == flags) {
406 cs = i;
407 while (cs >= TERM_CHARSET_CNT)
408 cs -= TERM_CHARSET_CNT;
409
410 if (!require_96 || cs < TERM_CHARSET_96_CNT || cs >= TERM_CHARSET_94_CNT)
411 return cs;
412 }
413 }
414
415 return -ENOENT;
416 }
417
418 /* true if exactly one bit in @value is set */
419 static inline bool exactly_one_bit_set(unsigned int value) {
420 return __builtin_popcount(value) == 1;
421 }
422
423 static unsigned int term_parse_host_escape(const term_seq *seq, unsigned int *cs_out) {
424 unsigned int t, flags;
425 int cs;
426
427 assert_return(seq, TERM_CMD_NONE);
428
429 flags = seq->intermediates;
430 t = TERM_SEQ_FLAG_POPEN | TERM_SEQ_FLAG_PCLOSE | TERM_SEQ_FLAG_MULT |
431 TERM_SEQ_FLAG_PLUS | TERM_SEQ_FLAG_MINUS | TERM_SEQ_FLAG_DOT |
432 TERM_SEQ_FLAG_SLASH;
433
434 if (exactly_one_bit_set(flags & t)) {
435 switch (flags & t) {
436 case TERM_SEQ_FLAG_POPEN:
437 case TERM_SEQ_FLAG_PCLOSE:
438 case TERM_SEQ_FLAG_MULT:
439 case TERM_SEQ_FLAG_PLUS:
440 cs = charset_from_cmd(seq->terminator, flags & ~t, false);
441 break;
442 case TERM_SEQ_FLAG_MINUS:
443 case TERM_SEQ_FLAG_DOT:
444 case TERM_SEQ_FLAG_SLASH:
445 cs = charset_from_cmd(seq->terminator, flags & ~t, true);
446 break;
447 default:
448 cs = -ENOENT;
449 break;
450 }
451
452 if (cs >= 0) {
453 if (cs_out)
454 *cs_out = cs;
455 return TERM_CMD_SCS;
456 }
457
458 /* looked like a charset-cmd but wasn't; continue */
459 }
460
461 switch (seq->terminator) {
462 case '3':
463 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL top-half */
464 return TERM_CMD_DECDHL_TH;
465 break;
466 case '4':
467 if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL bottom-half */
468 return TERM_CMD_DECDHL_BH;
469 break;
470 case '5':
471 if (flags == TERM_SEQ_FLAG_HASH) /* DECSWL */
472 return TERM_CMD_DECSWL;
473 break;
474 case '6':
475 if (flags == 0) /* DECBI */
476 return TERM_CMD_DECBI;
477 else if (flags == TERM_SEQ_FLAG_HASH) /* DECDWL */
478 return TERM_CMD_DECDWL;
479 break;
480 case '7':
481 if (flags == 0) /* DECSC */
482 return TERM_CMD_DECSC;
483 break;
484 case '8':
485 if (flags == 0) /* DECRC */
486 return TERM_CMD_DECRC;
487 else if (flags == TERM_SEQ_FLAG_HASH) /* DECALN */
488 return TERM_CMD_DECALN;
489 break;
490 case '9':
491 if (flags == 0) /* DECFI */
492 return TERM_CMD_DECFI;
493 break;
494 case '<':
495 if (flags == 0) /* DECANM */
496 return TERM_CMD_DECANM;
497 break;
498 case '=':
499 if (flags == 0) /* DECKPAM */
500 return TERM_CMD_DECKPAM;
501 break;
502 case '>':
503 if (flags == 0) /* DECKPNM */
504 return TERM_CMD_DECKPNM;
505 break;
506 case '@':
507 if (flags == TERM_SEQ_FLAG_PERCENT) {
508 /* Select default character set */
509 return TERM_CMD_XTERM_SDCS;
510 }
511 break;
512 case 'D':
513 if (flags == 0) /* IND */
514 return TERM_CMD_IND;
515 break;
516 case 'E':
517 if (flags == 0) /* NEL */
518 return TERM_CMD_NEL;
519 break;
520 case 'F':
521 if (flags == 0) /* Cursor to lower-left corner of screen */
522 return TERM_CMD_XTERM_CLLHP;
523 else if (flags == TERM_SEQ_FLAG_SPACE) /* S7C1T */
524 return TERM_CMD_S7C1T;
525 break;
526 case 'G':
527 if (flags == TERM_SEQ_FLAG_SPACE) { /* S8C1T */
528 return TERM_CMD_S8C1T;
529 } else if (flags == TERM_SEQ_FLAG_PERCENT) {
530 /* Select UTF-8 character set */
531 return TERM_CMD_XTERM_SUCS;
532 }
533 break;
534 case 'H':
535 if (flags == 0) /* HTS */
536 return TERM_CMD_HTS;
537 break;
538 case 'L':
539 if (flags == TERM_SEQ_FLAG_SPACE) {
540 /* Set ANSI conformance level 1 */
541 return TERM_CMD_XTERM_SACL1;
542 }
543 break;
544 case 'M':
545 if (flags == 0) { /* RI */
546 return TERM_CMD_RI;
547 } else if (flags == TERM_SEQ_FLAG_SPACE) {
548 /* Set ANSI conformance level 2 */
549 return TERM_CMD_XTERM_SACL2;
550 }
551 break;
552 case 'N':
553 if (flags == 0) { /* SS2 */
554 return TERM_CMD_SS2;
555 } else if (flags == TERM_SEQ_FLAG_SPACE) {
556 /* Set ANSI conformance level 3 */
557 return TERM_CMD_XTERM_SACL3;
558 }
559 break;
560 case 'O':
561 if (flags == 0) /* SS3 */
562 return TERM_CMD_SS3;
563 break;
564 case 'P':
565 if (flags == 0) /* DCS: this is already handled by the state-machine */
566 return 0;
567 break;
568 case 'V':
569 if (flags == 0) /* SPA */
570 return TERM_CMD_SPA;
571 break;
572 case 'W':
573 if (flags == 0) /* EPA */
574 return TERM_CMD_EPA;
575 break;
576 case 'X':
577 if (flags == 0) { /* SOS */
578 /* this is already handled by the state-machine */
579 break;
580 }
581 break;
582 case 'Z':
583 if (flags == 0) /* DECID */
584 return TERM_CMD_DECID;
585 break;
586 case '[':
587 if (flags == 0) { /* CSI */
588 /* this is already handled by the state-machine */
589 break;
590 }
591 break;
592 case '\\':
593 if (flags == 0) /* ST */
594 return TERM_CMD_ST;
595 break;
596 case ']':
597 if (flags == 0) { /* OSC */
598 /* this is already handled by the state-machine */
599 break;
600 }
601 break;
602 case '^':
603 if (flags == 0) { /* PM */
604 /* this is already handled by the state-machine */
605 break;
606 }
607 break;
608 case '_':
609 if (flags == 0) { /* APC */
610 /* this is already handled by the state-machine */
611 break;
612 }
613 break;
614 case 'c':
615 if (flags == 0) /* RIS */
616 return TERM_CMD_RIS;
617 break;
618 case 'l':
619 if (flags == 0) /* Memory lock */
620 return TERM_CMD_XTERM_MLHP;
621 break;
622 case 'm':
623 if (flags == 0) /* Memory unlock */
624 return TERM_CMD_XTERM_MUHP;
625 break;
626 case 'n':
627 if (flags == 0) /* LS2 */
628 return TERM_CMD_LS2;
629 break;
630 case 'o':
631 if (flags == 0) /* LS3 */
632 return TERM_CMD_LS3;
633 break;
634 case '|':
635 if (flags == 0) /* LS3R */
636 return TERM_CMD_LS3R;
637 break;
638 case '}':
639 if (flags == 0) /* LS2R */
640 return TERM_CMD_LS2R;
641 break;
642 case '~':
643 if (flags == 0) /* LS1R */
644 return TERM_CMD_LS1R;
645 break;
646 }
647
648 return TERM_CMD_NONE;
649 }
650
651 static unsigned int term_parse_host_csi(const term_seq *seq) {
652 unsigned int flags;
653
654 assert_return(seq, TERM_CMD_NONE);
655
656 flags = seq->intermediates;
657
658 switch (seq->terminator) {
659 case 'A':
660 if (flags == 0) /* CUU */
661 return TERM_CMD_CUU;
662 break;
663 case 'a':
664 if (flags == 0) /* HPR */
665 return TERM_CMD_HPR;
666 break;
667 case 'B':
668 if (flags == 0) /* CUD */
669 return TERM_CMD_CUD;
670 break;
671 case 'b':
672 if (flags == 0) /* REP */
673 return TERM_CMD_REP;
674 break;
675 case 'C':
676 if (flags == 0) /* CUF */
677 return TERM_CMD_CUF;
678 break;
679 case 'c':
680 if (flags == 0) /* DA1 */
681 return TERM_CMD_DA1;
682 else if (flags == TERM_SEQ_FLAG_GT) /* DA2 */
683 return TERM_CMD_DA2;
684 else if (flags == TERM_SEQ_FLAG_EQUAL) /* DA3 */
685 return TERM_CMD_DA3;
686 break;
687 case 'D':
688 if (flags == 0) /* CUB */
689 return TERM_CMD_CUB;
690 break;
691 case 'd':
692 if (flags == 0) /* VPA */
693 return TERM_CMD_VPA;
694 break;
695 case 'E':
696 if (flags == 0) /* CNL */
697 return TERM_CMD_CNL;
698 break;
699 case 'e':
700 if (flags == 0) /* VPR */
701 return TERM_CMD_VPR;
702 break;
703 case 'F':
704 if (flags == 0) /* CPL */
705 return TERM_CMD_CPL;
706 break;
707 case 'f':
708 if (flags == 0) /* HVP */
709 return TERM_CMD_HVP;
710 break;
711 case 'G':
712 if (flags == 0) /* CHA */
713 return TERM_CMD_CHA;
714 break;
715 case 'g':
716 if (flags == 0) /* TBC */
717 return TERM_CMD_TBC;
718 else if (flags == TERM_SEQ_FLAG_MULT) /* DECLFKC */
719 return TERM_CMD_DECLFKC;
720 break;
721 case 'H':
722 if (flags == 0) /* CUP */
723 return TERM_CMD_CUP;
724 break;
725 case 'h':
726 if (flags == 0) /* SM ANSI */
727 return TERM_CMD_SM_ANSI;
728 else if (flags == TERM_SEQ_FLAG_WHAT) /* SM DEC */
729 return TERM_CMD_SM_DEC;
730 break;
731 case 'I':
732 if (flags == 0) /* CHT */
733 return TERM_CMD_CHT;
734 break;
735 case 'i':
736 if (flags == 0) /* MC ANSI */
737 return TERM_CMD_MC_ANSI;
738 else if (flags == TERM_SEQ_FLAG_WHAT) /* MC DEC */
739 return TERM_CMD_MC_DEC;
740 break;
741 case 'J':
742 if (flags == 0) /* ED */
743 return TERM_CMD_ED;
744 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSED */
745 return TERM_CMD_DECSED;
746 break;
747 case 'K':
748 if (flags == 0) /* EL */
749 return TERM_CMD_EL;
750 else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSEL */
751 return TERM_CMD_DECSEL;
752 break;
753 case 'L':
754 if (flags == 0) /* IL */
755 return TERM_CMD_IL;
756 break;
757 case 'l':
758 if (flags == 0) /* RM ANSI */
759 return TERM_CMD_RM_ANSI;
760 else if (flags == TERM_SEQ_FLAG_WHAT) /* RM DEC */
761 return TERM_CMD_RM_DEC;
762 break;
763 case 'M':
764 if (flags == 0) /* DL */
765 return TERM_CMD_DL;
766 break;
767 case 'm':
768 if (flags == 0) /* SGR */
769 return TERM_CMD_SGR;
770 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SMR */
771 return TERM_CMD_XTERM_SRV;
772 break;
773 case 'n':
774 if (flags == 0) /* DSR ANSI */
775 return TERM_CMD_DSR_ANSI;
776 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM RMR */
777 return TERM_CMD_XTERM_RRV;
778 else if (flags == TERM_SEQ_FLAG_WHAT) /* DSR DEC */
779 return TERM_CMD_DSR_DEC;
780 break;
781 case 'P':
782 if (flags == 0) /* DCH */
783 return TERM_CMD_DCH;
784 else if (flags == TERM_SEQ_FLAG_SPACE) /* PPA */
785 return TERM_CMD_PPA;
786 break;
787 case 'p':
788 if (flags == 0) /* DECSSL */
789 return TERM_CMD_DECSSL;
790 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSSCLS */
791 return TERM_CMD_DECSSCLS;
792 else if (flags == TERM_SEQ_FLAG_BANG) /* DECSTR */
793 return TERM_CMD_DECSTR;
794 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCL */
795 return TERM_CMD_DECSCL;
796 else if (flags == TERM_SEQ_FLAG_CASH) /* DECRQM-ANSI */
797 return TERM_CMD_DECRQM_ANSI;
798 else if (flags == (TERM_SEQ_FLAG_CASH | TERM_SEQ_FLAG_WHAT)) /* DECRQM-DEC */
799 return TERM_CMD_DECRQM_DEC;
800 else if (flags == TERM_SEQ_FLAG_PCLOSE) /* DECSDPT */
801 return TERM_CMD_DECSDPT;
802 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSPPCS */
803 return TERM_CMD_DECSPPCS;
804 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSR */
805 return TERM_CMD_DECSR;
806 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECLTOD */
807 return TERM_CMD_DECLTOD;
808 else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SPM */
809 return TERM_CMD_XTERM_SPM;
810 break;
811 case 'Q':
812 if (flags == TERM_SEQ_FLAG_SPACE) /* PPR */
813 return TERM_CMD_PPR;
814 break;
815 case 'q':
816 if (flags == 0) /* DECLL */
817 return TERM_CMD_DECLL;
818 else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSCUSR */
819 return TERM_CMD_DECSCUSR;
820 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCA */
821 return TERM_CMD_DECSCA;
822 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSDDT */
823 return TERM_CMD_DECSDDT;
824 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSRC */
825 return TERM_CMD_DECSR;
826 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECELF */
827 return TERM_CMD_DECELF;
828 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECTID */
829 return TERM_CMD_DECTID;
830 break;
831 case 'R':
832 if (flags == TERM_SEQ_FLAG_SPACE) /* PPB */
833 return TERM_CMD_PPB;
834 break;
835 case 'r':
836 if (flags == 0) {
837 /* DECSTBM */
838 return TERM_CMD_DECSTBM;
839 } else if (flags == TERM_SEQ_FLAG_SPACE) {
840 /* DECSKCV */
841 return TERM_CMD_DECSKCV;
842 } else if (flags == TERM_SEQ_FLAG_CASH) {
843 /* DECCARA */
844 return TERM_CMD_DECCARA;
845 } else if (flags == TERM_SEQ_FLAG_MULT) {
846 /* DECSCS */
847 return TERM_CMD_DECSCS;
848 } else if (flags == TERM_SEQ_FLAG_PLUS) {
849 /* DECSMKR */
850 return TERM_CMD_DECSMKR;
851 } else if (flags == TERM_SEQ_FLAG_WHAT) {
852 /*
853 * There's a conflict between DECPCTERM and XTERM-RPM.
854 * XTERM-RPM takes a single argument, DECPCTERM takes 2.
855 * Split both up and forward the call to the closer
856 * match.
857 */
858 if (seq->n_args <= 1) /* XTERM RPM */
859 return TERM_CMD_XTERM_RPM;
860 else if (seq->n_args >= 2) /* DECPCTERM */
861 return TERM_CMD_DECPCTERM;
862 }
863 break;
864 case 'S':
865 if (flags == 0) /* SU */
866 return TERM_CMD_SU;
867 else if (flags == TERM_SEQ_FLAG_WHAT) /* XTERM SGFX */
868 return TERM_CMD_XTERM_SGFX;
869 break;
870 case 's':
871 if (flags == 0) {
872 /*
873 * There's a conflict between DECSLRM and SC-ANSI which
874 * cannot be resolved without knowing the state of
875 * DECLRMM. We leave that decision up to the caller.
876 */
877 return TERM_CMD_DECSLRM_OR_SC;
878 } else if (flags == TERM_SEQ_FLAG_CASH) {
879 /* DECSPRTT */
880 return TERM_CMD_DECSPRTT;
881 } else if (flags == TERM_SEQ_FLAG_MULT) {
882 /* DECSFC */
883 return TERM_CMD_DECSFC;
884 } else if (flags == TERM_SEQ_FLAG_WHAT) {
885 /* XTERM SPM */
886 return TERM_CMD_XTERM_SPM;
887 }
888 break;
889 case 'T':
890 if (flags == 0) {
891 /*
892 * Awesome: There's a conflict between SD and XTERM IHMT
893 * that we have to resolve by checking the parameter
894 * count.. XTERM_IHMT needs exactly 5 arguments, SD
895 * takes 0 or 1. We're conservative here and give both
896 * a wider range to allow unused arguments (compat...).
897 */
898 if (seq->n_args >= 5) {
899 /* XTERM IHMT */
900 return TERM_CMD_XTERM_IHMT;
901 } else if (seq->n_args < 5) {
902 /* SD */
903 return TERM_CMD_SD;
904 }
905 } else if (flags == TERM_SEQ_FLAG_GT) {
906 /* XTERM RTM */
907 return TERM_CMD_XTERM_RTM;
908 }
909 break;
910 case 't':
911 if (flags == 0) {
912 if (seq->n_args > 0 && seq->args[0] < 24) {
913 /* XTERM WM */
914 return TERM_CMD_XTERM_WM;
915 } else {
916 /* DECSLPP */
917 return TERM_CMD_DECSLPP;
918 }
919 } else if (flags == TERM_SEQ_FLAG_SPACE) {
920 /* DECSWBV */
921 return TERM_CMD_DECSWBV;
922 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
923 /* DECSRFR */
924 return TERM_CMD_DECSRFR;
925 } else if (flags == TERM_SEQ_FLAG_CASH) {
926 /* DECRARA */
927 return TERM_CMD_DECRARA;
928 } else if (flags == TERM_SEQ_FLAG_GT) {
929 /* XTERM STM */
930 return TERM_CMD_XTERM_STM;
931 }
932 break;
933 case 'U':
934 if (flags == 0) /* NP */
935 return TERM_CMD_NP;
936 break;
937 case 'u':
938 if (flags == 0) {
939 /* RC */
940 return TERM_CMD_RC;
941 } else if (flags == TERM_SEQ_FLAG_SPACE) {
942 /* DECSMBV */
943 return TERM_CMD_DECSMBV;
944 } else if (flags == TERM_SEQ_FLAG_DQUOTE) {
945 /* DECSTRL */
946 return TERM_CMD_DECSTRL;
947 } else if (flags == TERM_SEQ_FLAG_WHAT) {
948 /* DECRQUPSS */
949 return TERM_CMD_DECRQUPSS;
950 } else if (seq->args[0] == 1 && flags == TERM_SEQ_FLAG_CASH) {
951 /* DECRQTSR */
952 return TERM_CMD_DECRQTSR;
953 } else if (flags == TERM_SEQ_FLAG_MULT) {
954 /* DECSCP */
955 return TERM_CMD_DECSCP;
956 } else if (flags == TERM_SEQ_FLAG_COMMA) {
957 /* DECRQKT */
958 return TERM_CMD_DECRQKT;
959 }
960 break;
961 case 'V':
962 if (flags == 0) /* PP */
963 return TERM_CMD_PP;
964 break;
965 case 'v':
966 if (flags == TERM_SEQ_FLAG_SPACE) /* DECSLCK */
967 return TERM_CMD_DECSLCK;
968 else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECRQDE */
969 return TERM_CMD_DECRQDE;
970 else if (flags == TERM_SEQ_FLAG_CASH) /* DECCRA */
971 return TERM_CMD_DECCRA;
972 else if (flags == TERM_SEQ_FLAG_COMMA) /* DECRPKT */
973 return TERM_CMD_DECRPKT;
974 break;
975 case 'W':
976 if (seq->args[0] == 5 && flags == TERM_SEQ_FLAG_WHAT) {
977 /* DECST8C */
978 return TERM_CMD_DECST8C;
979 }
980 break;
981 case 'w':
982 if (flags == TERM_SEQ_FLAG_CASH) /* DECRQPSR */
983 return TERM_CMD_DECRQPSR;
984 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECEFR */
985 return TERM_CMD_DECEFR;
986 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSPP */
987 return TERM_CMD_DECSPP;
988 break;
989 case 'X':
990 if (flags == 0) /* ECH */
991 return TERM_CMD_ECH;
992 break;
993 case 'x':
994 if (flags == 0) /* DECREQTPARM */
995 return TERM_CMD_DECREQTPARM;
996 else if (flags == TERM_SEQ_FLAG_CASH) /* DECFRA */
997 return TERM_CMD_DECFRA;
998 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSACE */
999 return TERM_CMD_DECSACE;
1000 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECRQPKFM */
1001 return TERM_CMD_DECRQPKFM;
1002 break;
1003 case 'y':
1004 if (flags == 0) /* DECTST */
1005 return TERM_CMD_DECTST;
1006 else if (flags == TERM_SEQ_FLAG_MULT) /* DECRQCRA */
1007 return TERM_CMD_DECRQCRA;
1008 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKFMR */
1009 return TERM_CMD_DECPKFMR;
1010 break;
1011 case 'Z':
1012 if (flags == 0) /* CBT */
1013 return TERM_CMD_CBT;
1014 break;
1015 case 'z':
1016 if (flags == TERM_SEQ_FLAG_CASH) /* DECERA */
1017 return TERM_CMD_DECERA;
1018 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECELR */
1019 return TERM_CMD_DECELR;
1020 else if (flags == TERM_SEQ_FLAG_MULT) /* DECINVM */
1021 return TERM_CMD_DECINVM;
1022 else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKA */
1023 return TERM_CMD_DECPKA;
1024 break;
1025 case '@':
1026 if (flags == 0) /* ICH */
1027 return TERM_CMD_ICH;
1028 break;
1029 case '`':
1030 if (flags == 0) /* HPA */
1031 return TERM_CMD_HPA;
1032 break;
1033 case '{':
1034 if (flags == TERM_SEQ_FLAG_CASH) /* DECSERA */
1035 return TERM_CMD_DECSERA;
1036 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECSLE */
1037 return TERM_CMD_DECSLE;
1038 break;
1039 case '|':
1040 if (flags == TERM_SEQ_FLAG_CASH) /* DECSCPP */
1041 return TERM_CMD_DECSCPP;
1042 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECRQLP */
1043 return TERM_CMD_DECRQLP;
1044 else if (flags == TERM_SEQ_FLAG_MULT) /* DECSNLS */
1045 return TERM_CMD_DECSNLS;
1046 break;
1047 case '}':
1048 if (flags == TERM_SEQ_FLAG_SPACE) /* DECKBD */
1049 return TERM_CMD_DECKBD;
1050 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSASD */
1051 return TERM_CMD_DECSASD;
1052 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECIC */
1053 return TERM_CMD_DECIC;
1054 break;
1055 case '~':
1056 if (flags == TERM_SEQ_FLAG_SPACE) /* DECTME */
1057 return TERM_CMD_DECTME;
1058 else if (flags == TERM_SEQ_FLAG_CASH) /* DECSSDT */
1059 return TERM_CMD_DECSSDT;
1060 else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECDC */
1061 return TERM_CMD_DECDC;
1062 break;
1063 }
1064
1065 return TERM_CMD_NONE;
1066 }
1067
1068 /*
1069 * State Machine
1070 * This parser controls the parser-state and returns any detected sequence to
1071 * the caller. The parser is based on this state-diagram from Paul Williams:
1072 * http://vt100.net/emu/
1073 * It was written from scratch and extended where needed.
1074 * This parser is fully compatible up to the vt500 series. We expect UCS-4 as
1075 * input. It's the callers responsibility to do any UTF-8 parsing.
1076 */
1077
1078 enum parser_state {
1079 STATE_NONE, /* placeholder */
1080 STATE_GROUND, /* initial state and ground */
1081 STATE_ESC, /* ESC sequence was started */
1082 STATE_ESC_INT, /* intermediate escape characters */
1083 STATE_CSI_ENTRY, /* starting CSI sequence */
1084 STATE_CSI_PARAM, /* CSI parameters */
1085 STATE_CSI_INT, /* intermediate CSI characters */
1086 STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */
1087 STATE_DCS_ENTRY, /* starting DCS sequence */
1088 STATE_DCS_PARAM, /* DCS parameters */
1089 STATE_DCS_INT, /* intermediate DCS characters */
1090 STATE_DCS_PASS, /* DCS data passthrough */
1091 STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */
1092 STATE_OSC_STRING, /* parsing OSC sequence */
1093 STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */
1094 STATE_NUM
1095 };
1096
1097 enum parser_action {
1098 ACTION_NONE, /* placeholder */
1099 ACTION_CLEAR, /* clear parameters */
1100 ACTION_IGNORE, /* ignore the character entirely */
1101 ACTION_PRINT, /* print the character on the console */
1102 ACTION_EXECUTE, /* execute single control character (C0/C1) */
1103 ACTION_COLLECT, /* collect intermediate character */
1104 ACTION_PARAM, /* collect parameter character */
1105 ACTION_ESC_DISPATCH, /* dispatch escape sequence */
1106 ACTION_CSI_DISPATCH, /* dispatch csi sequence */
1107 ACTION_DCS_START, /* start of DCS data */
1108 ACTION_DCS_COLLECT, /* collect DCS data */
1109 ACTION_DCS_CONSUME, /* consume DCS terminator */
1110 ACTION_DCS_DISPATCH, /* dispatch dcs sequence */
1111 ACTION_OSC_START, /* start of OSC data */
1112 ACTION_OSC_COLLECT, /* collect OSC data */
1113 ACTION_OSC_CONSUME, /* consume OSC terminator */
1114 ACTION_OSC_DISPATCH, /* dispatch osc sequence */
1115 ACTION_NUM
1116 };
1117
1118 int term_parser_new(term_parser **out, bool host) {
1119 _term_parser_free_ term_parser *parser = NULL;
1120
1121 assert_return(out, -EINVAL);
1122
1123 parser = new0(term_parser, 1);
1124 if (!parser)
1125 return -ENOMEM;
1126
1127 parser->is_host = host;
1128 parser->st_alloc = 64;
1129 parser->seq.st = new0(char, parser->st_alloc + 1);
1130 if (!parser->seq.st)
1131 return -ENOMEM;
1132
1133 *out = parser;
1134 parser = NULL;
1135 return 0;
1136 }
1137
1138 term_parser *term_parser_free(term_parser *parser) {
1139 if (!parser)
1140 return NULL;
1141
1142 free(parser->seq.st);
1143 free(parser);
1144 return NULL;
1145 }
1146
1147 static inline void parser_clear(term_parser *parser) {
1148 unsigned int i;
1149
1150 parser->seq.command = TERM_CMD_NONE;
1151 parser->seq.terminator = 0;
1152 parser->seq.intermediates = 0;
1153 parser->seq.charset = TERM_CHARSET_NONE;
1154 parser->seq.n_args = 0;
1155 for (i = 0; i < TERM_PARSER_ARG_MAX; ++i)
1156 parser->seq.args[i] = -1;
1157
1158 parser->seq.n_st = 0;
1159 parser->seq.st[0] = 0;
1160 }
1161
1162 static int parser_ignore(term_parser *parser, uint32_t raw) {
1163 parser_clear(parser);
1164 parser->seq.type = TERM_SEQ_IGNORE;
1165 parser->seq.command = TERM_CMD_NONE;
1166 parser->seq.terminator = raw;
1167 parser->seq.charset = TERM_CHARSET_NONE;
1168
1169 return parser->seq.type;
1170 }
1171
1172 static int parser_print(term_parser *parser, uint32_t raw) {
1173 parser_clear(parser);
1174 parser->seq.type = TERM_SEQ_GRAPHIC;
1175 parser->seq.command = TERM_CMD_GRAPHIC;
1176 parser->seq.terminator = raw;
1177 parser->seq.charset = TERM_CHARSET_NONE;
1178
1179 return parser->seq.type;
1180 }
1181
1182 static int parser_execute(term_parser *parser, uint32_t raw) {
1183 parser_clear(parser);
1184 parser->seq.type = TERM_SEQ_CONTROL;
1185 parser->seq.command = TERM_CMD_GRAPHIC;
1186 parser->seq.terminator = raw;
1187 parser->seq.charset = TERM_CHARSET_NONE;
1188 if (!parser->is_host)
1189 parser->seq.command = term_parse_host_control(&parser->seq);
1190
1191 return parser->seq.type;
1192 }
1193
1194 static void parser_collect(term_parser *parser, uint32_t raw) {
1195 /*
1196 * Usually, characters from 0x30 to 0x3f are only allowed as leading
1197 * markers (or as part of the parameters), characters from 0x20 to 0x2f
1198 * are only allowed as trailing markers. However, our state-machine
1199 * already verifies those restrictions so we can handle them the same
1200 * way here. Note that we safely allow markers to be specified multiple
1201 * times.
1202 */
1203
1204 if (raw >= 0x20 && raw <= 0x3f)
1205 parser->seq.intermediates |= 1 << (raw - 0x20);
1206 }
1207
1208 static void parser_param(term_parser *parser, uint32_t raw) {
1209 int new;
1210
1211 if (raw == ';') {
1212 if (parser->seq.n_args < TERM_PARSER_ARG_MAX)
1213 ++parser->seq.n_args;
1214
1215 return;
1216 }
1217
1218 if (parser->seq.n_args >= TERM_PARSER_ARG_MAX)
1219 return;
1220
1221 if (raw >= '0' && raw <= '9') {
1222 new = parser->seq.args[parser->seq.n_args];
1223 if (new < 0)
1224 new = 0;
1225 new = new * 10 + raw - '0';
1226
1227 /* VT510 tells us to clamp all values to [0, 9999], however, it
1228 * also allows commands with values up to 2^15-1. We simply use
1229 * 2^16 as maximum here to be compatible to all commands, but
1230 * avoid overflows in any calculations. */
1231 if (new > 0xffff)
1232 new = 0xffff;
1233
1234 parser->seq.args[parser->seq.n_args] = new;
1235 }
1236 }
1237
1238 static int parser_esc(term_parser *parser, uint32_t raw) {
1239 parser->seq.type = TERM_SEQ_ESCAPE;
1240 parser->seq.command = TERM_CMD_NONE;
1241 parser->seq.terminator = raw;
1242 parser->seq.charset = TERM_CHARSET_NONE;
1243 if (!parser->is_host)
1244 parser->seq.command = term_parse_host_escape(&parser->seq, &parser->seq.charset);
1245
1246 return parser->seq.type;
1247 }
1248
1249 static int parser_csi(term_parser *parser, uint32_t raw) {
1250 /* parser->seq is cleared during CSI-ENTER state, thus there's no need
1251 * to clear invalid fields here. */
1252
1253 if (parser->seq.n_args < TERM_PARSER_ARG_MAX) {
1254 if (parser->seq.n_args > 0 ||
1255 parser->seq.args[parser->seq.n_args] >= 0)
1256 ++parser->seq.n_args;
1257 }
1258
1259 parser->seq.type = TERM_SEQ_CSI;
1260 parser->seq.command = TERM_CMD_NONE;
1261 parser->seq.terminator = raw;
1262 parser->seq.charset = TERM_CHARSET_NONE;
1263 if (!parser->is_host)
1264 parser->seq.command = term_parse_host_csi(&parser->seq);
1265
1266 return parser->seq.type;
1267 }
1268
1269 /* perform state transition and dispatch related actions */
1270 static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) {
1271 if (state != STATE_NONE)
1272 parser->state = state;
1273
1274 switch (action) {
1275 case ACTION_NONE:
1276 return TERM_SEQ_NONE;
1277 case ACTION_CLEAR:
1278 parser_clear(parser);
1279 return TERM_SEQ_NONE;
1280 case ACTION_IGNORE:
1281 return parser_ignore(parser, raw);
1282 case ACTION_PRINT:
1283 return parser_print(parser, raw);
1284 case ACTION_EXECUTE:
1285 return parser_execute(parser, raw);
1286 case ACTION_COLLECT:
1287 parser_collect(parser, raw);
1288 return TERM_SEQ_NONE;
1289 case ACTION_PARAM:
1290 parser_param(parser, raw);
1291 return TERM_SEQ_NONE;
1292 case ACTION_ESC_DISPATCH:
1293 return parser_esc(parser, raw);
1294 case ACTION_CSI_DISPATCH:
1295 return parser_csi(parser, raw);
1296 case ACTION_DCS_START:
1297 /* not implemented */
1298 return TERM_SEQ_NONE;
1299 case ACTION_DCS_COLLECT:
1300 /* not implemented */
1301 return TERM_SEQ_NONE;
1302 case ACTION_DCS_CONSUME:
1303 /* not implemented */
1304 return TERM_SEQ_NONE;
1305 case ACTION_DCS_DISPATCH:
1306 /* not implemented */
1307 return TERM_SEQ_NONE;
1308 case ACTION_OSC_START:
1309 /* not implemented */
1310 return TERM_SEQ_NONE;
1311 case ACTION_OSC_COLLECT:
1312 /* not implemented */
1313 return TERM_SEQ_NONE;
1314 case ACTION_OSC_CONSUME:
1315 /* not implemented */
1316 return TERM_SEQ_NONE;
1317 case ACTION_OSC_DISPATCH:
1318 /* not implemented */
1319 return TERM_SEQ_NONE;
1320 default:
1321 assert_not_reached("invalid vte-parser action");
1322 return TERM_SEQ_NONE;
1323 }
1324 }
1325
1326 static int parser_feed_to_state(term_parser *parser, uint32_t raw) {
1327 switch (parser->state) {
1328 case STATE_NONE:
1329 /*
1330 * During initialization, parser->state is cleared. Treat this
1331 * as STATE_GROUND. We will then never get to STATE_NONE again.
1332 */
1333 case STATE_GROUND:
1334 switch (raw) {
1335 case 0x00 ... 0x1f: /* C0 */
1336 case 0x80 ... 0x9b: /* C1 \ { ST } */
1337 case 0x9d ... 0x9f:
1338 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1339 case 0x9c: /* ST */
1340 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1341 }
1342
1343 return parser_transition(parser, raw, STATE_NONE, ACTION_PRINT);
1344 case STATE_ESC:
1345 switch (raw) {
1346 case 0x00 ... 0x1f: /* C0 */
1347 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1348 case 0x20 ... 0x2f: /* [' ' - '\'] */
1349 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1350 case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */
1351 case 0x51 ... 0x57:
1352 case 0x59 ... 0x5a:
1353 case 0x5c:
1354 case 0x60 ... 0x7e:
1355 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1356 case 0x50: /* 'P' */
1357 return parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1358 case 0x5b: /* '[' */
1359 return parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1360 case 0x5d: /* ']' */
1361 return parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1362 case 0x58: /* 'X' */
1363 case 0x5e: /* '^' */
1364 case 0x5f: /* '_' */
1365 return parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1366 case 0x7f: /* DEL */
1367 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1368 case 0x9c: /* ST */
1369 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1370 }
1371
1372 return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT);
1373 case STATE_ESC_INT:
1374 switch (raw) {
1375 case 0x00 ... 0x1f: /* C0 */
1376 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1377 case 0x20 ... 0x2f: /* [' ' - '\'] */
1378 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1379 case 0x30 ... 0x7e: /* ['0' - '~'] */
1380 return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH);
1381 case 0x7f: /* DEL */
1382 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1383 case 0x9c: /* ST */
1384 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1385 }
1386
1387 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1388 case STATE_CSI_ENTRY:
1389 switch (raw) {
1390 case 0x00 ... 0x1f: /* C0 */
1391 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1392 case 0x20 ... 0x2f: /* [' ' - '\'] */
1393 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1394 case 0x3a: /* ':' */
1395 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1396 case 0x30 ... 0x39: /* ['0' - '9'] */
1397 case 0x3b: /* ';' */
1398 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_PARAM);
1399 case 0x3c ... 0x3f: /* ['<' - '?'] */
1400 return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_COLLECT);
1401 case 0x40 ... 0x7e: /* ['@' - '~'] */
1402 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1403 case 0x7f: /* DEL */
1404 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1405 case 0x9c: /* ST */
1406 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1407 }
1408
1409 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1410 case STATE_CSI_PARAM:
1411 switch (raw) {
1412 case 0x00 ... 0x1f: /* C0 */
1413 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1414 case 0x20 ... 0x2f: /* [' ' - '\'] */
1415 return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT);
1416 case 0x30 ... 0x39: /* ['0' - '9'] */
1417 case 0x3b: /* ';' */
1418 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1419 case 0x3a: /* ':' */
1420 case 0x3c ... 0x3f: /* ['<' - '?'] */
1421 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1422 case 0x40 ... 0x7e: /* ['@' - '~'] */
1423 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1424 case 0x7f: /* DEL */
1425 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1426 case 0x9c: /* ST */
1427 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1428 }
1429
1430 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1431 case STATE_CSI_INT:
1432 switch (raw) {
1433 case 0x00 ... 0x1f: /* C0 */
1434 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1435 case 0x20 ... 0x2f: /* [' ' - '\'] */
1436 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1437 case 0x30 ... 0x3f: /* ['0' - '?'] */
1438 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1439 case 0x40 ... 0x7e: /* ['@' - '~'] */
1440 return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH);
1441 case 0x7f: /* DEL */
1442 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1443 case 0x9c: /* ST */
1444 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1445 }
1446
1447 return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE);
1448 case STATE_CSI_IGNORE:
1449 switch (raw) {
1450 case 0x00 ... 0x1f: /* C0 */
1451 return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE);
1452 case 0x20 ... 0x3f: /* [' ' - '?'] */
1453 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1454 case 0x40 ... 0x7e: /* ['@' - '~'] */
1455 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1456 case 0x7f: /* DEL */
1457 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1458 case 0x9c: /* ST */
1459 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1460 }
1461
1462 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1463 case STATE_DCS_ENTRY:
1464 switch (raw) {
1465 case 0x00 ... 0x1f: /* C0 */
1466 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1467 case 0x20 ... 0x2f: /* [' ' - '\'] */
1468 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1469 case 0x3a: /* ':' */
1470 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1471 case 0x30 ... 0x39: /* ['0' - '9'] */
1472 case 0x3b: /* ';' */
1473 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_PARAM);
1474 case 0x3c ... 0x3f: /* ['<' - '?'] */
1475 return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_COLLECT);
1476 case 0x40 ... 0x7e: /* ['@' - '~'] */
1477 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1478 case 0x7f: /* DEL */
1479 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1480 case 0x9c: /* ST */
1481 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1482 }
1483
1484 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1485 case STATE_DCS_PARAM:
1486 switch (raw) {
1487 case 0x00 ... 0x1f: /* C0 */
1488 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1489 case 0x20 ... 0x2f: /* [' ' - '\'] */
1490 return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT);
1491 case 0x30 ... 0x39: /* ['0' - '9'] */
1492 case 0x3b: /* ';' */
1493 return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM);
1494 case 0x3a: /* ':' */
1495 case 0x3c ... 0x3f: /* ['<' - '?'] */
1496 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1497 case 0x40 ... 0x7e: /* ['@' - '~'] */
1498 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1499 case 0x7f: /* DEL */
1500 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1501 case 0x9c: /* ST */
1502 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1503 }
1504
1505 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1506 case STATE_DCS_INT:
1507 switch (raw) {
1508 case 0x00 ... 0x1f: /* C0 */
1509 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1510 case 0x20 ... 0x2f: /* [' ' - '\'] */
1511 return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT);
1512 case 0x30 ... 0x3f: /* ['0' - '?'] */
1513 return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE);
1514 case 0x40 ... 0x7e: /* ['@' - '~'] */
1515 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1516 case 0x7f: /* DEL */
1517 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1518 case 0x9c: /* ST */
1519 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1520 }
1521
1522 return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME);
1523 case STATE_DCS_PASS:
1524 switch (raw) {
1525 case 0x00 ... 0x7e: /* ASCII \ { DEL } */
1526 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1527 case 0x7f: /* DEL */
1528 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1529 case 0x9c: /* ST */
1530 return parser_transition(parser, raw, STATE_GROUND, ACTION_DCS_DISPATCH);
1531 }
1532
1533 return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT);
1534 case STATE_DCS_IGNORE:
1535 switch (raw) {
1536 case 0x00 ... 0x7f: /* ASCII */
1537 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1538 case 0x9c: /* ST */
1539 return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE);
1540 }
1541
1542 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1543 case STATE_OSC_STRING:
1544 switch (raw) {
1545 case 0x00 ... 0x06: /* C0 \ { BEL } */
1546 case 0x08 ... 0x1f:
1547 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1548 case 0x20 ... 0x7f: /* [' ' - DEL] */
1549 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1550 case 0x07: /* BEL */
1551 case 0x9c: /* ST */
1552 return parser_transition(parser, raw, STATE_GROUND, ACTION_OSC_DISPATCH);
1553 }
1554
1555 return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT);
1556 case STATE_ST_IGNORE:
1557 switch (raw) {
1558 case 0x00 ... 0x7f: /* ASCII */
1559 return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE);
1560 case 0x9c: /* ST */
1561 return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1562 }
1563
1564 return parser_transition(parser, raw, STATE_NONE, ACTION_NONE);
1565 }
1566
1567 assert_not_reached("bad vte-parser state");
1568 return -EINVAL;
1569 }
1570
1571 int term_parser_feed(term_parser *parser, const term_seq **seq_out, uint32_t raw) {
1572 int r;
1573
1574 assert_return(parser, -EINVAL);
1575 assert_return(seq_out, -EINVAL);
1576
1577 /*
1578 * Notes:
1579 * * DEC treats GR codes as GL. We don't do that as we require UTF-8
1580 * as charset and, thus, it doesn't make sense to treat GR special.
1581 * * During control sequences, unexpected C1 codes cancel the sequence
1582 * and immediately start a new one. C0 codes, however, may or may not
1583 * be ignored/executed depending on the sequence.
1584 */
1585
1586 switch (raw) {
1587 case 0x18: /* CAN */
1588 r = parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE);
1589 break;
1590 case 0x1a: /* SUB */
1591 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1592 break;
1593 case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */
1594 case 0x91 ... 0x97:
1595 case 0x99 ... 0x9a:
1596 r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE);
1597 break;
1598 case 0x1b: /* ESC */
1599 r = parser_transition(parser, raw, STATE_ESC, ACTION_CLEAR);
1600 break;
1601 case 0x98: /* SOS */
1602 case 0x9e: /* PM */
1603 case 0x9f: /* APC */
1604 r = parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE);
1605 break;
1606 case 0x90: /* DCS */
1607 r = parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR);
1608 break;
1609 case 0x9d: /* OSC */
1610 r = parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR);
1611 break;
1612 case 0x9b: /* CSI */
1613 r = parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR);
1614 break;
1615 default:
1616 r = parser_feed_to_state(parser, raw);
1617 break;
1618 }
1619
1620 if (r <= 0)
1621 *seq_out = NULL;
1622 else
1623 *seq_out = &parser->seq;
1624
1625 return r;
1626 }