]>
Commit | Line | Data |
---|---|---|
1c9633d6 DH |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright (C) 2014 David Herrmann <dh.herrmann@gmail.com> | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU Lesser General Public License as published by | |
10 | the Free Software Foundation; either version 2.1 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | Lesser General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU Lesser General Public License | |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
20 | ***/ | |
21 | ||
22 | /* | |
23 | * Terminal Parser | |
24 | * This file contains a bunch of UTF-8 helpers and the main ctlseq-parser. The | |
25 | * parser is a simple state-machine that correctly parses all CSI, DCS, OSC, ST | |
26 | * control sequences and generic escape sequences. | |
27 | * The parser itself does not perform any actions but lets the caller react to | |
28 | * detected sequences. | |
29 | */ | |
30 | ||
31 | #include <stdbool.h> | |
32 | #include <stdint.h> | |
33 | #include <stdlib.h> | |
34 | #include "macro.h" | |
35 | #include "term-internal.h" | |
36 | #include "util.h" | |
37 | ||
56dec05d DH |
38 | static const uint8_t default_palette[18][3] = { |
39 | { 0, 0, 0 }, /* black */ | |
40 | { 205, 0, 0 }, /* red */ | |
41 | { 0, 205, 0 }, /* green */ | |
42 | { 205, 205, 0 }, /* yellow */ | |
43 | { 0, 0, 238 }, /* blue */ | |
44 | { 205, 0, 205 }, /* magenta */ | |
45 | { 0, 205, 205 }, /* cyan */ | |
46 | { 229, 229, 229 }, /* light grey */ | |
47 | { 127, 127, 127 }, /* dark grey */ | |
48 | { 255, 0, 0 }, /* light red */ | |
49 | { 0, 255, 0 }, /* light green */ | |
50 | { 255, 255, 0 }, /* light yellow */ | |
51 | { 92, 92, 255 }, /* light blue */ | |
52 | { 255, 0, 255 }, /* light magenta */ | |
53 | { 0, 255, 255 }, /* light cyan */ | |
54 | { 255, 255, 255 }, /* white */ | |
55 | ||
56 | { 229, 229, 229 }, /* light grey */ | |
57 | { 0, 0, 0 }, /* black */ | |
58 | }; | |
59 | ||
60 | static uint32_t term_color_to_argb32(const term_color *color, const term_attr *attr, const uint8_t *palette) { | |
61 | static const uint8_t bval[] = { | |
62 | 0x00, 0x5f, 0x87, | |
63 | 0xaf, 0xd7, 0xff, | |
64 | }; | |
65 | uint8_t r, g, b, t; | |
66 | ||
67 | assert(color); | |
68 | ||
69 | if (!palette) | |
70 | palette = (void*)default_palette; | |
71 | ||
72 | switch (color->ccode) { | |
73 | case TERM_CCODE_RGB: | |
74 | r = color->red; | |
75 | g = color->green; | |
76 | b = color->blue; | |
77 | ||
78 | break; | |
79 | case TERM_CCODE_256: | |
80 | t = color->c256; | |
81 | if (t < 16) { | |
82 | r = palette[t * 3 + 0]; | |
83 | g = palette[t * 3 + 1]; | |
84 | b = palette[t * 3 + 2]; | |
85 | } else if (t < 232) { | |
86 | t -= 16; | |
87 | b = bval[t % 6]; | |
88 | t /= 6; | |
89 | g = bval[t % 6]; | |
90 | t /= 6; | |
91 | r = bval[t % 6]; | |
92 | } else { | |
93 | t = (t - 232) * 10 + 8; | |
94 | r = t; | |
95 | g = t; | |
96 | b = t; | |
97 | } | |
98 | ||
99 | break; | |
100 | case TERM_CCODE_BLACK ... TERM_CCODE_LIGHT_WHITE: | |
101 | t = color->ccode - TERM_CCODE_BLACK; | |
102 | ||
34dbefce DH |
103 | /* bold causes light colors (only for foreground colors) */ |
104 | if (t < 8 && attr->bold && color == &attr->fg) | |
56dec05d DH |
105 | t += 8; |
106 | ||
107 | r = palette[t * 3 + 0]; | |
108 | g = palette[t * 3 + 1]; | |
109 | b = palette[t * 3 + 2]; | |
110 | break; | |
111 | case TERM_CCODE_DEFAULT: | |
112 | /* fallthrough */ | |
113 | default: | |
114 | t = 16 + !(color == &attr->fg); | |
115 | r = palette[t * 3 + 0]; | |
116 | g = palette[t * 3 + 1]; | |
117 | b = palette[t * 3 + 2]; | |
118 | break; | |
119 | } | |
120 | ||
121 | return (0xff << 24) | (r << 16) | (g << 8) | b; | |
122 | } | |
123 | ||
124 | /** | |
125 | * term_attr_to_argb32() - Encode terminal colors as native ARGB32 value | |
126 | * @color: Terminal attributes to work on | |
127 | * @fg: Storage for foreground color (or NULL) | |
128 | * @bg: Storage for background color (or NULL) | |
129 | * @palette: The color palette to use (or NULL for default) | |
130 | * | |
131 | * This encodes the colors attr->fg and attr->bg as native-endian ARGB32 values | |
132 | * and returns them. Any color conversions are automatically applied. | |
133 | */ | |
134 | void term_attr_to_argb32(const term_attr *attr, uint32_t *fg, uint32_t *bg, const uint8_t *palette) { | |
135 | uint32_t f, b, t; | |
136 | ||
137 | assert(attr); | |
138 | ||
139 | f = term_color_to_argb32(&attr->fg, attr, palette); | |
140 | b = term_color_to_argb32(&attr->bg, attr, palette); | |
141 | ||
142 | if (attr->inverse) { | |
143 | t = f; | |
144 | f = b; | |
145 | b = t; | |
146 | } | |
147 | ||
148 | if (fg) | |
149 | *fg = f; | |
150 | if (bg) | |
151 | *bg = b; | |
152 | } | |
153 | ||
1c9633d6 DH |
154 | /** |
155 | * term_utf8_encode() - Encode single UCS-4 character as UTF-8 | |
156 | * @out_utf8: output buffer of at least 4 bytes or NULL | |
157 | * @g: UCS-4 character to encode | |
158 | * | |
159 | * This encodes a single UCS-4 character as UTF-8 and writes it into @out_utf8. | |
160 | * The length of the character is returned. It is not zero-terminated! If the | |
161 | * output buffer is NULL, only the length is returned. | |
162 | * | |
163 | * Returns: The length in bytes that the UTF-8 representation does or would | |
164 | * occupy. | |
165 | */ | |
166 | size_t term_utf8_encode(char *out_utf8, uint32_t g) { | |
167 | if (g < (1 << 7)) { | |
168 | if (out_utf8) | |
169 | out_utf8[0] = g & 0x7f; | |
170 | return 1; | |
171 | } else if (g < (1 << 11)) { | |
172 | if (out_utf8) { | |
173 | out_utf8[0] = 0xc0 | ((g >> 6) & 0x1f); | |
174 | out_utf8[1] = 0x80 | (g & 0x3f); | |
175 | } | |
176 | return 2; | |
177 | } else if (g < (1 << 16)) { | |
178 | if (out_utf8) { | |
179 | out_utf8[0] = 0xe0 | ((g >> 12) & 0x0f); | |
180 | out_utf8[1] = 0x80 | ((g >> 6) & 0x3f); | |
181 | out_utf8[2] = 0x80 | (g & 0x3f); | |
182 | } | |
183 | return 3; | |
184 | } else if (g < (1 << 21)) { | |
185 | if (out_utf8) { | |
186 | out_utf8[0] = 0xf0 | ((g >> 18) & 0x07); | |
187 | out_utf8[1] = 0x80 | ((g >> 12) & 0x3f); | |
188 | out_utf8[2] = 0x80 | ((g >> 6) & 0x3f); | |
189 | out_utf8[3] = 0x80 | (g & 0x3f); | |
190 | } | |
191 | return 4; | |
192 | } else { | |
193 | return 0; | |
194 | } | |
195 | } | |
196 | ||
197 | /** | |
198 | * term_utf8_decode() - Try decoding the next UCS-4 character | |
199 | * @p: decoder object to operate on or NULL | |
f1f5b2a3 | 200 | * @out_len: output storage for pointer to decoded UCS-4 string or NULL |
1c9633d6 DH |
201 | * @c: next char to push into decoder |
202 | * | |
203 | * This decodes a UTF-8 stream. It must be called for each input-byte of the | |
f1f5b2a3 DH |
204 | * UTF-8 stream and returns a UCS-4 stream. A pointer to the parsed UCS-4 |
205 | * string is stored in @out_buf if non-NULL. The length of this string (number | |
206 | * of parsed UCS4 characters) is returned as result. The string is not | |
207 | * zero-terminated! Furthermore, the string is only valid until the next | |
208 | * invocation of this function. It is also bound to the parser state @p and | |
209 | * must not be freed nor written to by the caller. | |
1c9633d6 DH |
210 | * |
211 | * This function is highly optimized to work with terminal-emulators. Instead | |
212 | * of being strict about UTF-8 validity, this tries to perform a fallback to | |
213 | * ISO-8859-1 in case a wrong series was detected. Therefore, this function | |
214 | * might return multiple UCS-4 characters by parsing just a single UTF-8 byte. | |
215 | * | |
216 | * The parser state @p should be allocated and managed by the caller. There're | |
217 | * no helpers to do that for you. To initialize it, simply reset it to all | |
218 | * zero. You can reset or free the object at any point in time. | |
219 | * | |
f1f5b2a3 | 220 | * Returns: Number of parsed UCS4 characters |
1c9633d6 | 221 | */ |
f1f5b2a3 DH |
222 | size_t term_utf8_decode(term_utf8 *p, uint32_t **out_buf, char c) { |
223 | static uint32_t ucs4_null = 0; | |
1c9633d6 DH |
224 | uint32_t t, *res = NULL; |
225 | uint8_t byte; | |
226 | size_t len = 0; | |
227 | ||
228 | if (!p) | |
229 | goto out; | |
230 | ||
231 | byte = c; | |
232 | ||
233 | if (!p->valid || p->i_bytes >= p->n_bytes) { | |
234 | /* | |
235 | * If the previous sequence was invalid or fully parsed, start | |
236 | * parsing a fresh new sequence. | |
237 | */ | |
238 | ||
239 | if ((byte & 0xE0) == 0xC0) { | |
240 | /* start of two byte sequence */ | |
241 | t = byte & 0x1F; | |
242 | p->n_bytes = 2; | |
243 | p->i_bytes = 1; | |
244 | p->valid = 1; | |
245 | } else if ((byte & 0xF0) == 0xE0) { | |
246 | /* start of three byte sequence */ | |
247 | t = byte & 0x0F; | |
248 | p->n_bytes = 3; | |
249 | p->i_bytes = 1; | |
250 | p->valid = 1; | |
251 | } else if ((byte & 0xF8) == 0xF0) { | |
252 | /* start of four byte sequence */ | |
253 | t = byte & 0x07; | |
254 | p->n_bytes = 4; | |
255 | p->i_bytes = 1; | |
256 | p->valid = 1; | |
257 | } else { | |
258 | /* Either of: | |
259 | * - single ASCII 7-bit char | |
260 | * - out-of-sync continuation byte | |
261 | * - overlong encoding | |
262 | * All of them are treated as single byte ISO-8859-1 */ | |
263 | t = byte; | |
264 | p->n_bytes = 1; | |
265 | p->i_bytes = 1; | |
266 | p->valid = 0; | |
267 | } | |
268 | ||
269 | p->chars[0] = byte; | |
270 | p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes)); | |
271 | } else { | |
272 | /* | |
273 | * ..otherwise, try to continue the previous sequence.. | |
274 | */ | |
275 | ||
276 | if ((byte & 0xC0) == 0x80) { | |
277 | /* | |
278 | * Valid continuation byte. Append to sequence and | |
279 | * update the ucs4 cache accordingly. | |
280 | */ | |
281 | ||
282 | t = byte & 0x3F; | |
283 | p->chars[p->i_bytes++] = byte; | |
284 | p->ucs4 |= t << (6 * (p->n_bytes - p->i_bytes)); | |
285 | } else { | |
286 | /* | |
287 | * Invalid continuation? Treat cached sequence as | |
288 | * ISO-8859-1, but parse the new char as valid new | |
289 | * starting character. If it's a new single-byte UTF-8 | |
290 | * sequence, we immediately return it in the same run, | |
291 | * otherwise, we might suffer from starvation. | |
292 | */ | |
293 | ||
294 | if ((byte & 0xE0) == 0xC0 || | |
295 | (byte & 0xF0) == 0xE0 || | |
296 | (byte & 0xF8) == 0xF0) { | |
297 | /* | |
298 | * New multi-byte sequence. Move to-be-returned | |
299 | * data at the end and start new sequence. Only | |
300 | * return the old sequence. | |
301 | */ | |
302 | ||
303 | memmove(p->chars + 1, | |
304 | p->chars, | |
305 | sizeof(*p->chars) * p->i_bytes); | |
306 | res = p->chars + 1; | |
307 | len = p->i_bytes; | |
308 | ||
309 | if ((byte & 0xE0) == 0xC0) { | |
310 | /* start of two byte sequence */ | |
311 | t = byte & 0x1F; | |
312 | p->n_bytes = 2; | |
313 | p->i_bytes = 1; | |
314 | p->valid = 1; | |
315 | } else if ((byte & 0xF0) == 0xE0) { | |
316 | /* start of three byte sequence */ | |
317 | t = byte & 0x0F; | |
318 | p->n_bytes = 3; | |
319 | p->i_bytes = 1; | |
320 | p->valid = 1; | |
321 | } else if ((byte & 0xF8) == 0xF0) { | |
322 | /* start of four byte sequence */ | |
323 | t = byte & 0x07; | |
324 | p->n_bytes = 4; | |
325 | p->i_bytes = 1; | |
326 | p->valid = 1; | |
41cb81ea TA |
327 | } else |
328 | assert_not_reached("Should not happen"); | |
1c9633d6 DH |
329 | |
330 | p->chars[0] = byte; | |
331 | p->ucs4 = t << (6 * (p->n_bytes - p->i_bytes)); | |
332 | ||
333 | goto out; | |
334 | } else { | |
335 | /* | |
336 | * New single byte sequence, append to output | |
337 | * and return combined sequence. | |
338 | */ | |
339 | ||
340 | p->chars[p->i_bytes++] = byte; | |
341 | p->valid = 0; | |
342 | } | |
343 | } | |
344 | } | |
345 | ||
346 | /* | |
347 | * Check whether a full sequence (valid or invalid) has been parsed and | |
348 | * then return it. Otherwise, return nothing. | |
349 | */ | |
350 | if (p->valid) { | |
351 | /* still parsing? then bail out */ | |
352 | if (p->i_bytes < p->n_bytes) | |
353 | goto out; | |
354 | ||
355 | res = &p->ucs4; | |
356 | len = 1; | |
357 | } else { | |
358 | res = p->chars; | |
359 | len = p->i_bytes; | |
360 | } | |
361 | ||
362 | p->valid = 0; | |
363 | p->i_bytes = 0; | |
364 | p->n_bytes = 0; | |
365 | ||
366 | out: | |
f1f5b2a3 DH |
367 | if (out_buf) |
368 | *out_buf = res ? : &ucs4_null; | |
369 | return len; | |
1c9633d6 DH |
370 | } |
371 | ||
372 | /* | |
373 | * Command Parser | |
374 | * The ctl-seq parser "term_parser" only detects whole sequences, it does not | |
375 | * detect the specific command. Once a sequence is parsed, the command-parsers | |
376 | * are used to figure out their meaning. Note that this depends on whether we | |
377 | * run on the host or terminal side. | |
378 | */ | |
379 | ||
380 | static unsigned int term_parse_host_control(const term_seq *seq) { | |
381 | assert_return(seq, TERM_CMD_NONE); | |
382 | ||
383 | switch (seq->terminator) { | |
384 | case 0x00: /* NUL */ | |
385 | return TERM_CMD_NULL; | |
386 | case 0x05: /* ENQ */ | |
387 | return TERM_CMD_ENQ; | |
388 | case 0x07: /* BEL */ | |
389 | return TERM_CMD_BEL; | |
390 | case 0x08: /* BS */ | |
391 | return TERM_CMD_BS; | |
392 | case 0x09: /* HT */ | |
393 | return TERM_CMD_HT; | |
394 | case 0x0a: /* LF */ | |
395 | return TERM_CMD_LF; | |
396 | case 0x0b: /* VT */ | |
397 | return TERM_CMD_VT; | |
398 | case 0x0c: /* FF */ | |
399 | return TERM_CMD_FF; | |
400 | case 0x0d: /* CR */ | |
401 | return TERM_CMD_CR; | |
402 | case 0x0e: /* SO */ | |
403 | return TERM_CMD_SO; | |
404 | case 0x0f: /* SI */ | |
405 | return TERM_CMD_SI; | |
406 | case 0x11: /* DC1 */ | |
407 | return TERM_CMD_DC1; | |
408 | case 0x13: /* DC3 */ | |
409 | return TERM_CMD_DC3; | |
410 | case 0x18: /* CAN */ | |
411 | /* this is already handled by the state-machine */ | |
412 | break; | |
413 | case 0x1a: /* SUB */ | |
414 | return TERM_CMD_SUB; | |
415 | case 0x1b: /* ESC */ | |
416 | /* this is already handled by the state-machine */ | |
417 | break; | |
418 | case 0x1f: /* DEL */ | |
419 | /* this is already handled by the state-machine */ | |
420 | break; | |
421 | case 0x84: /* IND */ | |
422 | return TERM_CMD_IND; | |
423 | case 0x85: /* NEL */ | |
424 | return TERM_CMD_NEL; | |
425 | case 0x88: /* HTS */ | |
426 | return TERM_CMD_HTS; | |
427 | case 0x8d: /* RI */ | |
428 | return TERM_CMD_RI; | |
429 | case 0x8e: /* SS2 */ | |
430 | return TERM_CMD_SS2; | |
431 | case 0x8f: /* SS3 */ | |
432 | return TERM_CMD_SS3; | |
433 | case 0x90: /* DCS */ | |
434 | /* this is already handled by the state-machine */ | |
435 | break; | |
436 | case 0x96: /* SPA */ | |
437 | return TERM_CMD_SPA; | |
438 | case 0x97: /* EPA */ | |
439 | return TERM_CMD_EPA; | |
440 | case 0x98: /* SOS */ | |
441 | /* this is already handled by the state-machine */ | |
442 | break; | |
443 | case 0x9a: /* DECID */ | |
444 | return TERM_CMD_DECID; | |
445 | case 0x9b: /* CSI */ | |
446 | /* this is already handled by the state-machine */ | |
447 | break; | |
448 | case 0x9c: /* ST */ | |
449 | return TERM_CMD_ST; | |
450 | case 0x9d: /* OSC */ | |
451 | /* this is already handled by the state-machine */ | |
452 | break; | |
453 | case 0x9e: /* PM */ | |
454 | /* this is already handled by the state-machine */ | |
455 | break; | |
456 | case 0x9f: /* APC */ | |
457 | /* this is already handled by the state-machine */ | |
458 | break; | |
459 | } | |
460 | ||
461 | return TERM_CMD_NONE; | |
462 | } | |
463 | ||
464 | static inline int charset_from_cmd(uint32_t raw, unsigned int flags, bool require_96) { | |
465 | static const struct { | |
466 | uint32_t raw; | |
467 | unsigned int flags; | |
468 | } charset_cmds[] = { | |
469 | /* 96-compat charsets */ | |
470 | [TERM_CHARSET_ISO_LATIN1_SUPPLEMENTAL] = { .raw = 'A', .flags = 0 }, | |
471 | [TERM_CHARSET_ISO_LATIN2_SUPPLEMENTAL] = { .raw = 'B', .flags = 0 }, | |
472 | [TERM_CHARSET_ISO_LATIN5_SUPPLEMENTAL] = { .raw = 'M', .flags = 0 }, | |
473 | [TERM_CHARSET_ISO_GREEK_SUPPLEMENTAL] = { .raw = 'F', .flags = 0 }, | |
474 | [TERM_CHARSET_ISO_HEBREW_SUPPLEMENTAL] = { .raw = 'H', .flags = 0 }, | |
475 | [TERM_CHARSET_ISO_LATIN_CYRILLIC] = { .raw = 'L', .flags = 0 }, | |
476 | ||
477 | /* 94-compat charsets */ | |
478 | [TERM_CHARSET_DEC_SPECIAL_GRAPHIC] = { .raw = '0', .flags = 0 }, | |
479 | [TERM_CHARSET_DEC_SUPPLEMENTAL] = { .raw = '5', .flags = TERM_SEQ_FLAG_PERCENT }, | |
480 | [TERM_CHARSET_DEC_TECHNICAL] = { .raw = '>', .flags = 0 }, | |
481 | [TERM_CHARSET_CYRILLIC_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_AND }, | |
482 | [TERM_CHARSET_DUTCH_NRCS] = { .raw = '4', .flags = 0 }, | |
483 | [TERM_CHARSET_FINNISH_NRCS] = { .raw = '5', .flags = 0 }, | |
484 | [TERM_CHARSET_FRENCH_NRCS] = { .raw = 'R', .flags = 0 }, | |
485 | [TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = '9', .flags = 0 }, | |
486 | [TERM_CHARSET_GERMAN_NRCS] = { .raw = 'K', .flags = 0 }, | |
487 | [TERM_CHARSET_GREEK_DEC] = { .raw = '?', .flags = TERM_SEQ_FLAG_DQUOTE }, | |
488 | [TERM_CHARSET_GREEK_NRCS] = { .raw = '>', .flags = TERM_SEQ_FLAG_DQUOTE }, | |
489 | [TERM_CHARSET_HEBREW_DEC] = { .raw = '4', .flags = TERM_SEQ_FLAG_DQUOTE }, | |
490 | [TERM_CHARSET_HEBREW_NRCS] = { .raw = '=', .flags = TERM_SEQ_FLAG_PERCENT }, | |
491 | [TERM_CHARSET_ITALIAN_NRCS] = { .raw = 'Y', .flags = 0 }, | |
492 | [TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '`', .flags = 0 }, | |
493 | [TERM_CHARSET_PORTUGUESE_NRCS] = { .raw = '6', .flags = TERM_SEQ_FLAG_PERCENT }, | |
494 | [TERM_CHARSET_RUSSIAN_NRCS] = { .raw = '5', .flags = TERM_SEQ_FLAG_AND }, | |
495 | [TERM_CHARSET_SCS_NRCS] = { .raw = '3', .flags = TERM_SEQ_FLAG_PERCENT }, | |
496 | [TERM_CHARSET_SPANISH_NRCS] = { .raw = 'Z', .flags = 0 }, | |
497 | [TERM_CHARSET_SWEDISH_NRCS] = { .raw = '7', .flags = 0 }, | |
498 | [TERM_CHARSET_SWISS_NRCS] = { .raw = '=', .flags = 0 }, | |
499 | [TERM_CHARSET_TURKISH_DEC] = { .raw = '0', .flags = TERM_SEQ_FLAG_PERCENT }, | |
500 | [TERM_CHARSET_TURKISH_NRCS] = { .raw = '2', .flags = TERM_SEQ_FLAG_PERCENT }, | |
501 | ||
502 | /* special charsets */ | |
503 | [TERM_CHARSET_USERPREF_SUPPLEMENTAL] = { .raw = '<', .flags = 0 }, | |
504 | ||
505 | /* secondary choices */ | |
506 | [TERM_CHARSET_CNT + TERM_CHARSET_FINNISH_NRCS] = { .raw = 'C', .flags = 0 }, | |
507 | [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_NRCS] = { .raw = 'f', .flags = 0 }, | |
508 | [TERM_CHARSET_CNT + TERM_CHARSET_FRENCH_CANADIAN_NRCS] = { .raw = 'Q', .flags = 0 }, | |
509 | [TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = 'E', .flags = 0 }, | |
510 | [TERM_CHARSET_CNT + TERM_CHARSET_SWEDISH_NRCS] = { .raw = 'H', .flags = 0 }, /* unused; conflicts with ISO_HEBREW */ | |
511 | ||
512 | /* tertiary choices */ | |
513 | [TERM_CHARSET_CNT + TERM_CHARSET_CNT + TERM_CHARSET_NORWEGIAN_DANISH_NRCS] = { .raw = '6', .flags = 0 }, | |
514 | }; | |
515 | size_t i, cs; | |
516 | ||
517 | /* | |
518 | * Secondary choice on SWEDISH_NRCS and primary choice on | |
519 | * ISO_HEBREW_SUPPLEMENTAL have a conflict: raw=="H", flags==0. | |
520 | * We always choose the ISO 96-compat set, which is what VT510 does. | |
521 | */ | |
522 | ||
523 | for (i = 0; i < ELEMENTSOF(charset_cmds); ++i) { | |
524 | if (charset_cmds[i].raw == raw && charset_cmds[i].flags == flags) { | |
525 | cs = i; | |
526 | while (cs >= TERM_CHARSET_CNT) | |
527 | cs -= TERM_CHARSET_CNT; | |
528 | ||
529 | if (!require_96 || cs < TERM_CHARSET_96_CNT || cs >= TERM_CHARSET_94_CNT) | |
530 | return cs; | |
531 | } | |
532 | } | |
533 | ||
534 | return -ENOENT; | |
535 | } | |
536 | ||
537 | /* true if exactly one bit in @value is set */ | |
538 | static inline bool exactly_one_bit_set(unsigned int value) { | |
539 | return __builtin_popcount(value) == 1; | |
540 | } | |
541 | ||
542 | static unsigned int term_parse_host_escape(const term_seq *seq, unsigned int *cs_out) { | |
543 | unsigned int t, flags; | |
544 | int cs; | |
545 | ||
546 | assert_return(seq, TERM_CMD_NONE); | |
547 | ||
548 | flags = seq->intermediates; | |
549 | t = TERM_SEQ_FLAG_POPEN | TERM_SEQ_FLAG_PCLOSE | TERM_SEQ_FLAG_MULT | | |
550 | TERM_SEQ_FLAG_PLUS | TERM_SEQ_FLAG_MINUS | TERM_SEQ_FLAG_DOT | | |
551 | TERM_SEQ_FLAG_SLASH; | |
552 | ||
553 | if (exactly_one_bit_set(flags & t)) { | |
554 | switch (flags & t) { | |
555 | case TERM_SEQ_FLAG_POPEN: | |
556 | case TERM_SEQ_FLAG_PCLOSE: | |
557 | case TERM_SEQ_FLAG_MULT: | |
558 | case TERM_SEQ_FLAG_PLUS: | |
559 | cs = charset_from_cmd(seq->terminator, flags & ~t, false); | |
560 | break; | |
561 | case TERM_SEQ_FLAG_MINUS: | |
562 | case TERM_SEQ_FLAG_DOT: | |
563 | case TERM_SEQ_FLAG_SLASH: | |
564 | cs = charset_from_cmd(seq->terminator, flags & ~t, true); | |
565 | break; | |
566 | default: | |
567 | cs = -ENOENT; | |
568 | break; | |
569 | } | |
570 | ||
571 | if (cs >= 0) { | |
572 | if (cs_out) | |
573 | *cs_out = cs; | |
574 | return TERM_CMD_SCS; | |
575 | } | |
576 | ||
577 | /* looked like a charset-cmd but wasn't; continue */ | |
578 | } | |
579 | ||
580 | switch (seq->terminator) { | |
581 | case '3': | |
582 | if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL top-half */ | |
583 | return TERM_CMD_DECDHL_TH; | |
584 | break; | |
585 | case '4': | |
586 | if (flags == TERM_SEQ_FLAG_HASH) /* DECDHL bottom-half */ | |
587 | return TERM_CMD_DECDHL_BH; | |
588 | break; | |
589 | case '5': | |
590 | if (flags == TERM_SEQ_FLAG_HASH) /* DECSWL */ | |
591 | return TERM_CMD_DECSWL; | |
592 | break; | |
593 | case '6': | |
594 | if (flags == 0) /* DECBI */ | |
595 | return TERM_CMD_DECBI; | |
596 | else if (flags == TERM_SEQ_FLAG_HASH) /* DECDWL */ | |
597 | return TERM_CMD_DECDWL; | |
598 | break; | |
599 | case '7': | |
600 | if (flags == 0) /* DECSC */ | |
601 | return TERM_CMD_DECSC; | |
602 | break; | |
603 | case '8': | |
604 | if (flags == 0) /* DECRC */ | |
605 | return TERM_CMD_DECRC; | |
606 | else if (flags == TERM_SEQ_FLAG_HASH) /* DECALN */ | |
607 | return TERM_CMD_DECALN; | |
608 | break; | |
609 | case '9': | |
610 | if (flags == 0) /* DECFI */ | |
611 | return TERM_CMD_DECFI; | |
612 | break; | |
613 | case '<': | |
614 | if (flags == 0) /* DECANM */ | |
615 | return TERM_CMD_DECANM; | |
616 | break; | |
617 | case '=': | |
618 | if (flags == 0) /* DECKPAM */ | |
619 | return TERM_CMD_DECKPAM; | |
620 | break; | |
621 | case '>': | |
622 | if (flags == 0) /* DECKPNM */ | |
623 | return TERM_CMD_DECKPNM; | |
624 | break; | |
625 | case '@': | |
626 | if (flags == TERM_SEQ_FLAG_PERCENT) { | |
627 | /* Select default character set */ | |
628 | return TERM_CMD_XTERM_SDCS; | |
629 | } | |
630 | break; | |
631 | case 'D': | |
632 | if (flags == 0) /* IND */ | |
633 | return TERM_CMD_IND; | |
634 | break; | |
635 | case 'E': | |
636 | if (flags == 0) /* NEL */ | |
637 | return TERM_CMD_NEL; | |
638 | break; | |
639 | case 'F': | |
640 | if (flags == 0) /* Cursor to lower-left corner of screen */ | |
641 | return TERM_CMD_XTERM_CLLHP; | |
642 | else if (flags == TERM_SEQ_FLAG_SPACE) /* S7C1T */ | |
643 | return TERM_CMD_S7C1T; | |
644 | break; | |
645 | case 'G': | |
646 | if (flags == TERM_SEQ_FLAG_SPACE) { /* S8C1T */ | |
647 | return TERM_CMD_S8C1T; | |
648 | } else if (flags == TERM_SEQ_FLAG_PERCENT) { | |
649 | /* Select UTF-8 character set */ | |
650 | return TERM_CMD_XTERM_SUCS; | |
651 | } | |
652 | break; | |
653 | case 'H': | |
654 | if (flags == 0) /* HTS */ | |
655 | return TERM_CMD_HTS; | |
656 | break; | |
657 | case 'L': | |
658 | if (flags == TERM_SEQ_FLAG_SPACE) { | |
659 | /* Set ANSI conformance level 1 */ | |
660 | return TERM_CMD_XTERM_SACL1; | |
661 | } | |
662 | break; | |
663 | case 'M': | |
664 | if (flags == 0) { /* RI */ | |
665 | return TERM_CMD_RI; | |
666 | } else if (flags == TERM_SEQ_FLAG_SPACE) { | |
667 | /* Set ANSI conformance level 2 */ | |
668 | return TERM_CMD_XTERM_SACL2; | |
669 | } | |
670 | break; | |
671 | case 'N': | |
672 | if (flags == 0) { /* SS2 */ | |
673 | return TERM_CMD_SS2; | |
674 | } else if (flags == TERM_SEQ_FLAG_SPACE) { | |
675 | /* Set ANSI conformance level 3 */ | |
676 | return TERM_CMD_XTERM_SACL3; | |
677 | } | |
678 | break; | |
679 | case 'O': | |
680 | if (flags == 0) /* SS3 */ | |
681 | return TERM_CMD_SS3; | |
682 | break; | |
683 | case 'P': | |
684 | if (flags == 0) /* DCS: this is already handled by the state-machine */ | |
685 | return 0; | |
686 | break; | |
687 | case 'V': | |
688 | if (flags == 0) /* SPA */ | |
689 | return TERM_CMD_SPA; | |
690 | break; | |
691 | case 'W': | |
692 | if (flags == 0) /* EPA */ | |
693 | return TERM_CMD_EPA; | |
694 | break; | |
695 | case 'X': | |
696 | if (flags == 0) { /* SOS */ | |
697 | /* this is already handled by the state-machine */ | |
698 | break; | |
699 | } | |
700 | break; | |
701 | case 'Z': | |
702 | if (flags == 0) /* DECID */ | |
703 | return TERM_CMD_DECID; | |
704 | break; | |
705 | case '[': | |
706 | if (flags == 0) { /* CSI */ | |
707 | /* this is already handled by the state-machine */ | |
708 | break; | |
709 | } | |
710 | break; | |
711 | case '\\': | |
712 | if (flags == 0) /* ST */ | |
713 | return TERM_CMD_ST; | |
714 | break; | |
715 | case ']': | |
716 | if (flags == 0) { /* OSC */ | |
717 | /* this is already handled by the state-machine */ | |
718 | break; | |
719 | } | |
720 | break; | |
721 | case '^': | |
722 | if (flags == 0) { /* PM */ | |
723 | /* this is already handled by the state-machine */ | |
724 | break; | |
725 | } | |
726 | break; | |
727 | case '_': | |
728 | if (flags == 0) { /* APC */ | |
729 | /* this is already handled by the state-machine */ | |
730 | break; | |
731 | } | |
732 | break; | |
733 | case 'c': | |
734 | if (flags == 0) /* RIS */ | |
735 | return TERM_CMD_RIS; | |
736 | break; | |
737 | case 'l': | |
738 | if (flags == 0) /* Memory lock */ | |
739 | return TERM_CMD_XTERM_MLHP; | |
740 | break; | |
741 | case 'm': | |
742 | if (flags == 0) /* Memory unlock */ | |
743 | return TERM_CMD_XTERM_MUHP; | |
744 | break; | |
745 | case 'n': | |
746 | if (flags == 0) /* LS2 */ | |
747 | return TERM_CMD_LS2; | |
748 | break; | |
749 | case 'o': | |
750 | if (flags == 0) /* LS3 */ | |
751 | return TERM_CMD_LS3; | |
752 | break; | |
753 | case '|': | |
754 | if (flags == 0) /* LS3R */ | |
755 | return TERM_CMD_LS3R; | |
756 | break; | |
757 | case '}': | |
758 | if (flags == 0) /* LS2R */ | |
759 | return TERM_CMD_LS2R; | |
760 | break; | |
761 | case '~': | |
762 | if (flags == 0) /* LS1R */ | |
763 | return TERM_CMD_LS1R; | |
764 | break; | |
765 | } | |
766 | ||
767 | return TERM_CMD_NONE; | |
768 | } | |
769 | ||
770 | static unsigned int term_parse_host_csi(const term_seq *seq) { | |
771 | unsigned int flags; | |
772 | ||
773 | assert_return(seq, TERM_CMD_NONE); | |
774 | ||
775 | flags = seq->intermediates; | |
776 | ||
777 | switch (seq->terminator) { | |
778 | case 'A': | |
779 | if (flags == 0) /* CUU */ | |
780 | return TERM_CMD_CUU; | |
781 | break; | |
782 | case 'a': | |
783 | if (flags == 0) /* HPR */ | |
784 | return TERM_CMD_HPR; | |
785 | break; | |
786 | case 'B': | |
787 | if (flags == 0) /* CUD */ | |
788 | return TERM_CMD_CUD; | |
789 | break; | |
790 | case 'b': | |
791 | if (flags == 0) /* REP */ | |
792 | return TERM_CMD_REP; | |
793 | break; | |
794 | case 'C': | |
795 | if (flags == 0) /* CUF */ | |
796 | return TERM_CMD_CUF; | |
797 | break; | |
798 | case 'c': | |
799 | if (flags == 0) /* DA1 */ | |
800 | return TERM_CMD_DA1; | |
801 | else if (flags == TERM_SEQ_FLAG_GT) /* DA2 */ | |
802 | return TERM_CMD_DA2; | |
803 | else if (flags == TERM_SEQ_FLAG_EQUAL) /* DA3 */ | |
804 | return TERM_CMD_DA3; | |
805 | break; | |
806 | case 'D': | |
807 | if (flags == 0) /* CUB */ | |
808 | return TERM_CMD_CUB; | |
809 | break; | |
810 | case 'd': | |
811 | if (flags == 0) /* VPA */ | |
812 | return TERM_CMD_VPA; | |
813 | break; | |
814 | case 'E': | |
815 | if (flags == 0) /* CNL */ | |
816 | return TERM_CMD_CNL; | |
817 | break; | |
818 | case 'e': | |
819 | if (flags == 0) /* VPR */ | |
820 | return TERM_CMD_VPR; | |
821 | break; | |
822 | case 'F': | |
823 | if (flags == 0) /* CPL */ | |
824 | return TERM_CMD_CPL; | |
825 | break; | |
826 | case 'f': | |
827 | if (flags == 0) /* HVP */ | |
828 | return TERM_CMD_HVP; | |
829 | break; | |
830 | case 'G': | |
831 | if (flags == 0) /* CHA */ | |
832 | return TERM_CMD_CHA; | |
833 | break; | |
834 | case 'g': | |
835 | if (flags == 0) /* TBC */ | |
836 | return TERM_CMD_TBC; | |
837 | else if (flags == TERM_SEQ_FLAG_MULT) /* DECLFKC */ | |
838 | return TERM_CMD_DECLFKC; | |
839 | break; | |
840 | case 'H': | |
841 | if (flags == 0) /* CUP */ | |
842 | return TERM_CMD_CUP; | |
843 | break; | |
844 | case 'h': | |
845 | if (flags == 0) /* SM ANSI */ | |
846 | return TERM_CMD_SM_ANSI; | |
847 | else if (flags == TERM_SEQ_FLAG_WHAT) /* SM DEC */ | |
848 | return TERM_CMD_SM_DEC; | |
849 | break; | |
850 | case 'I': | |
851 | if (flags == 0) /* CHT */ | |
852 | return TERM_CMD_CHT; | |
853 | break; | |
854 | case 'i': | |
855 | if (flags == 0) /* MC ANSI */ | |
856 | return TERM_CMD_MC_ANSI; | |
857 | else if (flags == TERM_SEQ_FLAG_WHAT) /* MC DEC */ | |
858 | return TERM_CMD_MC_DEC; | |
859 | break; | |
860 | case 'J': | |
861 | if (flags == 0) /* ED */ | |
862 | return TERM_CMD_ED; | |
863 | else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSED */ | |
864 | return TERM_CMD_DECSED; | |
865 | break; | |
866 | case 'K': | |
867 | if (flags == 0) /* EL */ | |
868 | return TERM_CMD_EL; | |
869 | else if (flags == TERM_SEQ_FLAG_WHAT) /* DECSEL */ | |
870 | return TERM_CMD_DECSEL; | |
871 | break; | |
872 | case 'L': | |
873 | if (flags == 0) /* IL */ | |
874 | return TERM_CMD_IL; | |
875 | break; | |
876 | case 'l': | |
877 | if (flags == 0) /* RM ANSI */ | |
878 | return TERM_CMD_RM_ANSI; | |
879 | else if (flags == TERM_SEQ_FLAG_WHAT) /* RM DEC */ | |
880 | return TERM_CMD_RM_DEC; | |
881 | break; | |
882 | case 'M': | |
883 | if (flags == 0) /* DL */ | |
884 | return TERM_CMD_DL; | |
885 | break; | |
886 | case 'm': | |
887 | if (flags == 0) /* SGR */ | |
888 | return TERM_CMD_SGR; | |
889 | else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SMR */ | |
890 | return TERM_CMD_XTERM_SRV; | |
891 | break; | |
892 | case 'n': | |
893 | if (flags == 0) /* DSR ANSI */ | |
894 | return TERM_CMD_DSR_ANSI; | |
895 | else if (flags == TERM_SEQ_FLAG_GT) /* XTERM RMR */ | |
896 | return TERM_CMD_XTERM_RRV; | |
897 | else if (flags == TERM_SEQ_FLAG_WHAT) /* DSR DEC */ | |
898 | return TERM_CMD_DSR_DEC; | |
899 | break; | |
900 | case 'P': | |
901 | if (flags == 0) /* DCH */ | |
902 | return TERM_CMD_DCH; | |
903 | else if (flags == TERM_SEQ_FLAG_SPACE) /* PPA */ | |
904 | return TERM_CMD_PPA; | |
905 | break; | |
906 | case 'p': | |
907 | if (flags == 0) /* DECSSL */ | |
908 | return TERM_CMD_DECSSL; | |
909 | else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSSCLS */ | |
910 | return TERM_CMD_DECSSCLS; | |
911 | else if (flags == TERM_SEQ_FLAG_BANG) /* DECSTR */ | |
912 | return TERM_CMD_DECSTR; | |
913 | else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCL */ | |
914 | return TERM_CMD_DECSCL; | |
915 | else if (flags == TERM_SEQ_FLAG_CASH) /* DECRQM-ANSI */ | |
916 | return TERM_CMD_DECRQM_ANSI; | |
917 | else if (flags == (TERM_SEQ_FLAG_CASH | TERM_SEQ_FLAG_WHAT)) /* DECRQM-DEC */ | |
918 | return TERM_CMD_DECRQM_DEC; | |
919 | else if (flags == TERM_SEQ_FLAG_PCLOSE) /* DECSDPT */ | |
920 | return TERM_CMD_DECSDPT; | |
921 | else if (flags == TERM_SEQ_FLAG_MULT) /* DECSPPCS */ | |
922 | return TERM_CMD_DECSPPCS; | |
923 | else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSR */ | |
924 | return TERM_CMD_DECSR; | |
925 | else if (flags == TERM_SEQ_FLAG_COMMA) /* DECLTOD */ | |
926 | return TERM_CMD_DECLTOD; | |
927 | else if (flags == TERM_SEQ_FLAG_GT) /* XTERM SPM */ | |
928 | return TERM_CMD_XTERM_SPM; | |
929 | break; | |
930 | case 'Q': | |
931 | if (flags == TERM_SEQ_FLAG_SPACE) /* PPR */ | |
932 | return TERM_CMD_PPR; | |
933 | break; | |
934 | case 'q': | |
935 | if (flags == 0) /* DECLL */ | |
936 | return TERM_CMD_DECLL; | |
937 | else if (flags == TERM_SEQ_FLAG_SPACE) /* DECSCUSR */ | |
938 | return TERM_CMD_DECSCUSR; | |
939 | else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECSCA */ | |
940 | return TERM_CMD_DECSCA; | |
941 | else if (flags == TERM_SEQ_FLAG_CASH) /* DECSDDT */ | |
942 | return TERM_CMD_DECSDDT; | |
943 | else if (flags == TERM_SEQ_FLAG_MULT) /* DECSRC */ | |
944 | return TERM_CMD_DECSR; | |
945 | else if (flags == TERM_SEQ_FLAG_PLUS) /* DECELF */ | |
946 | return TERM_CMD_DECELF; | |
947 | else if (flags == TERM_SEQ_FLAG_COMMA) /* DECTID */ | |
948 | return TERM_CMD_DECTID; | |
949 | break; | |
950 | case 'R': | |
951 | if (flags == TERM_SEQ_FLAG_SPACE) /* PPB */ | |
952 | return TERM_CMD_PPB; | |
953 | break; | |
954 | case 'r': | |
955 | if (flags == 0) { | |
956 | /* DECSTBM */ | |
957 | return TERM_CMD_DECSTBM; | |
958 | } else if (flags == TERM_SEQ_FLAG_SPACE) { | |
959 | /* DECSKCV */ | |
960 | return TERM_CMD_DECSKCV; | |
961 | } else if (flags == TERM_SEQ_FLAG_CASH) { | |
962 | /* DECCARA */ | |
963 | return TERM_CMD_DECCARA; | |
964 | } else if (flags == TERM_SEQ_FLAG_MULT) { | |
965 | /* DECSCS */ | |
966 | return TERM_CMD_DECSCS; | |
967 | } else if (flags == TERM_SEQ_FLAG_PLUS) { | |
968 | /* DECSMKR */ | |
969 | return TERM_CMD_DECSMKR; | |
970 | } else if (flags == TERM_SEQ_FLAG_WHAT) { | |
971 | /* | |
972 | * There's a conflict between DECPCTERM and XTERM-RPM. | |
973 | * XTERM-RPM takes a single argument, DECPCTERM takes 2. | |
974 | * Split both up and forward the call to the closer | |
975 | * match. | |
976 | */ | |
977 | if (seq->n_args <= 1) /* XTERM RPM */ | |
978 | return TERM_CMD_XTERM_RPM; | |
979 | else if (seq->n_args >= 2) /* DECPCTERM */ | |
980 | return TERM_CMD_DECPCTERM; | |
981 | } | |
982 | break; | |
983 | case 'S': | |
984 | if (flags == 0) /* SU */ | |
985 | return TERM_CMD_SU; | |
986 | else if (flags == TERM_SEQ_FLAG_WHAT) /* XTERM SGFX */ | |
987 | return TERM_CMD_XTERM_SGFX; | |
988 | break; | |
989 | case 's': | |
990 | if (flags == 0) { | |
991 | /* | |
992 | * There's a conflict between DECSLRM and SC-ANSI which | |
993 | * cannot be resolved without knowing the state of | |
994 | * DECLRMM. We leave that decision up to the caller. | |
995 | */ | |
996 | return TERM_CMD_DECSLRM_OR_SC; | |
997 | } else if (flags == TERM_SEQ_FLAG_CASH) { | |
998 | /* DECSPRTT */ | |
999 | return TERM_CMD_DECSPRTT; | |
1000 | } else if (flags == TERM_SEQ_FLAG_MULT) { | |
1001 | /* DECSFC */ | |
1002 | return TERM_CMD_DECSFC; | |
1003 | } else if (flags == TERM_SEQ_FLAG_WHAT) { | |
1004 | /* XTERM SPM */ | |
1005 | return TERM_CMD_XTERM_SPM; | |
1006 | } | |
1007 | break; | |
1008 | case 'T': | |
1009 | if (flags == 0) { | |
1010 | /* | |
1011 | * Awesome: There's a conflict between SD and XTERM IHMT | |
1012 | * that we have to resolve by checking the parameter | |
1013 | * count.. XTERM_IHMT needs exactly 5 arguments, SD | |
1014 | * takes 0 or 1. We're conservative here and give both | |
1015 | * a wider range to allow unused arguments (compat...). | |
1016 | */ | |
1017 | if (seq->n_args >= 5) { | |
1018 | /* XTERM IHMT */ | |
1019 | return TERM_CMD_XTERM_IHMT; | |
1020 | } else if (seq->n_args < 5) { | |
1021 | /* SD */ | |
1022 | return TERM_CMD_SD; | |
1023 | } | |
1024 | } else if (flags == TERM_SEQ_FLAG_GT) { | |
1025 | /* XTERM RTM */ | |
1026 | return TERM_CMD_XTERM_RTM; | |
1027 | } | |
1028 | break; | |
1029 | case 't': | |
1030 | if (flags == 0) { | |
1031 | if (seq->n_args > 0 && seq->args[0] < 24) { | |
1032 | /* XTERM WM */ | |
1033 | return TERM_CMD_XTERM_WM; | |
1034 | } else { | |
1035 | /* DECSLPP */ | |
1036 | return TERM_CMD_DECSLPP; | |
1037 | } | |
1038 | } else if (flags == TERM_SEQ_FLAG_SPACE) { | |
1039 | /* DECSWBV */ | |
1040 | return TERM_CMD_DECSWBV; | |
1041 | } else if (flags == TERM_SEQ_FLAG_DQUOTE) { | |
1042 | /* DECSRFR */ | |
1043 | return TERM_CMD_DECSRFR; | |
1044 | } else if (flags == TERM_SEQ_FLAG_CASH) { | |
1045 | /* DECRARA */ | |
1046 | return TERM_CMD_DECRARA; | |
1047 | } else if (flags == TERM_SEQ_FLAG_GT) { | |
1048 | /* XTERM STM */ | |
1049 | return TERM_CMD_XTERM_STM; | |
1050 | } | |
1051 | break; | |
1052 | case 'U': | |
1053 | if (flags == 0) /* NP */ | |
1054 | return TERM_CMD_NP; | |
1055 | break; | |
1056 | case 'u': | |
1057 | if (flags == 0) { | |
1058 | /* RC */ | |
1059 | return TERM_CMD_RC; | |
1060 | } else if (flags == TERM_SEQ_FLAG_SPACE) { | |
1061 | /* DECSMBV */ | |
1062 | return TERM_CMD_DECSMBV; | |
1063 | } else if (flags == TERM_SEQ_FLAG_DQUOTE) { | |
1064 | /* DECSTRL */ | |
1065 | return TERM_CMD_DECSTRL; | |
1066 | } else if (flags == TERM_SEQ_FLAG_WHAT) { | |
1067 | /* DECRQUPSS */ | |
1068 | return TERM_CMD_DECRQUPSS; | |
1069 | } else if (seq->args[0] == 1 && flags == TERM_SEQ_FLAG_CASH) { | |
1070 | /* DECRQTSR */ | |
1071 | return TERM_CMD_DECRQTSR; | |
1072 | } else if (flags == TERM_SEQ_FLAG_MULT) { | |
1073 | /* DECSCP */ | |
1074 | return TERM_CMD_DECSCP; | |
1075 | } else if (flags == TERM_SEQ_FLAG_COMMA) { | |
1076 | /* DECRQKT */ | |
1077 | return TERM_CMD_DECRQKT; | |
1078 | } | |
1079 | break; | |
1080 | case 'V': | |
1081 | if (flags == 0) /* PP */ | |
1082 | return TERM_CMD_PP; | |
1083 | break; | |
1084 | case 'v': | |
1085 | if (flags == TERM_SEQ_FLAG_SPACE) /* DECSLCK */ | |
1086 | return TERM_CMD_DECSLCK; | |
1087 | else if (flags == TERM_SEQ_FLAG_DQUOTE) /* DECRQDE */ | |
1088 | return TERM_CMD_DECRQDE; | |
1089 | else if (flags == TERM_SEQ_FLAG_CASH) /* DECCRA */ | |
1090 | return TERM_CMD_DECCRA; | |
1091 | else if (flags == TERM_SEQ_FLAG_COMMA) /* DECRPKT */ | |
1092 | return TERM_CMD_DECRPKT; | |
1093 | break; | |
1094 | case 'W': | |
1095 | if (seq->args[0] == 5 && flags == TERM_SEQ_FLAG_WHAT) { | |
1096 | /* DECST8C */ | |
1097 | return TERM_CMD_DECST8C; | |
1098 | } | |
1099 | break; | |
1100 | case 'w': | |
1101 | if (flags == TERM_SEQ_FLAG_CASH) /* DECRQPSR */ | |
1102 | return TERM_CMD_DECRQPSR; | |
1103 | else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECEFR */ | |
1104 | return TERM_CMD_DECEFR; | |
1105 | else if (flags == TERM_SEQ_FLAG_PLUS) /* DECSPP */ | |
1106 | return TERM_CMD_DECSPP; | |
1107 | break; | |
1108 | case 'X': | |
1109 | if (flags == 0) /* ECH */ | |
1110 | return TERM_CMD_ECH; | |
1111 | break; | |
1112 | case 'x': | |
1113 | if (flags == 0) /* DECREQTPARM */ | |
1114 | return TERM_CMD_DECREQTPARM; | |
1115 | else if (flags == TERM_SEQ_FLAG_CASH) /* DECFRA */ | |
1116 | return TERM_CMD_DECFRA; | |
1117 | else if (flags == TERM_SEQ_FLAG_MULT) /* DECSACE */ | |
1118 | return TERM_CMD_DECSACE; | |
1119 | else if (flags == TERM_SEQ_FLAG_PLUS) /* DECRQPKFM */ | |
1120 | return TERM_CMD_DECRQPKFM; | |
1121 | break; | |
1122 | case 'y': | |
1123 | if (flags == 0) /* DECTST */ | |
1124 | return TERM_CMD_DECTST; | |
1125 | else if (flags == TERM_SEQ_FLAG_MULT) /* DECRQCRA */ | |
1126 | return TERM_CMD_DECRQCRA; | |
1127 | else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKFMR */ | |
1128 | return TERM_CMD_DECPKFMR; | |
1129 | break; | |
1130 | case 'Z': | |
1131 | if (flags == 0) /* CBT */ | |
1132 | return TERM_CMD_CBT; | |
1133 | break; | |
1134 | case 'z': | |
1135 | if (flags == TERM_SEQ_FLAG_CASH) /* DECERA */ | |
1136 | return TERM_CMD_DECERA; | |
1137 | else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECELR */ | |
1138 | return TERM_CMD_DECELR; | |
1139 | else if (flags == TERM_SEQ_FLAG_MULT) /* DECINVM */ | |
1140 | return TERM_CMD_DECINVM; | |
1141 | else if (flags == TERM_SEQ_FLAG_PLUS) /* DECPKA */ | |
1142 | return TERM_CMD_DECPKA; | |
1143 | break; | |
1144 | case '@': | |
1145 | if (flags == 0) /* ICH */ | |
1146 | return TERM_CMD_ICH; | |
1147 | break; | |
1148 | case '`': | |
1149 | if (flags == 0) /* HPA */ | |
1150 | return TERM_CMD_HPA; | |
1151 | break; | |
1152 | case '{': | |
1153 | if (flags == TERM_SEQ_FLAG_CASH) /* DECSERA */ | |
1154 | return TERM_CMD_DECSERA; | |
1155 | else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECSLE */ | |
1156 | return TERM_CMD_DECSLE; | |
1157 | break; | |
1158 | case '|': | |
1159 | if (flags == TERM_SEQ_FLAG_CASH) /* DECSCPP */ | |
1160 | return TERM_CMD_DECSCPP; | |
1161 | else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECRQLP */ | |
1162 | return TERM_CMD_DECRQLP; | |
1163 | else if (flags == TERM_SEQ_FLAG_MULT) /* DECSNLS */ | |
1164 | return TERM_CMD_DECSNLS; | |
1165 | break; | |
1166 | case '}': | |
1167 | if (flags == TERM_SEQ_FLAG_SPACE) /* DECKBD */ | |
1168 | return TERM_CMD_DECKBD; | |
1169 | else if (flags == TERM_SEQ_FLAG_CASH) /* DECSASD */ | |
1170 | return TERM_CMD_DECSASD; | |
1171 | else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECIC */ | |
1172 | return TERM_CMD_DECIC; | |
1173 | break; | |
1174 | case '~': | |
1175 | if (flags == TERM_SEQ_FLAG_SPACE) /* DECTME */ | |
1176 | return TERM_CMD_DECTME; | |
1177 | else if (flags == TERM_SEQ_FLAG_CASH) /* DECSSDT */ | |
1178 | return TERM_CMD_DECSSDT; | |
1179 | else if (flags == TERM_SEQ_FLAG_SQUOTE) /* DECDC */ | |
1180 | return TERM_CMD_DECDC; | |
1181 | break; | |
1182 | } | |
1183 | ||
1184 | return TERM_CMD_NONE; | |
1185 | } | |
1186 | ||
1187 | /* | |
1188 | * State Machine | |
1189 | * This parser controls the parser-state and returns any detected sequence to | |
1190 | * the caller. The parser is based on this state-diagram from Paul Williams: | |
1191 | * http://vt100.net/emu/ | |
1192 | * It was written from scratch and extended where needed. | |
1193 | * This parser is fully compatible up to the vt500 series. We expect UCS-4 as | |
1194 | * input. It's the callers responsibility to do any UTF-8 parsing. | |
1195 | */ | |
1196 | ||
1197 | enum parser_state { | |
1198 | STATE_NONE, /* placeholder */ | |
1199 | STATE_GROUND, /* initial state and ground */ | |
1200 | STATE_ESC, /* ESC sequence was started */ | |
1201 | STATE_ESC_INT, /* intermediate escape characters */ | |
1202 | STATE_CSI_ENTRY, /* starting CSI sequence */ | |
1203 | STATE_CSI_PARAM, /* CSI parameters */ | |
1204 | STATE_CSI_INT, /* intermediate CSI characters */ | |
1205 | STATE_CSI_IGNORE, /* CSI error; ignore this CSI sequence */ | |
1206 | STATE_DCS_ENTRY, /* starting DCS sequence */ | |
1207 | STATE_DCS_PARAM, /* DCS parameters */ | |
1208 | STATE_DCS_INT, /* intermediate DCS characters */ | |
1209 | STATE_DCS_PASS, /* DCS data passthrough */ | |
1210 | STATE_DCS_IGNORE, /* DCS error; ignore this DCS sequence */ | |
1211 | STATE_OSC_STRING, /* parsing OSC sequence */ | |
1212 | STATE_ST_IGNORE, /* unimplemented seq; ignore until ST */ | |
1213 | STATE_NUM | |
1214 | }; | |
1215 | ||
1216 | enum parser_action { | |
1217 | ACTION_NONE, /* placeholder */ | |
1218 | ACTION_CLEAR, /* clear parameters */ | |
1219 | ACTION_IGNORE, /* ignore the character entirely */ | |
1220 | ACTION_PRINT, /* print the character on the console */ | |
1221 | ACTION_EXECUTE, /* execute single control character (C0/C1) */ | |
1222 | ACTION_COLLECT, /* collect intermediate character */ | |
1223 | ACTION_PARAM, /* collect parameter character */ | |
1224 | ACTION_ESC_DISPATCH, /* dispatch escape sequence */ | |
1225 | ACTION_CSI_DISPATCH, /* dispatch csi sequence */ | |
1226 | ACTION_DCS_START, /* start of DCS data */ | |
1227 | ACTION_DCS_COLLECT, /* collect DCS data */ | |
1228 | ACTION_DCS_CONSUME, /* consume DCS terminator */ | |
1229 | ACTION_DCS_DISPATCH, /* dispatch dcs sequence */ | |
1230 | ACTION_OSC_START, /* start of OSC data */ | |
1231 | ACTION_OSC_COLLECT, /* collect OSC data */ | |
1232 | ACTION_OSC_CONSUME, /* consume OSC terminator */ | |
1233 | ACTION_OSC_DISPATCH, /* dispatch osc sequence */ | |
1234 | ACTION_NUM | |
1235 | }; | |
1236 | ||
1237 | int term_parser_new(term_parser **out, bool host) { | |
1238 | _term_parser_free_ term_parser *parser = NULL; | |
1239 | ||
1240 | assert_return(out, -EINVAL); | |
1241 | ||
1242 | parser = new0(term_parser, 1); | |
1243 | if (!parser) | |
1244 | return -ENOMEM; | |
1245 | ||
1246 | parser->is_host = host; | |
1247 | parser->st_alloc = 64; | |
1248 | parser->seq.st = new0(char, parser->st_alloc + 1); | |
1249 | if (!parser->seq.st) | |
1250 | return -ENOMEM; | |
1251 | ||
1252 | *out = parser; | |
1253 | parser = NULL; | |
1254 | return 0; | |
1255 | } | |
1256 | ||
1257 | term_parser *term_parser_free(term_parser *parser) { | |
1258 | if (!parser) | |
1259 | return NULL; | |
1260 | ||
1261 | free(parser->seq.st); | |
1262 | free(parser); | |
1263 | return NULL; | |
1264 | } | |
1265 | ||
1266 | static inline void parser_clear(term_parser *parser) { | |
1267 | unsigned int i; | |
1268 | ||
1269 | parser->seq.command = TERM_CMD_NONE; | |
1270 | parser->seq.terminator = 0; | |
1271 | parser->seq.intermediates = 0; | |
1272 | parser->seq.charset = TERM_CHARSET_NONE; | |
1273 | parser->seq.n_args = 0; | |
1274 | for (i = 0; i < TERM_PARSER_ARG_MAX; ++i) | |
1275 | parser->seq.args[i] = -1; | |
1276 | ||
1277 | parser->seq.n_st = 0; | |
1278 | parser->seq.st[0] = 0; | |
1279 | } | |
1280 | ||
1281 | static int parser_ignore(term_parser *parser, uint32_t raw) { | |
1282 | parser_clear(parser); | |
1283 | parser->seq.type = TERM_SEQ_IGNORE; | |
1284 | parser->seq.command = TERM_CMD_NONE; | |
1285 | parser->seq.terminator = raw; | |
1286 | parser->seq.charset = TERM_CHARSET_NONE; | |
1287 | ||
1288 | return parser->seq.type; | |
1289 | } | |
1290 | ||
1291 | static int parser_print(term_parser *parser, uint32_t raw) { | |
1292 | parser_clear(parser); | |
1293 | parser->seq.type = TERM_SEQ_GRAPHIC; | |
1294 | parser->seq.command = TERM_CMD_GRAPHIC; | |
1295 | parser->seq.terminator = raw; | |
1296 | parser->seq.charset = TERM_CHARSET_NONE; | |
1297 | ||
1298 | return parser->seq.type; | |
1299 | } | |
1300 | ||
1301 | static int parser_execute(term_parser *parser, uint32_t raw) { | |
1302 | parser_clear(parser); | |
1303 | parser->seq.type = TERM_SEQ_CONTROL; | |
1304 | parser->seq.command = TERM_CMD_GRAPHIC; | |
1305 | parser->seq.terminator = raw; | |
1306 | parser->seq.charset = TERM_CHARSET_NONE; | |
1307 | if (!parser->is_host) | |
1308 | parser->seq.command = term_parse_host_control(&parser->seq); | |
1309 | ||
1310 | return parser->seq.type; | |
1311 | } | |
1312 | ||
1313 | static void parser_collect(term_parser *parser, uint32_t raw) { | |
1314 | /* | |
1315 | * Usually, characters from 0x30 to 0x3f are only allowed as leading | |
1316 | * markers (or as part of the parameters), characters from 0x20 to 0x2f | |
1317 | * are only allowed as trailing markers. However, our state-machine | |
1318 | * already verifies those restrictions so we can handle them the same | |
1319 | * way here. Note that we safely allow markers to be specified multiple | |
1320 | * times. | |
1321 | */ | |
1322 | ||
1323 | if (raw >= 0x20 && raw <= 0x3f) | |
1324 | parser->seq.intermediates |= 1 << (raw - 0x20); | |
1325 | } | |
1326 | ||
1327 | static void parser_param(term_parser *parser, uint32_t raw) { | |
1328 | int new; | |
1329 | ||
1330 | if (raw == ';') { | |
1331 | if (parser->seq.n_args < TERM_PARSER_ARG_MAX) | |
1332 | ++parser->seq.n_args; | |
1333 | ||
1334 | return; | |
1335 | } | |
1336 | ||
1337 | if (parser->seq.n_args >= TERM_PARSER_ARG_MAX) | |
1338 | return; | |
1339 | ||
1340 | if (raw >= '0' && raw <= '9') { | |
1341 | new = parser->seq.args[parser->seq.n_args]; | |
1342 | if (new < 0) | |
1343 | new = 0; | |
1344 | new = new * 10 + raw - '0'; | |
1345 | ||
1346 | /* VT510 tells us to clamp all values to [0, 9999], however, it | |
1347 | * also allows commands with values up to 2^15-1. We simply use | |
1348 | * 2^16 as maximum here to be compatible to all commands, but | |
1349 | * avoid overflows in any calculations. */ | |
1350 | if (new > 0xffff) | |
1351 | new = 0xffff; | |
1352 | ||
1353 | parser->seq.args[parser->seq.n_args] = new; | |
1354 | } | |
1355 | } | |
1356 | ||
1357 | static int parser_esc(term_parser *parser, uint32_t raw) { | |
1358 | parser->seq.type = TERM_SEQ_ESCAPE; | |
1359 | parser->seq.command = TERM_CMD_NONE; | |
1360 | parser->seq.terminator = raw; | |
1361 | parser->seq.charset = TERM_CHARSET_NONE; | |
1362 | if (!parser->is_host) | |
1363 | parser->seq.command = term_parse_host_escape(&parser->seq, &parser->seq.charset); | |
1364 | ||
1365 | return parser->seq.type; | |
1366 | } | |
1367 | ||
1368 | static int parser_csi(term_parser *parser, uint32_t raw) { | |
1369 | /* parser->seq is cleared during CSI-ENTER state, thus there's no need | |
1370 | * to clear invalid fields here. */ | |
1371 | ||
1372 | if (parser->seq.n_args < TERM_PARSER_ARG_MAX) { | |
1373 | if (parser->seq.n_args > 0 || | |
1374 | parser->seq.args[parser->seq.n_args] >= 0) | |
1375 | ++parser->seq.n_args; | |
1376 | } | |
1377 | ||
1378 | parser->seq.type = TERM_SEQ_CSI; | |
1379 | parser->seq.command = TERM_CMD_NONE; | |
1380 | parser->seq.terminator = raw; | |
1381 | parser->seq.charset = TERM_CHARSET_NONE; | |
1382 | if (!parser->is_host) | |
1383 | parser->seq.command = term_parse_host_csi(&parser->seq); | |
1384 | ||
1385 | return parser->seq.type; | |
1386 | } | |
1387 | ||
1388 | /* perform state transition and dispatch related actions */ | |
1389 | static int parser_transition(term_parser *parser, uint32_t raw, unsigned int state, unsigned int action) { | |
1390 | if (state != STATE_NONE) | |
1391 | parser->state = state; | |
1392 | ||
1393 | switch (action) { | |
1394 | case ACTION_NONE: | |
1395 | return TERM_SEQ_NONE; | |
1396 | case ACTION_CLEAR: | |
1397 | parser_clear(parser); | |
1398 | return TERM_SEQ_NONE; | |
1399 | case ACTION_IGNORE: | |
1400 | return parser_ignore(parser, raw); | |
1401 | case ACTION_PRINT: | |
1402 | return parser_print(parser, raw); | |
1403 | case ACTION_EXECUTE: | |
1404 | return parser_execute(parser, raw); | |
1405 | case ACTION_COLLECT: | |
1406 | parser_collect(parser, raw); | |
1407 | return TERM_SEQ_NONE; | |
1408 | case ACTION_PARAM: | |
1409 | parser_param(parser, raw); | |
1410 | return TERM_SEQ_NONE; | |
1411 | case ACTION_ESC_DISPATCH: | |
1412 | return parser_esc(parser, raw); | |
1413 | case ACTION_CSI_DISPATCH: | |
1414 | return parser_csi(parser, raw); | |
1415 | case ACTION_DCS_START: | |
1416 | /* not implemented */ | |
1417 | return TERM_SEQ_NONE; | |
1418 | case ACTION_DCS_COLLECT: | |
1419 | /* not implemented */ | |
1420 | return TERM_SEQ_NONE; | |
1421 | case ACTION_DCS_CONSUME: | |
1422 | /* not implemented */ | |
1423 | return TERM_SEQ_NONE; | |
1424 | case ACTION_DCS_DISPATCH: | |
1425 | /* not implemented */ | |
1426 | return TERM_SEQ_NONE; | |
1427 | case ACTION_OSC_START: | |
1428 | /* not implemented */ | |
1429 | return TERM_SEQ_NONE; | |
1430 | case ACTION_OSC_COLLECT: | |
1431 | /* not implemented */ | |
1432 | return TERM_SEQ_NONE; | |
1433 | case ACTION_OSC_CONSUME: | |
1434 | /* not implemented */ | |
1435 | return TERM_SEQ_NONE; | |
1436 | case ACTION_OSC_DISPATCH: | |
1437 | /* not implemented */ | |
1438 | return TERM_SEQ_NONE; | |
1439 | default: | |
1440 | assert_not_reached("invalid vte-parser action"); | |
1441 | return TERM_SEQ_NONE; | |
1442 | } | |
1443 | } | |
1444 | ||
1445 | static int parser_feed_to_state(term_parser *parser, uint32_t raw) { | |
1446 | switch (parser->state) { | |
1447 | case STATE_NONE: | |
1448 | /* | |
1449 | * During initialization, parser->state is cleared. Treat this | |
1450 | * as STATE_GROUND. We will then never get to STATE_NONE again. | |
1451 | */ | |
1452 | case STATE_GROUND: | |
1453 | switch (raw) { | |
1454 | case 0x00 ... 0x1f: /* C0 */ | |
1455 | case 0x80 ... 0x9b: /* C1 \ { ST } */ | |
1456 | case 0x9d ... 0x9f: | |
1457 | return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); | |
1458 | case 0x9c: /* ST */ | |
1459 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1460 | } | |
1461 | ||
1462 | return parser_transition(parser, raw, STATE_NONE, ACTION_PRINT); | |
1463 | case STATE_ESC: | |
1464 | switch (raw) { | |
1465 | case 0x00 ... 0x1f: /* C0 */ | |
1466 | return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); | |
1467 | case 0x20 ... 0x2f: /* [' ' - '\'] */ | |
1468 | return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT); | |
1469 | case 0x30 ... 0x4f: /* ['0' - '~'] \ { 'P', 'X', '[', ']', '^', '_' } */ | |
1470 | case 0x51 ... 0x57: | |
1471 | case 0x59 ... 0x5a: | |
1472 | case 0x5c: | |
1473 | case 0x60 ... 0x7e: | |
1474 | return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH); | |
1475 | case 0x50: /* 'P' */ | |
1476 | return parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR); | |
1477 | case 0x5b: /* '[' */ | |
1478 | return parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR); | |
1479 | case 0x5d: /* ']' */ | |
1480 | return parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR); | |
1481 | case 0x58: /* 'X' */ | |
1482 | case 0x5e: /* '^' */ | |
1483 | case 0x5f: /* '_' */ | |
1484 | return parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE); | |
1485 | case 0x7f: /* DEL */ | |
1486 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1487 | case 0x9c: /* ST */ | |
1488 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1489 | } | |
1490 | ||
1491 | return parser_transition(parser, raw, STATE_ESC_INT, ACTION_COLLECT); | |
1492 | case STATE_ESC_INT: | |
1493 | switch (raw) { | |
1494 | case 0x00 ... 0x1f: /* C0 */ | |
1495 | return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); | |
1496 | case 0x20 ... 0x2f: /* [' ' - '\'] */ | |
1497 | return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT); | |
1498 | case 0x30 ... 0x7e: /* ['0' - '~'] */ | |
1499 | return parser_transition(parser, raw, STATE_GROUND, ACTION_ESC_DISPATCH); | |
1500 | case 0x7f: /* DEL */ | |
1501 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1502 | case 0x9c: /* ST */ | |
1503 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1504 | } | |
1505 | ||
1506 | return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT); | |
1507 | case STATE_CSI_ENTRY: | |
1508 | switch (raw) { | |
1509 | case 0x00 ... 0x1f: /* C0 */ | |
1510 | return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); | |
1511 | case 0x20 ... 0x2f: /* [' ' - '\'] */ | |
1512 | return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT); | |
1513 | case 0x3a: /* ':' */ | |
1514 | return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); | |
1515 | case 0x30 ... 0x39: /* ['0' - '9'] */ | |
1516 | case 0x3b: /* ';' */ | |
1517 | return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_PARAM); | |
1518 | case 0x3c ... 0x3f: /* ['<' - '?'] */ | |
1519 | return parser_transition(parser, raw, STATE_CSI_PARAM, ACTION_COLLECT); | |
1520 | case 0x40 ... 0x7e: /* ['@' - '~'] */ | |
1521 | return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH); | |
1522 | case 0x7f: /* DEL */ | |
1523 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1524 | case 0x9c: /* ST */ | |
1525 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1526 | } | |
1527 | ||
1528 | return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); | |
1529 | case STATE_CSI_PARAM: | |
1530 | switch (raw) { | |
1531 | case 0x00 ... 0x1f: /* C0 */ | |
1532 | return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); | |
1533 | case 0x20 ... 0x2f: /* [' ' - '\'] */ | |
1534 | return parser_transition(parser, raw, STATE_CSI_INT, ACTION_COLLECT); | |
1535 | case 0x30 ... 0x39: /* ['0' - '9'] */ | |
1536 | case 0x3b: /* ';' */ | |
1537 | return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM); | |
1538 | case 0x3a: /* ':' */ | |
1539 | case 0x3c ... 0x3f: /* ['<' - '?'] */ | |
1540 | return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); | |
1541 | case 0x40 ... 0x7e: /* ['@' - '~'] */ | |
1542 | return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH); | |
1543 | case 0x7f: /* DEL */ | |
1544 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1545 | case 0x9c: /* ST */ | |
1546 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1547 | } | |
1548 | ||
1549 | return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); | |
1550 | case STATE_CSI_INT: | |
1551 | switch (raw) { | |
1552 | case 0x00 ... 0x1f: /* C0 */ | |
1553 | return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); | |
1554 | case 0x20 ... 0x2f: /* [' ' - '\'] */ | |
1555 | return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT); | |
1556 | case 0x30 ... 0x3f: /* ['0' - '?'] */ | |
1557 | return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); | |
1558 | case 0x40 ... 0x7e: /* ['@' - '~'] */ | |
1559 | return parser_transition(parser, raw, STATE_GROUND, ACTION_CSI_DISPATCH); | |
1560 | case 0x7f: /* DEL */ | |
1561 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1562 | case 0x9c: /* ST */ | |
1563 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1564 | } | |
1565 | ||
1566 | return parser_transition(parser, raw, STATE_CSI_IGNORE, ACTION_NONE); | |
1567 | case STATE_CSI_IGNORE: | |
1568 | switch (raw) { | |
1569 | case 0x00 ... 0x1f: /* C0 */ | |
1570 | return parser_transition(parser, raw, STATE_NONE, ACTION_EXECUTE); | |
1571 | case 0x20 ... 0x3f: /* [' ' - '?'] */ | |
1572 | return parser_transition(parser, raw, STATE_NONE, ACTION_NONE); | |
1573 | case 0x40 ... 0x7e: /* ['@' - '~'] */ | |
1574 | return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE); | |
1575 | case 0x7f: /* DEL */ | |
1576 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1577 | case 0x9c: /* ST */ | |
1578 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1579 | } | |
1580 | ||
1581 | return parser_transition(parser, raw, STATE_NONE, ACTION_NONE); | |
1582 | case STATE_DCS_ENTRY: | |
1583 | switch (raw) { | |
1584 | case 0x00 ... 0x1f: /* C0 */ | |
1585 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1586 | case 0x20 ... 0x2f: /* [' ' - '\'] */ | |
1587 | return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT); | |
1588 | case 0x3a: /* ':' */ | |
1589 | return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE); | |
1590 | case 0x30 ... 0x39: /* ['0' - '9'] */ | |
1591 | case 0x3b: /* ';' */ | |
1592 | return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_PARAM); | |
1593 | case 0x3c ... 0x3f: /* ['<' - '?'] */ | |
1594 | return parser_transition(parser, raw, STATE_DCS_PARAM, ACTION_COLLECT); | |
1595 | case 0x40 ... 0x7e: /* ['@' - '~'] */ | |
1596 | return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); | |
1597 | case 0x7f: /* DEL */ | |
1598 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1599 | case 0x9c: /* ST */ | |
1600 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1601 | } | |
1602 | ||
1603 | return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); | |
1604 | case STATE_DCS_PARAM: | |
1605 | switch (raw) { | |
1606 | case 0x00 ... 0x1f: /* C0 */ | |
1607 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1608 | case 0x20 ... 0x2f: /* [' ' - '\'] */ | |
1609 | return parser_transition(parser, raw, STATE_DCS_INT, ACTION_COLLECT); | |
1610 | case 0x30 ... 0x39: /* ['0' - '9'] */ | |
1611 | case 0x3b: /* ';' */ | |
1612 | return parser_transition(parser, raw, STATE_NONE, ACTION_PARAM); | |
1613 | case 0x3a: /* ':' */ | |
1614 | case 0x3c ... 0x3f: /* ['<' - '?'] */ | |
1615 | return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE); | |
1616 | case 0x40 ... 0x7e: /* ['@' - '~'] */ | |
1617 | return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); | |
1618 | case 0x7f: /* DEL */ | |
1619 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1620 | case 0x9c: /* ST */ | |
1621 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1622 | } | |
1623 | ||
1624 | return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); | |
1625 | case STATE_DCS_INT: | |
1626 | switch (raw) { | |
1627 | case 0x00 ... 0x1f: /* C0 */ | |
1628 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1629 | case 0x20 ... 0x2f: /* [' ' - '\'] */ | |
1630 | return parser_transition(parser, raw, STATE_NONE, ACTION_COLLECT); | |
1631 | case 0x30 ... 0x3f: /* ['0' - '?'] */ | |
1632 | return parser_transition(parser, raw, STATE_DCS_IGNORE, ACTION_NONE); | |
1633 | case 0x40 ... 0x7e: /* ['@' - '~'] */ | |
1634 | return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); | |
1635 | case 0x7f: /* DEL */ | |
1636 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1637 | case 0x9c: /* ST */ | |
1638 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1639 | } | |
1640 | ||
1641 | return parser_transition(parser, raw, STATE_DCS_PASS, ACTION_DCS_CONSUME); | |
1642 | case STATE_DCS_PASS: | |
1643 | switch (raw) { | |
1644 | case 0x00 ... 0x7e: /* ASCII \ { DEL } */ | |
1645 | return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT); | |
1646 | case 0x7f: /* DEL */ | |
1647 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1648 | case 0x9c: /* ST */ | |
1649 | return parser_transition(parser, raw, STATE_GROUND, ACTION_DCS_DISPATCH); | |
1650 | } | |
1651 | ||
1652 | return parser_transition(parser, raw, STATE_NONE, ACTION_DCS_COLLECT); | |
1653 | case STATE_DCS_IGNORE: | |
1654 | switch (raw) { | |
1655 | case 0x00 ... 0x7f: /* ASCII */ | |
1656 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1657 | case 0x9c: /* ST */ | |
1658 | return parser_transition(parser, raw, STATE_GROUND, ACTION_NONE); | |
1659 | } | |
1660 | ||
1661 | return parser_transition(parser, raw, STATE_NONE, ACTION_NONE); | |
1662 | case STATE_OSC_STRING: | |
1663 | switch (raw) { | |
1664 | case 0x00 ... 0x06: /* C0 \ { BEL } */ | |
1665 | case 0x08 ... 0x1f: | |
1666 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1667 | case 0x20 ... 0x7f: /* [' ' - DEL] */ | |
1668 | return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT); | |
1669 | case 0x07: /* BEL */ | |
1670 | case 0x9c: /* ST */ | |
1671 | return parser_transition(parser, raw, STATE_GROUND, ACTION_OSC_DISPATCH); | |
1672 | } | |
1673 | ||
1674 | return parser_transition(parser, raw, STATE_NONE, ACTION_OSC_COLLECT); | |
1675 | case STATE_ST_IGNORE: | |
1676 | switch (raw) { | |
1677 | case 0x00 ... 0x7f: /* ASCII */ | |
1678 | return parser_transition(parser, raw, STATE_NONE, ACTION_IGNORE); | |
1679 | case 0x9c: /* ST */ | |
1680 | return parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1681 | } | |
1682 | ||
1683 | return parser_transition(parser, raw, STATE_NONE, ACTION_NONE); | |
1684 | } | |
1685 | ||
1686 | assert_not_reached("bad vte-parser state"); | |
1687 | return -EINVAL; | |
1688 | } | |
1689 | ||
1690 | int term_parser_feed(term_parser *parser, const term_seq **seq_out, uint32_t raw) { | |
1691 | int r; | |
1692 | ||
1693 | assert_return(parser, -EINVAL); | |
1694 | assert_return(seq_out, -EINVAL); | |
1695 | ||
1696 | /* | |
1697 | * Notes: | |
1698 | * * DEC treats GR codes as GL. We don't do that as we require UTF-8 | |
1699 | * as charset and, thus, it doesn't make sense to treat GR special. | |
1700 | * * During control sequences, unexpected C1 codes cancel the sequence | |
1701 | * and immediately start a new one. C0 codes, however, may or may not | |
1702 | * be ignored/executed depending on the sequence. | |
1703 | */ | |
1704 | ||
1705 | switch (raw) { | |
1706 | case 0x18: /* CAN */ | |
1707 | r = parser_transition(parser, raw, STATE_GROUND, ACTION_IGNORE); | |
1708 | break; | |
1709 | case 0x1a: /* SUB */ | |
1710 | r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE); | |
1711 | break; | |
1712 | case 0x80 ... 0x8f: /* C1 \ {DCS, SOS, CSI, ST, OSC, PM, APC} */ | |
1713 | case 0x91 ... 0x97: | |
1714 | case 0x99 ... 0x9a: | |
1715 | r = parser_transition(parser, raw, STATE_GROUND, ACTION_EXECUTE); | |
1716 | break; | |
1717 | case 0x1b: /* ESC */ | |
1718 | r = parser_transition(parser, raw, STATE_ESC, ACTION_CLEAR); | |
1719 | break; | |
1720 | case 0x98: /* SOS */ | |
1721 | case 0x9e: /* PM */ | |
1722 | case 0x9f: /* APC */ | |
1723 | r = parser_transition(parser, raw, STATE_ST_IGNORE, ACTION_NONE); | |
1724 | break; | |
1725 | case 0x90: /* DCS */ | |
1726 | r = parser_transition(parser, raw, STATE_DCS_ENTRY, ACTION_CLEAR); | |
1727 | break; | |
1728 | case 0x9d: /* OSC */ | |
1729 | r = parser_transition(parser, raw, STATE_OSC_STRING, ACTION_CLEAR); | |
1730 | break; | |
1731 | case 0x9b: /* CSI */ | |
1732 | r = parser_transition(parser, raw, STATE_CSI_ENTRY, ACTION_CLEAR); | |
1733 | break; | |
1734 | default: | |
1735 | r = parser_feed_to_state(parser, raw); | |
1736 | break; | |
1737 | } | |
1738 | ||
1739 | if (r <= 0) | |
1740 | *seq_out = NULL; | |
1741 | else | |
1742 | *seq_out = &parser->seq; | |
1743 | ||
1744 | return r; | |
1745 | } |