From: Lennart Poettering Date: Thu, 31 Oct 2024 10:22:39 +0000 (+0100) Subject: string-util: also check for 0x1b 0x5c ST when stripping ANSI from strings X-Git-Tag: v257-rc1~61^2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F34964%2Fhead;p=thirdparty%2Fsystemd.git string-util: also check for 0x1b 0x5c ST when stripping ANSI from strings --- diff --git a/src/basic/string-util.c b/src/basic/string-util.c index 9526443e50a..171a3680594 100644 --- a/src/basic/string-util.c +++ b/src/basic/string-util.c @@ -699,6 +699,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { STATE_ESCAPE, STATE_CSI, STATE_OSC, + STATE_OSC_CLOSING, } state = STATE_OTHER; _cleanup_(memstream_done) MemStream m = {}; size_t isz, shift[2] = {}, n_carriage_returns = 0; @@ -711,7 +712,7 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { * * 1. Replaces TABs by 8 spaces * 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' … 'm' sequences - * 3. Strips ANSI operating system sequences (OSC), i.e. ESC ']' … BEL sequences + * 3. Strips ANSI operating system sequences (OSC), i.e. ESC ']' … ST sequences * 4. Strip trailing \r characters (since they would "move the cursor", but have no * other effect). * @@ -796,14 +797,32 @@ char* strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) { case STATE_OSC: assert(n_carriage_returns == 0); + /* There are three kinds of OSC terminators: \x07, \x1b\x5c or \x9c. We only support + * the first two, because the last one is a valid UTF-8 codepoint and hence creates + * an ambiguity (many Terminal emulators refuse to support it as well). */ if (i >= *ibuf + isz || /* EOT … */ - (*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */ + (!IN_SET(*i, '\x07', '\x1b') && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */ fputc('\x1B', f); fputc(']', f); advance_offsets(i - *ibuf, highlight, shift, 2); state = STATE_OTHER; i = begin-1; - } else if (*i == '\a') + } else if (*i == '\x07') /* Single character ST */ + state = STATE_OTHER; + else if (*i == '\x1B') + state = STATE_OSC_CLOSING; + + break; + + case STATE_OSC_CLOSING: + if (i >= *ibuf + isz || /* EOT … */ + *i != '\x5c') { /* … or incomplete two-byte ST in sequence */ + fputc('\x1B', f); + fputc(']', f); + advance_offsets(i - *ibuf, highlight, shift, 2); + state = STATE_OTHER; + i = begin-1; + } else if (*i == '\x5c') state = STATE_OTHER; break; diff --git a/src/test/test-strip-tab-ansi.c b/src/test/test-strip-tab-ansi.c index 3ad0fcd90e6..00bd7cdaf11 100644 --- a/src/test/test-strip-tab-ansi.c +++ b/src/test/test-strip-tab-ansi.c @@ -67,6 +67,15 @@ TEST(strip_tab_ansi) { assert_se(strip_tab_ansi(&q, NULL, NULL)); ASSERT_STREQ(q, qq); } + + /* Test that both kinds of ST are recognized after OSC */ + assert_se(p = strdup("before" ANSI_OSC "inside1" ANSI_ST + "between1" ANSI_OSC "inside2\a" + "between2" ANSI_OSC "inside3\x1b\x5c" + "after")); + assert_se(strip_tab_ansi(&p, NULL, NULL)); + ASSERT_STREQ(p, "beforebetween1between2after"); + free(p); } DEFINE_TEST_MAIN(LOG_INFO);