From 9131a310e4081fba942cee23a42599805169ddc4 Mon Sep 17 00:00:00 2001 From: Juarez Rudsatz Date: Sun, 19 Jan 2025 17:26:54 -0300 Subject: [PATCH] column: handle OSC 8 hyperlink escape sequences --- text-utils/column.c | 70 +++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/text-utils/column.c b/text-utils/column.c index a0a94867f..2524c9848 100644 --- a/text-utils/column.c +++ b/text-utils/column.c @@ -112,18 +112,19 @@ typedef enum { ANSI_ESC = 0x1b, ANSI_SGR = '[', ANSI_OSC = ']', - ANSI_APC = '_', - ANSI_BSL = '\\' + ANSI_LNK = '8', + ANSI_LBL = 0x7, + ANSI_END = '\\' } ansi_esc_states; /** * Count how many characters are non-printable due to ANSI X3.41 escape codes. * - * It detects and count only Fe Escape sequences. These sequences contains characters - * that normally are printable, but due to being part of a escape sequence are ignored - * when displayed in console terminals. + * It detects and count Fe Escape and OSC 8 links sequences. These sequences contains + * characters that normally are printable, but due to being part of a escape sequence + * are ignored when displayed in console terminals. */ -static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const wchar_t *str) +static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const wchar_t *str, int chw) { switch (*state) { case ANSI_CHR: @@ -144,7 +145,7 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const case '_': // APC - Application Program Command case 'P': // DCS - Device Control String case '^': // PM - Privacy Message - *state = ANSI_APC; + *state = ANSI_END; break; default: *state = ANSI_CHR; @@ -153,7 +154,7 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const *found = 1; return 0; case ANSI_SGR: - *found += 1; + *found += chw; // Fe escape sequences allows the range 0x30-0x3f // However SGR (Select Graphic Rendition) only uses: 0-9 ';' ':' if (*str >= '0' && *str <= '?') @@ -162,25 +163,45 @@ static inline size_t ansi_esc_width(ansi_esc_states *state, size_t *found, const if (*str <= '@' && *str >= '~') *found = 0; break; - case ANSI_APC: case ANSI_OSC: - *found += 1; -#ifdef HAVE_WIDECHAR - if (*str == 0x9c || *str == 0x7) // ends with ST (String Terminator) or BEL (\a) + *found += chw; + if (*str == ANSI_LNK) // OSC8-Link + *state = ANSI_LNK; + else + *state = ANSI_END; // other command sequences are ignored + return 0; + case ANSI_LNK: // OSC8 Terminal Hiperlink Sequence + switch (*str) { + case 0x7: // Separated by BEL + *state = ANSI_LBL; //# \e]8;;LINK\aTEXT\e]8;;\a # break; -#else - if (((unsigned char)*str) == 0x9c || *str == 0x7) + case 0x1b: // OSC8-Link separated by ESC BACKSLASH + *found += 2; + *state = ANSI_LBL; //# \e]8;;LINK\e\\TEXT\e]8;;\e\\ # break; -#endif - else if (*str == 0x1b) // ends with ESC BACKSLASH - *state = ANSI_BSL; + default: + *found += 1; + } + return 0; // ignore link width + case ANSI_LBL: + if (*str == 0x1b) { // Link label goes until ESC BACKSLASH + *found += chw; + *state = ANSI_END; + } return 0; - case ANSI_BSL: - if (*str == '\\') // ends with BACKSLASH + case ANSI_END: + switch (*str) { + case 0x1b: // APC/OSC8-Links ends with ESC BACKSLASH + *found += chw; break; - *found = 0; + case 0x7: // APC/OSC/OSC8-Links ends with BEL + case 0x9c: // APC/DCS/DM ends with ST (String Terminator) + break; + default: + *found += chw; return 0; } + } size_t res = *found; *state = ANSI_CHR; *found = 0; @@ -196,13 +217,12 @@ static size_t width(const wchar_t *str) for (; *str != '\0'; str++) { #ifdef HAVE_WIDECHAR int x = wcwidth(*str); /* don't use wcswidth(), need to ignore non-printable */ - if (x > 0) - count += x; #else - if (isprint(*str)) - count++; + int x = isprint(*str) ? 1 : 0; #endif - count -= ansi_esc_width(&state, &found, str); + int chw = x > 0 ? x : 0; + size_t nonpr = ansi_esc_width(&state, &found, str, chw); + count += chw - nonpr; } return count; } -- 2.47.2