From: Eli Zaretskii Date: Thu, 22 Jan 2026 11:03:13 +0000 (+0200) Subject: gdb: Support UTF-8 output on MS-Windows terminal X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=43c658ff0cf2bf0b407a563b0de2a36d55e2d5df;p=thirdparty%2Fbinutils-gdb.git gdb: Support UTF-8 output on MS-Windows terminal This detects when the Windows Terminal uses codepage 65001 (a.k.a. "UTF-8") for output, and sets the default host charset to UTF-8 in that case. It also enables Emoji styling, as the Windows terminal supports it in that case. * gdb/charset.c (INIT_GDB_FILE) [USE_WIN32API]: If the Windows console uses codepage 65001, set default host charset to UTF-8, and switch to the "C" locale, to prevent Windows from interpreting UTF-8 sequences written to the console. * gdb/mingw-hdep.c (windows_initialize_console): Don't disable Emoji here... * gdb/charset.c (INIT_GDB_FILE) [USE_WIN32API]: ...disable them here instead, and only if the console doesn't use UTF-8. --- diff --git a/gdb/charset.c b/gdb/charset.c index 7c6cd938c32..eaddfb57098 100644 --- a/gdb/charset.c +++ b/gdb/charset.c @@ -19,6 +19,7 @@ #include "charset.h" #include "cli/cli-cmds.h" +#include "cli/cli-style.h" #include "gdbsupport/gdb_obstack.h" #include "gdbsupport/gdb_wait.h" #include "charset-list.h" @@ -1009,11 +1010,28 @@ INIT_GDB_FILE (charset) { /* "CP" + x<=5 digits + paranoia. */ static char w32_host_default_charset[16]; - - snprintf (w32_host_default_charset, sizeof w32_host_default_charset, - "CP%d", GetACP()); + unsigned codepage = mingw_get_codeset (); + + /* The rest of the code expects a literal "UTF-8" and doesn't know + anything about codepage 65001. */ + if (codepage == 65001) + { + strcpy (w32_host_default_charset, "UTF-8"); + /* This is needed to force Windows CRT output functions treat + output as simple stream of bytes, instead of trying to + interpret it as encoded non-ASCII text, which will fail if + the system locale's codeset is NOT UTF-8. */ + setlocale (LC_CTYPE, "C"); + } + else + snprintf (w32_host_default_charset, sizeof w32_host_default_charset, + "CP%u", codepage); auto_host_charset_name = w32_host_default_charset; auto_target_charset_name = auto_host_charset_name; + + /* Windows Terminal supports Emoji when using UTF-8 output. */ + if (strcmp (w32_host_default_charset, "UTF-8") != 0) + no_emojis (); } #endif #endif diff --git a/gdb/charset.h b/gdb/charset.h index 6cfab83b896..431590f5000 100644 --- a/gdb/charset.h +++ b/gdb/charset.h @@ -165,4 +165,8 @@ char host_letter_to_control_character (char c); #define HOST_UTF32 "UTF-32LE" #endif +#ifdef __MINGW32__ + unsigned int mingw_get_codeset (); +#endif + #endif /* GDB_CHARSET_H */ diff --git a/gdb/mingw-hdep.c b/gdb/mingw-hdep.c index 1fad256bc42..efad50c4cbe 100644 --- a/gdb/mingw-hdep.c +++ b/gdb/mingw-hdep.c @@ -24,10 +24,10 @@ #include "gdbsupport/event-loop.h" #include "gdbsupport/gdb_select.h" #include "inferior.h" -#include "cli/cli-style.h" #include "command.h" #include "cli/cli-cmds.h" #include "terminal.h" +#include "charset.h" #include #include @@ -258,9 +258,6 @@ windows_initialize_console () } else if (hstdout != INVALID_HANDLE_VALUE) mingw_use_console_color_apis = -1; /* valid, but not a console device */ - - if (mingw_use_console_color_apis > 0) - no_emojis (); } void @@ -440,6 +437,27 @@ gdb_console_fputs (const char *linebuf, FILE *fstream) return 1; } +unsigned int +mingw_get_codeset () +{ + unsigned int default_codepage = GetACP (); + unsigned int output_codepage = GetConsoleOutputCP (); + + /* Multibyte writes will not work correctly if written one byte at a + time, which is what gdb_console_fputs above does. So if they set + the console's output to use UTF-8, only use that if we have + successfully set up the terminal for Virtual Terminal Sequences, + and are using 'fputs' directly. */ + if (output_codepage == 0 /* GetConsoleOutputCP failed */ + || (output_codepage == 65001 + && output_codepage != default_codepage + && !(mingw_use_console_color_apis < 0 + && hstdout != INVALID_HANDLE_VALUE + && orig_console_mode != 0))) + return default_codepage; + return output_codepage; +} + /* See inferior.h. */ tribool