From: larrybr Date: Mon, 30 Oct 2023 13:56:50 +0000 (+0000) Subject: Predicate Windows CLI UTF-8 console I/O on a runtime capability check rather than... X-Git-Tag: version-3.44.0~11^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8a95e92af95e9ab247ba72d2b22ac2515b317ef6;p=thirdparty%2Fsqlite.git Predicate Windows CLI UTF-8 console I/O on a runtime capability check rather than an OS version check. FossilOrigin-Name: f89d062f8890fffc957a354e966784031d561d0f8f5c174c1ccdcf77e66c32bd --- diff --git a/manifest b/manifest index 3dddd587af..280b948327 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Simplify\scode\sslightly.\sImprove\scomments\son\sadded\scode\sand\sits\suse. -D 2023-10-29T19:55:22.809 +C Predicate\sWindows\sCLI\sUTF-8\sconsole\sI/O\son\sa\sruntime\scapability\scheck\srather\sthan\san\sOS\sversion\scheck. +D 2023-10-30T13:56:50.918 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -722,7 +722,7 @@ F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c F src/resolve.c 31229276a8eb5b5de1428cd2d80f6f1cf8ffc5248be25e47cf575df12f1b8f23 F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97 F src/select.c 64c9bc7494f3d220a27498137551762c25458282388ea9ac0a710dd6d5dc1510 -F src/shell.c.in 9e234ec61ce462b63ce37b29d875d0a716878f049ad50f13f55b859d01c919ad +F src/shell.c.in 3826827be22545318865b0c387a8b9208b0a0f5d849d68440a63488ea9c275c8 F src/sqlite.h.in ef0e41e83ad1ac0dcc9ec9939bf541a44b1c5de821bee2d6c61754c3252f3276 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 2f30b2671f4c03cd27a43f039e11251391066c97d11385f5f963bb40b03038ac @@ -2139,8 +2139,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P dc91eb91725f3db65c73725f1fbcf18a711cafb65b4fea3277aa0905a24df353 -R 21d5e98541aed894e5aa957e7e737a78 +P 046c84296627382ee416f64b02b77a937b368e30b32e6b800de5a854810766f6 +R 3cdce76f5cb17ac317716fdc83d39d50 U larrybr -Z 84338a34602261108f59c503d896ebcf +Z 39db1e0ea2a35c1c7711150de7a00b37 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 219a9cc027..194f81ce32 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -046c84296627382ee416f64b02b77a937b368e30b32e6b800de5a854810766f6 \ No newline at end of file +f89d062f8890fffc957a354e966784031d561d0f8f5c174c1ccdcf77e66c32bd \ No newline at end of file diff --git a/src/shell.c.in b/src/shell.c.in index 3d9b46db0b..cf8e4f0611 100644 --- a/src/shell.c.in +++ b/src/shell.c.in @@ -450,16 +450,18 @@ static int bail_on_error = 0; static int stdin_is_interactive = 1; /* -** If build is for Windows, without 3rd-party line editing, Console -** input and output may be done in a UTF-8 compatible way. This is -** determined by invocation option and OS installed capability. +** If build is for non-RT Windows, without 3rd-party line editing, +** console input and output may be done in a UTF-8 compatible way, +** if the OS is capable of it and the --no-utf8 option is not seen. */ #if (defined(_WIN32) || defined(WIN32)) && SHELL_USE_LOCAL_GETLINE \ - && !defined(SHELL_OMIT_WIN_UTF8) + && !defined(SHELL_OMIT_WIN_UTF8) && !SQLITE_OS_WINRT # define SHELL_WIN_UTF8_OPT 1 +/* Record whether to do UTF-8 console I/O translation per stream. */ static int console_utf8_in = 0; static int console_utf8_out = 0; - static int mbcs_opted = 0; +/* Record whether can do UTF-8 or --no-utf8 seen in invocation. */ + static int mbcs_opted = 1; /* Assume cannot do until shown otherwise. */ #else # define console_utf8_in 0 # define console_utf8_out 0 @@ -599,7 +601,7 @@ static char *dynamicContinuePrompt(void){ #endif /* !defined(SQLITE_OMIT_DYNAPROMPT) */ #if SHELL_WIN_UTF8_OPT -/* Following struct is used for UTF-8 operation. */ +/* Following struct is used for UTF-8 console I/O. */ static struct ConsoleState { int stdinEof; /* EOF has been seen on console input */ int infsMode; /* Input file stream mode upon shell start */ @@ -613,106 +615,102 @@ static struct ConsoleState { # define _O_U16TEXT 0x20000 #endif - -#if !SQLITE_OS_WINRT /* -** Check Windows major version against given value, returning -** 1 if the OS major version is no less than the argument. -** This check uses very late binding to the registry access -** API so that it can operate gracefully on OS versions that -** do not have that API. The Windows NT registry, for versions -** through Windows 11 (at least, as of October 2023), keeps -** the actual major version number at registry key/value -** HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\CurrentMajorVersionNumber -** where it can be read more reliably than allowed by various -** version info APIs which "process" the result in a manner -** incompatible with the purpose of the CLI's version check. -** -** If the registry API is unavailable, or the location of -** the above registry value changes, or the OS major version -** is less than the argument, this function returns 0. -*/ -static int CheckAtLeastWinX(DWORD major_version){ - typedef LONG (WINAPI *REG_OPEN)(HKEY,LPCSTR,DWORD,REGSAM,PHKEY); - typedef LSTATUS (WINAPI *REG_READ)(HKEY,LPCSTR,LPCSTR,DWORD, - LPDWORD,PVOID,LPDWORD); - typedef LSTATUS (WINAPI *REG_CLOSE)(HKEY); - int rv = 0; - HINSTANCE hLib = LoadLibrary(TEXT("Advapi32.dll")); - if( NULL != hLib ){ - REG_OPEN rkOpen = (REG_OPEN)GetProcAddress(hLib, "RegOpenKeyExA"); - REG_READ rkRead = (REG_READ)GetProcAddress(hLib, "RegGetValueA"); - REG_CLOSE rkFree = (REG_CLOSE)GetProcAddress(hLib, "RegCloseKey"); - if( rkOpen != NULL && rkRead != NULL && rkFree != NULL ){ - HKEY hk; - const char *zsk = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion"; - if( ERROR_SUCCESS == rkOpen(HKEY_LOCAL_MACHINE, zsk, 0, KEY_READ, &hk) ){ - DWORD kv = 0, kvsize = sizeof(kv); - if( ERROR_SUCCESS == rkRead(hk, 0, "CurrentMajorVersionNumber", - RRF_RT_REG_DWORD, 0, &kv, &kvsize) ){ - rv = (kv >= major_version); - } - rkFree(hk); - } - } - FreeLibrary(hLib); +** If given stream number is a console, return 1 and get some attributes, +** else return 0 and set the output attributes to invalid values. +*/ +static short console_attrs(unsigned stnum, HANDLE *pH, DWORD *pConsMode){ + static int stid[3] = { STD_INPUT_HANDLE,STD_OUTPUT_HANDLE,STD_ERROR_HANDLE }; + HANDLE h; + *pH = INVALID_HANDLE_VALUE; + *pConsMode = 0; + if( stnum > 2 ) return 0; + h = GetStdHandle(stid[stnum]); + if( h!=*pH && GetFileType(h)==FILE_TYPE_CHAR && GetConsoleMode(h,pConsMode) ){ + *pH = h; + return 1; } - return rv; + return 0; } -# define IS_WIN10_OR_LATER() CheckAtLeastWinX(10) -#else /* defined(SQLITE_OS_WINRT) */ -# define IS_WIN10_OR_LATER() 0 -#endif /* -** Prepare console, (if known to be a WIN32 console), for UTF-8 input -** (from either typing or suitable paste operations) and/or for UTF-8 -** output rendering. This may "fail" with a message to stderr, where -** the preparation is not done and common "code page" issues occur. +** Perform a runtime test of Windows console to determine if it can +** do char-stream I/O correctly when the code page is set to CP_UTF8. +** Returns are: 1 => yes it can, 0 => no it cannot +** +** The console's output code page is momentarily set, then restored. +** So this should only be run when the process is given use of the +** console for either input or output. +*/ +static short ConsoleDoesUTF8(void){ + UINT ocp = GetConsoleOutputCP(); + CONSOLE_SCREEN_BUFFER_INFO csbInfo = {0}; + /* Create an inactive screen buffer with which to do the experiment. */ + HANDLE hCSB = CreateConsoleScreenBuffer(GENERIC_READ|GENERIC_WRITE, 0, 0, + CONSOLE_TEXTMODE_BUFFER, NULL); + if( hCSB!=INVALID_HANDLE_VALUE ){ + const char TrialUtf8[] = { '\xC8', '\xAB' }; /* "È«" or 2 MBCS characters */ + COORD cpos = {0,0}; + SetConsoleCursorPosition(hCSB, cpos); + SetConsoleOutputCP(CP_UTF8); + /* Write 2 chars which are a single character in UTF-8 but more in MBCS. */ + WriteConsoleA(hCSB, TrialUtf8, sizeof(TrialUtf8), NULL, NULL); + GetConsoleScreenBufferInfo(hCSB, &csbInfo); + SetConsoleOutputCP(ocp); + CloseHandle(hCSB); + } + /* Return 1 if cursor advanced by 1 position, else 0. */ + return (short)(csbInfo.dwCursorPosition.X == 1); +} + +static short in_console = 0; +static short out_console = 0; + +/* +** Determine whether either normal I/O stream is the console, +** and whether it can do UTF-8 translation, setting globals +** in_console, out_console and mbcs_opted accordingly. +*/ +static void probe_console(void){ + HANDLE h; + DWORD cMode; + in_console = console_attrs(0, &h, &cMode); + out_console = console_attrs(1, &h, &cMode); + if( in_console || out_console ) mbcs_opted = !ConsoleDoesUTF8(); +} + +/* +** If console is used for normal I/O, absent a --no-utf8 option, +** prepare console for UTF-8 input (from either typing or suitable +** paste operations) and/or for UTF-8 output rendering. ** ** The console state upon entry is preserved, in conState, so that ** console_restore() can later restore the same console state. ** ** The globals console_utf8_in and console_utf8_out are set, for ** later use in selecting UTF-8 or MBCS console I/O translations. +** This routine depends upon globals set by probe_console(). */ static void console_prepare_utf8(void){ - HANDLE hCI = GetStdHandle(STD_INPUT_HANDLE); - HANDLE hCO = GetStdHandle(STD_OUTPUT_HANDLE); - HANDLE hCC = INVALID_HANDLE_VALUE; - DWORD consoleMode = 0; - u8 conI = 0, conO = 0; struct ConsoleState csWork = { 0, 0, 0, 0, INVALID_HANDLE_VALUE, 0 }; console_utf8_in = console_utf8_out = 0; - if( isatty(0) && GetFileType(hCI)==FILE_TYPE_CHAR ) conI = 1; - if( isatty(1) && GetFileType(hCO)==FILE_TYPE_CHAR ) conO = 1; - if( (!conI && !conO) || mbcs_opted ) return; - if( conI ) hCC = hCI; - else hCC = hCO; - if( !IsValidCodePage(CP_UTF8) || !GetConsoleMode( hCC, &consoleMode) ){ - bail: - fprintf(stderr, "Cannot use UTF-8 code page.\n"); - return; - } - csWork.hConsole = hCC; - csWork.consoleMode = consoleMode; - csWork.inCodePage = GetConsoleCP(); - csWork.outCodePage = GetConsoleOutputCP(); - if( conI ){ - if( !SetConsoleCP(CP_UTF8) ) goto bail; - consoleMode |= ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT; - SetConsoleMode(conState.hConsole, consoleMode); - csWork.infsMode = _setmode(_fileno(stdin), _O_U16TEXT); + if( (!in_console && !out_console) || mbcs_opted ) return; + console_attrs((in_console)? 0 : 1, &conState.hConsole, &conState.consoleMode); + conState.inCodePage = GetConsoleCP(); + conState.outCodePage = GetConsoleOutputCP(); + if( in_console ){ + SetConsoleCP(CP_UTF8); + DWORD newConsoleMode = conState.consoleMode + | ENABLE_LINE_INPUT | ENABLE_PROCESSED_INPUT; + SetConsoleMode(conState.hConsole, newConsoleMode); + conState.infsMode = _setmode(_fileno(stdin), _O_U16TEXT); + console_utf8_in = 1; } - if( conO ){ - /* Here, it is assumed that if conI is true, this call will also - ** succeed, so there is no need to undo above setup upon failure. */ - if( !SetConsoleOutputCP(CP_UTF8) ) goto bail; + if( out_console ){ + SetConsoleOutputCP(CP_UTF8); + console_utf8_out = 1; } - console_utf8_in = conI; - console_utf8_out = conO; - conState = csWork; } /* @@ -12187,7 +12185,8 @@ int SQLITE_CDECL wmain(int argc, wchar_t **wargv){ stdout_is_console = isatty(1); #endif #if SHELL_WIN_UTF8_OPT - atexit(console_restore); /* Needs revision for CLI as library call */ + probe_console(); /* Check for console I/O and UTF-8 capability. */ + if( !mbcs_opted ) atexit(console_restore); #endif atexit(sayAbnormalExit); #ifdef SQLITE_DEBUG @@ -12272,16 +12271,6 @@ int SQLITE_CDECL wmain(int argc, wchar_t **wargv){ } #endif -#if SHELL_WIN_UTF8_OPT - /* If Windows build and not RT, set default MBCS/UTF-8 translation for - ** console according to detected Windows version. This default may be - ** overridden by a -no-utf8 or (undocumented) -utf8 invocation option. - ** If a runtime check for UTF-8 console I/O capability is devised, - ** that should be preferred over this version check. - */ - mbcs_opted = (IS_WIN10_OR_LATER())? 0 : 1; -#endif - /* Do an initial pass through the command-line argument to locate ** the name of the database file, the name of the initialization file, ** the size of the alternative malloc heap, options affecting commands @@ -12332,8 +12321,8 @@ int SQLITE_CDECL wmain(int argc, wchar_t **wargv){ stdin_is_interactive = 0; }else if( cli_strcmp(z,"-utf8")==0 ){ #if SHELL_WIN_UTF8_OPT - /* Option accepted, but just specifies default UTF-8 console I/O. */ - mbcs_opted = 0; + /* Option accepted, but is ignored except for this diagnostic. */ + if( mbcs_opted ) fprintf(stderr, "Cannot do UTF-8 at this console.\n"); #endif /* SHELL_WIN_UTF8_OPT */ }else if( cli_strcmp(z,"-no-utf8")==0 ){ #if SHELL_WIN_UTF8_OPT @@ -12479,10 +12468,10 @@ int SQLITE_CDECL wmain(int argc, wchar_t **wargv){ } #if SHELL_WIN_UTF8_OPT /* Get indicated Windows console setup done before running invocation commands. */ - if( stdin_is_interactive || stdout_is_console ){ + if( in_console || out_console ){ console_prepare_utf8(); } - if( !stdin_is_interactive ){ + if( !in_console ){ setBinaryMode(stdin, 0); } #endif