From 838dc53bb7bf52039b23af0e9ccffa51cf9ad7d0 Mon Sep 17 00:00:00 2001 From: Viktor Szakats Date: Sat, 3 May 2025 18:11:29 +0200 Subject: [PATCH] spacecheck.pl: check for non-ASCII chars, fix fallouts Reported-by: James Fuller Assisted-by: Dan Fandrich Closes #17247 --- .github/scripts/spacecheck.pl | 32 ++++++++++++++++++++++++++++++++ CMakeLists.txt | 4 ++-- lib/asyn-ares.c | 2 +- tests/server/util.c | 6 +++--- 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/.github/scripts/spacecheck.pl b/.github/scripts/spacecheck.pl index 52d031dde1..e8b64f2dc7 100755 --- a/.github/scripts/spacecheck.pl +++ b/.github/scripts/spacecheck.pl @@ -47,6 +47,31 @@ my @space_at_eol = ( "^tests/data/test", ); +my @non_ascii_allowed = ( + '\xC3\xA1', # UTF-8 for https://codepoints.net/U+00E1 LATIN SMALL LETTER A WITH ACUTE + '\xC3\xA5', # UTF-8 for https://codepoints.net/U+00E5 LATIN SMALL LETTER A WITH RING ABOVE + '\xC3\xA4', # UTF-8 for https://codepoints.net/U+00E4 LATIN SMALL LETTER A WITH DIAERESIS + '\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS + '\xC2\xB1', # UTF-8 for https://codepoints.net/U+00B1 PLUS-MINUS SIGN + '\xC2\xA7', # UTF-8 for https://codepoints.net/U+00A7 SECTION SIGN + '\xC3\x9F', # UTF-8 for https://codepoints.net/U+00DF LATIN SMALL LETTER SHARP S + '\xF0\x9F\x99\x8F', # UTF-8 for https://codepoints.net/U+1f64f PERSON WITH FOLDED HANDS +); + +my $non_ascii_allowed = join(', ', @non_ascii_allowed); + +my @non_ascii = ( + ".github/scripts/spellcheck.words", + ".mailmap", + "RELEASE-NOTES", + "docs/BINDINGS.md", + "docs/CIPHERS.md", + "docs/THANKS", + "docs/THANKS-filter", + "tests/libtest/lib1560.c", + "^tests/data/test", +); + sub fn_match { my ($filename, @masklist) = @_; @@ -134,6 +159,13 @@ while(my $filename = <$git_ls_files>) { push @err, "content: has binary contents"; } + $content =~ s/[$non_ascii_allowed]//g; + + if(!fn_match($filename, @non_ascii) && + $content =~ /([\x80-\xff]+)/) { + push @err, "content: has non-ASCII: '$1'"; + } + if(@err) { $issues++; foreach my $err (@err) { diff --git a/CMakeLists.txt b/CMakeLists.txt index 178fd93d92..f82b4d900f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2030,9 +2030,9 @@ function(curl_transform_makefile_inc _input_file _output_file) string(REPLACE "$(top_srcdir)" "\${PROJECT_SOURCE_DIR}" _makefile_inc_text ${_makefile_inc_text}) string(REPLACE "$(top_builddir)" "\${PROJECT_BINARY_DIR}" _makefile_inc_text ${_makefile_inc_text}) - string(REGEX REPLACE "\\\\\n" "!π!α!" _makefile_inc_text ${_makefile_inc_text}) + string(REGEX REPLACE "\\\\\n" "!^!^!" _makefile_inc_text ${_makefile_inc_text}) string(REGEX REPLACE "([a-zA-Z_][a-zA-Z0-9_]*)[\t ]*=[\t ]*([^\n]*)" "set(\\1 \\2)" _makefile_inc_text ${_makefile_inc_text}) - string(REPLACE "!π!α!" "\n" _makefile_inc_text ${_makefile_inc_text}) + string(REPLACE "!^!^!" "\n" _makefile_inc_text ${_makefile_inc_text}) # Replace $() with ${} string(REGEX REPLACE "\\$\\(([a-zA-Z_][a-zA-Z0-9_]*)\\)" "\${\\1}" _makefile_inc_text ${_makefile_inc_text}) diff --git a/lib/asyn-ares.c b/lib/asyn-ares.c index 6d70976460..63dca25033 100644 --- a/lib/asyn-ares.c +++ b/lib/asyn-ares.c @@ -810,7 +810,7 @@ struct Curl_addrinfo *Curl_async_getaddrinfo(struct Curl_easy *data, } /* Set what DNS server are is to use. This is called in 2 situations: - * 1. when the application does `CURLOPT_DNS_SERVERS´ and passing NULL + * 1. when the application does 'CURLOPT_DNS_SERVERS' and passing NULL * means any previous set value should be unset. Which means * we need to destroy and create the are channel anew, if there is one. * 2. When we lazy init the ares channel and NULL means that there diff --git a/tests/server/util.c b/tests/server/util.c index 9db8b5895a..fe1315284d 100644 --- a/tests/server/util.c +++ b/tests/server/util.c @@ -302,11 +302,11 @@ curl_off_t our_getpid(void) pid = (curl_off_t)curlx_getpid(); #ifdef _WIN32 /* store pid + MAX_PID to avoid conflict with Cygwin/msys PIDs, see also: - * - 2019-01-31: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; ↵ + * - 2019-01-31: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; * h=b5e1003722cb14235c4f166be72c09acdffc62ea - * - 2019-02-02: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; ↵ + * - 2019-02-02: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; * h=448cf5aa4b429d5a9cebf92a0da4ab4b5b6d23fe - * - 2024-12-19: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; ↵ + * - 2024-12-19: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; * h=363357c023ce01e936bdaedf0f479292a8fa4e0f */ pid += 4194304; -- 2.47.3