From: Viktor Szakats Date: Tue, 13 May 2025 09:11:55 +0000 (+0200) Subject: spacecheck.pl: drop more exceptions X-Git-Tag: curl-8_14_0~88 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=828f9984937f2494d3bc44690d11ce9e4b9500ef;p=thirdparty%2Fcurl.git spacecheck.pl: drop more exceptions - replace ß (scharfes S) with links. - replace § (section sign) with links. - replace 🙏 emoji with `:pray:`. Supported by GitHub, Forgejo/Gitea and most likely GitLab. - docs/libcurl/curl_mprintf.md: replace Unicode ± with `{+|-}`. - docs/CIPHERS.md: URL encode Unicode in URLs. - lib1560: use hex encoding in `räksmörgås.se`. - unit1307: use hex encoding in `Lindmätarv`. - drop LATIN SMALL LETTER A WITH ACUTE exception. No longer appears in tests. This leaves the single character exception: `ö` And file exceptions holding contributor names. Follow-up to 9243ed59b387a90940fa4a16ebfd99ad7d6c2f63 #17329 Follow-up to 838dc53bb7bf52039b23af0e9ccffa51cf9ad7d0 #17247 Closes #17335 --- diff --git a/.github/scripts/spacecheck.pl b/.github/scripts/spacecheck.pl index 97ebdee0c0..6e2488d28f 100755 --- a/.github/scripts/spacecheck.pl +++ b/.github/scripts/spacecheck.pl @@ -48,14 +48,7 @@ my @space_at_eol = ( ); my @non_ascii_allowed = ( - '\xC3\xA1', # UTF-8 for https://codepoints.net/U+00E1 LATIN SMALL LETTER A WITH ACUTE - '\xC3\xA5', # UTF-8 for https://codepoints.net/U+00E5 LATIN SMALL LETTER A WITH RING ABOVE - '\xC3\xA4', # UTF-8 for https://codepoints.net/U+00E4 LATIN SMALL LETTER A WITH DIAERESIS - '\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS - '\xC2\xB1', # UTF-8 for https://codepoints.net/U+00B1 PLUS-MINUS SIGN - '\xC2\xA7', # UTF-8 for https://codepoints.net/U+00A7 SECTION SIGN - '\xC3\x9F', # UTF-8 for https://codepoints.net/U+00DF LATIN SMALL LETTER SHARP S - '\xF0\x9F\x99\x8F', # UTF-8 for https://codepoints.net/U+1f64f PERSON WITH FOLDED HANDS + '\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS ); my $non_ascii_allowed = join(', ', @non_ascii_allowed); @@ -65,7 +58,6 @@ my @non_ascii = ( ".mailmap", "RELEASE-NOTES", "docs/BINDINGS.md", - "docs/CIPHERS.md", "docs/THANKS", "docs/THANKS-filter", ); diff --git a/README.md b/README.md index 2d324917a6..3359818fd5 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ distribution terms. ## Backers -Thank you to all our backers 🙏 [Become a backer](https://opencollective.com/curl#section-contribute). +Thank you to all our backers :pray: [Become a backer](https://opencollective.com/curl#section-contribute). ## Sponsors diff --git a/docs/CIPHERS.md b/docs/CIPHERS.md index fb8814302e..8a8bc3d8ea 100644 --- a/docs/CIPHERS.md +++ b/docs/CIPHERS.md @@ -18,9 +18,9 @@ are names for specific combinations of and with TLSv1.3 the [authenticated encryption](https://en.wikipedia.org/wiki/Authenticated_encryption). In addition, there are other parameters that influence the TLS handshake, like -[DHE](https://en.wikipedia.org/wiki/Diffie–Hellman_key_exchange) "groups" and -[ECDHE](https://en.wikipedia.org/wiki/Elliptic-curve_Diffie–Hellman) with its -"curves". +[DHE](https://en.wikipedia.org/wiki/Diffie%e2%80%93Hellman_key_exchange) "groups" +and [ECDHE](https://en.wikipedia.org/wiki/Elliptic-curve_Diffie%e2%80%93Hellman) +with its "curves". ### History diff --git a/docs/KNOWN_BUGS b/docs/KNOWN_BUGS index 0f39401604..191d47a979 100644 --- a/docs/KNOWN_BUGS +++ b/docs/KNOWN_BUGS @@ -46,7 +46,7 @@ problems may have been fixed or changed somewhat since this was written. 6. Authentication 6.2 MIT Kerberos for Windows build 6.3 NTLM in system context uses wrong name - 6.5 NTLM does not support password with § character + 6.5 NTLM does not support password with Unicode 'SECTION SIGN' character 6.6 libcurl can fail to try alternatives with --proxy-any 6.7 Do not clear digest for single realm 6.8 Heimdal memory leaks @@ -318,8 +318,10 @@ problems may have been fixed or changed somewhat since this was written. "system context" makes it use wrong(?) username - at least when compared to what winhttp does. See https://curl.se/bug/view.cgi?id=535 -6.5 NTLM does not support password with § character +6.5 NTLM does not support password with Unicode 'SECTION SIGN' character + https://en.wikipedia.org/wiki/Section_sign + https://codepoints.net/U+00A7 SECTION SIGN https://github.com/curl/curl/issues/2120 6.6 libcurl can fail to try alternatives with --proxy-any diff --git a/docs/URL-SYNTAX.md b/docs/URL-SYNTAX.md index 61682f4252..81c69260da 100644 --- a/docs/URL-SYNTAX.md +++ b/docs/URL-SYNTAX.md @@ -194,8 +194,9 @@ handle hostnames using non-ASCII characters. When built with libidn2, curl uses the IDNA 2008 standard. This is equivalent to the WHATWG URL spec, but differs from certain browsers that use IDNA 2003 Transitional Processing. The two standards have a huge overlap but differ -slightly, perhaps most famously in how they deal with the German "double s" -(`ß`). +slightly, perhaps most famously in how they deal with the +[German "double s"](https://en.wikipedia.org/wiki/%c3%9f) +([LATIN SMALL LETTER SHARP S](https://codepoints.net/U+00DF)). When WinIDN is used, curl uses IDNA 2003 Transitional Processing, like the rest of Windows. diff --git a/docs/libcurl/curl_mprintf.md b/docs/libcurl/curl_mprintf.md index 8f1daa8c26..4f5c090579 100644 --- a/docs/libcurl/curl_mprintf.md +++ b/docs/libcurl/curl_mprintf.md @@ -226,7 +226,7 @@ printed with an explicit precision 0, the output is empty. ## e, E -The double argument is rounded and output in the style **"[-]d.ddde±dd"** +The double argument is rounded and output in the style **"[-]d.ddde{+|-}dd"** ## f, F diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index 4b5f8c75e1..c9ae08fe0d 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -202,14 +202,20 @@ static const struct testcase get_parts_list[] ={ "| [16] | [17]", 0, CURLU_URLDECODE, CURLUE_OK }, #ifdef USE_IDN - {"https://räksmörgÃ¥s.se", + /* + https://sv.wikipedia.org/wiki/R%c3%a4ksm%c3%b6rg%c3%a5s + https://codepoints.net/U+00E4 Latin Small Letter A with Diaeresis + https://codepoints.net/U+00F6 Latin Small Letter O with Diaeresis + https://codepoints.net/U+00E5 Latin Small Letter A with Ring Above + */ + {"https://r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s.se", "https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | " "[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK}, {"https://xn--rksmrgs-5wao1o.se", - "https | [11] | [12] | [13] | räksmörgÃ¥s.se | " + "https | [11] | [12] | [13] | r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s.se | " "[15] | / | [16] | [17]", 0, CURLU_PUNY2IDN, CURLUE_OK}, #else - {"https://räksmörgÃ¥s.se", + {"https://r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s.se", "https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK}, #endif @@ -619,7 +625,7 @@ static const struct urltestcase get_url_list[] = { "", 0, 0, CURLUE_BAD_PORT_NUMBER}, #ifdef USE_IDN - {"https://räksmörgÃ¥s.se/path?q#frag", + {"https://r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s.se/path?q#frag", "https://xn--rksmrgs-5wao1o.se/path?q#frag", 0, CURLU_PUNYCODE, CURLUE_OK}, #endif /* unsupported schemes with no guessing enabled */ diff --git a/tests/unit/unit1307.c b/tests/unit/unit1307.c index 7237cf5421..5f678cffc9 100644 --- a/tests/unit/unit1307.c +++ b/tests/unit/unit1307.c @@ -243,7 +243,8 @@ static const struct testcase tests[] = { { "?*?*?.?", "abcdef.c", MATCH }, { "?*?*?.?", "abcdef.cd", NOMATCH }, - { "Lindmätarv", "Lindmätarv", MATCH }, + /* https://codepoints.net/U+00E4 Latin Small Letter A with Diaeresis */ + { "Lindm\xc3\xa4tarv", "Lindm\xc3\xa4tarv", MATCH }, { "", "", MATCH}, {"**]*[*[\x13]**[*\x13)]*]*[**[*\x13~r-]*]**[.*]*[\xe3\xe3\xe3\xe3\xe3\xe3"