]> git.ipfire.org Git - thirdparty/curl.git/commitdiff
spacecheck.pl: drop more exceptions
authorViktor Szakats <commit@vsz.me>
Tue, 13 May 2025 09:11:55 +0000 (11:11 +0200)
committerViktor Szakats <commit@vsz.me>
Tue, 13 May 2025 14:01:07 +0000 (16:01 +0200)
- replace ß (scharfes S) with links.
- replace § (section sign) with links.
- replace 🙏 emoji with `:pray:`.
 Supported by GitHub, Forgejo/Gitea and most likely GitLab.
- docs/libcurl/curl_mprintf.md: replace Unicode ± with `{+|-}`.
- docs/CIPHERS.md: URL encode Unicode in URLs.
- lib1560: use hex encoding in `räksmörgås.se`.
- unit1307: use hex encoding in `Lindmätarv`.
- drop LATIN SMALL LETTER A WITH ACUTE exception.
  No longer appears in tests.

This leaves the single character exception: `ö`
And file exceptions holding contributor names.

Follow-up to 9243ed59b387a90940fa4a16ebfd99ad7d6c2f63 #17329
Follow-up to 838dc53bb7bf52039b23af0e9ccffa51cf9ad7d0 #17247

Closes #17335

.github/scripts/spacecheck.pl
README.md
docs/CIPHERS.md
docs/KNOWN_BUGS
docs/URL-SYNTAX.md
docs/libcurl/curl_mprintf.md
tests/libtest/lib1560.c
tests/unit/unit1307.c

index 97ebdee0c0cc3ae1396f7e0325b440830489df02..6e2488d28f34f12c6071058e29fb751866fb4607 100755 (executable)
@@ -48,14 +48,7 @@ my @space_at_eol = (
 );
 
 my @non_ascii_allowed = (
-    '\xC3\xA1',          # UTF-8 for https://codepoints.net/U+00E1 LATIN SMALL LETTER A WITH ACUTE
-    '\xC3\xA5',          # UTF-8 for https://codepoints.net/U+00E5 LATIN SMALL LETTER A WITH RING ABOVE
-    '\xC3\xA4',          # UTF-8 for https://codepoints.net/U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
-    '\xC3\xB6',          # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS
-    '\xC2\xB1',          # UTF-8 for https://codepoints.net/U+00B1 PLUS-MINUS SIGN
-    '\xC2\xA7',          # UTF-8 for https://codepoints.net/U+00A7 SECTION SIGN
-    '\xC3\x9F',          # UTF-8 for https://codepoints.net/U+00DF LATIN SMALL LETTER SHARP S
-    '\xF0\x9F\x99\x8F',  # UTF-8 for https://codepoints.net/U+1f64f PERSON WITH FOLDED HANDS
+    '\xC3\xB6',  # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS
 );
 
 my $non_ascii_allowed = join(', ', @non_ascii_allowed);
@@ -65,7 +58,6 @@ my @non_ascii = (
     ".mailmap",
     "RELEASE-NOTES",
     "docs/BINDINGS.md",
-    "docs/CIPHERS.md",
     "docs/THANKS",
     "docs/THANKS-filter",
 );
index 2d324917a6dcb286ac1ecc07b9c38824216d2362..3359818fd5723d258f0099f228a02b8bcda26f1b 100644 (file)
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ distribution terms.
 
 ## Backers
 
-Thank you to all our backers 🙏 [Become a backer](https://opencollective.com/curl#section-contribute).
+Thank you to all our backers :pray: [Become a backer](https://opencollective.com/curl#section-contribute).
 
 ## Sponsors
 
index fb8814302e26e8c40e41246315bc9afa8e990dfd..8a8bc3d8eacc0b7f20f59ac566c1f9410d9e5dc5 100644 (file)
@@ -18,9 +18,9 @@ are names for specific combinations of
 and with TLSv1.3 the
 [authenticated encryption](https://en.wikipedia.org/wiki/Authenticated_encryption).
 In addition, there are other parameters that influence the TLS handshake, like
-[DHE](https://en.wikipedia.org/wiki/Diffie–Hellman_key_exchange) "groups" and
-[ECDHE](https://en.wikipedia.org/wiki/Elliptic-curve_Diffie–Hellman) with its
-"curves".
+[DHE](https://en.wikipedia.org/wiki/Diffie%e2%80%93Hellman_key_exchange) "groups"
+and [ECDHE](https://en.wikipedia.org/wiki/Elliptic-curve_Diffie%e2%80%93Hellman)
+with its "curves".
 
 ### History
 
index 0f394016045da4bb282e951cf3596036f38d5987..191d47a979a98b0c75ce82f84b0ceff794c034ea 100644 (file)
@@ -46,7 +46,7 @@ problems may have been fixed or changed somewhat since this was written.
  6. Authentication
  6.2 MIT Kerberos for Windows build
  6.3 NTLM in system context uses wrong name
- 6.5 NTLM does not support password with § character
+ 6.5 NTLM does not support password with Unicode 'SECTION SIGN' character
  6.6 libcurl can fail to try alternatives with --proxy-any
  6.7 Do not clear digest for single realm
  6.8 Heimdal memory leaks
@@ -318,8 +318,10 @@ problems may have been fixed or changed somewhat since this was written.
  "system context" makes it use wrong(?) username - at least when compared to
  what winhttp does. See https://curl.se/bug/view.cgi?id=535
 
-6.5 NTLM does not support password with § character
+6.5 NTLM does not support password with Unicode 'SECTION SIGN' character
 
+ https://en.wikipedia.org/wiki/Section_sign
+ https://codepoints.net/U+00A7 SECTION SIGN
  https://github.com/curl/curl/issues/2120
 
 6.6 libcurl can fail to try alternatives with --proxy-any
index 61682f4252f3e698aea13d240e8d180a622497d4..81c69260dad20126295f415b5f3bf9c6f2f1c4e4 100644 (file)
@@ -194,8 +194,9 @@ handle hostnames using non-ASCII characters.
 When built with libidn2, curl uses the IDNA 2008 standard. This is equivalent
 to the WHATWG URL spec, but differs from certain browsers that use IDNA 2003
 Transitional Processing. The two standards have a huge overlap but differ
-slightly, perhaps most famously in how they deal with the German "double s"
-(`ß`).
+slightly, perhaps most famously in how they deal with the
+[German "double s"](https://en.wikipedia.org/wiki/%c3%9f)
+([LATIN SMALL LETTER SHARP S](https://codepoints.net/U+00DF)).
 
 When WinIDN is used, curl uses IDNA 2003 Transitional Processing, like the rest
 of Windows.
index 8f1daa8c2646e1e5b56de5125b9d9a16b6f86c2a..4f5c090579ba397087fe2bc1c6e59a264a93ea9f 100644 (file)
@@ -226,7 +226,7 @@ printed with an explicit precision 0, the output is empty.
 
 ## e, E
 
-The double argument is rounded and output in the style **"[-]d.ddde±dd"**
+The double argument is rounded and output in the style **"[-]d.ddde{+|-}dd"**
 
 ## f, F
 
index 4b5f8c75e1adb281a16c490d851c82085e140799..c9ae08fe0d00a12688cd2ec5eb72af7d2b85b3cf 100644 (file)
@@ -202,14 +202,20 @@ static const struct testcase get_parts_list[] ={
    "| [16] | [17]",
    0, CURLU_URLDECODE, CURLUE_OK },
 #ifdef USE_IDN
-  {"https://räksmörgås.se",
+  /*
+    https://sv.wikipedia.org/wiki/R%c3%a4ksm%c3%b6rg%c3%a5s
+    https://codepoints.net/U+00E4 Latin Small Letter A with Diaeresis
+    https://codepoints.net/U+00F6 Latin Small Letter O with Diaeresis
+    https://codepoints.net/U+00E5 Latin Small Letter A with Ring Above
+  */
+  {"https://r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s.se",
    "https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | "
    "[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK},
   {"https://xn--rksmrgs-5wao1o.se",
-   "https | [11] | [12] | [13] | räksmörgås.se | "
+   "https | [11] | [12] | [13] | r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s.se | "
    "[15] | / | [16] | [17]", 0, CURLU_PUNY2IDN, CURLUE_OK},
 #else
-  {"https://räksmörgås.se",
+  {"https://r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s.se",
    "https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]",
    0, CURLU_PUNYCODE, CURLUE_OK},
 #endif
@@ -619,7 +625,7 @@ static const struct urltestcase get_url_list[] = {
    "",
    0, 0, CURLUE_BAD_PORT_NUMBER},
 #ifdef USE_IDN
-  {"https://räksmörgås.se/path?q#frag",
+  {"https://r\xc3\xa4ksm\xc3\xb6rg\xc3\xa5s.se/path?q#frag",
    "https://xn--rksmrgs-5wao1o.se/path?q#frag", 0, CURLU_PUNYCODE, CURLUE_OK},
 #endif
   /* unsupported schemes with no guessing enabled */
index 7237cf5421fa87aea994dc858165545bb9d8b422..5f678cffc90e6bb886f58c652a1189f030bf10de 100644 (file)
@@ -243,7 +243,8 @@ static const struct testcase tests[] = {
   { "?*?*?.?",                  "abcdef.c",               MATCH },
   { "?*?*?.?",                  "abcdef.cd",              NOMATCH },
 
-  { "Lindmätarv",               "Lindmätarv",             MATCH },
+  /* https://codepoints.net/U+00E4 Latin Small Letter A with Diaeresis */
+  { "Lindm\xc3\xa4tarv",        "Lindm\xc3\xa4tarv",      MATCH },
 
   { "",                         "",                       MATCH},
   {"**]*[*[\x13]**[*\x13)]*]*[**[*\x13~r-]*]**[.*]*[\xe3\xe3\xe3\xe3\xe3\xe3"