mdlinkcheck: detect and check "raw" links

author Daniel Stenberg <daniel@haxx.se>

Fri, 5 Dec 2025 10:45:35 +0000 (11:45 +0100)

committer Daniel Stenberg <daniel@haxx.se>

Fri, 5 Dec 2025 22:41:41 +0000 (23:41 +0100)
author Daniel Stenberg <daniel@haxx.se>
Fri, 5 Dec 2025 10:45:35 +0000 (11:45 +0100)
committer Daniel Stenberg <daniel@haxx.se>
Fri, 5 Dec 2025 22:41:41 +0000 (23:41 +0100)
diff --git a/.github/workflows/checkdocs.yml b/.github/workflows/checkdocs.yml

index 60d42301941e5661fb68b9ea4657ac64e869971d..ba3858f66c465d31e4db7e98e7997fa46e157724 100644 (file)
--- a/.github/workflows/checkdocs.yml
+++ b/.github/workflows/checkdocs.yml
@@ -15,8 +15,7 @@ name: 'Docs'
      paths:
        - '.github/workflows/checkdocs.yml'
        - '.github/scripts/**'
-      - '.github/scripts/mdlinkcheck'
-      - '/scripts/**'
+      - 'scripts/**'
        - '**.md'
        - 'docs/*'
    pull_request:
@@ -25,8 +24,7 @@ name: 'Docs'
      paths:
        - '.github/workflows/checkdocs.yml'
        - '.github/scripts/**'
-      - '.github/scripts/mdlinkcheck'
-      - '/scripts/**'
+      - 'scripts/**'
        - '**.md'
        - 'docs/*'
  
diff --git a/scripts/mdlinkcheck b/scripts/mdlinkcheck

index 6b648786f33e6f4210090eb3e5ea4b5131800952..734617949d2d92fcf1278fc08328ddf2bd87cc16 100755 (executable)
--- a/scripts/mdlinkcheck
+++ b/scripts/mdlinkcheck
@@ -27,7 +27,10 @@ use strict;
  use warnings;
  
  my %whitelist = (
+    'https://curl.se' => 1,
      'https://curl.se/' => 1,
+    'https://curl.se/bug/' => 1,
+    'https://curl.se/bug/view.cgi' => 1,
      'https://curl.se/changes.html' => 1,
      'https://curl.se/dev/advisory.html' => 1,
      'https://curl.se/dev/builds.html' => 1,
@@ -40,19 +43,25 @@ my %whitelist = (
      'https://curl.se/docs/bugbounty.html' => 1,
      'https://curl.se/docs/caextract.html' => 1,
      'https://curl.se/docs/copyright.html' => 1,
+    'https://curl.se/docs/http-cookies.html' => 1,
      'https://curl.se/docs/install.html' => 1,
      'https://curl.se/docs/knownbugs.html' => 1,
      'https://curl.se/docs/manpage.html' => 1,
+    'https://curl.se/docs/releases.html' => 1,
      'https://curl.se/docs/security.html' => 1,
+    'https://curl.se/docs/ssl-ciphers.html' => 1,
+    'https://curl.se/docs/ssl-compared.html' => 1,
      'https://curl.se/docs/sslcerts.html' => 1,
      'https://curl.se/docs/thanks.html' => 1,
      'https://curl.se/docs/todo.html' => 1,
      'https://curl.se/docs/vulnerabilities.html' => 1,
+    'https://curl.se/download.html' => 1,
      'https://curl.se/libcurl/' => 1,
-    'https://curl.se/libcurl/c/CURLOPT_SSLVERSION.html' => 1,
      'https://curl.se/libcurl/c/CURLOPT_SSL_CIPHER_LIST.html' => 1,
+    'https://curl.se/libcurl/c/CURLOPT_SSLVERSION.html' => 1,
      'https://curl.se/libcurl/c/CURLOPT_TLS13_CIPHERS.html' => 1,
      'https://curl.se/libcurl/c/libcurl.html' => 1,
+    'https://curl.se/libcurl/c/threadsafe.html' => 1,
      'https://curl.se/logo/curl-logo.svg' => 1,
      'https://curl.se/mail/' => 1,
      'https://curl.se/mail/etiquette.html' => 1,
@@ -62,14 +71,15 @@ my %whitelist = (
      'https://curl.se/rfc/rfc2255.txt' => 1,
      'https://curl.se/sponsors.html' => 1,
      'https://curl.se/support.html' => 1,
+    'https://curl.se/windows' => 1,
+    'https://curl.se/windows/' => 1,
+
+    'https://testclutch.curl.se/' => 1,
  
-    'https://github.com/curl/curl' => 1,
      'https://github.com/curl/curl-fuzzer' => 1,
      'https://github.com/curl/curl-www' => 1,
-    'https://github.com/curl/curl/discussions' => 1,
-    'https://github.com/curl/curl/issues' => 1,
-    'https://github.com/curl/curl/labels/help%20wanted' => 1,
-    'https://github.com/curl/curl/pulls' => 1,
+    'https://github.com/curl/curl.git' => 1,
+    'https://github.com/curl/curl/wcurl' => 1,
  
      );
  
@@ -77,7 +87,7 @@ my %url;
  my %flink;
  
  # list all .md files in the repo
-my @files=`git ls-files '**.md'`;
+my @files=`git ls-files '**.md' docs/TODO docs/KNOWN_BUGS docs/FAQ`;
  
  sub storelink {
      my ($f, $line, $link) = @_;
@@ -91,7 +101,29 @@ sub storelink {
      $link =~ s:\#.*\z::;
  
      if($link =~ /^(https|http):/) {
-        $url{$link} .= "$f:$line ";
+        if($whitelist{$link}) {
+            #print "-- whitelisted: $link\n";
+        }
+        # example.com is just example
+        elsif($link =~ /^https:\/\/(.*)example.(com|org|net)/) {
+            #print "-- example: $link\n";
+        }
+        # so is using the .example TLD
+        elsif($link =~ /^https:\/\/(.*)\.example(\/|$|:)/) {
+            #print "-- .example: $link\n";
+        }
+        # so is using anything on localhost
+        elsif($link =~ /^http(s|):\/\/localhost/) {
+            #print "-- localhost: $link\n";
+        }
+        # ignore all links to curl's github repo
+        elsif($link =~ /^https:\/\/github.com\/curl\/curl(\/|$)/) {
+            #print "-- curl github repo: $link\n";
+        }
+        else {
+            #print "ADD '$link'\n";
+            $url{$link} .= "$f:$line ";
+        }
          return;
      }
  
@@ -119,11 +151,19 @@ sub findlinks {
          return;
  
      while(<F>) {
+        chomp;
          if(/\]\(([^)]*)/) {
              my $link = $1;
              #print "$f:$line $link\n";
              storelink($f, $line, $link);
          }
+        # ignore trailing: dot, quote, asterisk, hash, comma, question mark,
+        # colon, closing parenthesis, closing angle bracket, whitespace, pipe,
+        # backtick, semicolon
+        elsif(/(https:\/\/[a-z0-9.\/:%_-]+[^."*\#,?:\)> \t|`;])/i) {
+            #print "RAW ";
+            storelink($f, $line, $1);
+        }
          $line++;
      }
      close(F);
@@ -133,11 +173,10 @@ sub checkurl {
      my ($url) = @_;
  
      if($whitelist{$url}) {
-        #print "$url is whitelisted\n";
+        #print STDERR "$url is whitelisted\n";
          return 0;
      }
  
-    print "check $url\n";
      $url =~ s/\+/%2B/g;
      my @content;
      if(open(my $fh, '-|', 'curl', '-ILfsm10', '--retry', '2', '--retry-delay', '5',
@@ -146,9 +185,10 @@ sub checkurl {
          close $fh;
      }
      if(!$content[0]) {
-        print STDERR "FAIL\n";
+        print "FAIL: $url\n";
          return 1; # fail
      }
+    print "OK: $url\n";
      return 0; # ok
  }
  
@@ -157,14 +197,19 @@ for my $f (@files) {
      findlinks($f);
  }
  
-my $error;
+#for my $u (sort keys %url) {
+#    print "$u\n";
+#}
+#exit;
  
+my $error;
+my @errlist;
  for my $u (sort keys %url) {
      my $r = checkurl($u);
  
      if($r) {
          for my $f (split(/ /, $url{$u})) {
-            printf "%s ERROR links to missing URL %s\n", $f, $u;
+            push @errlist, sprintf "%s ERROR links to missing URL %s\n", $f, $u;
              $error++;
          }
      }
@@ -173,10 +218,17 @@ for my $u (sort keys %url) {
  for my $l (sort keys %flink) {
      if(! -r $l) {
          for my $f (split(/ /, $flink{$l})) {
-            printf "%s ERROR links to missing file %s\n", $f, $l;
+            push @errlist, sprintf "%s ERROR links to missing file %s\n", $f, $l;
              $error++;
          }
      }
  }
  
+printf "Checked %d URLs\n", scalar(keys %url);
+if($error) {
+    print "$error URLs had problems:\n";
+    for(@errlist) {
+        print $_;
+    }
+}
  exit 1 if($error);
author	Daniel Stenberg <daniel@haxx.se>
	Fri, 5 Dec 2025 10:45:35 +0000 (11:45 +0100)
committer	Daniel Stenberg <daniel@haxx.se>
	Fri, 5 Dec 2025 22:41:41 +0000 (23:41 +0100)
.github/workflows/checkdocs.yml		patch \| blob \| blame \| history
scripts/mdlinkcheck		patch \| blob \| blame \| history