tools: add script that checks external links in docs

author Yu Watanabe <watanabe.yu+github@gmail.com>

Wed, 28 Jan 2026 13:01:39 +0000 (13:01 +0000)

committer Yu Watanabe <watanabe.yu+github@gmail.com>

Fri, 30 Jan 2026 16:11:05 +0000 (01:11 +0900)
author Yu Watanabe <watanabe.yu+github@gmail.com>
Wed, 28 Jan 2026 13:01:39 +0000 (13:01 +0000)
committer Yu Watanabe <watanabe.yu+github@gmail.com>
Fri, 30 Jan 2026 16:11:05 +0000 (01:11 +0900)
diff --git a/tools/check-docs-urls.sh b/tools/check-docs-urls.sh

new file mode 100755 (executable)

index 0000000..d0efb93
--- /dev/null
+++ b/tools/check-docs-urls.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+set -euo pipefail
+
+# check-docs-urls.sh
+# Extract external URLs from docs/ using git grep, clean them, de-duplicate,
+# and check HTTP status codes with curl. Writes results to a status file.
+
+OUT_LIST=${1:-/tmp/docs-urls.txt}
+OUT_STATUS=${2:-/tmp/docs-url-status.txt}
+
+usage() {
+    cat <<EOF
+Usage: $0 [URL_LIST_OUT] [STATUS_OUT]
+
+Extract external URLs from docs/, dedupe and clean them, then check each URL
+with curl. Defaults:
+  URL_LIST_OUT = /tmp/docs-urls.txt
+  STATUS_OUT   = /tmp/docs-url-status.txt
+
+Examples:
+  $0
+  $0 /tmp/my-urls.txt /tmp/my-status.txt
+EOF
+}
+
+if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+    usage
+    exit 0
+fi
+
+command -v curl >/dev/null 2>&1 || { echo "ERROR: curl not found in PATH" >&2; exit 2; }
+
+# Extract likely URLs. Pattern stops at whitespace, angle bracket or quote/paren to avoid trailing HTML tags.
+# Then strip trailing punctuation like ,.;:)\"' and any accidental trailing angle brackets.
+git grep 'https*://' docs \
+    | sed -e 's|^.*http|http|; s/["`'"'"')< ].*$//' \
+    | sort -u > "$OUT_LIST"
+
+echo "Found $(wc -l < "$OUT_LIST") unique urls (written to $OUT_LIST)"
+
+# Check each URL with curl (follows redirects). Output: HTTP_CODE URL
+: > "$OUT_STATUS"
+while read -r url; do
+    [[ -z "$url" ]] && continue
+    # Use a reasonable timeout and follow redirects
+    code=$(curl -sS -L -o /dev/null -w "%{http_code}" --max-time 3 "$url" || echo "000")
+    printf "%s %s\n" "$code" "$url" >> "$OUT_STATUS"
+done < "$OUT_LIST"
+
+echo "Wrote status results to $OUT_STATUS"
+
+# Show non-2xx/3xx entries
+echo "Non-OK results (not 2xx/3xx):"
+grep -E "^[^23]" "$OUT_STATUS" || true
+
+exit 0
author	Yu Watanabe <watanabe.yu+github@gmail.com>
	Wed, 28 Jan 2026 13:01:39 +0000 (13:01 +0000)
committer	Yu Watanabe <watanabe.yu+github@gmail.com>
	Fri, 30 Jan 2026 16:11:05 +0000 (01:11 +0900)