From: Yu Watanabe Date: Wed, 28 Jan 2026 13:01:39 +0000 (+0000) Subject: tools: add script that checks external links in docs X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=966c29ae591e07e545e19b190b767dbe07364177;p=thirdparty%2Fsystemd.git tools: add script that checks external links in docs Generated by GitHub Copilot, and manually adjusted. --- diff --git a/tools/check-docs-urls.sh b/tools/check-docs-urls.sh new file mode 100755 index 00000000000..d0efb93dc7a --- /dev/null +++ b/tools/check-docs-urls.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later + +set -euo pipefail + +# check-docs-urls.sh +# Extract external URLs from docs/ using git grep, clean them, de-duplicate, +# and check HTTP status codes with curl. Writes results to a status file. + +OUT_LIST=${1:-/tmp/docs-urls.txt} +OUT_STATUS=${2:-/tmp/docs-url-status.txt} + +usage() { + cat </dev/null 2>&1 || { echo "ERROR: curl not found in PATH" >&2; exit 2; } + +# Extract likely URLs. Pattern stops at whitespace, angle bracket or quote/paren to avoid trailing HTML tags. +# Then strip trailing punctuation like ,.;:)\"' and any accidental trailing angle brackets. +git grep 'https*://' docs \ + | sed -e 's|^.*http|http|; s/["`'"'"')< ].*$//' \ + | sort -u > "$OUT_LIST" + +echo "Found $(wc -l < "$OUT_LIST") unique urls (written to $OUT_LIST)" + +# Check each URL with curl (follows redirects). Output: HTTP_CODE URL +: > "$OUT_STATUS" +while read -r url; do + [[ -z "$url" ]] && continue + # Use a reasonable timeout and follow redirects + code=$(curl -sS -L -o /dev/null -w "%{http_code}" --max-time 3 "$url" || echo "000") + printf "%s %s\n" "$code" "$url" >> "$OUT_STATUS" +done < "$OUT_LIST" + +echo "Wrote status results to $OUT_STATUS" + +# Show non-2xx/3xx entries +echo "Non-OK results (not 2xx/3xx):" +grep -E "^[^23]" "$OUT_STATUS" || true + +exit 0