🛠️ Add `add-permalinks` and `add-permalinks-page` to `scripts/docs.py` (#14033)

author Motov Yurii <109919500+YuriiMotov@users.noreply.github.com>

Fri, 21 Nov 2025 12:49:11 +0000 (13:49 +0100)

committer GitHub <noreply@github.com>

Fri, 21 Nov 2025 12:49:11 +0000 (13:49 +0100)
author Motov Yurii <109919500+YuriiMotov@users.noreply.github.com>
Fri, 21 Nov 2025 12:49:11 +0000 (13:49 +0100)
committer GitHub <noreply@github.com>
Fri, 21 Nov 2025 12:49:11 +0000 (13:49 +0100)
diff --git a/requirements-docs.txt b/requirements-docs.txt

index d60125bbe5f48e7444be96292c44a4825c3582b4..05b47fe92b3bd9bc0c1acc10a74ca2438664f9d1 100644 (file)
--- a/requirements-docs.txt
+++ b/requirements-docs.txt
@@ -18,3 +18,4 @@ griffe-warnings-deprecated==1.1.0
  black==25.1.0
  mkdocs-macros-plugin==1.4.1
  markdown-include-variants==0.0.5
+python-slugify==8.0.4
diff --git a/scripts/docs.py b/scripts/docs.py

index d08a218f8b85f50880badf69e1ee23c03c50ae52..d67ab50f77d564f2bba4ddd13bcbac9d7bc0419d 100644 (file)
--- a/scripts/docs.py
+++ b/scripts/docs.py
@@ -4,6 +4,7 @@ import os
  import re
  import shutil
  import subprocess
+from html.parser import HTMLParser
  from http.server import HTTPServer, SimpleHTTPRequestHandler
  from multiprocessing import Pool
  from pathlib import Path
@@ -14,6 +15,7 @@ import typer
  import yaml
  from jinja2 import Template
  from ruff.__main__ import find_ruff_bin
+from slugify import slugify as py_slugify
  
  logging.basicConfig(level=logging.INFO)
  
@@ -25,8 +27,8 @@ missing_translation_snippet = """
  {!../../docs/missing-translation.md!}
  """
  
-non_translated_sections = [
-    "reference/",
+non_translated_sections = (
+    f"reference{os.sep}",
      "release-notes.md",
      "fastapi-people.md",
      "external-links.md",
@@ -34,7 +36,7 @@ non_translated_sections = [
      "management-tasks.md",
      "management.md",
      "contributing.md",
-]
+)
  
  docs_path = Path("docs")
  en_docs_path = Path("docs/en")
@@ -42,7 +44,39 @@ en_config_path: Path = en_docs_path / mkdocs_name
  site_path = Path("site").absolute()
  build_site_path = Path("site_build").absolute()
  
+header_pattern = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$")
  header_with_permalink_pattern = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})\s*$")
+code_block3_pattern = re.compile(r"^\s*```")
+code_block4_pattern = re.compile(r"^\s*````")
+
+
+class VisibleTextExtractor(HTMLParser):
+    """Extract visible text from a string with HTML tags."""
+
+    def __init__(self):
+        super().__init__()
+        self.text_parts = []
+
+    def handle_data(self, data):
+        self.text_parts.append(data)
+
+    def extract_visible_text(self, html: str) -> str:
+        self.reset()
+        self.text_parts = []
+        self.feed(html)
+        return "".join(self.text_parts).strip()
+
+
+def slugify(text: str) -> str:
+    return py_slugify(
+        text,
+        replacements=[
+            ("`", ""),  # `dict`s -> dicts
+            ("'s", "s"),  # it's -> its
+            ("'t", "t"),  # don't -> dont
+            ("**", ""),  # **FastAPI**s -> FastAPIs
+        ],
+    )
  
  
  def get_en_config() -> Dict[str, Any]:
@@ -426,5 +460,74 @@ def generate_docs_src_versions_for_file(file_path: Path) -> None:
          version_file.write_text(content_format, encoding="utf-8")
  
  
+@app.command()
+def add_permalinks_page(path: Path, update_existing: bool = False):
+    """
+    Add or update header permalinks in specific page of En docs.
+    """
+
+    if not path.is_relative_to(en_docs_path / "docs"):
+        raise RuntimeError(f"Path must be inside {en_docs_path}")
+    rel_path = path.relative_to(en_docs_path / "docs")
+
+    # Skip excluded sections
+    if str(rel_path).startswith(non_translated_sections):
+        return
+
+    visible_text_extractor = VisibleTextExtractor()
+    updated_lines = []
+    in_code_block3 = False
+    in_code_block4 = False
+    permalinks = set()
+
+    with path.open("r", encoding="utf-8") as f:
+        lines = f.readlines()
+
+    for line in lines:
+        # Handle codeblocks start and end
+        if not (in_code_block3 or in_code_block4):
+            if code_block4_pattern.match(line):
+                in_code_block4 = True
+            elif code_block3_pattern.match(line):
+                in_code_block3 = True
+        else:
+            if in_code_block4 and code_block4_pattern.match(line):
+                in_code_block4 = False
+            elif in_code_block3 and code_block3_pattern.match(line):
+                in_code_block3 = False
+
+        # Process Headers only outside codeblocks
+        if not (in_code_block3 or in_code_block4):
+            match = header_pattern.match(line)
+            if match:
+                hashes, title, _permalink = match.groups()
+                if (not _permalink) or update_existing:
+                    slug = slugify(visible_text_extractor.extract_visible_text(title))
+                    if slug in permalinks:
+                        # If the slug is already used, append a number to make it unique
+                        count = 1
+                        original_slug = slug
+                        while slug in permalinks:
+                            slug = f"{original_slug}_{count}"
+                            count += 1
+                    permalinks.add(slug)
+
+                    line = f"{hashes} {title} {{ #{slug} }}\n"
+
+        updated_lines.append(line)
+
+    with path.open("w", encoding="utf-8") as f:
+        f.writelines(updated_lines)
+
+
+@app.command()
+def add_permalinks(update_existing: bool = False) -> None:
+    """
+    Add or update header permalinks in all pages of En docs.
+    """
+    for md_file in en_docs_path.rglob("*.md"):
+        add_permalinks_page(md_file, update_existing=update_existing)
+
+
  if __name__ == "__main__":
      app()
author	Motov Yurii <109919500+YuriiMotov@users.noreply.github.com>
	Fri, 21 Nov 2025 12:49:11 +0000 (13:49 +0100)
committer	GitHub <noreply@github.com>
	Fri, 21 Nov 2025 12:49:11 +0000 (13:49 +0100)
requirements-docs.txt		patch \| blob \| blame \| history
scripts/docs.py		patch \| blob \| blame \| history