import re
import shutil
import subprocess
+from html.parser import HTMLParser
from http.server import HTTPServer, SimpleHTTPRequestHandler
from multiprocessing import Pool
from pathlib import Path
import yaml
from jinja2 import Template
from ruff.__main__ import find_ruff_bin
+from slugify import slugify as py_slugify
logging.basicConfig(level=logging.INFO)
{!../../docs/missing-translation.md!}
"""
-non_translated_sections = [
- "reference/",
+non_translated_sections = (
+ f"reference{os.sep}",
"release-notes.md",
"fastapi-people.md",
"external-links.md",
"management-tasks.md",
"management.md",
"contributing.md",
-]
+)
docs_path = Path("docs")
en_docs_path = Path("docs/en")
site_path = Path("site").absolute()
build_site_path = Path("site_build").absolute()
+header_pattern = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$")
header_with_permalink_pattern = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})\s*$")
+code_block3_pattern = re.compile(r"^\s*```")
+code_block4_pattern = re.compile(r"^\s*````")
+
+
+class VisibleTextExtractor(HTMLParser):
+ """Extract visible text from a string with HTML tags."""
+
+ def __init__(self):
+ super().__init__()
+ self.text_parts = []
+
+ def handle_data(self, data):
+ self.text_parts.append(data)
+
+ def extract_visible_text(self, html: str) -> str:
+ self.reset()
+ self.text_parts = []
+ self.feed(html)
+ return "".join(self.text_parts).strip()
+
+
+def slugify(text: str) -> str:
+ return py_slugify(
+ text,
+ replacements=[
+ ("`", ""), # `dict`s -> dicts
+ ("'s", "s"), # it's -> its
+ ("'t", "t"), # don't -> dont
+ ("**", ""), # **FastAPI**s -> FastAPIs
+ ],
+ )
def get_en_config() -> Dict[str, Any]:
version_file.write_text(content_format, encoding="utf-8")
+@app.command()
+def add_permalinks_page(path: Path, update_existing: bool = False):
+ """
+ Add or update header permalinks in specific page of En docs.
+ """
+
+ if not path.is_relative_to(en_docs_path / "docs"):
+ raise RuntimeError(f"Path must be inside {en_docs_path}")
+ rel_path = path.relative_to(en_docs_path / "docs")
+
+ # Skip excluded sections
+ if str(rel_path).startswith(non_translated_sections):
+ return
+
+ visible_text_extractor = VisibleTextExtractor()
+ updated_lines = []
+ in_code_block3 = False
+ in_code_block4 = False
+ permalinks = set()
+
+ with path.open("r", encoding="utf-8") as f:
+ lines = f.readlines()
+
+ for line in lines:
+ # Handle codeblocks start and end
+ if not (in_code_block3 or in_code_block4):
+ if code_block4_pattern.match(line):
+ in_code_block4 = True
+ elif code_block3_pattern.match(line):
+ in_code_block3 = True
+ else:
+ if in_code_block4 and code_block4_pattern.match(line):
+ in_code_block4 = False
+ elif in_code_block3 and code_block3_pattern.match(line):
+ in_code_block3 = False
+
+ # Process Headers only outside codeblocks
+ if not (in_code_block3 or in_code_block4):
+ match = header_pattern.match(line)
+ if match:
+ hashes, title, _permalink = match.groups()
+ if (not _permalink) or update_existing:
+ slug = slugify(visible_text_extractor.extract_visible_text(title))
+ if slug in permalinks:
+ # If the slug is already used, append a number to make it unique
+ count = 1
+ original_slug = slug
+ while slug in permalinks:
+ slug = f"{original_slug}_{count}"
+ count += 1
+ permalinks.add(slug)
+
+ line = f"{hashes} {title} {{ #{slug} }}\n"
+
+ updated_lines.append(line)
+
+ with path.open("w", encoding="utf-8") as f:
+ f.writelines(updated_lines)
+
+
+@app.command()
+def add_permalinks(update_existing: bool = False) -> None:
+ """
+ Add or update header permalinks in all pages of En docs.
+ """
+ for md_file in en_docs_path.rglob("*.md"):
+ add_permalinks_page(md_file, update_existing=update_existing)
+
+
if __name__ == "__main__":
app()