]> git.ipfire.org Git - thirdparty/fastapi/fastapi.git/commitdiff
🔨 Tweak translation script (#15174)
authorMotov Yurii <109919500+YuriiMotov@users.noreply.github.com>
Thu, 23 Apr 2026 16:35:43 +0000 (18:35 +0200)
committerGitHub <noreply@github.com>
Thu, 23 Apr 2026 16:35:43 +0000 (18:35 +0200)
scripts/general-llm-prompt.md
scripts/translate.py

index cfad6ff43dcefbef558a8c452b6047001d94ca31..06c26d7afbe7d6c8d3225c333306b6d81c6df68d 100644 (file)
@@ -6,6 +6,8 @@ The original content is written in Markdown, write the translation in Markdown a
 
 The original content will be surrounded by triple percentage signs (%%%). Do not include the triple percentage signs in the translation.
 
+[placeholder_for_additional_instructions]
+
 ### Technical terms in English
 
 For technical terms in English that don't have a common translation term, use the original term in English.
@@ -223,6 +225,8 @@ Result (German):
 
 Use the following rules for links (apply both to Markdown-style links ([text](url)) and to HTML-style <a href="url">text</a> tags):
 
+- The order of links should match the order of links in the English source. Do not change the order of links. Rephrase the sentence if necessary.
+
 - For relative URLs, only translate the link text. Do not translate the URL or its parts.
 
 Example:
index 1bfa92f8878577d402b16cc96f1a67584866454e..e2ce52fd52156432ab41f8874ccd37a10f0624bc 100644 (file)
@@ -57,39 +57,19 @@ def generate_en_path(*, lang: str, path: Path) -> Path:
     return out_path
 
 
-@app.command()
-def translate_page(
-    *,
-    language: Annotated[str, typer.Option(envvar="LANGUAGE")],
-    en_path: Annotated[Path, typer.Option(envvar="EN_PATH")],
-) -> None:
-    assert language != "en", (
-        "`en` is the source language, choose another language as translation target"
+def get_prompt(
+    lang_prompt_content: str,
+    old_translation: str | None,
+    language: str,
+    language_name: str,
+    original_content: str,
+    additional_instructions: str,
+) -> str:
+    general_prompt_with_additional_instructions = general_prompt.replace(
+        "[placeholder_for_additional_instructions]", additional_instructions
     )
-    langs = get_langs()
-    language_name = langs[language]
-    lang_path = Path(f"docs/{language}")
-    lang_path.mkdir(exist_ok=True)
-    lang_prompt_path = lang_path / "llm-prompt.md"
-    assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}"
-    lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8")
-
-    en_docs_path = Path("docs/en/docs")
-    assert str(en_path).startswith(str(en_docs_path)), (
-        f"Path must be inside {en_docs_path}"
-    )
-    out_path = generate_lang_path(lang=language, path=en_path)
-    out_path.parent.mkdir(parents=True, exist_ok=True)
-    original_content = en_path.read_text(encoding="utf-8")
-    old_translation: str | None = None
-    if out_path.exists():
-        print(f"Found existing translation: {out_path}")
-        old_translation = out_path.read_text(encoding="utf-8")
-    print(f"Translating {en_path} to {language} ({language_name})")
-    agent = Agent("openai:gpt-5")
-
     prompt_segments = [
-        general_prompt,
+        general_prompt_with_additional_instructions,
         lang_prompt_content,
     ]
     if old_translation:
@@ -119,12 +99,57 @@ def translate_page(
             f"%%%\n{original_content}%%%",
         ]
     )
-    prompt = "\n\n".join(prompt_segments)
+    return "\n\n".join(prompt_segments)
+
+
+@app.command()
+def translate_page(
+    *,
+    language: Annotated[str, typer.Option(envvar="LANGUAGE")],
+    en_path: Annotated[Path, typer.Option(envvar="EN_PATH")],
+) -> None:
+    assert language != "en", (
+        "`en` is the source language, choose another language as translation target"
+    )
+    langs = get_langs()
+    language_name = langs[language]
+    lang_path = Path(f"docs/{language}")
+    lang_path.mkdir(exist_ok=True)
+    lang_prompt_path = lang_path / "llm-prompt.md"
+    assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}"
+    lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8")
+
+    en_docs_path = Path("docs/en/docs")
+    assert str(en_path).startswith(str(en_docs_path)), (
+        f"Path must be inside {en_docs_path}"
+    )
+    out_path = generate_lang_path(lang=language, path=en_path)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    original_content = en_path.read_text(encoding="utf-8")
+    old_translation: str | None = None
+    if out_path.exists():
+        print(f"Found existing translation: {out_path}")
+        old_translation = out_path.read_text(encoding="utf-8")
+    print(f"Translating {en_path} to {language} ({language_name})")
+    agent = Agent("openai:gpt-5")
 
     MAX_ATTEMPTS = 3
+    additional_instructions = ""
     for attempt_no in range(1, MAX_ATTEMPTS + 1):
         print(f"Running agent for {out_path} (attempt {attempt_no}/{MAX_ATTEMPTS})")
-        result = agent.run_sync(prompt)
+        prompt = get_prompt(
+            lang_prompt_content=lang_prompt_content,
+            old_translation=old_translation,
+            language=language,
+            language_name=language_name,
+            original_content=original_content,
+            additional_instructions=additional_instructions,
+        )
+        result = agent.run_sync(
+            prompt.replace(
+                "[placeholder_for_additional_instructions]", additional_instructions
+            )
+        )
         out_content = f"{result.output.strip()}\n"
         try:
             check_translation(
@@ -139,6 +164,11 @@ def translate_page(
             print(
                 f"Translation check failed on attempt {attempt_no}/{MAX_ATTEMPTS}: {e}"
             )
+            additional_instructions = (
+                f"Current translation fails validation checks ({str(e)}). "
+                "Please, pay special attention to it."
+            )
+            old_translation = out_content
             continue  # Retry if not reached max attempts
     else:  # Max retry attempts reached
         print(f"Translation failed for {out_path} after {MAX_ATTEMPTS} attempts")