From: Motov Yurii <109919500+YuriiMotov@users.noreply.github.com> Date: Sat, 10 Jan 2026 21:48:08 +0000 (+0300) Subject: 🔨 Add LLM translations tool fixer (#14652) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d1c67c0055af1c0d187a55cf8ba3f020a73303c1;p=thirdparty%2Ffastapi%2Ffastapi.git 🔨 Add LLM translations tool fixer (#14652) Co-authored-by: github-actions[bot] Co-authored-by: Sebastián Ramírez --- diff --git a/scripts/doc_parsing_utils.py b/scripts/doc_parsing_utils.py new file mode 100644 index 0000000000..857d808aaa --- /dev/null +++ b/scripts/doc_parsing_utils.py @@ -0,0 +1,729 @@ +import re +from typing import TypedDict + +CODE_INCLUDE_RE = re.compile(r"^\{\*\s*(\S+)\s*(.*)\*\}$") +CODE_INCLUDE_PLACEHOLDER = "" + +HEADER_WITH_PERMALINK_RE = re.compile(r"^(#{1,6}) (.+?)(\s*\{\s*#.*\s*\})?\s*$") +HEADER_LINE_RE = re.compile(r"^(#{1,6}) (.+?)(?:\s*\{\s*(#.*)\s*\})?\s*$") + +TIANGOLO_COM = "https://fastapi.tiangolo.com" +ASSETS_URL_PREFIXES = ("/img/", "/css/", "/js/") + +MARKDOWN_LINK_RE = re.compile( + r"(?.*?)\]" # link text (non-greedy) + r"\(" + r"(?P[^)\s]+)" # url (no spaces and `)`) + r'(?:\s+["\'](?P.*?)["\'])?' # optional title in "" or '' + r"\)" + r"(?:\s*\{(?P<attrs>[^}]*)\})?" # optional attributes in {} +) + +HTML_LINK_RE = re.compile(r"<a\s+[^>]*>.*?</a>") +HTML_LINK_TEXT_RE = re.compile(r"<a\b([^>]*)>(.*?)</a>") +HTML_LINK_OPEN_TAG_RE = re.compile(r"<a\b([^>]*)>") +HTML_ATTR_RE = re.compile(r'(\w+)\s*=\s*([\'"])(.*?)\2') + +CODE_BLOCK_LANG_RE = re.compile(r"^`{3,4}([\w-]*)", re.MULTILINE) + +SLASHES_COMMENT_RE = re.compile( + r"^(?P<code>.*?)(?P<comment>(?:(?<= )// .*)|(?:^// .*))?$" +) + +HASH_COMMENT_RE = re.compile(r"^(?P<code>.*?)(?P<comment>(?:(?<= )# .*)|(?:^# .*))?$") + + +class CodeIncludeInfo(TypedDict): + line_no: int + line: str + + +class HeaderPermalinkInfo(TypedDict): + line_no: int + hashes: str + title: str + permalink: str + + +class MarkdownLinkInfo(TypedDict): + line_no: int + url: str + text: str + title: str | None + attributes: str | None + full_match: str + + +class HTMLLinkAttribute(TypedDict): + name: str + quote: str + value: str + + +class HtmlLinkInfo(TypedDict): + line_no: int + full_tag: str + attributes: list[HTMLLinkAttribute] + text: str + + +class MultilineCodeBlockInfo(TypedDict): + lang: str + start_line_no: int + content: list[str] + + +# Code includes +# -------------------------------------------------------------------------------------- + + +def extract_code_includes(lines: list[str]) -> list[CodeIncludeInfo]: + """ + Extract lines that contain code includes. + + Return list of CodeIncludeInfo, where each dict contains: + - `line_no` - line number (1-based) + - `line` - text of the line + """ + + includes: list[CodeIncludeInfo] = [] + for line_no, line in enumerate(lines, start=1): + if CODE_INCLUDE_RE.match(line): + includes.append(CodeIncludeInfo(line_no=line_no, line=line)) + return includes + + +def replace_code_includes_with_placeholders(text: list[str]) -> list[str]: + """ + Replace code includes with placeholders. + """ + + modified_text = text.copy() + includes = extract_code_includes(text) + for include in includes: + modified_text[include["line_no"] - 1] = CODE_INCLUDE_PLACEHOLDER + return modified_text + + +def replace_placeholders_with_code_includes( + text: list[str], original_includes: list[CodeIncludeInfo] +) -> list[str]: + """ + Replace code includes placeholders with actual code includes from the original (English) document. + Fail if the number of placeholders does not match the number of original includes. + """ + + code_include_lines = [ + line_no + for line_no, line in enumerate(text) + if line.strip() == CODE_INCLUDE_PLACEHOLDER + ] + + if len(code_include_lines) != len(original_includes): + raise ValueError( + "Number of code include placeholders does not match the number of code includes " + "in the original document " + f"({len(code_include_lines)} vs {len(original_includes)})" + ) + + modified_text = text.copy() + for i, line_no in enumerate(code_include_lines): + modified_text[line_no] = original_includes[i]["line"] + + return modified_text + + +# Header permalinks +# -------------------------------------------------------------------------------------- + + +def extract_header_permalinks(lines: list[str]) -> list[HeaderPermalinkInfo]: + """ + Extract list of header permalinks from the given lines. + + Return list of HeaderPermalinkInfo, where each dict contains: + - `line_no` - line number (1-based) + - `hashes` - string of hashes representing header level (e.g., "###") + - `permalink` - permalink string (e.g., "{#permalink}") + """ + + headers: list[HeaderPermalinkInfo] = [] + in_code_block3 = False + in_code_block4 = False + + for line_no, line in enumerate(lines, start=1): + if not (in_code_block3 or in_code_block4): + if line.startswith("```"): + count = len(line) - len(line.lstrip("`")) + if count == 3: + in_code_block3 = True + continue + elif count >= 4: + in_code_block4 = True + continue + + header_match = HEADER_WITH_PERMALINK_RE.match(line) + if header_match: + hashes, title, permalink = header_match.groups() + headers.append( + HeaderPermalinkInfo( + hashes=hashes, line_no=line_no, permalink=permalink, title=title + ) + ) + + elif in_code_block3: + if line.startswith("```"): + count = len(line) - len(line.lstrip("`")) + if count == 3: + in_code_block3 = False + continue + + elif in_code_block4: + if line.startswith("````"): + count = len(line) - len(line.lstrip("`")) + if count >= 4: + in_code_block4 = False + continue + + return headers + + +def remove_header_permalinks(lines: list[str]) -> list[str]: + """ + Remove permalinks from headers in the given lines. + """ + + modified_lines: list[str] = [] + for line in lines: + header_match = HEADER_WITH_PERMALINK_RE.match(line) + if header_match: + hashes, title, _permalink = header_match.groups() + modified_line = f"{hashes} {title}" + modified_lines.append(modified_line) + else: + modified_lines.append(line) + return modified_lines + + +def replace_header_permalinks( + text: list[str], + header_permalinks: list[HeaderPermalinkInfo], + original_header_permalinks: list[HeaderPermalinkInfo], +) -> list[str]: + """ + Replace permalinks in the given text with the permalinks from the original document. + + Fail if the number or level of headers does not match the original. + """ + + modified_text: list[str] = text.copy() + + if len(header_permalinks) != len(original_header_permalinks): + raise ValueError( + "Number of headers with permalinks does not match the number in the " + "original document " + f"({len(header_permalinks)} vs {len(original_header_permalinks)})" + ) + + for header_no in range(len(header_permalinks)): + header_info = header_permalinks[header_no] + original_header_info = original_header_permalinks[header_no] + + if header_info["hashes"] != original_header_info["hashes"]: + raise ValueError( + "Header levels do not match between document and original document" + f" (found {header_info['hashes']}, expected {original_header_info['hashes']})" + f" for header №{header_no + 1} in line {header_info['line_no']}" + ) + line_no = header_info["line_no"] - 1 + hashes = header_info["hashes"] + title = header_info["title"] + permalink = original_header_info["permalink"] + modified_text[line_no] = f"{hashes} {title}{permalink}" + + return modified_text + + +# Markdown links +# -------------------------------------------------------------------------------------- + + +def extract_markdown_links(lines: list[str]) -> list[MarkdownLinkInfo]: + """ + Extract all markdown links from the given lines. + + Return list of MarkdownLinkInfo, where each dict contains: + - `line_no` - line number (1-based) + - `url` - link URL + - `text` - link text + - `title` - link title (if any) + """ + + links: list[MarkdownLinkInfo] = [] + for line_no, line in enumerate(lines, start=1): + for m in MARKDOWN_LINK_RE.finditer(line): + links.append( + MarkdownLinkInfo( + line_no=line_no, + url=m.group("url"), + text=m.group("text"), + title=m.group("title"), + attributes=m.group("attrs"), + full_match=m.group(0), + ) + ) + return links + + +def _add_lang_code_to_url(url: str, lang_code: str) -> str: + if url.startswith(TIANGOLO_COM): + rel_url = url[len(TIANGOLO_COM) :] + if not rel_url.startswith(ASSETS_URL_PREFIXES): + url = url.replace(TIANGOLO_COM, f"{TIANGOLO_COM}/{lang_code}") + return url + + +def _construct_markdown_link( + url: str, text: str, title: str | None, attributes: str | None, lang_code: str +) -> str: + """ + Construct a markdown link, adjusting the URL for the given language code if needed. + """ + url = _add_lang_code_to_url(url, lang_code) + + if title: + link = f'[{text}]({url} "{title}")' + else: + link = f"[{text}]({url})" + + if attributes: + link += f"{{{attributes}}}" + + return link + + +def replace_markdown_links( + text: list[str], + links: list[MarkdownLinkInfo], + original_links: list[MarkdownLinkInfo], + lang_code: str, +) -> list[str]: + """ + Replace markdown links in the given text with the original links. + + Fail if the number of links does not match the original. + """ + + if len(links) != len(original_links): + raise ValueError( + "Number of markdown links does not match the number in the " + "original document " + f"({len(links)} vs {len(original_links)})" + ) + + modified_text = text.copy() + for i, link_info in enumerate(links): + link_text = link_info["text"] + link_title = link_info["title"] + original_link_info = original_links[i] + + # Replace + replacement_link = _construct_markdown_link( + url=original_link_info["url"], + text=link_text, + title=link_title, + attributes=original_link_info["attributes"], + lang_code=lang_code, + ) + line_no = link_info["line_no"] - 1 + modified_line = modified_text[line_no] + modified_line = modified_line.replace( + link_info["full_match"], replacement_link, 1 + ) + modified_text[line_no] = modified_line + + return modified_text + + +# HTML links +# -------------------------------------------------------------------------------------- + + +def extract_html_links(lines: list[str]) -> list[HtmlLinkInfo]: + """ + Extract all HTML links from the given lines. + + Return list of HtmlLinkInfo, where each dict contains: + - `line_no` - line number (1-based) + - `full_tag` - full HTML link tag + - `attributes` - list of HTMLLinkAttribute (name, quote, value) + - `text` - link text + """ + + links = [] + for line_no, line in enumerate(lines, start=1): + for html_link in HTML_LINK_RE.finditer(line): + link_str = html_link.group(0) + + link_text_match = HTML_LINK_TEXT_RE.match(link_str) + assert link_text_match is not None + link_text = link_text_match.group(2) + assert isinstance(link_text, str) + + link_open_tag_match = HTML_LINK_OPEN_TAG_RE.match(link_str) + assert link_open_tag_match is not None + link_open_tag = link_open_tag_match.group(1) + assert isinstance(link_open_tag, str) + + attributes: list[HTMLLinkAttribute] = [] + for attr_name, attr_quote, attr_value in re.findall( + HTML_ATTR_RE, link_open_tag + ): + assert isinstance(attr_name, str) + assert isinstance(attr_quote, str) + assert isinstance(attr_value, str) + attributes.append( + HTMLLinkAttribute( + name=attr_name, quote=attr_quote, value=attr_value + ) + ) + links.append( + HtmlLinkInfo( + line_no=line_no, + full_tag=link_str, + attributes=attributes, + text=link_text, + ) + ) + return links + + +def _construct_html_link( + link_text: str, + attributes: list[HTMLLinkAttribute], + lang_code: str, +) -> str: + """ + Reconstruct HTML link, adjusting the URL for the given language code if needed. + """ + + attributes_upd: list[HTMLLinkAttribute] = [] + for attribute in attributes: + if attribute["name"] == "href": + original_url = attribute["value"] + url = _add_lang_code_to_url(original_url, lang_code) + attributes_upd.append( + HTMLLinkAttribute(name="href", quote=attribute["quote"], value=url) + ) + else: + attributes_upd.append(attribute) + + attrs_str = " ".join( + f"{attribute['name']}={attribute['quote']}{attribute['value']}{attribute['quote']}" + for attribute in attributes_upd + ) + return f"<a {attrs_str}>{link_text}</a>" + + +def replace_html_links( + text: list[str], + links: list[HtmlLinkInfo], + original_links: list[HtmlLinkInfo], + lang_code: str, +) -> list[str]: + """ + Replace HTML links in the given text with the links from the original document. + + Adjust URLs for the given language code. + Fail if the number of links does not match the original. + """ + + if len(links) != len(original_links): + raise ValueError( + "Number of HTML links does not match the number in the " + "original document " + f"({len(links)} vs {len(original_links)})" + ) + + modified_text = text.copy() + for link_index, link in enumerate(links): + original_link_info = original_links[link_index] + + # Replace in the document text + replacement_link = _construct_html_link( + link_text=link["text"], + attributes=original_link_info["attributes"], + lang_code=lang_code, + ) + line_no = link["line_no"] - 1 + modified_text[line_no] = modified_text[line_no].replace( + link["full_tag"], replacement_link, 1 + ) + + return modified_text + + +# Multiline code blocks +# -------------------------------------------------------------------------------------- + + +def get_code_block_lang(line: str) -> str: + match = CODE_BLOCK_LANG_RE.match(line) + if match: + return match.group(1) + return "" + + +def extract_multiline_code_blocks(text: list[str]) -> list[MultilineCodeBlockInfo]: + blocks: list[MultilineCodeBlockInfo] = [] + + in_code_block3 = False + in_code_block4 = False + current_block_lang = "" + current_block_start_line = -1 + current_block_lines = [] + + for line_no, line in enumerate(text, start=1): + stripped = line.lstrip() + + # --- Detect opening fence --- + if not (in_code_block3 or in_code_block4): + if stripped.startswith("```"): + current_block_start_line = line_no + count = len(stripped) - len(stripped.lstrip("`")) + if count == 3: + in_code_block3 = True + current_block_lang = get_code_block_lang(stripped) + current_block_lines = [line] + continue + elif count >= 4: + in_code_block4 = True + current_block_lang = get_code_block_lang(stripped) + current_block_lines = [line] + continue + + # --- Detect closing fence --- + elif in_code_block3: + if stripped.startswith("```"): + count = len(stripped) - len(stripped.lstrip("`")) + if count == 3: + current_block_lines.append(line) + blocks.append( + MultilineCodeBlockInfo( + lang=current_block_lang, + start_line_no=current_block_start_line, + content=current_block_lines, + ) + ) + in_code_block3 = False + current_block_lang = "" + current_block_start_line = -1 + current_block_lines = [] + continue + current_block_lines.append(line) + + elif in_code_block4: + if stripped.startswith("````"): + count = len(stripped) - len(stripped.lstrip("`")) + if count >= 4: + current_block_lines.append(line) + blocks.append( + MultilineCodeBlockInfo( + lang=current_block_lang, + start_line_no=current_block_start_line, + content=current_block_lines, + ) + ) + in_code_block4 = False + current_block_lang = "" + current_block_start_line = -1 + current_block_lines = [] + continue + current_block_lines.append(line) + + return blocks + + +def _split_hash_comment(line: str) -> tuple[str, str | None]: + match = HASH_COMMENT_RE.match(line) + if match: + code = match.group("code").rstrip() + comment = match.group("comment") + return code, comment + return line.rstrip(), None + + +def _split_slashes_comment(line: str) -> tuple[str, str | None]: + match = SLASHES_COMMENT_RE.match(line) + if match: + code = match.group("code").rstrip() + comment = match.group("comment") + return code, comment + return line, None + + +def replace_multiline_code_block( + block_a: MultilineCodeBlockInfo, block_b: MultilineCodeBlockInfo +) -> list[str]: + """ + Replace multiline code block `a` with block `b` leaving comments intact. + + Syntax of comments depends on the language of the code block. + Raises ValueError if the blocks are not compatible (different languages or different number of lines). + """ + + start_line = block_a["start_line_no"] + end_line_no = start_line + len(block_a["content"]) - 1 + + if block_a["lang"] != block_b["lang"]: + raise ValueError( + f"Code block (lines {start_line}-{end_line_no}) " + "has different language than the original block " + f"('{block_a['lang']}' vs '{block_b['lang']}')" + ) + if len(block_a["content"]) != len(block_b["content"]): + raise ValueError( + f"Code block (lines {start_line}-{end_line_no}) " + "has different number of lines than the original block " + f"({len(block_a['content'])} vs {len(block_b['content'])})" + ) + + block_language = block_a["lang"].lower() + if block_language in {"mermaid"}: + if block_a != block_b: + print( + f"Skipping mermaid code block replacement (lines {start_line}-{end_line_no}). " + "This should be checked manually." + ) + return block_a["content"].copy() # We don't handle mermaid code blocks for now + + code_block: list[str] = [] + for line_a, line_b in zip(block_a["content"], block_b["content"]): + line_a_comment: str | None = None + line_b_comment: str | None = None + + # Handle comments based on language + if block_language in { + "python", + "py", + "sh", + "bash", + "dockerfile", + "requirements", + "gitignore", + "toml", + "yaml", + "yml", + "hash-style-comments", + }: + _line_a_code, line_a_comment = _split_hash_comment(line_a) + _line_b_code, line_b_comment = _split_hash_comment(line_b) + res_line = line_b + if line_b_comment: + res_line = res_line.replace(line_b_comment, line_a_comment, 1) + code_block.append(res_line) + elif block_language in {"console", "json", "slash-style-comments"}: + _line_a_code, line_a_comment = _split_slashes_comment(line_a) + _line_b_code, line_b_comment = _split_slashes_comment(line_b) + res_line = line_b + if line_b_comment: + res_line = res_line.replace(line_b_comment, line_a_comment, 1) + code_block.append(res_line) + else: + code_block.append(line_b) + + return code_block + + +def replace_multiline_code_blocks_in_text( + text: list[str], + code_blocks: list[MultilineCodeBlockInfo], + original_code_blocks: list[MultilineCodeBlockInfo], +) -> list[str]: + """ + Update each code block in `text` with the corresponding code block from + `original_code_blocks` with comments taken from `code_blocks`. + + Raises ValueError if the number, language, or shape of code blocks do not match. + """ + + if len(code_blocks) != len(original_code_blocks): + raise ValueError( + "Number of code blocks does not match the number in the original document " + f"({len(code_blocks)} vs {len(original_code_blocks)})" + ) + + modified_text = text.copy() + for block, original_block in zip(code_blocks, original_code_blocks): + updated_content = replace_multiline_code_block(block, original_block) + + start_line_index = block["start_line_no"] - 1 + for i, updated_line in enumerate(updated_content): + modified_text[start_line_index + i] = updated_line + + return modified_text + + +# All checks +# -------------------------------------------------------------------------------------- + + +def check_translation( + doc_lines: list[str], + en_doc_lines: list[str], + lang_code: str, + auto_fix: bool, + path: str, +) -> list[str]: + # Fix code includes + en_code_includes = extract_code_includes(en_doc_lines) + doc_lines_with_placeholders = replace_code_includes_with_placeholders(doc_lines) + fixed_doc_lines = replace_placeholders_with_code_includes( + doc_lines_with_placeholders, en_code_includes + ) + if auto_fix and (fixed_doc_lines != doc_lines): + print(f"Fixing code includes in: {path}") + doc_lines = fixed_doc_lines + + # Fix permalinks + en_permalinks = extract_header_permalinks(en_doc_lines) + doc_permalinks = extract_header_permalinks(doc_lines) + fixed_doc_lines = replace_header_permalinks( + doc_lines, doc_permalinks, en_permalinks + ) + if auto_fix and (fixed_doc_lines != doc_lines): + print(f"Fixing header permalinks in: {path}") + doc_lines = fixed_doc_lines + + # Fix markdown links + en_markdown_links = extract_markdown_links(en_doc_lines) + doc_markdown_links = extract_markdown_links(doc_lines) + fixed_doc_lines = replace_markdown_links( + doc_lines, doc_markdown_links, en_markdown_links, lang_code + ) + if auto_fix and (fixed_doc_lines != doc_lines): + print(f"Fixing markdown links in: {path}") + doc_lines = fixed_doc_lines + + # Fix HTML links + en_html_links = extract_html_links(en_doc_lines) + doc_html_links = extract_html_links(doc_lines) + fixed_doc_lines = replace_html_links( + doc_lines, doc_html_links, en_html_links, lang_code + ) + if auto_fix and (fixed_doc_lines != doc_lines): + print(f"Fixing HTML links in: {path}") + doc_lines = fixed_doc_lines + + # Fix multiline code blocks + en_code_blocks = extract_multiline_code_blocks(en_doc_lines) + doc_code_blocks = extract_multiline_code_blocks(doc_lines) + fixed_doc_lines = replace_multiline_code_blocks_in_text( + doc_lines, doc_code_blocks, en_code_blocks + ) + if auto_fix and (fixed_doc_lines != doc_lines): + print(f"Fixing multiline code blocks in: {path}") + doc_lines = fixed_doc_lines + + return doc_lines diff --git a/scripts/tests/test_translation_fixer/conftest.py b/scripts/tests/test_translation_fixer/conftest.py new file mode 100644 index 0000000000..b2c745de11 --- /dev/null +++ b/scripts/tests/test_translation_fixer/conftest.py @@ -0,0 +1,32 @@ +import shutil +from pathlib import Path + +import pytest +from typer.testing import CliRunner + + +@pytest.fixture(name="runner") +def get_runner(): + runner = CliRunner() + with runner.isolated_filesystem(): + yield runner + + +@pytest.fixture(name="root_dir") +def prepare_paths(runner): + docs_dir = Path("docs") + en_docs_dir = docs_dir / "en" / "docs" + lang_docs_dir = docs_dir / "lang" / "docs" + en_docs_dir.mkdir(parents=True, exist_ok=True) + lang_docs_dir.mkdir(parents=True, exist_ok=True) + yield Path.cwd() + + +@pytest.fixture +def copy_test_files(root_dir: Path, request: pytest.FixtureRequest): + en_file_path = Path(request.param[0]) + translation_file_path = Path(request.param[1]) + shutil.copy(str(en_file_path), str(root_dir / "docs" / "en" / "docs" / "doc.md")) + shutil.copy( + str(translation_file_path), str(root_dir / "docs" / "lang" / "docs" / "doc.md") + ) diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/en_doc.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/en_doc.md new file mode 100644 index 0000000000..cad20e2c70 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/en_doc.md @@ -0,0 +1,44 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +```toml +# This is a sample TOML code block +title = "TOML Example" # Title of the document +``` + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Mermaid diagram + +```mermaid +flowchart LR + stone(philosophers-stone) -->|requires| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_lines_number_gt.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_lines_number_gt.md new file mode 100644 index 0000000000..f46070156b --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_lines_number_gt.md @@ -0,0 +1,45 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +```toml +# Extra line +# This is a sample TOML code block +title = "TOML Example" # Title of the document +``` + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Диаграма Mermaid + +```mermaid +flowchart LR + stone(philosophers-stone) -->|requires| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_lines_number_lt.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_lines_number_lt.md new file mode 100644 index 0000000000..e08baa70b1 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_lines_number_lt.md @@ -0,0 +1,45 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +The following block is missing first line: + +```toml +title = "TOML Example" # Title of the document +``` + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Диаграма Mermaid + +```mermaid +flowchart LR + stone(philosophers-stone) -->|requires| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_mermaid_not_translated.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_mermaid_not_translated.md new file mode 100644 index 0000000000..cacb9546d0 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_mermaid_not_translated.md @@ -0,0 +1,44 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +```toml +# This is a sample TOML code block +title = "TOML Example" # Title of the document +``` + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Диаграма Mermaid + +```mermaid +flowchart LR + stone(philosophers-stone) -->|requires| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_mermaid_translated.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_mermaid_translated.md new file mode 100644 index 0000000000..d03dca53eb --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_mermaid_translated.md @@ -0,0 +1,44 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +```toml +# This is a sample TOML code block +title = "TOML Example" # Title of the document +``` + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Диаграма Mermaid + +```mermaid +flowchart LR + stone(philosophers-stone) -->|требует| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_number_gt.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_number_gt.md new file mode 100644 index 0000000000..e77050cc95 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_number_gt.md @@ -0,0 +1,50 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +```toml +# This is a sample TOML code block +title = "TOML Example" # Title of the document +``` + +Extra code block + +``` +$ cd my_project +``` + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Диаграма Mermaid + +```mermaid +flowchart LR + stone(philosophers-stone) -->|requires| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_number_lt.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_number_lt.md new file mode 100644 index 0000000000..918cb883f4 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_number_lt.md @@ -0,0 +1,41 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +Missing code block... + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Диаграма Mermaid + +```mermaid +flowchart LR + stone(philosophers-stone) -->|requires| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_wrong_lang_code.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_wrong_lang_code.md new file mode 100644 index 0000000000..88aed900d3 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_wrong_lang_code.md @@ -0,0 +1,46 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +The following block has wrong language code (should be TOML): + +```yaml +# This is a sample TOML code block +title = "TOML Example" # Title of the document +``` + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Диаграма Mermaid + +```mermaid +flowchart LR + stone(philosophers-stone) -->|requires| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_wrong_lang_code_2.md b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_wrong_lang_code_2.md new file mode 100644 index 0000000000..a7fbb39f54 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/data/translated_doc_wrong_lang_code_2.md @@ -0,0 +1,46 @@ +# Code blocks { #code-blocks } + +Some text + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +Some more text + +The following block has wrong language code (should be TOML): + +``` +# This is a sample TOML code block +title = "TOML Example" # Title of the document +``` + +And more text + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +And even more text + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + +Диаграма Mermaid + +```mermaid +flowchart LR + stone(philosophers-stone) -->|requires| harry-1[harry v1] +``` diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_lines_number_mismatch.py b/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_lines_number_mismatch.py new file mode 100644 index 0000000000..906c8a560a --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_lines_number_mismatch.py @@ -0,0 +1,58 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_code_blocks/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_lines_number_gt.md")], + indirect=True, +) +def test_gt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path( + f"{data_path}/translated_doc_lines_number_gt.md" + ).read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Code block (lines 14-18) has different number of lines than the original block (5 vs 4)" + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_lines_number_lt.md")], + indirect=True, +) +def test_lt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + # assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path( + f"{data_path}/translated_doc_lines_number_lt.md" + ).read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Code block (lines 16-18) has different number of lines than the original block (3 vs 4)" + ) in result.output diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_mermaid.py b/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_mermaid.py new file mode 100644 index 0000000000..75c589fb50 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_mermaid.py @@ -0,0 +1,59 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_code_blocks/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_mermaid_translated.md")], + indirect=True, +) +def test_translated(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 0, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path( + f"{data_path}/translated_doc_mermaid_translated.md" + ).read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert ( + "Skipping mermaid code block replacement (lines 41-44). This should be checked manually." + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [ + ( + f"{data_path}/en_doc.md", + f"{data_path}/translated_doc_mermaid_not_translated.md", + ) + ], + indirect=True, +) +def test_not_translated(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 0, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path( + f"{data_path}/translated_doc_mermaid_not_translated.md" + ).read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert ("Skipping mermaid code block replacement") not in result.output diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_number_mismatch.py b/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_number_mismatch.py new file mode 100644 index 0000000000..b05dac900e --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_number_mismatch.py @@ -0,0 +1,56 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_code_blocks/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_gt.md")], + indirect=True, +) +def test_gt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_gt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of code blocks does not match the number " + "in the original document (6 vs 5)" + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_lt.md")], + indirect=True, +) +def test_lt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + # assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_lt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of code blocks does not match the number " + "in the original document (4 vs 5)" + ) in result.output diff --git a/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_wrong_lang_code.py b/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_wrong_lang_code.py new file mode 100644 index 0000000000..6c2b18c89a --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_blocks/test_code_blocks_wrong_lang_code.py @@ -0,0 +1,58 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_code_blocks/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_wrong_lang_code.md")], + indirect=True, +) +def test_wrong_lang_code_1(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path( + f"{data_path}/translated_doc_wrong_lang_code.md" + ).read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Code block (lines 16-19) has different language than the original block ('yaml' vs 'toml')" + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_wrong_lang_code_2.md")], + indirect=True, +) +def test_wrong_lang_code_2(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path( + f"{data_path}/translated_doc_wrong_lang_code_2.md" + ).read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Code block (lines 16-19) has different language than the original block ('' vs 'toml')" + ) in result.output diff --git a/scripts/tests/test_translation_fixer/test_code_includes/data/en_doc.md b/scripts/tests/test_translation_fixer/test_code_includes/data/en_doc.md new file mode 100644 index 0000000000..593da0b327 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_includes/data/en_doc.md @@ -0,0 +1,13 @@ +# Header + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19:21] *} + +Some text + +{* ../../docs_src/bigger_applications/app_an_py39/internal/admin.py hl[3] title["app/internal/admin.py"] *} + +Some more text + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] hl[31:33] *} + +And even more text diff --git a/scripts/tests/test_translation_fixer/test_code_includes/data/translated_doc_number_gt.md b/scripts/tests/test_translation_fixer/test_code_includes/data/translated_doc_number_gt.md new file mode 100644 index 0000000000..c1ad94d276 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_includes/data/translated_doc_number_gt.md @@ -0,0 +1,15 @@ +# Header + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19:21] *} + +Some text + +{* ../../docs_src/bigger_applications/app_an_py39/internal/admin.py hl[3] title["app/internal/admin.py"] *} + +Some more text + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] hl[31:33] *} + +And even more text + +{* ../../docs_src/python_types/tutorial001_py39.py *} diff --git a/scripts/tests/test_translation_fixer/test_code_includes/data/translated_doc_number_lt.md b/scripts/tests/test_translation_fixer/test_code_includes/data/translated_doc_number_lt.md new file mode 100644 index 0000000000..07eaf2c23d --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_includes/data/translated_doc_number_lt.md @@ -0,0 +1,13 @@ +# Header + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19:21] *} + +Some text + +{* ../../docs_src/bigger_applications/app_an_py39/internal/admin.py hl[3] title["app/internal/admin.py"] *} + +Some more text + +... + +And even more text diff --git a/scripts/tests/test_translation_fixer/test_code_includes/test_number_mismatch.py b/scripts/tests/test_translation_fixer/test_code_includes/test_number_mismatch.py new file mode 100644 index 0000000000..5e3eee51af --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_code_includes/test_number_mismatch.py @@ -0,0 +1,56 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_code_includes/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_gt.md")], + indirect=True, +) +def test_gt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1 + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_gt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of code include placeholders does not match the number of code includes " + "in the original document (4 vs 3)" + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_lt.md")], + indirect=True, +) +def test_lt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1 + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_lt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of code include placeholders does not match the number of code includes " + "in the original document (2 vs 3)" + ) in result.output diff --git a/scripts/tests/test_translation_fixer/test_complex_doc/data/en_doc.md b/scripts/tests/test_translation_fixer/test_complex_doc/data/en_doc.md new file mode 100644 index 0000000000..69cd3f3fd7 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_complex_doc/data/en_doc.md @@ -0,0 +1,244 @@ +# Test translation fixer tool { #test-translation-fixer } + +## Code blocks with and without comments { #code-blocks-with-and-without-comments } + +This is a test page for the translation fixer tool. + +### Code blocks with comments { #code-blocks-with-comments } + +The following code blocks include comments in different styles. +Fixer tool should fix content, but preserve comments correctly. + +```python +# This is a sample Python code block +def hello_world(): + # Comment with indentation + print("Hello, world!") # Print greeting +``` + +```toml +# This is a sample TOML code block +title = "TOML Example" # Title of the document +``` + +```console +// Use the command "live" and pass the language code as a CLI argument +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +```json +{ + // This is a sample JSON code block + "greeting": "Hello, world!" // Greeting +} +``` + + +### Code blocks with comments where language uses different comment styles { #code-blocks-with-different-comment-styles } + +The following code blocks include comments in different styles based on the language. +Fixer tool will not preserve comments in these blocks. + +```json +{ + # This is a sample JSON code block + "greeting": "Hello, world!" # Print greeting +} +``` + +```console +# This is a sample console code block +$ echo "Hello, world!" # Print greeting +``` + +```toml +// This is a sample TOML code block +title = "TOML Example" // Title of the document +``` + + +### Code blocks with comments with unsupported languages or without language specified { #code-blocks-with-unsupported-languages } + +The following code blocks use unsupported languages for comment preservation. +Fixer tool will not preserve comments in these blocks. + +```javascript +// This is a sample JavaScript code block +console.log("Hello, world!"); // Print greeting +``` + +``` +# This is a sample console code block +$ echo "Hello, world!" # Print greeting +``` + +``` +// This is a sample console code block +$ echo "Hello, world!" // Print greeting +``` + + +### Code blocks with comments that don't follow pattern { #code-blocks-with-comments-without-pattern } + +Fixer tool expects comments that follow specific pattern: + +- For hash-style comments: comment starts with `# ` (hash following by whitespace) in the beginning of the string or after a whitespace. +- For slash-style comments: comment starts with `// ` (two slashes following by whitespace) in the beginning of the string or after a whitespace. + +If comment doesn't follow this pattern, fixer tool will not preserve it. + +```python +#Function declaration +def hello_world():# Print greeting + print("Hello, world!") #Print greeting without space after hash +``` + +```console +//Function declaration +def hello_world():// Print greeting + print("Hello, world!") //Print greeting without space after slashes +``` + +## Code blocks with quadruple backticks { #code-blocks-with-quadruple-backticks } + +The following code block uses quadruple backticks. + +````python +# Hello world function +def hello_world(): + print("Hello, world!") # Print greeting +```` + +### Backticks number mismatch is fixable { #backticks-number-mismatch-is-fixable } + +The following code block has triple backticks in the original document, but quadruple backticks in the translated document. +It will be fixed by the fixer tool (will convert to triple backticks). + +```Python +# Some Python code +``` + +### Triple backticks inside quadruple backticks { #triple-backticks-inside-quadruple-backticks } + +Comments inside nested code block will NOT be preserved. + +```` +Here is a code block with quadruple backticks that contains triple backticks inside: + +```python +# This is a sample Python code block +def hello_world(): + print("Hello, world!") # Print greeting +``` + +```` + +# Code includes { #code-includes } + +## Simple code includes { #simple-code-includes } + +{* ../../docs_src/python_types/tutorial001_py39.py *} + +{* ../../docs_src/python_types/tutorial002_py39.py *} + + +## Code includes with highlighting { #code-includes-with-highlighting } + +{* ../../docs_src/python_types/tutorial002_py39.py hl[1] *} + +{* ../../docs_src/python_types/tutorial006_py39.py hl[10] *} + + +## Code includes with line ranges { #code-includes-with-line-ranges } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19:21] *} + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] *} + + +## Code includes with line ranges and highlighting { #code-includes-with-line-ranges-and-highlighting } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] hl[31:33] *} + +{* ../../docs_src/dependencies/tutorial015_an_py310.py ln[10:15] hl[12:14] *} + + +## Code includes qith title { #code-includes-with-title } + +{* ../../docs_src/bigger_applications/app_an_py39/routers/users.py hl[1,3] title["app/routers/users.py"] *} + +{* ../../docs_src/bigger_applications/app_an_py39/internal/admin.py hl[3] title["app/internal/admin.py"] *} + +## Code includes with unknown attributes { #code-includes-with-unknown-attributes } + +{* ../../docs_src/python_types/tutorial001_py39.py unknown[123] *} + +## Some more code includes to test fixing { #some-more-code-includes-to-test-fixing } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19:21] *} + +{* ../../docs_src/bigger_applications/app_an_py39/internal/admin.py hl[3] title["app/internal/admin.py"] *} + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] hl[31:33] *} + + + +# Links { #links } + +## Markdown-style links { #markdown-style-links } + +This is a [Markdown link](https://example.com) to an external site. + +This is a link with attributes: [**FastAPI** Project Generators](project-generation.md){.internal-link target=_blank} + +This is a link to the main FastAPI site: [FastAPI](https://fastapi.tiangolo.com) - tool should add language code to the URL. + +This is a link to one of the pages on FastAPI site: [How to](https://fastapi.tiangolo.com/how-to/) - tool should add language code to the URL. + +Link to test wrong attribute: [**FastAPI** Project Generators](project-generation.md){.internal-link} - tool should fix the attribute. + +Link with a title: [Example](https://example.com "Example site") - URL will be fixed, title preserved. + +### Markdown link to static assets { #markdown-link-to-static-assets } + +These are links to static assets: + +* [FastAPI Logo](https://fastapi.tiangolo.com/img/fastapi-logo.png) +* [FastAPI CSS](https://fastapi.tiangolo.com/css/fastapi.css) +* [FastAPI JS](https://fastapi.tiangolo.com/js/fastapi.js) + +Tool should NOT add language code to their URLs. + +## HTML-style links { #html-style-links } + +This is an <a href="https://example.com" target="_blank" class="external-link">HTML link</a> to an external site. + +This is an <a href="https://fastapi.tiangolo.com">link to the main FastAPI site</a> - tool should add language code to the URL. + +This is an <a href="https://fastapi.tiangolo.com/how-to/">link to one of the pages on FastAPI site</a> - tool should add language code to the URL. + +Link to test wrong attribute: <a href="project-generation.md" class="internal-link">**FastAPI** Project Generators</a> - tool should fix the attribute. + +### HTML links to static assets { #html-links-to-static-assets } + +These are links to static assets: + +* <a href="https://fastapi.tiangolo.com/img/fastapi-logo.png">FastAPI Logo</a> +* <a href="https://fastapi.tiangolo.com/css/fastapi.css">FastAPI CSS</a> +* <a href="https://fastapi.tiangolo.com/js/fastapi.js">FastAPI JS</a> + +Tool should NOT add language code to their URLs. + +# Header (with HTML link to <a href="https://tiangolo.com">tiangolo.com</a>) { #header-with-html-link-to-tiangolo-com } + +#Not a header + +```Python +# Also not a header +``` + +Some text diff --git a/scripts/tests/test_translation_fixer/test_complex_doc/data/translated_doc.md b/scripts/tests/test_translation_fixer/test_complex_doc/data/translated_doc.md new file mode 100644 index 0000000000..c922d7b133 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_complex_doc/data/translated_doc.md @@ -0,0 +1,240 @@ +# Тестовый инструмент исправления переводов { #test-translation-fixer } + +## Блоки кода с комментариями и без комментариев { #code-blocks-with-and-without-comments } + +Это тестовая страница для инструмента исправления переводов. + +### Блоки кода с комментариями { #code-blocks-with-comments } + +Следующие блоки кода содержат комментарии в разных стилях. +Инструмент исправления должен исправлять содержимое, но корректно сохранять комментарии. + +```python +# Это пример блока кода на Python +def hello_world(): + # Комментарий с отступом + print("Hello, world!") # Печать приветствия +``` + +```toml +# Это пример блока кода на TOML +title = "TOML Example" # Заголовок документа +``` + +```console +// Используйте команду "live" и передайте код языка в качестве аргумента CLI +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +```json +{ + // Это пример блока кода на JSON + "greeting": "Hello, world!" // Печать приветствия +} +``` + + +### Блоки кода с комментариями, где язык использует другие стили комментариев { #code-blocks-with-different-comment-styles } + +Следующие блоки кода содержат комментарии в разных стилях в зависимости от языка. +Инструмент исправления не будет сохранять комментарии в этих блоках. + +```json +{ + # Это пример блока кода на JSON + "greeting": "Hello, world!" # Печать приветствия +} +``` + +```console +# Это пример блока кода консоли +$ echo "Hello, world!" # Печать приветствия +``` + +```toml +// Это пример блока кода на TOML +title = "TOML Example" // Заголовок документа +``` + +### Блоки кода с комментариями на неподдерживаемых языках или без указания языка { #code-blocks-with-unsupported-languages } + +Следующие блоки кода используют неподдерживаемые языки для сохранения комментариев. +Инструмент исправления не будет сохранять комментарии в этих блоках. + +```javascript +// Это пример блока кода на JavaScript +console.log("Hello, world!"); // Печать приветствия +``` + +``` +# Это пример блока кода консоли +$ echo "Hello, world!" # Печать приветствия +``` + +``` +// Это пример блока кода консоли +$ echo "Hello, world!" // Печать приветствия +``` + +### Блоки кода с комментариями, которые не соответствуют шаблону { #code-blocks-with-comments-without-pattern } + +Инструмент исправления ожидает комментарии, которые соответствуют определённому шаблону: + +- Для комментариев в стиле с решёткой: комментарий начинается с `# ` (решётка, затем пробел) в начале строки или после пробела. +- Для комментариев в стиле со слешами: комментарий начинается с `// ` (два слеша, затем пробел) в начале строки или после пробела. + +Если комментарий не соответствует этому шаблону, инструмент исправления не будет его сохранять. + +```python +#Объявление функции +def hello_world():# Печать приветствия + print("Hello, world!") #Печать приветствия без пробела после решётки +``` + +```console +//Объявление функции +def hello_world():// Печать приветствия + print("Hello, world!") //Печать приветствия без пробела после слешей +``` + +## Блок кода с четырёхкратными обратными кавычками { #code-blocks-with-quadruple-backticks } + +Следующий блок кода содержит четырёхкратные обратные кавычки. + +````python +# Функция приветствия +def hello_world(): + print("Hello, world") # Печать приветствия +```` + +### Несоответствие обратных кавычек фиксится { #backticks-number-mismatch-is-fixable } + +Следующий блок кода имеет тройные обратные кавычки в оригинальном документе, но четырёхкратные обратные кавычки в переведённом документе. +Это будет исправлено инструментом исправления (будет преобразовано в тройные обратные кавычки). + +````Python +# Немного кода на Python +```` + +### Блок кода в тройных обратных кавычка внутри блока кода в четырёхкратных обратных кавычках { #triple-backticks-inside-quadruple-backticks } + +Комментарии внутри вложенного блока кода в тройных обратных кавычках НЕ БУДУТ сохранены. + +```` +Here is a code block with quadruple backticks that contains triple backticks inside: + +```python +# Этот комментарий НЕ будет сохранён +def hello_world(): + print("Hello, world") # Как и этот комментарий +``` + +```` + +# Включения кода { #code-includes } + +## Простые включения кода { #simple-code-includes } + +{* ../../docs_src/python_types/tutorial001_py39.py *} + +{* ../../docs_src/python_types/tutorial002_py39.py *} + + +## Включения кода с подсветкой { #code-includes-with-highlighting } + +{* ../../docs_src/python_types/tutorial002_py39.py hl[1] *} + +{* ../../docs_src/python_types/tutorial006_py39.py hl[10] *} + + +## Включения кода с диапазонами строк { #code-includes-with-line-ranges } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19:21] *} + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] *} + + +## Включения кода с диапазонами строк и подсветкой { #code-includes-with-line-ranges-and-highlighting } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] hl[31:33] *} + +{* ../../docs_src/dependencies/tutorial015_an_py310.py ln[10:15] hl[12:14] *} + + +## Включения кода с заголовком { #code-includes-with-title } + +{* ../../docs_src/bigger_applications/app_an_py39/routers/users.py hl[1,3] title["app/routers/users.py"] *} + +{* ../../docs_src/bigger_applications/app_an_py39/internal/admin.py hl[3] title["app/internal/admin.py"] *} + +## Включения кода с неизвестными атрибутами { #code-includes-with-unknown-attributes } + +{* ../../docs_src/python_types/tutorial001_py39.py unknown[123] *} + +## Ещё включения кода для тестирования исправления { #some-more-code-includes-to-test-fixing } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19 : 21] *} + +{* ../../docs_src/bigger_applications/app_an_py39/wrong.py hl[3] title["app/internal/admin.py"] *} + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[1:30] hl[1:10] *} + +# Ссылки { #links } + +## Ссылки в стиле Markdown { #markdown-style-links } + +Это [Markdown-ссылка](https://example.com) на внешний сайт. + +Это ссылка с атрибутами: [**FastAPI** генераторы проектов](project-generation.md){.internal-link target=_blank} + +Это ссылка на основной сайт FastAPI: [FastAPI](https://fastapi.tiangolo.com) — инструмент должен добавить код языка в URL. + +Это ссылка на одну из страниц на сайте FastAPI: [How to](https://fastapi.tiangolo.com/how-to) — инструмент должен добавить код языка в URL. + +Ссылка для тестирования неправильного атрибута: [**FastAPI** генераторы проектов](project-generation.md){.external-link} - инструмент должен исправить атрибут. + +Ссылка с заголовком: [Пример](http://example.com/ "Сайт для примера") - URL будет исправлен инструментом, заголовок сохранится. + +### Markdown ссылки на статические ресурсы { #markdown-link-to-static-assets } + +Это ссылки на статические ресурсы: + +* [FastAPI Logo](https://fastapi.tiangolo.com/img/fastapi-logo.png) +* [FastAPI CSS](https://fastapi.tiangolo.com/css/fastapi.css) +* [FastAPI JS](https://fastapi.tiangolo.com/js/fastapi.js) + +Инструмент НЕ должен добавлять код языка в их URL. + +## Ссылки в стиле HTML { #html-style-links } + +Это <a href="https://example.com" target="_blank" class="external-link">HTML-ссылка</a> на внешний сайт. + +Это <a href="https://fastapi.tiangolo.com">ссылка на основной сайт FastAPI</a> — инструмент должен добавить код языка в URL. + +Это <a href="https://fastapi.tiangolo.com/how-to/">ссылка на одну из страниц на сайте FastAPI</a> — инструмент должен добавить код языка в URL. + +Ссылка для тестирования неправильного атрибута: <a href="project-generation.md" class="external-link">**FastAPI** генераторы проектов</a> - инструмент должен исправить атрибут. + +### HTML ссылки на статические ресурсы { #html-links-to-static-assets } + +Это ссылки на статические ресурсы: + +* <a href="https://fastapi.tiangolo.com/img/fastapi-logo.png">FastAPI Logo</a> +* <a href="https://fastapi.tiangolo.com/css/fastapi.css">FastAPI CSS</a> +* <a href="https://fastapi.tiangolo.com/js/fastapi.js">FastAPI JS</a> + +Инструмент НЕ должен добавлять код языка в их URL. + +# Заголовок (с HTML ссылкой на <a href="https://tiangolo.com">tiangolo.com</a>) { #header-5 } + +#Не заголовок + +```Python +# Также не заголовок +``` + +Немного текста diff --git a/scripts/tests/test_translation_fixer/test_complex_doc/data/translated_doc_expected.md b/scripts/tests/test_translation_fixer/test_complex_doc/data/translated_doc_expected.md new file mode 100644 index 0000000000..b33f36e772 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_complex_doc/data/translated_doc_expected.md @@ -0,0 +1,240 @@ +# Тестовый инструмент исправления переводов { #test-translation-fixer } + +## Блоки кода с комментариями и без комментариев { #code-blocks-with-and-without-comments } + +Это тестовая страница для инструмента исправления переводов. + +### Блоки кода с комментариями { #code-blocks-with-comments } + +Следующие блоки кода содержат комментарии в разных стилях. +Инструмент исправления должен исправлять содержимое, но корректно сохранять комментарии. + +```python +# Это пример блока кода на Python +def hello_world(): + # Комментарий с отступом + print("Hello, world!") # Печать приветствия +``` + +```toml +# Это пример блока кода на TOML +title = "TOML Example" # Заголовок документа +``` + +```console +// Используйте команду "live" и передайте код языка в качестве аргумента CLI +$ python ./scripts/docs.py live es + +<span style="color: green;">[INFO]</span> Serving on http://127.0.0.1:8008 +<span style="color: green;">[INFO]</span> Start watching changes +<span style="color: green;">[INFO]</span> Start detecting changes +``` + +```json +{ + // Это пример блока кода на JSON + "greeting": "Hello, world!" // Печать приветствия +} +``` + + +### Блоки кода с комментариями, где язык использует другие стили комментариев { #code-blocks-with-different-comment-styles } + +Следующие блоки кода содержат комментарии в разных стилях в зависимости от языка. +Инструмент исправления не будет сохранять комментарии в этих блоках. + +```json +{ + # This is a sample JSON code block + "greeting": "Hello, world!" # Print greeting +} +``` + +```console +# This is a sample console code block +$ echo "Hello, world!" # Print greeting +``` + +```toml +// This is a sample TOML code block +title = "TOML Example" // Title of the document +``` + +### Блоки кода с комментариями на неподдерживаемых языках или без указания языка { #code-blocks-with-unsupported-languages } + +Следующие блоки кода используют неподдерживаемые языки для сохранения комментариев. +Инструмент исправления не будет сохранять комментарии в этих блоках. + +```javascript +// This is a sample JavaScript code block +console.log("Hello, world!"); // Print greeting +``` + +``` +# This is a sample console code block +$ echo "Hello, world!" # Print greeting +``` + +``` +// This is a sample console code block +$ echo "Hello, world!" // Print greeting +``` + +### Блоки кода с комментариями, которые не соответствуют шаблону { #code-blocks-with-comments-without-pattern } + +Инструмент исправления ожидает комментарии, которые соответствуют определённому шаблону: + +- Для комментариев в стиле с решёткой: комментарий начинается с `# ` (решётка, затем пробел) в начале строки или после пробела. +- Для комментариев в стиле со слешами: комментарий начинается с `// ` (два слеша, затем пробел) в начале строки или после пробела. + +Если комментарий не соответствует этому шаблону, инструмент исправления не будет его сохранять. + +```python +#Function declaration +def hello_world():# Print greeting + print("Hello, world!") #Print greeting without space after hash +``` + +```console +//Function declaration +def hello_world():// Print greeting + print("Hello, world!") //Print greeting without space after slashes +``` + +## Блок кода с четырёхкратными обратными кавычками { #code-blocks-with-quadruple-backticks } + +Следующий блок кода содержит четырёхкратные обратные кавычки. + +````python +# Функция приветствия +def hello_world(): + print("Hello, world!") # Печать приветствия +```` + +### Несоответствие обратных кавычек фиксится { #backticks-number-mismatch-is-fixable } + +Следующий блок кода имеет тройные обратные кавычки в оригинальном документе, но четырёхкратные обратные кавычки в переведённом документе. +Это будет исправлено инструментом исправления (будет преобразовано в тройные обратные кавычки). + +```Python +# Немного кода на Python +``` + +### Блок кода в тройных обратных кавычка внутри блока кода в четырёхкратных обратных кавычках { #triple-backticks-inside-quadruple-backticks } + +Комментарии внутри вложенного блока кода в тройных обратных кавычках НЕ БУДУТ сохранены. + +```` +Here is a code block with quadruple backticks that contains triple backticks inside: + +```python +# This is a sample Python code block +def hello_world(): + print("Hello, world!") # Print greeting +``` + +```` + +# Включения кода { #code-includes } + +## Простые включения кода { #simple-code-includes } + +{* ../../docs_src/python_types/tutorial001_py39.py *} + +{* ../../docs_src/python_types/tutorial002_py39.py *} + + +## Включения кода с подсветкой { #code-includes-with-highlighting } + +{* ../../docs_src/python_types/tutorial002_py39.py hl[1] *} + +{* ../../docs_src/python_types/tutorial006_py39.py hl[10] *} + + +## Включения кода с диапазонами строк { #code-includes-with-line-ranges } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19:21] *} + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] *} + + +## Включения кода с диапазонами строк и подсветкой { #code-includes-with-line-ranges-and-highlighting } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] hl[31:33] *} + +{* ../../docs_src/dependencies/tutorial015_an_py310.py ln[10:15] hl[12:14] *} + + +## Включения кода с заголовком { #code-includes-with-title } + +{* ../../docs_src/bigger_applications/app_an_py39/routers/users.py hl[1,3] title["app/routers/users.py"] *} + +{* ../../docs_src/bigger_applications/app_an_py39/internal/admin.py hl[3] title["app/internal/admin.py"] *} + +## Включения кода с неизвестными атрибутами { #code-includes-with-unknown-attributes } + +{* ../../docs_src/python_types/tutorial001_py39.py unknown[123] *} + +## Ещё включения кода для тестирования исправления { #some-more-code-includes-to-test-fixing } + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[19:21] *} + +{* ../../docs_src/bigger_applications/app_an_py39/internal/admin.py hl[3] title["app/internal/admin.py"] *} + +{* ../../docs_src/dependencies/tutorial013_an_py310.py ln[30:38] hl[31:33] *} + +# Ссылки { #links } + +## Ссылки в стиле Markdown { #markdown-style-links } + +Это [Markdown-ссылка](https://example.com) на внешний сайт. + +Это ссылка с атрибутами: [**FastAPI** генераторы проектов](project-generation.md){.internal-link target=_blank} + +Это ссылка на основной сайт FastAPI: [FastAPI](https://fastapi.tiangolo.com/lang) — инструмент должен добавить код языка в URL. + +Это ссылка на одну из страниц на сайте FastAPI: [How to](https://fastapi.tiangolo.com/lang/how-to/) — инструмент должен добавить код языка в URL. + +Ссылка для тестирования неправильного атрибута: [**FastAPI** генераторы проектов](project-generation.md){.internal-link} - инструмент должен исправить атрибут. + +Ссылка с заголовком: [Пример](https://example.com "Сайт для примера") - URL будет исправлен инструментом, заголовок сохранится. + +### Markdown ссылки на статические ресурсы { #markdown-link-to-static-assets } + +Это ссылки на статические ресурсы: + +* [FastAPI Logo](https://fastapi.tiangolo.com/img/fastapi-logo.png) +* [FastAPI CSS](https://fastapi.tiangolo.com/css/fastapi.css) +* [FastAPI JS](https://fastapi.tiangolo.com/js/fastapi.js) + +Инструмент НЕ должен добавлять код языка в их URL. + +## Ссылки в стиле HTML { #html-style-links } + +Это <a href="https://example.com" target="_blank" class="external-link">HTML-ссылка</a> на внешний сайт. + +Это <a href="https://fastapi.tiangolo.com/lang">ссылка на основной сайт FastAPI</a> — инструмент должен добавить код языка в URL. + +Это <a href="https://fastapi.tiangolo.com/lang/how-to/">ссылка на одну из страниц на сайте FastAPI</a> — инструмент должен добавить код языка в URL. + +Ссылка для тестирования неправильного атрибута: <a href="project-generation.md" class="internal-link">**FastAPI** генераторы проектов</a> - инструмент должен исправить атрибут. + +### HTML ссылки на статические ресурсы { #html-links-to-static-assets } + +Это ссылки на статические ресурсы: + +* <a href="https://fastapi.tiangolo.com/img/fastapi-logo.png">FastAPI Logo</a> +* <a href="https://fastapi.tiangolo.com/css/fastapi.css">FastAPI CSS</a> +* <a href="https://fastapi.tiangolo.com/js/fastapi.js">FastAPI JS</a> + +Инструмент НЕ должен добавлять код языка в их URL. + +# Заголовок (с HTML ссылкой на <a href="https://tiangolo.com">tiangolo.com</a>) { #header-with-html-link-to-tiangolo-com } + +#Не заголовок + +```Python +# Также не заголовок +``` + +Немного текста diff --git a/scripts/tests/test_translation_fixer/test_complex_doc/test_compex_doc.py b/scripts/tests/test_translation_fixer/test_complex_doc/test_compex_doc.py new file mode 100644 index 0000000000..86cc4d5eb4 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_complex_doc/test_compex_doc.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_complex_doc/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc.md")], + indirect=True, +) +def test_fix(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 0, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = (data_path / "translated_doc_expected.md").read_text() + assert fixed_content == expected_content + + assert "Fixing multiline code blocks in" in result.output + assert "Fixing markdown links in" in result.output diff --git a/scripts/tests/test_translation_fixer/test_header_permalinks/data/en_doc.md b/scripts/tests/test_translation_fixer/test_header_permalinks/data/en_doc.md new file mode 100644 index 0000000000..878e16fb5d --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_header_permalinks/data/en_doc.md @@ -0,0 +1,19 @@ +# Header 1 { #header-1 } + +Some text + +## Header 2 { #header-2 } + +Some more text + +### Header 3 { #header-3 } + +Even more text + +# Header 4 { #header-4 } + +A bit more text + +#Not a header + +Final portion of text diff --git a/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_level_mismatch_1.md b/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_level_mismatch_1.md new file mode 100644 index 0000000000..feefd77705 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_level_mismatch_1.md @@ -0,0 +1,19 @@ +# Header 1 { #header-1 } + +Some text + +# Header 2 { #header-2 } + +Some more text + +### Header 3 { #header-3 } + +Even more text + +# Header 4 { #header-4 } + +A bit more text + +#Not a header + +Final portion of text diff --git a/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_level_mismatch_2.md b/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_level_mismatch_2.md new file mode 100644 index 0000000000..ad53a660cd --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_level_mismatch_2.md @@ -0,0 +1,19 @@ +# Header 1 { #header-1 } + +Some text + +## Header 2 { #header-2 } + +Some more text + +### Header 3 { #header-3 } + +Even more text + +## Header 4 { #header-4 } + +A bit more text + +#Not a header + +Final portion of text diff --git a/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_number_gt.md b/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_number_gt.md new file mode 100644 index 0000000000..9c517c9bb0 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_number_gt.md @@ -0,0 +1,19 @@ +# Header 1 { #header-1 } + +Some text + +## Header 2 { #header-2 } + +Some more text + +### Header 3 { #header-3 } + +Even more text + +# Header 4 { #header-4 } + +A bit more text + +# Extra header + +Final portion of text diff --git a/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_number_lt.md b/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_number_lt.md new file mode 100644 index 0000000000..8a308728b2 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_header_permalinks/data/translated_doc_number_lt.md @@ -0,0 +1,19 @@ +# Header 1 { #header-1 } + +Some text + +## Header 2 { #header-2 } + +Some more text + +### Header 3 { #header-3 } + +Even more text + +Header 4 is missing + +A bit more text + +#Not a header + +Final portion of text diff --git a/scripts/tests/test_translation_fixer/test_header_permalinks/test_header_level_mismatch.py b/scripts/tests/test_translation_fixer/test_header_permalinks/test_header_level_mismatch.py new file mode 100644 index 0000000000..9fe2f7ba70 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_header_permalinks/test_header_level_mismatch.py @@ -0,0 +1,60 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_header_permalinks/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_level_mismatch_1.md")], + indirect=True, +) +def test_level_mismatch_1(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1 + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path( + f"{data_path}/translated_doc_level_mismatch_1.md" + ).read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Header levels do not match between document and original document" + " (found #, expected ##) for header №2 in line 5" + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_level_mismatch_2.md")], + indirect=True, +) +def test_level_mismatch_2(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1 + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path( + f"{data_path}/translated_doc_level_mismatch_2.md" + ).read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Header levels do not match between document and original document" + " (found ##, expected #) for header №4 in line 13" + ) in result.output diff --git a/scripts/tests/test_translation_fixer/test_header_permalinks/test_header_number_mismatch.py b/scripts/tests/test_translation_fixer/test_header_permalinks/test_header_number_mismatch.py new file mode 100644 index 0000000000..c0e78d0302 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_header_permalinks/test_header_number_mismatch.py @@ -0,0 +1,56 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_header_permalinks/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_gt.md")], + indirect=True, +) +def test_gt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1 + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_gt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of headers with permalinks does not match the number " + "in the original document (5 vs 4)" + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_lt.md")], + indirect=True, +) +def test_lt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1 + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_lt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of headers with permalinks does not match the number " + "in the original document (3 vs 4)" + ) in result.output diff --git a/scripts/tests/test_translation_fixer/test_html_links/data/en_doc.md b/scripts/tests/test_translation_fixer/test_html_links/data/en_doc.md new file mode 100644 index 0000000000..4c4d104cf2 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_html_links/data/en_doc.md @@ -0,0 +1,19 @@ +# Header 1 { #header-1 } + +Some text with a link to <a href="https://fastapi.tiangolo.com">FastAPI</a>. + +## Header 2 { #header-2 } + +Two links here: <a href="https://fastapi.tiangolo.com/how-to/">How to</a> and <a href="project-generation.md" class="internal-link" target="_blank">Project Generators</a>. + +### Header 3 { #header-3 } + +Another link: <a href="project-generation.md" class="internal-link" target="_blank" title="Link title">**FastAPI** Project Generators</a> with title. + +# Header 4 { #header-4 } + +Link to anchor: <a href="#header-2">Header 2</a> + +# Header with <a href="http://example.com">link</a> { #header-with-link } + +Some text diff --git a/scripts/tests/test_translation_fixer/test_html_links/data/translated_doc_number_gt.md b/scripts/tests/test_translation_fixer/test_html_links/data/translated_doc_number_gt.md new file mode 100644 index 0000000000..bac40242be --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_html_links/data/translated_doc_number_gt.md @@ -0,0 +1,21 @@ +# Заголовок 1 { #header-1 } + +Немного текста со ссылкой на <a href="https://fastapi.tiangolo.com">FastAPI</a>. + +## Заголовок 2 { #header-2 } + +Две ссылки здесь: <a href="https://fastapi.tiangolo.com/how-to/">How to</a> и <a href="project-generation.md" class="internal-link" target="_blank">Project Generators</a>. + +### Заголовок 3 { #header-3 } + +Ещё ссылка: <a href="project-generation.md" class="internal-link" target="_blank" title="Тайтл">**FastAPI** Генераторы Проектов</a> с тайтлом. + +И ещё одна <a href="https://github.com">экстра ссылка</a>. + +# Заголовок 4 { #header-4 } + +Ссылка на якорь: <a href="#header-2">Заголовок 2</a> + +# Заголовок со <a href="http://example.com">ссылкой</a> { #header-with-link } + +Немного текста diff --git a/scripts/tests/test_translation_fixer/test_html_links/data/translated_doc_number_lt.md b/scripts/tests/test_translation_fixer/test_html_links/data/translated_doc_number_lt.md new file mode 100644 index 0000000000..e2b36b6887 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_html_links/data/translated_doc_number_lt.md @@ -0,0 +1,19 @@ +# Заголовок 1 { #header-1 } + +Немного текста со ссылкой на <a href="https://fastapi.tiangolo.com">FastAPI</a>. + +## Заголовок 2 { #header-2 } + +Две ссылки здесь: <a href="https://fastapi.tiangolo.com/how-to/">How to</a> и <a href="project-generation.md" class="internal-link" target="_blank">Project Generators</a>. + +### Заголовок 3 { #header-3 } + +Ещё ссылка: <a href="project-generation.md" class="internal-link" target="_blank" title="Тайтл">**FastAPI** Генераторы Проектов</a> с тайтлом. + +# Заголовок 4 { #header-4 } + +Ссылка на якорь: <a href="#header-2">Заголовок 2</a> + +# Заголовок с потерянной ссылкой { #header-with-link } + +Немного текста diff --git a/scripts/tests/test_translation_fixer/test_html_links/test_html_links_number_mismatch.py b/scripts/tests/test_translation_fixer/test_html_links/test_html_links_number_mismatch.py new file mode 100644 index 0000000000..271e5d2058 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_html_links/test_html_links_number_mismatch.py @@ -0,0 +1,54 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path("scripts/tests/test_translation_fixer/test_html_links/data").absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_gt.md")], + indirect=True, +) +def test_gt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_gt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of HTML links does not match the number " + "in the original document (7 vs 6)" + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_lt.md")], + indirect=True, +) +def test_lt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + # assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_lt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of HTML links does not match the number " + "in the original document (5 vs 6)" + ) in result.output diff --git a/scripts/tests/test_translation_fixer/test_markdown_links/data/en_doc.md b/scripts/tests/test_translation_fixer/test_markdown_links/data/en_doc.md new file mode 100644 index 0000000000..9d965b0d68 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_markdown_links/data/en_doc.md @@ -0,0 +1,19 @@ +# Header 1 { #header-1 } + +Some text with a link to [FastAPI](https://fastapi.tiangolo.com). + +## Header 2 { #header-2 } + +Two links here: [How to](https://fastapi.tiangolo.com/how-to/) and [Project Generators](project-generation.md){.internal-link target=_blank}. + +### Header 3 { #header-3 } + +Another link: [**FastAPI** Project Generators](project-generation.md "Link title"){.internal-link target=_blank} with title. + +# Header 4 { #header-4 } + +Link to anchor: [Header 2](#header-2) + +# Header with [link](http://example.com) { #header-with-link } + +Some text diff --git a/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc.md b/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc.md new file mode 100644 index 0000000000..804a7e60a6 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc.md @@ -0,0 +1,19 @@ +# Заголовок 1 { #header-1 } + +Немного текста со ссылкой на [FastAPI](https://fastapi.tiangolo.com). + +## Заголовок 2 { #header-2 } + +Две ссылки здесь: [How to](https://fastapi.tiangolo.com/how-to/) и [Project Generators](project-generation.md){.internal-link target=_blank}. + +### Заголовок 3 { #header-3 } + +Ещё ссылка: [**FastAPI** Генераторы Проектов](project-generation.md "Тайтл"){.internal-link target=_blank} с тайтлом. + +# Заголовок 4 { #header-4 } + +Ссылка на якорь: [Заголовок 2](#header-2) + +# Заголовок со [ссылкой](http://example.com) { #header-with-link } + +Немного текста diff --git a/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc_number_gt.md b/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc_number_gt.md new file mode 100644 index 0000000000..9cbedb676e --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc_number_gt.md @@ -0,0 +1,21 @@ +# Заголовок 1 { #header-1 } + +Немного текста со ссылкой на [FastAPI](https://fastapi.tiangolo.com). + +## Заголовок 2 { #header-2 } + +Две ссылки здесь: [How to](https://fastapi.tiangolo.com/how-to/) и [Project Generators](project-generation.md){.internal-link target=_blank}. + +### Заголовок 3 { #header-3 } + +Ещё ссылка: [**FastAPI** Генераторы Проектов](project-generation.md "Тайтл"){.internal-link target=_blank} с тайтлом. + +И ещё одна [экстра ссылка](https://github.com). + +# Заголовок 4 { #header-4 } + +Ссылка на якорь: [Заголовок 2](#header-2) + +# Заголовок со [ссылкой](http://example.com) { #header-with-link } + +Немного текста diff --git a/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc_number_lt.md b/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc_number_lt.md new file mode 100644 index 0000000000..4e9e6ccf78 --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_markdown_links/data/translated_doc_number_lt.md @@ -0,0 +1,19 @@ +# Заголовок 1 { #header-1 } + +Немного текста со ссылкой на [FastAPI](https://fastapi.tiangolo.com). + +## Заголовок 2 { #header-2 } + +Две ссылки здесь: [How to](https://fastapi.tiangolo.com/how-to/) и [Project Generators](project-generation.md){.internal-link target=_blank}. + +### Заголовок 3 { #header-3 } + +Ещё ссылка: [**FastAPI** Генераторы Проектов](project-generation.md "Тайтл"){.internal-link target=_blank} с тайтлом. + +# Заголовок 4 { #header-4 } + +Ссылка на якорь: [Заголовок 2](#header-2) + +# Заголовок с потерянной ссылкой { #header-with-link } + +Немного текста diff --git a/scripts/tests/test_translation_fixer/test_markdown_links/test_mkd_links_number_mismatch.py b/scripts/tests/test_translation_fixer/test_markdown_links/test_mkd_links_number_mismatch.py new file mode 100644 index 0000000000..0f4952f35d --- /dev/null +++ b/scripts/tests/test_translation_fixer/test_markdown_links/test_mkd_links_number_mismatch.py @@ -0,0 +1,56 @@ +from pathlib import Path + +import pytest +from typer.testing import CliRunner + +from scripts.translation_fixer import cli + +data_path = Path( + "scripts/tests/test_translation_fixer/test_markdown_links/data" +).absolute() + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_gt.md")], + indirect=True, +) +def test_gt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_gt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of markdown links does not match the number " + "in the original document (7 vs 6)" + ) in result.output + + +@pytest.mark.parametrize( + "copy_test_files", + [(f"{data_path}/en_doc.md", f"{data_path}/translated_doc_number_lt.md")], + indirect=True, +) +def test_lt(runner: CliRunner, root_dir: Path, copy_test_files): + result = runner.invoke( + cli, + ["fix-pages", "docs/lang/docs/doc.md"], + ) + # assert result.exit_code == 1, result.output + + fixed_content = (root_dir / "docs" / "lang" / "docs" / "doc.md").read_text() + expected_content = Path(f"{data_path}/translated_doc_number_lt.md").read_text() + + assert fixed_content == expected_content # Translated doc remains unchanged + assert "Error processing docs/lang/docs/doc.md" in result.output + assert ( + "Number of markdown links does not match the number " + "in the original document (5 vs 6)" + ) in result.output diff --git a/scripts/translation_fixer.py b/scripts/translation_fixer.py new file mode 100644 index 0000000000..3e1f42d514 --- /dev/null +++ b/scripts/translation_fixer.py @@ -0,0 +1,132 @@ +import os +from collections.abc import Iterable +from pathlib import Path +from typing import Annotated + +import typer + +from scripts.doc_parsing_utils import check_translation + +non_translated_sections = ( + f"reference{os.sep}", + "release-notes.md", + "fastapi-people.md", + "external-links.md", + "newsletter.md", + "management-tasks.md", + "management.md", + "contributing.md", +) + + +cli = typer.Typer() + + +@cli.callback() +def callback(): + pass + + +def iter_all_lang_paths(lang_path_root: Path) -> Iterable[Path]: + """ + Iterate on the markdown files to translate in order of priority. + """ + + first_dirs = [ + lang_path_root / "learn", + lang_path_root / "tutorial", + lang_path_root / "advanced", + lang_path_root / "about", + lang_path_root / "how-to", + ] + first_parent = lang_path_root + yield from first_parent.glob("*.md") + for dir_path in first_dirs: + yield from dir_path.rglob("*.md") + first_dirs_str = tuple(str(d) for d in first_dirs) + for path in lang_path_root.rglob("*.md"): + if str(path).startswith(first_dirs_str): + continue + if path.parent == first_parent: + continue + yield path + + +def get_all_paths(lang: str): + res: list[str] = [] + lang_docs_root = Path("docs") / lang / "docs" + for path in iter_all_lang_paths(lang_docs_root): + relpath = path.relative_to(lang_docs_root) + if not str(relpath).startswith(non_translated_sections): + res.append(str(relpath)) + return res + + +def process_one_page(path: Path) -> bool: + """ + Fix one translated document by comparing it to the English version. + + Returns True if processed successfully, False otherwise. + """ + + try: + lang_code = path.parts[1] + if lang_code == "en": + print(f"Skipping English document: {path}") + return True + + en_doc_path = Path("docs") / "en" / Path(*path.parts[2:]) + + doc_lines = path.read_text(encoding="utf-8").splitlines() + en_doc_lines = en_doc_path.read_text(encoding="utf-8").splitlines() + + doc_lines = check_translation( + doc_lines=doc_lines, + en_doc_lines=en_doc_lines, + lang_code=lang_code, + auto_fix=True, + path=str(path), + ) + + # Write back the fixed document + doc_lines.append("") # Ensure file ends with a newline + path.write_text("\n".join(doc_lines), encoding="utf-8") + + except ValueError as e: + print(f"Error processing {path}: {e}") + return False + return True + + +@cli.command() +def fix_all(ctx: typer.Context, language: str): + docs = get_all_paths(language) + + all_good = True + for page in docs: + doc_path = Path("docs") / language / "docs" / page + res = process_one_page(doc_path) + all_good = all_good and res + + if not all_good: + raise typer.Exit(code=1) + + +@cli.command() +def fix_pages( + doc_paths: Annotated[ + list[Path], + typer.Argument(help="List of paths to documents."), + ], +): + all_good = True + for path in doc_paths: + res = process_one_page(path) + all_good = all_good and res + + if not all_good: + raise typer.Exit(code=1) + + +if __name__ == "__main__": + cli()