From: Emma Marichal Date: Fri, 15 May 2026 08:00:20 +0000 (+0200) Subject: Add a check for articles formatting X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fcheck_knowledge;p=thirdparty%2Fgoogle%2Ffonts.git Add a check for articles formatting --- diff --git a/.ci/check_knowledge_markup.py b/.ci/check_knowledge_markup.py new file mode 100644 index 0000000000..2171c8bc18 --- /dev/null +++ b/.ci/check_knowledge_markup.py @@ -0,0 +1,86 @@ +import re +import sys +from pathlib import Path + +MARKDOWN_LINK = re.compile(r'(?]*>[^\n]+', + re.IGNORECASE, +) + +ADJACENT_BLOCK_RE = re.compile( + r'<(figure|aside|div|section|blockquote)[^>]*>\n([^\n].*?)\n', + re.DOTALL | re.IGNORECASE, +) + + +def check_inline_tags(content, md_file, errors): + """Catch markdown inside single-line HTML tags like
...
.""" + for match in INLINE_TAG_RE.finditer(content): + line = match.group(0) + line_num = content[:match.start()].count('\n') + 1 + for pattern, description in ALL_PATTERNS: + if pattern.search(line): + errors.append( + f"{md_file}:{line_num}: {description} inside inline HTML tag:\n" + f" {line[:200]!r}" + ) + break + + +def check_adjacent_blocks(content, md_file, errors): + """Catch markdown on lines immediately adjacent to block HTML tags (no blank line).""" + for block_match in ADJACENT_BLOCK_RE.finditer(content): + inner = block_match.group(2) + if not inner.strip(): + continue + inner_no_images = re.sub(r'!\[[^\]]*\]\([^)]*\)', '', inner) + line_num = content[:block_match.start()].count('\n') + 1 + for pattern, description in ALL_PATTERNS: + if pattern.search(inner_no_images): + errors.append( + f"{md_file}:{line_num}: {description} adjacent to HTML block tag " + f"(no blank line separator):\n" + f" {block_match.group(0)[:200].strip()!r}" + ) + break + + +def main(): + knowledge_root = Path("cc-by-sa/knowledge") + files = sorted(knowledge_root.rglob("*.md")) + + if not files: + print("No .md files found under cc-by-sa/knowledge/") + sys.exit(1) + + errors = [] + for md_file in files: + content = md_file.read_text(encoding="utf-8") + check_inline_tags(content, md_file, errors) + check_adjacent_blocks(content, md_file, errors) + + if errors: + print(f"❌ Found {len(errors)} markdown-in-HTML error(s):\n") + for e in errors: + print(f" {e}\n") + sys.exit(1) + + print(f"✅ Checked {len(files)} files — no markdown markup inside HTML blocks.") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/.github/workflows/check_knowledge.yaml b/.github/workflows/check_knowledge.yaml new file mode 100644 index 0000000000..bee74f262b --- /dev/null +++ b/.github/workflows/check_knowledge.yaml @@ -0,0 +1,20 @@ +name: Check Knowledge content markup + +on: + pull_request: + paths: + - "cc-by-sa/knowledge/**/*.md" + +jobs: + check-markdown-in-html: + name: No markdown inside HTML blocks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Check for markdown markup inside HTML + run: python3 .ci/check_knowledge_markup.py \ No newline at end of file