gh-127833: Docs: Add a `grammar-snippet` directive & replace `productionlist` (GH...

author Petr Viktorin <encukou@gmail.com>

Wed, 5 Feb 2025 15:12:23 +0000 (16:12 +0100)

committer GitHub <noreply@github.com>

Wed, 5 Feb 2025 15:12:23 +0000 (16:12 +0100)
author Petr Viktorin <encukou@gmail.com>
Wed, 5 Feb 2025 15:12:23 +0000 (16:12 +0100)
committer GitHub <noreply@github.com>
Wed, 5 Feb 2025 15:12:23 +0000 (16:12 +0100)
diff --git a/Doc/conf.py b/Doc/conf.py

index 94af54084ee3381d49fcbb3e02abd7d5871ea903..a4e0c6286490180f7a3c4ec35753770d92f594ec 100644 (file)
--- a/Doc/conf.py
+++ b/Doc/conf.py
@@ -27,6 +27,7 @@ extensions = [
      'c_annotations',
      'changes',
      'glossary_search',
+    'grammar_snippet',
      'lexers',
      'misc_news',
      'pydoc_topics',
diff --git a/Doc/reference/toplevel_components.rst b/Doc/reference/toplevel_components.rst

index dd3d3d6878e2892d92cfdd4d18c738de32490076..f155fafbe4d738e307f58d89438d6111ebf25b78 100644 (file)
--- a/Doc/reference/toplevel_components.rst
+++ b/Doc/reference/toplevel_components.rst
@@ -66,7 +66,9 @@ File input
  
  All input read from non-interactive files has the same form:
  
-.. productionlist:: python-grammar
+.. grammar-snippet::
+   :group: python-grammar
+
     file_input: (NEWLINE | `statement`)*
  
  This syntax is used in the following situations:
@@ -85,7 +87,9 @@ Interactive input
  
  Input in interactive mode is parsed using the following grammar:
  
-.. productionlist:: python-grammar
+.. grammar-snippet::
+   :group: python-grammar
+
     interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE
  
  Note that a (top-level) compound statement must be followed by a blank line in
diff --git a/Doc/tools/extensions/grammar_snippet.py b/Doc/tools/extensions/grammar_snippet.py

new file mode 100644 (file)

index 0000000..03c7e7c
--- /dev/null
+++ b/Doc/tools/extensions/grammar_snippet.py
@@ -0,0 +1,219 @@
+"""Support for documenting Python's grammar."""
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+from docutils import nodes
+from docutils.parsers.rst import directives
+from sphinx import addnodes
+from sphinx.domains.std import token_xrefs
+from sphinx.util.docutils import SphinxDirective
+from sphinx.util.nodes import make_id
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+    from typing import Any
+
+    from docutils.nodes import Node
+    from sphinx.application import Sphinx
+    from sphinx.util.typing import ExtensionMetadata
+
+
+class snippet_string_node(nodes.inline):  # noqa: N801 (snake_case is fine)
+    """Node for a string literal in a grammar snippet."""
+
+    def __init__(
+        self,
+        rawsource: str = '',
+        text: str = '',
+        *children: Node,
+        **attributes: Any,
+    ) -> None:
+        super().__init__(rawsource, text, *children, **attributes)
+        # Use the Pygments highlight class for `Literal.String.Other`
+        self['classes'].append('sx')
+
+
+class GrammarSnippetBase(SphinxDirective):
+    """Common functionality for GrammarSnippetDirective & CompatProductionList."""
+
+    # The option/argument handling is left to the individual classes.
+
+    def make_grammar_snippet(
+        self, options: dict[str, Any], content: Sequence[str]
+    ) -> list[nodes.paragraph]:
+        """Create a literal block from options & content."""
+
+        group_name = options['group']
+
+        # Docutils elements have a `rawsource` attribute that is supposed to be
+        # set to the original ReST source.
+        # Sphinx does the following with it:
+        # - if it's empty, set it to `self.astext()`
+        # - if it matches `self.astext()` when generating the output,
+        #   apply syntax highlighting (which is based on the plain-text content
+        #   and thus discards internal formatting, like references).
+        # To get around this, we set it to this non-empty string:
+        rawsource = 'You should not see this.'
+
+        literal = nodes.literal_block(
+            rawsource,
+            '',
+            classes=['highlight'],
+        )
+
+        grammar_re = re.compile(
+            r"""
+                (?P<rule_name>^[a-zA-Z0-9_]+)     # identifier at start of line
+                (?=:)                             # ... followed by a colon
+            |
+                (?P<rule_ref>`[^\s`]+`)           # identifier in backquotes
+            |
+                (?P<single_quoted>'[^']*')        # string in 'quotes'
+            |
+                (?P<double_quoted>"[^"]*")        # string in "quotes"
+            """,
+            re.VERBOSE,
+        )
+
+        for line in content:
+            last_pos = 0
+            for match in grammar_re.finditer(line):
+                # Handle text between matches
+                if match.start() > last_pos:
+                    literal += nodes.Text(line[last_pos : match.start()])
+                last_pos = match.end()
+
+                # Handle matches
+                group_dict = {
+                    name: content
+                    for name, content in match.groupdict().items()
+                    if content is not None
+                }
+                match group_dict:
+                    case {'rule_name': name}:
+                        literal += self.make_link_target_for_token(
+                            group_name, name
+                        )
+                    case {'rule_ref': ref_text}:
+                        literal += token_xrefs(ref_text, group_name)
+                    case {'single_quoted': name} | {'double_quoted': name}:
+                        literal += snippet_string_node('', name)
+                    case _:
+                        raise ValueError('unhandled match')
+            literal += nodes.Text(line[last_pos:] + '\n')
+
+        node = nodes.paragraph(
+            '',
+            '',
+            literal,
+        )
+
+        return [node]
+
+    def make_link_target_for_token(
+        self, group_name: str, name: str
+    ) -> addnodes.literal_strong:
+        """Return a literal node which is a link target for the given token."""
+        name_node = addnodes.literal_strong()
+
+        # Cargo-culted magic to make `name_node` a link target
+        # similar to Sphinx `production`.
+        # This needs to be the same as what Sphinx does
+        # to avoid breaking existing links.
+        domain = self.env.domains['std']
+        obj_name = f"{group_name}:{name}"
+        prefix = f'grammar-token-{group_name}'
+        node_id = make_id(self.env, self.state.document, prefix, name)
+        name_node['ids'].append(node_id)
+        self.state.document.note_implicit_target(name_node, name_node)
+        domain.note_object('token', obj_name, node_id, location=name_node)
+
+        text_node = nodes.Text(name)
+        name_node += text_node
+        return name_node
+
+
+class GrammarSnippetDirective(GrammarSnippetBase):
+    """Transform a grammar-snippet directive to a Sphinx literal_block
+
+    That is, turn something like:
+
+        .. grammar-snippet:: file
+           :group: python-grammar
+
+           file: (NEWLINE | statement)*
+
+    into something similar to Sphinx productionlist, but better suited
+    for our needs:
+    - Instead of `::=`, use a colon, as in `Grammar/python.gram`
+    - Show the listing almost as is, with no auto-aligment.
+      The only special character is the backtick, which marks tokens.
+
+    Unlike Sphinx's productionlist, this directive supports options.
+    The "group" must be given as a named option.
+    The content must be preceded by a blank line (like with most ReST
+    directives).
+    """
+
+    has_content = True
+    option_spec = {
+        'group': directives.unchanged_required,
+    }
+
+    # We currently ignore arguments.
+    required_arguments = 0
+    optional_arguments = 1
+    final_argument_whitespace = True
+
+    def run(self) -> list[nodes.paragraph]:
+        return self.make_grammar_snippet(self.options, self.content)
+
+
+class CompatProductionList(GrammarSnippetBase):
+    """Create grammar snippets from reST productionlist syntax
+
+    This is intended to be a transitional directive, used while we switch
+    from productionlist to grammar-snippet.
+    It makes existing docs that use the ReST syntax look like grammar-snippet,
+    as much as possible.
+    """
+
+    has_content = False
+    required_arguments = 1
+    optional_arguments = 0
+    final_argument_whitespace = True
+    option_spec = {}
+
+    def run(self) -> list[nodes.paragraph]:
+        # The "content" of a productionlist is actually the first and only
+        # argument. The first line is the group; the rest is the content lines.
+        lines = self.arguments[0].splitlines()
+        group = lines[0].strip()
+        options = {'group': group}
+        # We assume there's a colon in each line; align on it.
+        align_column = max(line.index(':') for line in lines[1:]) + 1
+        content = []
+        for line in lines[1:]:
+            rule_name, _colon, text = line.partition(':')
+            rule_name = rule_name.strip()
+            if rule_name:
+                name_part = rule_name + ':'
+            else:
+                name_part = ''
+            content.append(f'{name_part:<{align_column}}{text}')
+        return self.make_grammar_snippet(options, content)
+
+
+def setup(app: Sphinx) -> ExtensionMetadata:
+    app.add_directive('grammar-snippet', GrammarSnippetDirective)
+    app.add_directive_to_domain(
+        'std', 'productionlist', CompatProductionList, override=True
+    )
+    return {
+        'version': '1.0',
+        'parallel_read_safe': True,
+        'parallel_write_safe': True,
+    }
author	Petr Viktorin <encukou@gmail.com>
	Wed, 5 Feb 2025 15:12:23 +0000 (16:12 +0100)
committer	GitHub <noreply@github.com>
	Wed, 5 Feb 2025 15:12:23 +0000 (16:12 +0100)
Doc/conf.py		patch \| blob \| blame \| history
Doc/reference/toplevel_components.rst		patch \| blob \| blame \| history
Doc/tools/extensions/grammar_snippet.py	[new file with mode: 0644]	patch \| blob