and is described in the :ref:`notation <notation>` section,
except for an extra complication:
-* ``~`` ("cut"): commit to the current alternative and fail the rule
- even if this fails to parse
+* ``~`` ("cut"): commit to the current alternative; fail the rule
+ if the alternative fails to parse
+
+ Python mainly uses cuts for optimizations or improved error
+ messages. They often appear to be useless in the listing below.
+
+ .. see gh-143054, and CutValidator in the source, if you want to change this:
+
+ Cuts currently don't appear inside parentheses, brackets, lookaheads
+ and similar.
+ Their behavior in these contexts is deliberately left unspecified.
.. literalinclude:: ../../Grammar/python.gram
:language: peg
test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
- from pegen.validator import SubRuleValidator, ValidationError, RaiseRuleValidator
+ from pegen.validator import SubRuleValidator, ValidationError
+ from pegen.validator import RaiseRuleValidator, CutValidator
from pegen.testutil import parse_string
from pegen.grammar import Grammar
with self.assertRaises(ValidationError):
for rule_name, rule in grammar.rules.items():
validator.validate_rule(rule_name, rule)
+
+ def test_cut_validator(self) -> None:
+ grammar_source = """
+ star: (OP ~ OP)*
+ plus: (OP ~ OP)+
+ bracket: [OP ~ OP]
+ gather: OP.(OP ~ OP)+
+ nested: [OP | NAME ~ OP]
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ validator = CutValidator(grammar)
+ for rule_name, rule in grammar.rules.items():
+ with self.subTest(rule_name):
+ with self.assertRaises(ValidationError):
+ validator.validate_rule(rule_name, rule)
],
)
+ def test_cut_is_local_in_rule(self) -> None:
+ grammar = """
+ start:
+ | inner
+ | 'x' { "ok" }
+ inner:
+ | 'x' ~ 'y'
+ | 'x'
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("x", parser_class)
+ self.assertEqual(node, 'ok')
+
+ def test_cut_is_local_in_parens(self) -> None:
+ # we currently don't guarantee this behavior, see gh-143054
+ grammar = """
+ start:
+ | ('x' ~ 'y' | 'x')
+ | 'x' { "ok" }
+ """
+ parser_class = make_parser(grammar)
+ node = parse_string("x", parser_class)
+ self.assertEqual(node, 'ok')
+
def test_dangling_reference(self) -> None:
grammar = """
start: foo ENDMARKER
+from typing import Any
+
from pegen import grammar
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
)
+class CutValidator(GrammarValidator):
+ """Fail if Cut is not directly in a rule.
+
+ For simplicity, we currently document that a Cut affects alternatives
+ of the *rule* it is in.
+ However, the implementation makes cuts local to enclosing Rhs
+ (e.g. parenthesized list of choices).
+ Additionally, in academic papers about PEG, repeats and optional items
+ are "desugared" to choices with an empty alternative, and thus contain
+ a Cut's effect.
+
+ Please update documentation and tests when adding this cut,
+ then get rid of this validator.
+
+ See gh-143054.
+ """
+
+ def visit(self, node: Any, parents: tuple[Any, ...] = ()) -> None:
+ super().visit(node, parents=(*parents, node))
+
+ def visit_Cut(self, node: Alt, parents: tuple[Any, ...] = ()) -> None:
+ parent_types = [type(p).__name__ for p in parents]
+ if parent_types != ['Rule', 'Rhs', 'Alt', 'NamedItem', 'Cut']:
+ raise ValidationError(
+ f"Rule {self.rulename!r} contains cut that's not on the "
+ "top level. "
+ "The intended semantics of such cases need "
+ "to be clarified; see the CutValidator docstring."
+ f"\nThe cut is inside: {parent_types}"
+ )
+
def validate_grammar(the_grammar: grammar.Grammar) -> None:
for validator_cls in GrammarValidator.__subclasses__():
validator = validator_cls(the_grammar)