name: Run Ruff (lint) on Argument Clinic
args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml]
files: ^Tools/clinic/|Lib/test/test_clinic.py
+ - id: ruff
+ name: Run Ruff (lint) on Tools/peg_generator/
+ args: [--exit-non-zero-on-fix, --config=Tools/peg_generator/.ruff.toml]
+ files: ^Tools/peg_generator/
- id: ruff-format
name: Run Ruff (format) on Doc/
args: [--check]
--- /dev/null
+extend = "../../.ruff.toml" # Inherit the project-wide settings
+
+extend-exclude = [
+ # Generated files:
+ "Tools/peg_generator/pegen/grammar_parser.py",
+]
+
+[lint]
+select = [
+ "F", # pyflakes
+ "I", # isort
+ "UP", # pyupgrade
+ "RUF100", # Ban unused `# noqa` comments
+ "PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes)
+]
+ignore = [
+ # Use PEP-604 unions rather than tuples for isinstance() checks.
+ # Makes code slower and more verbose. https://github.com/astral-sh/ruff/issues/7871.
+ "UP038",
+]
+unfixable = [
+ # The autofixes sometimes do the wrong things for these;
+ # it's better to have to manually look at the code and see how it needs fixing
+ "F841", # Detects unused variables
+ "F601", # Detects dictionaries that have duplicate keys
+ "F602", # Also detects dictionaries that have duplicate keys
+]
import time
import token
import traceback
-from typing import Tuple
from pegen.grammar import Grammar
from pegen.parser import Parser
def generate_c_code(
args: argparse.Namespace,
-) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
+) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_c_parser_and_generator
verbose = args.verbose
def generate_python_code(
args: argparse.Namespace,
-) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
+) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_python_parser_and_generator
verbose = args.verbose
if __name__ == "__main__":
- if sys.version_info < (3, 8):
+ if sys.version_info < (3, 8): # noqa: UP036
print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
sys.exit(1)
main()
TODO: Remove the above-described hack.
"""
-from typing import Any, Optional, Tuple
+from typing import Any
def ast_dump(
annotate_fields: bool = True,
include_attributes: bool = False,
*,
- indent: Optional[str] = None,
+ indent: str | None = None,
) -> str:
- def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
+ def _format(node: Any, level: int = 0) -> tuple[str, bool]:
if indent is not None:
level += 1
prefix = "\n" + indent * level
value, simple = _format(value, level)
allsimple = allsimple and simple
if keywords:
- args.append("%s=%s" % (name, value))
+ args.append(f"{name}={value}")
else:
args.append(value)
if include_attributes and node._attributes:
continue
value, simple = _format(value, level)
allsimple = allsimple and simple
- args.append("%s=%s" % (name, value))
+ args.append(f"{name}={value}")
if allsimple and len(args) <= 3:
- return "%s(%s)" % (node.__class__.__name__, ", ".join(args)), not args
- return "%s(%s%s)" % (node.__class__.__name__, prefix, sep.join(args)), False
+ return "{}({})".format(node.__class__.__name__, ", ".join(args)), not args
+ return f"{node.__class__.__name__}({prefix}{sep.join(args)})", False
elif isinstance(node, list):
if not node:
return "[]", True
- return "[%s%s]" % (prefix, sep.join(_format(x, level)[0] for x in node)), False
+ return f"[{prefix}{sep.join(_format(x, level)[0] for x in node)}]", False
return repr(node), True
if all(cls.__name__ != "AST" for cls in node.__class__.__mro__):
- raise TypeError("expected AST, got %r" % node.__class__.__name__)
+ raise TypeError(f"expected AST, got {node.__class__.__name__!r}")
return _format(node)[0]
import sysconfig
import tempfile
import tokenize
-from typing import IO, Any, Dict, List, Optional, Set, Tuple
+from typing import IO, Any
from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar
MOD_DIR = pathlib.Path(__file__).resolve().parent
-TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
+TokenDefinitions = tuple[dict[int, str], dict[str, int], set[str]]
Incomplete = Any # TODO: install `types-setuptools` and remove this alias
-def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
+def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> list[str]:
flags = sysconfig.get_config_var(compiler_flags)
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
if flags is None or py_flags_nodist is None:
def compile_c_extension(
generated_source_path: str,
- build_dir: Optional[str] = None,
+ build_dir: str | None = None,
verbose: bool = False,
keep_asserts: bool = True,
disable_optimization: bool = False,
- library_dir: Optional[str] = None,
+ library_dir: str | None = None,
) -> pathlib.Path:
"""Compile the generated source for a parser generator into an extension module.
"""
import setuptools.command.build_ext
import setuptools.logging
-
- from setuptools import Extension, Distribution
- from setuptools.modified import newer_group
+ from setuptools import Distribution, Extension
from setuptools._distutils.ccompiler import new_compiler
from setuptools._distutils.sysconfig import customize_compiler
+ from setuptools.modified import newer_group
if verbose:
setuptools.logging.set_threshold(logging.DEBUG)
def build_parser(
grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
-) -> Tuple[Grammar, Parser, Tokenizer]:
+) -> tuple[Grammar, Parser, Tokenizer]:
with open(grammar_file) as file:
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
parser = GrammarParser(tokenizer, verbose=verbose_parser)
keep_asserts_in_extension: bool = True,
skip_actions: bool = False,
) -> ParserGenerator:
- with open(tokens_file, "r") as tok_file:
+ with open(tokens_file) as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
with open(output_file, "w") as file:
gen: ParserGenerator = CParserGenerator(
verbose_c_extension: bool = False,
keep_asserts_in_extension: bool = True,
skip_actions: bool = False,
-) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
+) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, C parser, tokenizer, parser generator for a given grammar
Args:
verbose_tokenizer: bool = False,
verbose_parser: bool = False,
skip_actions: bool = False,
-) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
+) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, python parser, tokenizer, parser generator for a given grammar
Args:
import ast
import os.path
import re
+from collections.abc import Callable
from dataclasses import dataclass, field
from enum import Enum
-from typing import IO, Any, Callable, Dict, List, Optional, Set, Text, Tuple
+from typing import IO, Any
from pegen import grammar
from pegen.grammar import (
@dataclass
class FunctionCall:
function: str
- arguments: List[Any] = field(default_factory=list)
- assigned_variable: Optional[str] = None
- assigned_variable_type: Optional[str] = None
- return_type: Optional[str] = None
- nodetype: Optional[NodeTypes] = None
+ arguments: list[Any] = field(default_factory=list)
+ assigned_variable: str | None = None
+ assigned_variable_type: str | None = None
+ return_type: str | None = None
+ nodetype: NodeTypes | None = None
force_true: bool = False
- comment: Optional[str] = None
+ comment: str | None = None
def __str__(self) -> str:
parts = []
def __init__(
self,
parser_generator: ParserGenerator,
- exact_tokens: Dict[str, int],
- non_exact_tokens: Set[str],
+ exact_tokens: dict[str, int],
+ non_exact_tokens: set[str],
):
self.gen = parser_generator
self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens
- self.cache: Dict[str, str] = {}
- self.cleanup_statements: List[str] = []
+ self.cache: dict[str, str] = {}
+ self.cleanup_statements: list[str] = []
def keyword_helper(self, keyword: str) -> FunctionCall:
return FunctionCall(
)
return FunctionCall(
assigned_variable=f"{name.lower()}_var",
- function=f"_PyPegen_expect_token",
+ function="_PyPegen_expect_token",
arguments=["p", name],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
- function=f"_PyPegen_expect_token",
+ function="_PyPegen_expect_token",
arguments=["p", type],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
- function=f"_PyPegen_expect_forced_token",
+ function="_PyPegen_expect_forced_token",
arguments=["p", type, f'"{val}"'],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
call.comment = None
return FunctionCall(
assigned_variable="_literal",
- function=f"_PyPegen_expect_forced_result",
+ function="_PyPegen_expect_forced_result",
arguments=["p", str(call), f'"{node.node.rhs!s}"'],
return_type="void *",
comment=f"forced_token=({node.node.rhs!s})",
node: Any,
prefix: str,
rule_generation_func: Callable[[], str],
- return_type: Optional[str] = None,
+ return_type: str | None = None,
) -> FunctionCall:
node_str = f"{node}"
key = f"{prefix}_{node_str}"
def __init__(
self,
grammar: grammar.Grammar,
- tokens: Dict[int, str],
- exact_tokens: Dict[str, int],
- non_exact_tokens: Set[str],
- file: Optional[IO[Text]],
+ tokens: dict[int, str],
+ exact_tokens: dict[str, int],
+ non_exact_tokens: set[str],
+ file: IO[str] | None,
debug: bool = False,
skip_actions: bool = False,
):
self._varname_counter = 0
self.debug = debug
self.skip_actions = skip_actions
- self.cleanup_statements: List[str] = []
+ self.cleanup_statements: list[str] = []
def add_level(self) -> None:
self.print("if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) {")
self.print(f"if ({error_var}) {{")
with self.indent():
self.print(f"goto {goto_target};")
- self.print(f"}}")
+ self.print("}")
def out_of_memory_return(
self,
expr: str,
- cleanup_code: Optional[str] = None,
+ cleanup_code: str | None = None,
) -> None:
self.print(f"if ({expr}) {{")
with self.indent():
self.print("p->error_indicator = 1;")
self.print("PyErr_NoMemory();")
self.add_return("NULL")
- self.print(f"}}")
+ self.print("}")
def out_of_memory_goto(self, expr: str, goto_target: str) -> None:
self.print(f"if ({expr}) {{")
with self.indent():
self.print("PyErr_NoMemory();")
self.print(f"goto {goto_target};")
- self.print(f"}}")
+ self.print("}")
def generate(self, filename: str) -> None:
self.collect_rules()
if trailer:
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
- def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
- groups: Dict[int, List[Tuple[str, int]]] = {}
+ def _group_keywords_by_length(self) -> dict[int, list[tuple[str, int]]]:
+ groups: dict[int, list[tuple[str, int]]] = {}
for keyword_str, keyword_type in self.keywords.items():
length = len(keyword_str)
if length in groups:
self.print("if (_raw == NULL || p->mark <= _resmark)")
with self.indent():
self.print("break;")
- self.print(f"_resmark = p->mark;")
+ self.print("_resmark = p->mark;")
self.print("_res = _raw;")
self.print("}")
- self.print(f"p->mark = _resmark;")
+ self.print("p->mark = _resmark;")
self.add_return("_res")
self.print("}")
self.print(f"static {result_type}")
if memoize:
self.print("int _start_mark = p->mark;")
self.print("void **_children = PyMem_Malloc(sizeof(void *));")
- self.out_of_memory_return(f"!_children")
+ self.out_of_memory_return("!_children")
self.print("Py_ssize_t _children_capacity = 1;")
self.print("Py_ssize_t _n = 0;")
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
self.add_return("NULL")
self.print("}")
self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);")
- self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);")
+ self.out_of_memory_return("!_seq", cleanup_code="PyMem_Free(_children);")
self.print("for (Py_ssize_t i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);")
self.print("PyMem_Free(_children);")
if memoize and node.name:
self.print(call)
def visit_Rhs(
- self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
+ self, node: Rhs, is_loop: bool, is_gather: bool, rulename: str | None
) -> None:
if is_loop:
assert len(node.alts) == 1
self.visit(item)
self.print(")")
- def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
+ def emit_action(self, node: Alt, cleanup_code: str | None = None) -> None:
self.print(f"_res = {node.action};")
self.print("if (_res == NULL && PyErr_Occurred()) {")
def emit_dummy_action(self) -> None:
self.print("_res = _PyPegen_dummy_name(p);")
- def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
+ def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: str | None) -> None:
self.join_conditions(keyword="if", node=node)
self.print("{")
# We have parsed successfully all the conditions for the option.
self.emit_default_action(is_gather, node)
# As the current option has parsed correctly, do not continue with the rest.
- self.print(f"goto done;")
+ self.print("goto done;")
self.print("}")
- def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
+ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: str | None) -> None:
# Condition of the main body of the alternative
self.join_conditions(keyword="while", node=node)
self.print("{")
self.print(
"void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));"
)
- self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);")
+ self.out_of_memory_return("!_new_children", cleanup_code="PyMem_Free(_children);")
self.print("_children = _new_children;")
self.print("}")
self.print("_children[_n++] = _res;")
self.print("}")
def visit_Alt(
- self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
+ self, node: Alt, is_loop: bool, is_gather: bool, rulename: str | None
) -> None:
if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
self.print(f"if (p->call_invalid_rules) {{ // {node}")
self.print("}")
self.print("}")
- def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
+ def collect_vars(self, node: Alt) -> dict[str | None, str | None]:
types = {}
with self.local_variable_context():
for item in node.items:
types[name] = type
return types
- def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
+ def add_var(self, node: NamedItem) -> tuple[str | None, str | None]:
call = self.callmakervisitor.generate_call(node.item)
name = node.name if node.name else call.assigned_variable
if name is not None:
import argparse
import pprint
import sys
-from typing import Dict, Set
from pegen.build import build_parser
from pegen.grammar import (
class FirstSetCalculator(GrammarVisitor):
- def __init__(self, rules: Dict[str, Rule]) -> None:
+ def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
self.nullables = compute_nullables(rules)
- self.first_sets: Dict[str, Set[str]] = dict()
- self.in_process: Set[str] = set()
+ self.first_sets: dict[str, set[str]] = dict()
+ self.in_process: set[str] = set()
- def calculate(self) -> Dict[str, Set[str]]:
+ def calculate(self) -> dict[str, set[str]]:
for name, rule in self.rules.items():
self.visit(rule)
return self.first_sets
- def visit_Alt(self, item: Alt) -> Set[str]:
- result: Set[str] = set()
- to_remove: Set[str] = set()
+ def visit_Alt(self, item: Alt) -> set[str]:
+ result: set[str] = set()
+ to_remove: set[str] = set()
for other in item.items:
new_terminals = self.visit(other)
if isinstance(other.item, NegativeLookahead):
return result
- def visit_Cut(self, item: Cut) -> Set[str]:
+ def visit_Cut(self, item: Cut) -> set[str]:
return set()
- def visit_Group(self, item: Group) -> Set[str]:
+ def visit_Group(self, item: Group) -> set[str]:
return self.visit(item.rhs)
- def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
+ def visit_PositiveLookahead(self, item: Lookahead) -> set[str]:
return self.visit(item.node)
- def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
+ def visit_NegativeLookahead(self, item: NegativeLookahead) -> set[str]:
return self.visit(item.node)
- def visit_NamedItem(self, item: NamedItem) -> Set[str]:
+ def visit_NamedItem(self, item: NamedItem) -> set[str]:
return self.visit(item.item)
- def visit_Opt(self, item: Opt) -> Set[str]:
+ def visit_Opt(self, item: Opt) -> set[str]:
return self.visit(item.node)
- def visit_Gather(self, item: Gather) -> Set[str]:
+ def visit_Gather(self, item: Gather) -> set[str]:
return self.visit(item.node)
- def visit_Repeat0(self, item: Repeat0) -> Set[str]:
+ def visit_Repeat0(self, item: Repeat0) -> set[str]:
return self.visit(item.node)
- def visit_Repeat1(self, item: Repeat1) -> Set[str]:
+ def visit_Repeat1(self, item: Repeat1) -> set[str]:
return self.visit(item.node)
- def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
+ def visit_NameLeaf(self, item: NameLeaf) -> set[str]:
if item.value not in self.rules:
return {item.value}
return self.first_sets[item.value]
- def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
+ def visit_StringLeaf(self, item: StringLeaf) -> set[str]:
return {item.value}
- def visit_Rhs(self, item: Rhs) -> Set[str]:
- result: Set[str] = set()
+ def visit_Rhs(self, item: Rhs) -> set[str]:
+ result: set[str] = set()
for alt in item.alts:
result |= self.visit(alt)
return result
- def visit_Rule(self, item: Rule) -> Set[str]:
+ def visit_Rule(self, item: Rule) -> set[str]:
if item.name in self.in_process:
return set()
elif item.name not in self.first_sets:
try:
grammar, parser, tokenizer = build_parser(args.grammar_file)
except Exception as err:
- print("ERROR: Failed to parse grammar file", file=sys.stderr)
+ print("ERROR: Failed to parse grammar file", err, file=sys.stderr)
sys.exit(1)
firs_sets = FirstSetCalculator(grammar.rules).calculate()
from __future__ import annotations
-from typing import (
- AbstractSet,
- Any,
- Iterable,
- Iterator,
- List,
- Optional,
- Tuple,
- Union,
-)
+from collections.abc import Iterable, Iterator, Set
+from typing import Any
class GrammarError(Exception):
class Grammar:
- def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
+ def __init__(self, rules: Iterable[Rule], metas: Iterable[tuple[str, str | None]]):
# Check if there are repeated rules in "rules"
all_rules = {}
for rule in rules:
class Rule:
- def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
+ def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None):
self.name = name
self.type = type
self.rhs = rhs
class Rhs:
- def __init__(self, alts: List[Alt]):
+ def __init__(self, alts: list[Alt]):
self.alts = alts
- self.memo: Optional[Tuple[Optional[str], str]] = None
+ self.memo: tuple[str | None, str] | None = None
def __str__(self) -> str:
return " | ".join(str(alt) for alt in self.alts)
def __repr__(self) -> str:
return f"Rhs({self.alts!r})"
- def __iter__(self) -> Iterator[List[Alt]]:
+ def __iter__(self) -> Iterator[list[Alt]]:
yield self.alts
@property
class Alt:
- def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None):
+ def __init__(self, items: list[NamedItem], *, icut: int = -1, action: str | None = None):
self.items = items
self.icut = icut
self.action = action
args.append(f"action={self.action!r}")
return f"Alt({', '.join(args)})"
- def __iter__(self) -> Iterator[List[NamedItem]]:
+ def __iter__(self) -> Iterator[list[NamedItem]]:
yield self.items
class NamedItem:
- def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None):
+ def __init__(self, name: str | None, item: Item, type: str | None = None):
self.name = name
self.item = item
self.type = type
def __init__(self, node: Plain):
self.node = node
- self.memo: Optional[Tuple[Optional[str], str]] = None
+ self.memo: tuple[str | None, str] | None = None
def __iter__(self) -> Iterator[Plain]:
yield self.node
pass
def __repr__(self) -> str:
- return f"Cut()"
+ return "Cut()"
def __str__(self) -> str:
- return f"~"
+ return "~"
- def __iter__(self) -> Iterator[Tuple[str, str]]:
+ def __iter__(self) -> Iterator[tuple[str, str]]:
yield from ()
def __eq__(self, other: object) -> bool:
return NotImplemented
return True
- def initial_names(self) -> AbstractSet[str]:
+ def initial_names(self) -> Set[str]:
return set()
-Plain = Union[Leaf, Group]
-Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut]
-RuleName = Tuple[str, Optional[str]]
-MetaTuple = Tuple[str, Optional[str]]
-MetaList = List[MetaTuple]
-RuleList = List[Rule]
-NamedItemList = List[NamedItem]
-LookaheadOrCut = Union[Lookahead, Cut]
+Plain = Leaf | Group
+Item = Plain | Opt | Repeat | Forced | Lookahead | Rhs | Cut
+RuleName = tuple[str, str | None]
+MetaTuple = tuple[str, str | None]
+MetaList = list[MetaTuple]
+RuleList = list[Rule]
+NamedItemList = list[NamedItem]
+LookaheadOrCut = Lookahead | Cut
import argparse
import sys
-from typing import Any, Callable, Iterator
+from collections.abc import Callable, Iterator
+from typing import Any
from pegen.build import build_parser
from pegen.grammar import Grammar, Rule
try:
grammar, parser, tokenizer = build_parser(args.filename)
except Exception as err:
- print("ERROR: Failed to parse grammar file", file=sys.stderr)
+ print("ERROR: Failed to parse grammar file", err, file=sys.stderr)
sys.exit(1)
visitor = ASTGrammarPrinter()
import tokenize
import traceback
from abc import abstractmethod
-from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast
+from collections.abc import Callable
+from typing import Any, ClassVar, TypeVar, cast
from pegen.tokenizer import Mark, Tokenizer, exact_token_types
def memoize_left_rec(
- method: Callable[["Parser"], Optional[T]]
-) -> Callable[["Parser"], Optional[T]]:
+ method: Callable[["Parser"], T | None]
+) -> Callable[["Parser"], T | None]:
"""Memoize a left-recursive symbol method."""
method_name = method.__name__
- def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]:
+ def memoize_left_rec_wrapper(self: "Parser") -> T | None:
mark = self._mark()
key = mark, method_name, ()
# Fast path: cache hit, and not verbose.
class Parser:
"""Parsing base class."""
- KEYWORDS: ClassVar[Tuple[str, ...]]
+ KEYWORDS: ClassVar[tuple[str, ...]]
- SOFT_KEYWORDS: ClassVar[Tuple[str, ...]]
+ SOFT_KEYWORDS: ClassVar[tuple[str, ...]]
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
self._tokenizer = tokenizer
self._verbose = verbose
self._level = 0
- self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
+ self._cache: dict[tuple[Mark, str, tuple[Any, ...]], tuple[Any, Mark]] = {}
# Integer tracking whether we are in a left recursive rule or not. Can be useful
# for error reporting.
self.in_recursive_rule = 0
return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
@memoize
- def name(self) -> Optional[tokenize.TokenInfo]:
+ def name(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string not in self.KEYWORDS:
return self._tokenizer.getnext()
return None
@memoize
- def number(self) -> Optional[tokenize.TokenInfo]:
+ def number(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NUMBER:
return self._tokenizer.getnext()
return None
@memoize
- def string(self) -> Optional[tokenize.TokenInfo]:
+ def string(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.STRING:
return self._tokenizer.getnext()
return None
@memoize
- def fstring_start(self) -> Optional[tokenize.TokenInfo]:
+ def fstring_start(self) -> tokenize.TokenInfo | None:
FSTRING_START = getattr(token, "FSTRING_START", None)
if not FSTRING_START:
return None
return None
@memoize
- def fstring_middle(self) -> Optional[tokenize.TokenInfo]:
+ def fstring_middle(self) -> tokenize.TokenInfo | None:
FSTRING_MIDDLE = getattr(token, "FSTRING_MIDDLE", None)
if not FSTRING_MIDDLE:
return None
return None
@memoize
- def fstring_end(self) -> Optional[tokenize.TokenInfo]:
+ def fstring_end(self) -> tokenize.TokenInfo | None:
FSTRING_END = getattr(token, "FSTRING_END", None)
if not FSTRING_END:
return None
return None
@memoize
- def op(self) -> Optional[tokenize.TokenInfo]:
+ def op(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.OP:
return self._tokenizer.getnext()
return None
@memoize
- def type_comment(self) -> Optional[tokenize.TokenInfo]:
+ def type_comment(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.TYPE_COMMENT:
return self._tokenizer.getnext()
return None
@memoize
- def soft_keyword(self) -> Optional[tokenize.TokenInfo]:
+ def soft_keyword(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
return self._tokenizer.getnext()
return None
@memoize
- def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
+ def expect(self, type: str) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.string == type:
return self._tokenizer.getnext()
return self._tokenizer.getnext()
return None
- def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]:
+ def expect_forced(self, res: Any, expectation: str) -> tokenize.TokenInfo | None:
if res is None:
raise self.make_syntax_error(f"expected {expectation}")
return res
return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
-def simple_parser_main(parser_class: Type[Parser]) -> None:
+def simple_parser_main(parser_class: type[Parser]) -> None:
argparser = argparse.ArgumentParser()
argparser.add_argument(
"-v",
endpos = 0
else:
endpos = file.tell()
- except IOError:
+ except OSError:
endpos = 0
finally:
if file is not sys.stdin:
-import sys
import ast
import contextlib
import re
+import sys
from abc import abstractmethod
-from typing import (
- IO,
- AbstractSet,
- Any,
- Dict,
- Iterable,
- Iterator,
- List,
- Optional,
- Set,
- Text,
- Tuple,
- Union,
-)
+from collections.abc import Iterable, Iterator, Set
+from typing import IO, Any
from pegen import sccutils
from pegen.grammar import (
class KeywordCollectorVisitor(GrammarVisitor):
"""Visitor that collects all the keywords and soft keywords in the Grammar"""
- def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
+ def __init__(self, gen: "ParserGenerator", keywords: dict[str, int], soft_keywords: set[str]):
self.generator = gen
self.keywords = keywords
self.soft_keywords = soft_keywords
class RuleCheckingVisitor(GrammarVisitor):
- def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
+ def __init__(self, rules: dict[str, Rule], tokens: set[str]):
self.rules = rules
self.tokens = tokens
# If python < 3.12 add the virtual fstring tokens
class ParserGenerator:
callmakervisitor: GrammarVisitor
- def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
+ def __init__(self, grammar: Grammar, tokens: set[str], file: IO[str] | None):
self.grammar = grammar
self.tokens = tokens
- self.keywords: Dict[str, int] = {}
- self.soft_keywords: Set[str] = set()
+ self.keywords: dict[str, int] = {}
+ self.soft_keywords: set[str] = set()
self.rules = grammar.rules
self.validate_rule_names()
if "trailer" not in grammar.metas and "start" not in self.rules:
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type()
- self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
- self._local_variable_stack: List[List[str]] = []
+ self.all_rules: dict[str, Rule] = self.rules.copy() # Rules + temporal rules
+ self._local_variable_stack: list[list[str]] = []
def validate_rule_names(self) -> None:
for rule in self.rules:
self._local_variable_stack.pop()
@property
- def local_variable_names(self) -> List[str]:
+ def local_variable_names(self) -> list[str]:
return self._local_variable_stack[-1]
@abstractmethod
keyword_collector.visit(rule)
rule_collector = RuleCollectorVisitor(self.callmakervisitor)
- done: Set[str] = set()
+ done: set[str] = set()
while True:
computed_rules = list(self.all_rules)
todo = [i for i in computed_rules if i not in done]
class NullableVisitor(GrammarVisitor):
- def __init__(self, rules: Dict[str, Rule]) -> None:
+ def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
- self.visited: Set[Any] = set()
- self.nullables: Set[Union[Rule, NamedItem]] = set()
+ self.visited: set[Any] = set()
+ self.nullables: set[Rule | NamedItem] = set()
def visit_Rule(self, rule: Rule) -> bool:
if rule in self.visited:
return not node.value
-def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
+def compute_nullables(rules: dict[str, Rule]) -> set[Any]:
"""Compute which rules in a grammar are nullable.
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
class InitialNamesVisitor(GrammarVisitor):
- def __init__(self, rules: Dict[str, Rule]) -> None:
+ def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
self.nullables = compute_nullables(rules)
- def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
- names: Set[str] = set()
+ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> set[Any]:
+ names: set[str] = set()
for value in node:
if isinstance(value, list):
for item in value:
names |= self.visit(value, *args, **kwargs)
return names
- def visit_Alt(self, alt: Alt) -> Set[Any]:
- names: Set[str] = set()
+ def visit_Alt(self, alt: Alt) -> set[Any]:
+ names: set[str] = set()
for item in alt.items:
names |= self.visit(item)
if item not in self.nullables:
break
return names
- def visit_Forced(self, force: Forced) -> Set[Any]:
+ def visit_Forced(self, force: Forced) -> set[Any]:
return set()
- def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
+ def visit_LookAhead(self, lookahead: Lookahead) -> set[Any]:
return set()
- def visit_Cut(self, cut: Cut) -> Set[Any]:
+ def visit_Cut(self, cut: Cut) -> set[Any]:
return set()
- def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
+ def visit_NameLeaf(self, node: NameLeaf) -> set[Any]:
return {node.value}
- def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
+ def visit_StringLeaf(self, node: StringLeaf) -> set[Any]:
return set()
def compute_left_recursives(
- rules: Dict[str, Rule]
-) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
+ rules: dict[str, Rule]
+) -> tuple[dict[str, Set[str]], list[Set[str]]]:
graph = make_first_graph(rules)
sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
for scc in sccs:
return graph, sccs
-def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
+def make_first_graph(rules: dict[str, Rule]) -> dict[str, Set[str]]:
"""Compute the graph of left-invocations.
There's an edge from A to B if A may invoke B at its initial
"""
initial_name_visitor = InitialNamesVisitor(rules)
graph = {}
- vertices: Set[str] = set()
+ vertices: set[str] = set()
for rulename, rhs in rules.items():
graph[rulename] = names = initial_name_visitor.visit(rhs)
vertices |= names
import os.path
import token
-from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple
+from collections.abc import Callable, Sequence
+from typing import IO, Any
from pegen import grammar
from pegen.grammar import (
def visit_Opt(self, node: Opt) -> bool:
return self.visit(node.node)
- def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
+ def visit_Repeat(self, node: Repeat0) -> tuple[str, str]:
return self.visit(node.node)
- def visit_Gather(self, node: Gather) -> Tuple[str, str]:
+ def visit_Gather(self, node: Gather) -> tuple[str, str]:
return self.visit(node.node)
def visit_Group(self, node: Group) -> bool:
class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
- self.cache: Dict[str, Tuple[str, str]] = {}
+ self.cache: dict[str, tuple[str, str]] = {}
- def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
+ def visit_NameLeaf(self, node: NameLeaf) -> tuple[str | None, str]:
name = node.value
if name == "SOFT_KEYWORD":
return "soft_keyword", "self.soft_keyword()"
return "_" + name.lower(), f"self.expect({name!r})"
return name, f"self.{name}()"
- def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
+ def visit_StringLeaf(self, node: StringLeaf) -> tuple[str, str]:
return "literal", f"self.expect({node.value})"
- def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
+ def visit_NamedItem(self, node: NamedItem) -> tuple[str | None, str]:
name, call = self.visit(node.item)
if node.name:
name = node.name
return name, call
- def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
+ def lookahead_call_helper(self, node: Lookahead) -> tuple[str, str]:
name, call = self.visit(node.node)
head, tail = call.split("(", 1)
assert tail[-1] == ")"
tail = tail[:-1]
return head, tail
- def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
+ def visit_PositiveLookahead(self, node: PositiveLookahead) -> tuple[None, str]:
head, tail = self.lookahead_call_helper(node)
return None, f"self.positive_lookahead({head}, {tail})"
- def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
+ def visit_NegativeLookahead(self, node: NegativeLookahead) -> tuple[None, str]:
head, tail = self.lookahead_call_helper(node)
return None, f"self.negative_lookahead({head}, {tail})"
- def visit_Opt(self, node: Opt) -> Tuple[str, str]:
+ def visit_Opt(self, node: Opt) -> tuple[str, str]:
name, call = self.visit(node.node)
# Note trailing comma (the call may already have one comma
# at the end, for example when rules have both repeat0 and optional
prefix: str,
call_by_name_func: Callable[[str], str],
rule_generation_func: Callable[[], str],
- ) -> Tuple[str, str]:
+ ) -> tuple[str, str]:
node_str = f"{node}"
key = f"{prefix}_{node_str}"
if key in self.cache:
self.cache[key] = name, call
return self.cache[key]
- def visit_Rhs(self, node: Rhs) -> Tuple[str, str]:
+ def visit_Rhs(self, node: Rhs) -> tuple[str, str]:
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
return self.visit(node.alts[0].items[0])
lambda: self.gen.artificial_rule_from_rhs(node),
)
- def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
+ def visit_Repeat0(self, node: Repeat0) -> tuple[str, str]:
return self._generate_artificial_rule_call(
node,
"repeat0",
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False),
)
- def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
+ def visit_Repeat1(self, node: Repeat1) -> tuple[str, str]:
return self._generate_artificial_rule_call(
node,
"repeat1",
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True),
)
- def visit_Gather(self, node: Gather) -> Tuple[str, str]:
+ def visit_Gather(self, node: Gather) -> tuple[str, str]:
return self._generate_artificial_rule_call(
node,
"gather",
lambda: self.gen.artificial_rule_from_gather(node),
)
- def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
+ def visit_Group(self, node: Group) -> tuple[str | None, str]:
return self.visit(node.rhs)
- def visit_Cut(self, node: Cut) -> Tuple[str, str]:
+ def visit_Cut(self, node: Cut) -> tuple[str, str]:
return "cut", "True"
- def visit_Forced(self, node: Forced) -> Tuple[str, str]:
+ def visit_Forced(self, node: Forced) -> tuple[str, str]:
if isinstance(node.node, Group):
_, val = self.visit(node.node.rhs)
return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
def __init__(
self,
grammar: grammar.Grammar,
- file: Optional[IO[Text]],
- tokens: Set[str] = set(token.tok_name.values()),
- location_formatting: Optional[str] = None,
- unreachable_formatting: Optional[str] = None,
+ file: IO[str] | None,
+ tokens: set[str] = set(token.tok_name.values()),
+ location_formatting: str | None = None,
+ unreachable_formatting: str | None = None,
):
tokens.add("SOFT_KEYWORD")
super().__init__(grammar, tokens, file)
if is_loop:
self.print(f"children.append({action})")
- self.print(f"mark = self._mark()")
+ self.print("mark = self._mark()")
else:
if "UNREACHABLE" in action:
action = action.replace("UNREACHABLE", self.unreachable_formatting)
# Adapted from mypy (mypy/build.py) under the MIT license.
-from typing import *
+from collections.abc import Iterable, Iterator, Set
def strongly_connected_components(
- vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]]
-) -> Iterator[AbstractSet[str]]:
+ vertices: Set[str], edges: dict[str, Set[str]]
+) -> Iterator[Set[str]]:
"""Compute Strongly Connected Components of a directed graph.
Args:
From https://code.activestate.com/recipes/578507-strongly-connected-components-of-a-directed-graph/.
"""
- identified: Set[str] = set()
- stack: List[str] = []
- index: Dict[str, int] = {}
- boundaries: List[int] = []
+ identified: set[str] = set()
+ stack: list[str] = []
+ index: dict[str, int] = {}
+ boundaries: list[int] = []
- def dfs(v: str) -> Iterator[Set[str]]:
+ def dfs(v: str) -> Iterator[set[str]]:
index[v] = len(stack)
stack.append(v)
boundaries.append(index[v])
def topsort(
- data: Dict[AbstractSet[str], Set[AbstractSet[str]]]
-) -> Iterable[AbstractSet[AbstractSet[str]]]:
+ data: dict[Set[str], set[Set[str]]]
+) -> Iterable[Set[Set[str]]]:
"""Topological sort.
Args:
break
yield ready
data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
- assert not data, "A cyclic dependency exists amongst %r" % data
+ assert not data, f"A cyclic dependency exists amongst {data}"
def find_cycles_in_scc(
- graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str
-) -> Iterable[List[str]]:
+ graph: dict[str, Set[str]], scc: Set[str], start: str
+) -> Iterable[list[str]]:
"""Find cycles in SCC emanating from start.
Yields lists of the form ['A', 'B', 'C', 'A'], which means there's
assert start in graph
# Recursive helper that yields cycles.
- def dfs(node: str, path: List[str]) -> Iterator[List[str]]:
+ def dfs(node: str, path: list[str]) -> Iterator[list[str]]:
if node in path:
yield path + [node]
return
import textwrap
import token
import tokenize
-from typing import IO, Any, Dict, Final, Optional, Type, cast
+from typing import IO, Any, Final, cast
from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator
}
-def generate_parser(grammar: Grammar) -> Type[Parser]:
+def generate_parser(grammar: Grammar) -> type[Parser]:
# Generate a parser.
out = io.StringIO()
genr = PythonParserGenerator(grammar, out)
genr.generate("<string>")
# Load the generated parser class.
- ns: Dict[str, Any] = {}
+ ns: dict[str, Any] = {}
exec(out.getvalue(), ns)
return ns["GeneratedParser"]
-def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
+def run_parser(file: IO[bytes], parser_class: type[Parser], *, verbose: bool = False) -> Any:
# Run a parser on a file (stream).
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515
parser = parser_class(tokenizer, verbose=verbose)
def parse_string(
- source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
+ source: str, parser_class: type[Parser], *, dedent: bool = True, verbose: bool = False
) -> Any:
# Run the parser on a string.
if dedent:
return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515
-def make_parser(source: str) -> Type[Parser]:
+def make_parser(source: str) -> type[Parser]:
# Combine parse_string() and generate_parser().
grammar = parse_string(source, GrammarParser)
return generate_parser(grammar)
grammar: Grammar,
path: pathlib.PurePath,
debug: bool = False,
- library_dir: Optional[str] = None,
+ library_dir: str | None = None,
) -> Any:
"""Generate a parser c extension for the given grammar in the given path
import token
import tokenize
-from typing import Dict, Iterator, List
+from collections.abc import Iterator
Mark = int # NewType('Mark', int)
def shorttok(tok: tokenize.TokenInfo) -> str:
- return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
+ formatted = (
+ f"{tok.start[0]}.{tok.start[1]}: "
+ f"{token.tok_name[tok.type]}:{tok.string!r}"
+ )
+ return f"{formatted:<25.25}"
class Tokenizer:
This is pretty tied to Python's syntax.
"""
- _tokens: List[tokenize.TokenInfo]
+ _tokens: list[tokenize.TokenInfo]
def __init__(
self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
self._tokens = []
self._index = 0
self._verbose = verbose
- self._lines: Dict[int, str] = {}
+ self._lines: dict[int, str] = {}
self._path = path
if verbose:
self.report(False, False)
break
return tok
- def get_lines(self, line_numbers: List[int]) -> List[str]:
+ def get_lines(self, line_numbers: list[int]) -> list[str]:
"""Retrieve source lines corresponding to line numbers."""
if self._lines:
lines = self._lines
-from typing import Optional
-
from pegen import grammar
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar) -> None:
self.grammar = grammar
- self.rulename: Optional[str] = None
+ self.rulename: str | None = None
def validate_rule(self, rulename: str, node: Rule) -> None:
self.rulename = rulename