bpo-40939: Generate keyword.py using the new parser (GH-20800)

author Lysandros Nikolaou <lisandrosnik@gmail.com>

Thu, 11 Jun 2020 12:45:15 +0000 (15:45 +0300)

committer GitHub <noreply@github.com>

Thu, 11 Jun 2020 12:45:15 +0000 (13:45 +0100)
author Lysandros Nikolaou <lisandrosnik@gmail.com>
Thu, 11 Jun 2020 12:45:15 +0000 (15:45 +0300)
committer GitHub <noreply@github.com>
Thu, 11 Jun 2020 12:45:15 +0000 (13:45 +0100)
diff --git a/Lib/keyword.py b/Lib/keyword.py

index ddcbb25d3d3f585de56088850cddafbcd527709c..afc3db3942ccbc15317e5f940089d5080d28b68c 100644 (file)
--- a/Lib/keyword.py
+++ b/Lib/keyword.py
@@ -1,13 +1,14 @@
-"""Keywords (from "Grammar/Grammar")
+"""Keywords (from "Grammar/python.gram")
  
  This file is automatically generated; please don't muck it up!
  
  To update the symbols in this file, 'cd' to the top directory of
  the python source tree and run:
  
-    python3 -m Parser.pgen.keywordgen Grammar/Grammar \
-                                      Grammar/Tokens \
-                                      Lib/keyword.py
+    PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \
+        Grammar/Grammar \
+        Grammar/Tokens \
+        Lib/keyword.py
  
  Alternatively, you can run 'make regen-keyword'.
  """
@@ -18,6 +19,7 @@ kwlist = [
      'False',
      'None',
      'True',
+    '__new_parser__',
      'and',
      'as',
      'assert',
diff --git a/Lib/pydoc.py b/Lib/pydoc.py

index 628f9fc7d1d1efd3938adf39fca29cc316e6e2ab..a5368bf8bfe551b09d1c513c39427235858e0cba 100755 (executable)
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -1817,6 +1817,7 @@ class Helper:
          'False': '',
          'None': '',
          'True': '',
+        '__new_parser__': '',
          'and': 'BOOLEAN',
          'as': 'with',
          'assert': ('assert', ''),
diff --git a/Makefile.pre.in b/Makefile.pre.in

index 7c16d2905fbf4ebe7db0204e5010d7a0c4f47a1e..9a82729aa0f210b34b691809bfc0203530b80793 100644 (file)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -895,9 +895,10 @@ regen-token:
  
  .PHONY: regen-keyword
  regen-keyword:
-       # Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
-       # using Parser/pgen
-       PYTHONPATH=$(srcdir) $(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
+       # Regenerate Lib/keyword.py from Grammar/python.gram and Grammar/Tokens
+       # using Tools/peg_generator/pegen
+       PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen.keywordgen \
+               $(srcdir)/Grammar/python.gram \
                 $(srcdir)/Grammar/Tokens \
                 $(srcdir)/Lib/keyword.py.new
         $(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new
diff --git a/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst b/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst

new file mode 100644 (file)

index 0000000..0e83112
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst
@@ -0,0 +1 @@
+Use the new PEG parser when generating the stdlib :mod:`keyword` module.
+\ No newline at end of file
diff --git a/PCbuild/regen.vcxproj b/PCbuild/regen.vcxproj

index d46fb997dbd79c4cdad69e3767cbf48489f6d39c..564a4dd71188ca45ae549c1b1c8ac49df7a9aae0 100644 (file)
--- a/PCbuild/regen.vcxproj
+++ b/PCbuild/regen.vcxproj
@@ -205,8 +205,9 @@
      <Exec Command="&quot;$(PythonExe)&quot; $(PySourcePath)Tools\scripts\generate_token.py py &quot;$(PySourcePath)Grammar\Tokens&quot; &quot;$(PySourcePath)Lib\token.py&quot;" />
    </Target>
    <Target Name="_RegenKeywords" AfterTargets="_RegenTokens">
-    <!-- Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens using Parser/pgen-->
-    <Exec Command="&quot;$(PythonExe)&quot; -m Parser.pgen.keywordgen &quot;$(PySourcePath)Grammar\Grammar&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; &quot;$(IntDir)keyword.py&quot;" />
+    <!-- Regenerate Lib/keyword.py from Grammar/python.gram and Grammar/Tokens using Tools/peg_generator/pegen-->
+    <SetEnv Name="PYTHONPATH" Prefix="true" Value="$(PySourcePath)Tools\peg_generator\" />
+    <Exec Command="&quot;$(PythonExe)&quot; -m pegen.keywordgen &quot;$(PySourcePath)Grammar\python.gram&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; &quot;$(IntDir)keyword.py&quot;" />
      <Copy SourceFiles="$(IntDir)keyword.py" DestinationFiles="$(PySourcePath)Lib\keyword.py">
        <Output TaskParameter="CopiedFiles" ItemName="_Updated" />
      </Copy>
diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py

new file mode 100644 (file)

index 0000000..279c34b
--- /dev/null
+++ b/Tools/peg_generator/pegen/keywordgen.py
@@ -0,0 +1,73 @@
+"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
+
+import argparse
+
+from .build import build_parser, generate_token_definitions
+from .c_generator import CParserGenerator
+
+TEMPLATE = r'''
+"""Keywords (from "Grammar/python.gram")
+
+This file is automatically generated; please don't muck it up!
+
+To update the symbols in this file, 'cd' to the top directory of
+the python source tree and run:
+
+    PYTHONPATH=Tools/peg_generator python3 -m pegen.keywordgen \
+        Grammar/Grammar \
+        Grammar/Tokens \
+        Lib/keyword.py
+
+Alternatively, you can run 'make regen-keyword'.
+"""
+
+__all__ = ["iskeyword", "kwlist"]
+
+kwlist = [
+    {keywords}
+]
+
+iskeyword = frozenset(kwlist).__contains__
+'''.lstrip()
+
+EXTRA_KEYWORDS = ["async", "await"]
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate the Lib/keywords.py file from the grammar."
+    )
+    parser.add_argument(
+        "grammar", type=str, help="The file with the grammar definition in PEG format"
+    )
+    parser.add_argument(
+        "tokens_file",
+        type=argparse.FileType("r"),
+        help="The file with the token definitions"
+    )
+    parser.add_argument(
+        "keyword_file",
+        type=argparse.FileType("w"),
+        help="The path to write the keyword definitions",
+    )
+    args = parser.parse_args()
+
+    grammar, _, _ = build_parser(args.grammar)
+    with args.tokens_file as tok_file:
+        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
+    gen: ParserGenerator = CParserGenerator(
+        grammar, all_tokens, exact_tok, non_exact_tok, file=None
+    )
+    gen.collect_todo()
+
+    with args.keyword_file as thefile:
+        all_keywords = sorted(
+            list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS
+        )
+
+        keywords = ",\n    ".join(map(repr, all_keywords))
+        thefile.write(TEMPLATE.format(keywords=keywords))
+
+
+if __name__ == "__main__":
+    main()
author	Lysandros Nikolaou <lisandrosnik@gmail.com>
	Thu, 11 Jun 2020 12:45:15 +0000 (15:45 +0300)
committer	GitHub <noreply@github.com>
	Thu, 11 Jun 2020 12:45:15 +0000 (13:45 +0100)
Lib/keyword.py		patch \| blob \| blame \| history
Lib/pydoc.py		patch \| blob \| blame \| history
Makefile.pre.in		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2020-06-11-11-07-10.bpo-40939.-D5Asl.rst	[new file with mode: 0644]	patch \| blob
PCbuild/regen.vcxproj		patch \| blob \| blame \| history
Tools/peg_generator/pegen/keywordgen.py	[new file with mode: 0644]	patch \| blob