gh-131178: Add tests for `tokenize` command-line interface (#131274)

author Semyon Moroz <donbarbos@proton.me>

Thu, 27 Mar 2025 16:04:16 +0000 (20:04 +0400)

committer GitHub <noreply@github.com>

Thu, 27 Mar 2025 16:04:16 +0000 (18:04 +0200)
author Semyon Moroz <donbarbos@proton.me>
Thu, 27 Mar 2025 16:04:16 +0000 (20:04 +0400)
committer GitHub <noreply@github.com>
Thu, 27 Mar 2025 16:04:16 +0000 (18:04 +0200)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py

index 5fa4e0d922ed0805e903e4736b5de6c96c6fa333..df2617c680b5e59e515a269e150f33e1bfd0bb19 100644 (file)
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,5 +1,7 @@
+import contextlib
  import os
  import re
+import tempfile
  import token
  import tokenize
  import unittest
@@ -3178,5 +3180,83 @@ class CTokenizerBufferTests(unittest.TestCase):
              run_test_script(file_name)
  
  
+class CommandLineTest(unittest.TestCase):
+    def setUp(self):
+        self.filename = tempfile.mktemp()
+        self.addCleanup(os_helper.unlink, self.filename)
+
+    @staticmethod
+    def text_normalize(string):
+        """Dedent *string* and strip it from its surrounding whitespaces.
+
+        This method is used by the other utility functions so that any
+        string to write or to match against can be freely indented.
+        """
+        return re.sub(r'\s+', ' ', string).strip()
+
+    def set_source(self, content):
+        with open(self.filename, 'w') as fp:
+            fp.write(content)
+
+    def invoke_tokenize(self, *flags):
+        output = StringIO()
+        with contextlib.redirect_stdout(output):
+            tokenize._main(args=[*flags, self.filename])
+        return self.text_normalize(output.getvalue())
+
+    def check_output(self, source, expect, *flags):
+        with self.subTest(source=source, flags=flags):
+            self.set_source(source)
+            res = self.invoke_tokenize(*flags)
+            expect = self.text_normalize(expect)
+            self.assertListEqual(res.splitlines(), expect.splitlines())
+
+    def test_invocation(self):
+        # test various combinations of parameters
+        base_flags = ('-e', '--exact')
+
+        self.set_source('''
+            def f():
+                print(x)
+                return None
+        ''')
+
+        for flag in base_flags:
+            with self.subTest(args=flag):
+                _ = self.invoke_tokenize(flag)
+
+        with self.assertRaises(SystemExit):
+            # suppress argparse error message
+            with contextlib.redirect_stderr(StringIO()):
+                _ = self.invoke_tokenize('--unknown')
+
+    def test_without_flag(self):
+        # test 'python -m tokenize source.py'
+        source = 'a = 1'
+        expect = '''
+            0,0-0,0:            ENCODING       'utf-8'
+            1,0-1,1:            NAME           'a'
+            1,2-1,3:            OP             '='
+            1,4-1,5:            NUMBER         '1'
+            1,5-1,6:            NEWLINE        ''
+            2,0-2,0:            ENDMARKER      ''
+        '''
+        self.check_output(source, expect)
+
+    def test_exact_flag(self):
+        # test 'python -m tokenize -e/--exact source.py'
+        source = 'a = 1'
+        expect = '''
+            0,0-0,0:            ENCODING       'utf-8'
+            1,0-1,1:            NAME           'a'
+            1,2-1,3:            EQUAL          '='
+            1,4-1,5:            NUMBER         '1'
+            1,5-1,6:            NEWLINE        ''
+            2,0-2,0:            ENDMARKER      ''
+        '''
+        for flag in ['-e', '--exact']:
+            self.check_output(source, expect, flag)
+
+
  if __name__ == "__main__":
      unittest.main()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py

index 9ce95a62d961ba508209ba5e0a93f6bfa722b412..7afacff7381f1ce482a19bef7441280d42b3fb61 100644 (file)
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -499,7 +499,7 @@ def generate_tokens(readline):
      """
      return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
  
-def main():
+def _main(args=None):
      import argparse
  
      # Helper error handling routines
@@ -524,7 +524,7 @@ def main():
                          help='the file to tokenize; defaults to stdin')
      parser.add_argument('-e', '--exact', dest='exact', action='store_true',
                          help='display token names using the exact type')
-    args = parser.parse_args()
+    args = parser.parse_args(args)
  
      try:
          # Tokenize the input
@@ -589,4 +589,4 @@ def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False)
  
  
  if __name__ == "__main__":
-    main()
+    _main()
author	Semyon Moroz <donbarbos@proton.me>
	Thu, 27 Mar 2025 16:04:16 +0000 (20:04 +0400)
committer	GitHub <noreply@github.com>
	Thu, 27 Mar 2025 16:04:16 +0000 (18:04 +0200)
Lib/test/test_tokenize.py		patch \| blob \| blame \| history
Lib/tokenize.py		patch \| blob \| blame \| history