[3.12] gh-104400: Add more tests to pygettext (GH-108173) (GH-126362)

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Sun, 3 Nov 2024 14:24:41 +0000 (15:24 +0100)

committer GitHub <noreply@github.com>

Sun, 3 Nov 2024 14:24:41 +0000 (14:24 +0000)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Sun, 3 Nov 2024 14:24:41 +0000 (15:24 +0100)
committer GitHub <noreply@github.com>
Sun, 3 Nov 2024 14:24:41 +0000 (14:24 +0000)
diff --git a/Lib/test/test_tools/i18n_data/docstrings.pot b/Lib/test/test_tools/i18n_data/docstrings.pot

new file mode 100644 (file)

index 0000000..5af1d41
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/docstrings.pot
@@ -0,0 +1,40 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#: docstrings.py:7
+#, docstring
+msgid ""
+msgstr ""
+
+#: docstrings.py:18
+#, docstring
+msgid ""
+"multiline\n"
+"    docstring\n"
+"    "
+msgstr ""
+
+#: docstrings.py:25
+#, docstring
+msgid "docstring1"
+msgstr ""
+
+#: docstrings.py:30
+#, docstring
+msgid "Hello, {}!"
+msgstr ""
+
diff --git a/Lib/test/test_tools/i18n_data/docstrings.py b/Lib/test/test_tools/i18n_data/docstrings.py

new file mode 100644 (file)

index 0000000..85d7f15
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/docstrings.py
@@ -0,0 +1,41 @@
+# Test docstring extraction
+from gettext import gettext as _
+
+
+# Empty docstring
+def test(x):
+    """"""
+
+
+# Leading empty line
+def test2(x):
+
+    """docstring"""  # XXX This should be extracted but isn't.
+
+
+# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`.
+def test3(x):
+    """multiline
+    docstring
+    """
+
+
+# Multiple docstrings - only the first should be extracted
+def test4(x):
+    """docstring1"""
+    """docstring2"""
+
+
+def test5(x):
+    """Hello, {}!""".format("world!")  # XXX This should not be extracted.
+
+
+# Nested docstrings
+def test6(x):
+    def inner(y):
+        """nested docstring"""  # XXX This should be extracted but isn't.
+
+
+class Outer:
+    class Inner:
+        "nested class docstring"  # XXX This should be extracted but isn't.
diff --git a/Lib/test/test_tools/i18n_data/fileloc.pot b/Lib/test/test_tools/i18n_data/fileloc.pot

new file mode 100644 (file)

index 0000000..dbd2868
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/fileloc.pot
@@ -0,0 +1,35 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#: fileloc.py:5 fileloc.py:6
+msgid "foo"
+msgstr ""
+
+#: fileloc.py:9
+msgid "bar"
+msgstr ""
+
+#: fileloc.py:14 fileloc.py:18
+#, docstring
+msgid "docstring"
+msgstr ""
+
+#: fileloc.py:22 fileloc.py:26
+#, docstring
+msgid "baz"
+msgstr ""
+
diff --git a/Lib/test/test_tools/i18n_data/fileloc.py b/Lib/test/test_tools/i18n_data/fileloc.py

new file mode 100644 (file)

index 0000000..c5d4d05
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/fileloc.py
@@ -0,0 +1,26 @@
+# Test file locations
+from gettext import gettext as _
+
+# Duplicate strings
+_('foo')
+_('foo')
+
+# Duplicate strings on the same line should only add one location to the output
+_('bar'), _('bar')
+
+
+# Duplicate docstrings
+class A:
+    """docstring"""
+
+
+def f():
+    """docstring"""
+
+
+# Duplicate message and docstring
+_('baz')
+
+
+def g():
+    """baz"""
diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot

new file mode 100644 (file)

index 0000000..ddfbd18
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/messages.pot
@@ -0,0 +1,67 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#: messages.py:5
+msgid ""
+msgstr ""
+
+#: messages.py:8 messages.py:9
+msgid "parentheses"
+msgstr ""
+
+#: messages.py:12
+msgid "Hello, world!"
+msgstr ""
+
+#: messages.py:15
+msgid ""
+"Hello,\n"
+"    multiline!\n"
+msgstr ""
+
+#: messages.py:29
+msgid "Hello, {}!"
+msgstr ""
+
+#: messages.py:33
+msgid "1"
+msgstr ""
+
+#: messages.py:33
+msgid "2"
+msgstr ""
+
+#: messages.py:34 messages.py:35
+msgid "A"
+msgstr ""
+
+#: messages.py:34 messages.py:35
+msgid "B"
+msgstr ""
+
+#: messages.py:36
+msgid "set"
+msgstr ""
+
+#: messages.py:42
+msgid "nested string"
+msgstr ""
+
+#: messages.py:47
+msgid "baz"
+msgstr ""
+
diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py

new file mode 100644 (file)

index 0000000..f220294
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/messages.py
@@ -0,0 +1,64 @@
+# Test message extraction
+from gettext import gettext as _
+
+# Empty string
+_("")
+
+# Extra parentheses
+(_("parentheses"))
+((_("parentheses")))
+
+# Multiline strings
+_("Hello, "
+  "world!")
+
+_("""Hello,
+    multiline!
+""")
+
+# Invalid arguments
+_()
+_(None)
+_(1)
+_(False)
+_(x="kwargs are not allowed")
+_("foo", "bar")
+_("something", x="something else")
+
+# .format()
+_("Hello, {}!").format("world")  # valid
+_("Hello, {}!".format("world"))  # invalid
+
+# Nested structures
+_("1"), _("2")
+arr = [_("A"), _("B")]
+obj = {'a': _("A"), 'b': _("B")}
+{{{_('set')}}}
+
+
+# Nested functions and classes
+def test():
+    _("nested string")  # XXX This should be extracted but isn't.
+    [_("nested string")]
+
+
+class Foo:
+    def bar(self):
+        return _("baz")
+
+
+def bar(x=_('default value')):  # XXX This should be extracted but isn't.
+    pass
+
+
+def baz(x=[_('default value')]):  # XXX This should be extracted but isn't.
+    pass
+
+
+# Shadowing _()
+def _(x):
+    pass
+
+
+def _(x="don't extract me"):
+    pass
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py

index c083a04475e726cb43940b94368de9760f053ac7..21dead8f943bb7d2a2208e26805607e6a7b63b9b 100644 (file)
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -1,9 +1,11 @@
  """Tests to cover the Tools/i18n package"""
  
  import os
+import re
  import sys
  import unittest
  from textwrap import dedent
+from pathlib import Path
  
  from test.support.script_helper import assert_python_ok
  from test.test_tools import skip_if_missing, toolsdir
@@ -12,20 +14,47 @@ from test.support.os_helper import temp_cwd, temp_dir
  
  skip_if_missing()
  
+DATA_DIR = Path(__file__).resolve().parent / 'i18n_data'
+
+
+def normalize_POT_file(pot):
+    """Normalize the POT creation timestamp, charset and
+    file locations to make the POT file easier to compare.
+
+    """
+    # Normalize the creation date.
+    date_pattern = re.compile(r'"POT-Creation-Date: .+?\\n"')
+    header = r'"POT-Creation-Date: 2000-01-01 00:00+0000\\n"'
+    pot = re.sub(date_pattern, header, pot)
+
+    # Normalize charset to UTF-8 (currently there's no way to specify the output charset).
+    charset_pattern = re.compile(r'"Content-Type: text/plain; charset=.+?\\n"')
+    charset = r'"Content-Type: text/plain; charset=UTF-8\\n"'
+    pot = re.sub(charset_pattern, charset, pot)
+
+    # Normalize file location path separators in case this test is
+    # running on Windows (which uses '\').
+    fileloc_pattern = re.compile(r'#:.+')
+
+    def replace(match):
+        return match[0].replace(os.sep, "/")
+    pot = re.sub(fileloc_pattern, replace, pot)
+    return pot
+
  
  class Test_pygettext(unittest.TestCase):
      """Tests for the pygettext.py tool"""
  
-    script = os.path.join(toolsdir,'i18n', 'pygettext.py')
+    script = Path(toolsdir, 'i18n', 'pygettext.py')
  
      def get_header(self, data):
          """ utility: return the header of a .po file as a dictionary """
          headers = {}
          for line in data.split('\n'):
-            if not line or line.startswith(('#', 'msgid','msgstr')):
+            if not line or line.startswith(('#', 'msgid', 'msgstr')):
                  continue
              line = line.strip('"')
-            key, val = line.split(':',1)
+            key, val = line.split(':', 1)
              headers[key] = val.strip()
          return headers
  
@@ -53,13 +82,18 @@ class Test_pygettext(unittest.TestCase):
  
          return msgids
  
+    def assert_POT_equal(self, expected, actual):
+        """Check if two POT files are equal"""
+        self.maxDiff = None
+        self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
+
      def extract_docstrings_from_str(self, module_content):
          """ utility: return all msgids extracted from module_content """
          filename = 'test_docstrings.py'
          with temp_cwd(None) as cwd:
              with open(filename, 'w', encoding='utf-8') as fp:
                  fp.write(module_content)
-            assert_python_ok(self.script, '-D', filename)
+            assert_python_ok('-Xutf8', self.script, '-D', filename)
              with open('messages.pot', encoding='utf-8') as fp:
                  data = fp.read()
          return self.get_msgids(data)
@@ -69,7 +103,7 @@ class Test_pygettext(unittest.TestCase):
             http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
          """
          with temp_cwd(None) as cwd:
-            assert_python_ok(self.script)
+            assert_python_ok('-Xutf8', self.script)
              with open('messages.pot', encoding='utf-8') as fp:
                  data = fp.read()
              header = self.get_header(data)
@@ -96,7 +130,7 @@ class Test_pygettext(unittest.TestCase):
          """ Match the date format from xgettext for POT-Creation-Date """
          from datetime import datetime
          with temp_cwd(None) as cwd:
-            assert_python_ok(self.script)
+            assert_python_ok('-Xutf8', self.script)
              with open('messages.pot', encoding='utf-8') as fp:
                  data = fp.read()
              header = self.get_header(data)
@@ -310,6 +344,20 @@ class Test_pygettext(unittest.TestCase):
          self.assertNotIn('foo', msgids)
          self.assertIn('bar', msgids)
  
+    def test_pygettext_output(self):
+        """Test that the pygettext output exactly matches snapshots."""
+        for input_file in DATA_DIR.glob('*.py'):
+            output_file = input_file.with_suffix('.pot')
+            with self.subTest(input_file=f'i18n_data/{input_file}'):
+                contents = input_file.read_text(encoding='utf-8')
+                with temp_cwd(None):
+                    Path(input_file.name).write_text(contents)
+                    assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name)
+                    output = Path('messages.pot').read_text(encoding='utf-8')
+
+                expected = output_file.read_text(encoding='utf-8')
+                self.assert_POT_equal(expected, output)
+
      def test_files_list(self):
          """Make sure the directories are inspected for source files
             bpo-31920
@@ -318,21 +366,41 @@ class Test_pygettext(unittest.TestCase):
          text2 = 'Text to translate2'
          text3 = 'Text to ignore'
          with temp_cwd(None), temp_dir(None) as sdir:
-            os.mkdir(os.path.join(sdir, 'pypkg'))
-            with open(os.path.join(sdir, 'pypkg', 'pymod.py'), 'w',
-                      encoding='utf-8') as sfile:
-                sfile.write(f'_({text1!r})')
-            os.mkdir(os.path.join(sdir, 'pkg.py'))
-            with open(os.path.join(sdir, 'pkg.py', 'pymod2.py'), 'w',
-                      encoding='utf-8') as sfile:
-                sfile.write(f'_({text2!r})')
-            os.mkdir(os.path.join(sdir, 'CVS'))
-            with open(os.path.join(sdir, 'CVS', 'pymod3.py'), 'w',
-                      encoding='utf-8') as sfile:
-                sfile.write(f'_({text3!r})')
-            assert_python_ok(self.script, sdir)
-            with open('messages.pot', encoding='utf-8') as fp:
-                data = fp.read()
+            pymod = Path(sdir, 'pypkg', 'pymod.py')
+            pymod.parent.mkdir()
+            pymod.write_text(f'_({text1!r})', encoding='utf-8')
+
+            pymod2 = Path(sdir, 'pkg.py', 'pymod2.py')
+            pymod2.parent.mkdir()
+            pymod2.write_text(f'_({text2!r})', encoding='utf-8')
+
+            pymod3 = Path(sdir, 'CVS', 'pymod3.py')
+            pymod3.parent.mkdir()
+            pymod3.write_text(f'_({text3!r})', encoding='utf-8')
+
+            assert_python_ok('-Xutf8', self.script, sdir)
+            data = Path('messages.pot').read_text(encoding='utf-8')
              self.assertIn(f'msgid "{text1}"', data)
              self.assertIn(f'msgid "{text2}"', data)
              self.assertNotIn(text3, data)
+
+
+def update_POT_snapshots():
+    for input_file in DATA_DIR.glob('*.py'):
+        output_file = input_file.with_suffix('.pot')
+        contents = input_file.read_bytes()
+        with temp_cwd(None):
+            Path(input_file.name).write_bytes(contents)
+            assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name)
+            output = Path('messages.pot').read_text(encoding='utf-8')
+
+        output = normalize_POT_file(output)
+        output_file.write_text(output, encoding='utf-8')
+
+
+if __name__ == '__main__':
+    # To regenerate POT files
+    if len(sys.argv) > 1 and sys.argv[1] == '--snapshot-update':
+        update_POT_snapshots()
+        sys.exit(0)
+    unittest.main()
diff --git a/Makefile.pre.in b/Makefile.pre.in

index 14eea08dcfbd3316e98fbf097ee793c51ca1e4fe..f87de823974e96f49b1c12c4d0b9790acde97baf 100644 (file)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -2235,6 +2235,7 @@ TESTSUBDIRS=      idlelib/idle_test \
                 test/test_tomllib/data/valid/dates-and-times \
                 test/test_tomllib/data/valid/multiline-basic-str \
                 test/test_tools \
+               test/test_tools/i18n_data \
                 test/test_ttk \
                 test/test_unittest \
                 test/test_unittest/testmock \
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Sun, 3 Nov 2024 14:24:41 +0000 (15:24 +0100)
committer	GitHub <noreply@github.com>
	Sun, 3 Nov 2024 14:24:41 +0000 (14:24 +0000)
Lib/test/test_tools/i18n_data/docstrings.pot	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/i18n_data/docstrings.py	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/i18n_data/fileloc.pot	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/i18n_data/fileloc.py	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/i18n_data/messages.pot	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/i18n_data/messages.py	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/test_i18n.py		patch \| blob \| blame \| history
Makefile.pre.in		patch \| blob \| blame \| history