[3.12] gh-130197: pygettext: Test the --escape option (GH-131902) (GH-132033)

author Tomas R. <tomas.roun8@gmail.com>

Wed, 2 Apr 2025 20:29:02 +0000 (22:29 +0200)

committer GitHub <noreply@github.com>

Wed, 2 Apr 2025 20:29:02 +0000 (23:29 +0300)
author Tomas R. <tomas.roun8@gmail.com>
Wed, 2 Apr 2025 20:29:02 +0000 (22:29 +0200)
committer GitHub <noreply@github.com>
Wed, 2 Apr 2025 20:29:02 +0000 (23:29 +0300)
diff --git a/Lib/test/test_tools/i18n_data/ascii-escapes.pot b/Lib/test/test_tools/i18n_data/ascii-escapes.pot

new file mode 100644 (file)

index 0000000..18d868b
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/ascii-escapes.pot
@@ -0,0 +1,45 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#: escapes.py:5
+msgid ""
+"\"\t\n"
+"\r\\"
+msgstr ""
+
+#: escapes.py:8
+msgid ""
+"\000\001\002\003\004\005\006\007\010\t\n"
+"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
+msgstr ""
+
+#: escapes.py:13
+msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
+msgstr ""
+
+#: escapes.py:17
+msgid "\177"
+msgstr ""
+
+#: escapes.py:20
+msgid "\80   ÿ"
+msgstr ""
+
+#: escapes.py:23
+msgid "α ㄱ 𓂀"
+msgstr ""
+
diff --git a/Lib/test/test_tools/i18n_data/escapes.pot b/Lib/test/test_tools/i18n_data/escapes.pot

new file mode 100644 (file)

index 0000000..2c7899d
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/escapes.pot
@@ -0,0 +1,45 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#: escapes.py:5
+msgid ""
+"\"\t\n"
+"\r\\"
+msgstr ""
+
+#: escapes.py:8
+msgid ""
+"\000\001\002\003\004\005\006\007\010\t\n"
+"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
+msgstr ""
+
+#: escapes.py:13
+msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
+msgstr ""
+
+#: escapes.py:17
+msgid "\177"
+msgstr ""
+
+#: escapes.py:20
+msgid "\302\200 \302\240 \303\277"
+msgstr ""
+
+#: escapes.py:23
+msgid "\316\261 \343\204\261 \360\223\202\200"
+msgstr ""
+
diff --git a/Lib/test/test_tools/i18n_data/escapes.py b/Lib/test/test_tools/i18n_data/escapes.py

new file mode 100644 (file)

index 0000000..900bd97
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/escapes.py
@@ -0,0 +1,23 @@
+import gettext as _
+
+
+# Special characters that are always escaped in the POT file
+_('"\t\n\r\\')
+
+# All ascii characters 0-31
+_('\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n'
+  '\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15'
+  '\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f')
+
+# All ascii characters 32-126
+_(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+  '[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
+
+# ascii char 127
+_('\x7f')
+
+# some characters in the 128-255 range
+_('\x80 \xa0 ÿ')
+
+# some characters >= 256 encoded as 2, 3 and 4 bytes, respectively
+_('α ㄱ 𓂀')
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py

index 6f71f0976819f15391933f5539f2c1026e5086e1..ffa1b1178eddbc9d757e1539d6d88d636cfc5a50 100644 (file)
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -369,15 +369,8 @@ class Test_pygettext(unittest.TestCase):
  
      def test_pygettext_output(self):
          """Test that the pygettext output exactly matches snapshots."""
-        for input_file in DATA_DIR.glob('*.py'):
-            output_file = input_file.with_suffix('.pot')
-            with self.subTest(input_file=f'i18n_data/{input_file}'):
-                contents = input_file.read_text(encoding='utf-8')
-                with temp_cwd(None):
-                    Path(input_file.name).write_text(contents)
-                    assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name)
-                    output = Path('messages.pot').read_text(encoding='utf-8')
-
+        for input_file, output_file, output in extract_from_snapshots():
+            with self.subTest(input_file=input_file):
                  expected = output_file.read_text(encoding='utf-8')
                  self.assert_POT_equal(expected, output)
  
@@ -408,15 +401,37 @@ class Test_pygettext(unittest.TestCase):
              self.assertNotIn(text3, data)
  
  
-def update_POT_snapshots():
-    for input_file in DATA_DIR.glob('*.py'):
-        output_file = input_file.with_suffix('.pot')
+def extract_from_snapshots():
+    snapshots = {
+        'messages.py': ('--docstrings',),
+        'fileloc.py': ('--docstrings',),
+        'docstrings.py': ('--docstrings',),
+        # == Test character escaping
+        # Escape ascii and unicode:
+        'escapes.py': ('--escape',),
+        # Escape only ascii and let unicode pass through:
+        ('escapes.py', 'ascii-escapes.pot'): (),
+    }
+
+    for filename, args in snapshots.items():
+        if isinstance(filename, tuple):
+            filename, output_file = filename
+            output_file = DATA_DIR / output_file
+            input_file = DATA_DIR / filename
+        else:
+            input_file = DATA_DIR / filename
+            output_file = input_file.with_suffix('.pot')
          contents = input_file.read_bytes()
          with temp_cwd(None):
              Path(input_file.name).write_bytes(contents)
-            assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name)
-            output = Path('messages.pot').read_text(encoding='utf-8')
+            assert_python_ok('-Xutf8', Test_pygettext.script, *args,
+                             input_file.name)
+            yield (input_file, output_file,
+                   Path('messages.pot').read_text(encoding='utf-8'))
+
  
+def update_POT_snapshots():
+    for _, output_file, output in extract_from_snapshots():
          output = normalize_POT_file(output)
          output_file.write_text(output, encoding='utf-8')
  
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py

index 0d16e8f7da00711f6acff76b6348bcd824d0077c..8f59982640cbbabd74bd561d79f773a93420ffd2 100755 (executable)
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -207,7 +207,7 @@ def make_escapes(pass_nonascii):
      global escapes, escape
      if pass_nonascii:
          # Allow non-ascii characters to pass through so that e.g. 'msgid
-        # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
+        # "Höhe"' would not result in 'msgid "H\366he"'.  Otherwise we
          # escape any character outside the 32..126 range.
          mod = 128
          escape = escape_ascii
author	Tomas R. <tomas.roun8@gmail.com>
	Wed, 2 Apr 2025 20:29:02 +0000 (22:29 +0200)
committer	GitHub <noreply@github.com>
	Wed, 2 Apr 2025 20:29:02 +0000 (23:29 +0300)
Lib/test/test_tools/i18n_data/ascii-escapes.pot	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/i18n_data/escapes.pot	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/i18n_data/escapes.py	[new file with mode: 0644]	patch \| blob
Lib/test/test_tools/test_i18n.py		patch \| blob \| blame \| history
Tools/i18n/pygettext.py		patch \| blob \| blame \| history