]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
Enclose white spaces in references (#1105) 1062/head
authorDaniel Roschka <dunedan@phoenitydawn.de>
Wed, 4 Sep 2024 15:58:12 +0000 (17:58 +0200)
committerGitHub <noreply@github.com>
Wed, 4 Sep 2024 15:58:12 +0000 (18:58 +0300)
Since version 0.22 gettext encloses file names in references which
contain white spaces or tabs within First Strong Isolate (U+2068) and
Pop Directional Isolate (U+2069). This commit adds the same behavior for
Babel.

babel/messages/pofile.py
tests/messages/test_pofile.py

index 89a924255b730301b1295b0d7ded803df5449c80..5cd65d86737dda2bee71fb7f071e4dd16b5fc43a 100644 (file)
@@ -80,6 +80,50 @@ def denormalize(string: str) -> str:
         return unescape(string)
 
 
+def _extract_locations(line: str) -> list[str]:
+    """Extract locations from location comments.
+
+    Locations are extracted while properly handling First Strong
+    Isolate (U+2068) and Pop Directional Isolate (U+2069), used by
+    gettext to enclose filenames with spaces and tabs in their names.
+    """
+    if "\u2068" not in line and "\u2069" not in line:
+        return line.lstrip().split()
+
+    locations = []
+    location = ""
+    in_filename = False
+    for c in line:
+        if c == "\u2068":
+            if in_filename:
+                raise ValueError("location comment contains more First Strong Isolate "
+                                 "characters, than Pop Directional Isolate characters")
+            in_filename = True
+            continue
+        elif c == "\u2069":
+            if not in_filename:
+                raise ValueError("location comment contains more Pop Directional Isolate "
+                                 "characters, than First Strong Isolate characters")
+            in_filename = False
+            continue
+        elif c == " ":
+            if in_filename:
+                location += c
+            elif location:
+                locations.append(location)
+                location = ""
+        else:
+            location += c
+    else:
+        if location:
+            if in_filename:
+                raise ValueError("location comment contains more First Strong Isolate "
+                                 "characters, than Pop Directional Isolate characters")
+            locations.append(location)
+
+    return locations
+
+
 class PoFileError(Exception):
     """Exception thrown by PoParser when an invalid po file is encountered."""
 
@@ -269,7 +313,7 @@ class PoFileParser:
         self._finish_current_message()
 
         if line[1:].startswith(':'):
-            for location in line[2:].lstrip().split():
+            for location in _extract_locations(line[2:]):
                 pos = location.rfind(':')
                 if pos >= 0:
                     try:
@@ -307,7 +351,10 @@ class PoFileParser:
                 if line[1:].startswith('~'):
                     self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
                 else:
-                    self._process_comment(line)
+                    try:
+                        self._process_comment(line)
+                    except ValueError as exc:
+                        self._invalid_pofile(line, lineno, str(exc))
             else:
                 self._process_message_line(lineno, line)
 
@@ -474,6 +521,23 @@ def normalize(string: str, prefix: str = '', width: int = 76) -> str:
     return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])
 
 
+def _enclose_filename_if_necessary(filename: str) -> str:
+    """Enclose filenames which include white spaces or tabs.
+
+    Do the same as gettext and enclose filenames which contain white
+    spaces or tabs with First Strong Isolate (U+2068) and Pop
+    Directional Isolate (U+2069).
+    """
+    if " " not in filename and "\t" not in filename:
+        return filename
+
+    if not filename.startswith("\u2068"):
+        filename = "\u2068" + filename
+    if not filename.endswith("\u2069"):
+        filename += "\u2069"
+    return filename
+
+
 def write_po(
     fileobj: SupportsWrite[bytes],
     catalog: Catalog,
@@ -626,6 +690,7 @@ def generate_po(
 
             for filename, lineno in locations:
                 location = filename.replace(os.sep, '/')
+                location = _enclose_filename_if_necessary(location)
                 if lineno and include_lineno:
                     location = f"{location}:{lineno:d}"
                 if location not in locs:
index d1a3e2d119b70be07d984f391be533bf1d582bc6..c0ded12967c2d252e62146bcb5e0d7d0ee30f6d7 100644 (file)
@@ -19,6 +19,7 @@ import pytest
 from babel.core import Locale
 from babel.messages import pofile
 from babel.messages.catalog import Catalog, Message
+from babel.messages.pofile import _enclose_filename_if_necessary, _extract_locations
 from babel.util import FixedOffsetTimezone
 
 
@@ -438,6 +439,19 @@ msgstr[2] "Vohs [text]"
         assert message.string[1] == ''
         assert message.string[2] == 'Vohs [text]'
 
+    def test_with_location(self):
+        buf = StringIO('''\
+#: main.py:1 \u2068filename with whitespace.py\u2069:123
+msgid "foo"
+msgstr "bar"
+''')
+        catalog = pofile.read_po(buf, locale='de_DE')
+        assert len(catalog) == 1
+        message = catalog['foo']
+        assert message.string == 'bar'
+        assert message.locations == [("main.py", 1), ("filename with whitespace.py", 123)]
+
+
     def test_abort_invalid_po_file(self):
         invalid_po = '''
             msgctxt ""
@@ -841,6 +855,59 @@ msgstr ""'''
 msgid "foo"
 msgstr ""'''
 
+    def test_white_space_in_location(self):
+        catalog = Catalog()
+        catalog.add('foo', locations=[('main.py', 1)])
+        catalog.add('foo', locations=[('utils b.py', 3)])
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+    def test_white_space_in_location_already_enclosed(self):
+        catalog = Catalog()
+        catalog.add('foo', locations=[('main.py', 1)])
+        catalog.add('foo', locations=[('\u2068utils b.py\u2069', 3)])
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+    def test_tab_in_location(self):
+        catalog = Catalog()
+        catalog.add('foo', locations=[('main.py', 1)])
+        catalog.add('foo', locations=[('utils\tb.py', 3)])
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils        b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+    def test_tab_in_location_already_enclosed(self):
+        catalog = Catalog()
+        catalog.add('foo', locations=[('main.py', 1)])
+        catalog.add('foo', locations=[('\u2068utils\tb.py\u2069', 3)])
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils        b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+
+class RoundtripPoTestCase(unittest.TestCase):
+
+    def test_enclosed_filenames_in_location_comment(self):
+        catalog = Catalog()
+        catalog.add("foo", lineno=2, locations=[("main 1.py", 1)], string="")
+        catalog.add("bar", lineno=6, locations=[("other.py", 2)], string="")
+        catalog.add("baz", lineno=10, locations=[("main 1.py", 3), ("other.py", 4)], string="")
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        buf.seek(0)
+        catalog2 = pofile.read_po(buf)
+        assert True is catalog.is_identical(catalog2)
 
 class PofileFunctionsTestCase(unittest.TestCase):
 
@@ -864,6 +931,51 @@ class PofileFunctionsTestCase(unittest.TestCase):
         assert expected_denormalized == pofile.denormalize(f'""\n{msgstr}')
 
 
+@pytest.mark.parametrize(("line", "locations"), [
+    ("\u2068file1.po\u2069", ["file1.po"]),
+    ("file1.po \u2068file 2.po\u2069 file3.po", ["file1.po", "file 2.po", "file3.po"]),
+    ("file1.po:1 \u2068file 2.po\u2069:2 file3.po:3", ["file1.po:1", "file 2.po:2", "file3.po:3"]),
+    ("\u2068file1.po\u2069:1 \u2068file\t2.po\u2069:2 file3.po:3",
+     ["file1.po:1", "file\t2.po:2", "file3.po:3"]),
+    ("file1.po  file2.po", ["file1.po", "file2.po"]),
+    ("file1.po \u2068\u2069 file2.po", ["file1.po", "file2.po"]),
+])
+def test_extract_locations_valid_location_comment(line, locations):
+    assert locations == _extract_locations(line)
+
+
+@pytest.mark.parametrize(("line",), [
+    ("\u2068file 1.po",),
+    ("file 1.po\u2069",),
+    ("\u2069file 1.po\u2068",),
+    ("\u2068file 1.po:1 \u2068file 2.po\u2069:2",),
+    ("\u2068file 1.po\u2069:1 file 2.po\u2069:2",),
+])
+def test_extract_locations_invalid_location_comment(line):
+    with pytest.raises(ValueError):
+        _extract_locations(line)
+
+
+@pytest.mark.parametrize(("filename",), [
+    ("file.po",),
+    ("file_a.po",),
+    ("file-a.po",),
+    ("file\n.po",),
+    ("\u2068file.po\u2069",),
+    ("\u2068file a.po\u2069",),
+])
+def test_enclose_filename_if_necessary_no_change(filename):
+    assert filename == _enclose_filename_if_necessary(filename)
+
+
+@pytest.mark.parametrize(("filename",), [
+    ("file a.po",),
+    ("file\ta.po",),
+])
+def test_enclose_filename_if_necessary_enclosed(filename):
+    assert "\u2068" + filename + "\u2069" == _enclose_filename_if_necessary(filename)
+
+
 def test_unknown_language_roundtrip():
     buf = StringIO(r'''
 msgid ""