Enclose white spaces in references (#1105)

author Daniel Roschka <dunedan@phoenitydawn.de>

Wed, 4 Sep 2024 15:58:12 +0000 (17:58 +0200)

committer GitHub <noreply@github.com>

Wed, 4 Sep 2024 15:58:12 +0000 (18:58 +0300)
author Daniel Roschka <dunedan@phoenitydawn.de>
Wed, 4 Sep 2024 15:58:12 +0000 (17:58 +0200)
committer GitHub <noreply@github.com>
Wed, 4 Sep 2024 15:58:12 +0000 (18:58 +0300)
diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py

index 89a924255b730301b1295b0d7ded803df5449c80..5cd65d86737dda2bee71fb7f071e4dd16b5fc43a 100644 (file)
--- a/babel/messages/pofile.py
+++ b/babel/messages/pofile.py
@@ -80,6 +80,50 @@ def denormalize(string: str) -> str:
          return unescape(string)
  
  
+def _extract_locations(line: str) -> list[str]:
+    """Extract locations from location comments.
+
+    Locations are extracted while properly handling First Strong
+    Isolate (U+2068) and Pop Directional Isolate (U+2069), used by
+    gettext to enclose filenames with spaces and tabs in their names.
+    """
+    if "\u2068" not in line and "\u2069" not in line:
+        return line.lstrip().split()
+
+    locations = []
+    location = ""
+    in_filename = False
+    for c in line:
+        if c == "\u2068":
+            if in_filename:
+                raise ValueError("location comment contains more First Strong Isolate "
+                                 "characters, than Pop Directional Isolate characters")
+            in_filename = True
+            continue
+        elif c == "\u2069":
+            if not in_filename:
+                raise ValueError("location comment contains more Pop Directional Isolate "
+                                 "characters, than First Strong Isolate characters")
+            in_filename = False
+            continue
+        elif c == " ":
+            if in_filename:
+                location += c
+            elif location:
+                locations.append(location)
+                location = ""
+        else:
+            location += c
+    else:
+        if location:
+            if in_filename:
+                raise ValueError("location comment contains more First Strong Isolate "
+                                 "characters, than Pop Directional Isolate characters")
+            locations.append(location)
+
+    return locations
+
+
  class PoFileError(Exception):
      """Exception thrown by PoParser when an invalid po file is encountered."""
  
@@ -269,7 +313,7 @@ class PoFileParser:
          self._finish_current_message()
  
          if line[1:].startswith(':'):
-            for location in line[2:].lstrip().split():
+            for location in _extract_locations(line[2:]):
                  pos = location.rfind(':')
                  if pos >= 0:
                      try:
@@ -307,7 +351,10 @@ class PoFileParser:
                  if line[1:].startswith('~'):
                      self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
                  else:
-                    self._process_comment(line)
+                    try:
+                        self._process_comment(line)
+                    except ValueError as exc:
+                        self._invalid_pofile(line, lineno, str(exc))
              else:
                  self._process_message_line(lineno, line)
  
@@ -474,6 +521,23 @@ def normalize(string: str, prefix: str = '', width: int = 76) -> str:
      return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])
  
  
+def _enclose_filename_if_necessary(filename: str) -> str:
+    """Enclose filenames which include white spaces or tabs.
+
+    Do the same as gettext and enclose filenames which contain white
+    spaces or tabs with First Strong Isolate (U+2068) and Pop
+    Directional Isolate (U+2069).
+    """
+    if " " not in filename and "\t" not in filename:
+        return filename
+
+    if not filename.startswith("\u2068"):
+        filename = "\u2068" + filename
+    if not filename.endswith("\u2069"):
+        filename += "\u2069"
+    return filename
+
+
  def write_po(
      fileobj: SupportsWrite[bytes],
      catalog: Catalog,
@@ -626,6 +690,7 @@ def generate_po(
  
              for filename, lineno in locations:
                  location = filename.replace(os.sep, '/')
+                location = _enclose_filename_if_necessary(location)
                  if lineno and include_lineno:
                      location = f"{location}:{lineno:d}"
                  if location not in locs:
diff --git a/tests/messages/test_pofile.py b/tests/messages/test_pofile.py

index d1a3e2d119b70be07d984f391be533bf1d582bc6..c0ded12967c2d252e62146bcb5e0d7d0ee30f6d7 100644 (file)
--- a/tests/messages/test_pofile.py
+++ b/tests/messages/test_pofile.py
@@ -19,6 +19,7 @@ import pytest
  from babel.core import Locale
  from babel.messages import pofile
  from babel.messages.catalog import Catalog, Message
+from babel.messages.pofile import _enclose_filename_if_necessary, _extract_locations
  from babel.util import FixedOffsetTimezone
  
  
@@ -438,6 +439,19 @@ msgstr[2] "Vohs [text]"
          assert message.string[1] == ''
          assert message.string[2] == 'Vohs [text]'
  
+    def test_with_location(self):
+        buf = StringIO('''\
+#: main.py:1 \u2068filename with whitespace.py\u2069:123
+msgid "foo"
+msgstr "bar"
+''')
+        catalog = pofile.read_po(buf, locale='de_DE')
+        assert len(catalog) == 1
+        message = catalog['foo']
+        assert message.string == 'bar'
+        assert message.locations == [("main.py", 1), ("filename with whitespace.py", 123)]
+
+
      def test_abort_invalid_po_file(self):
          invalid_po = '''
              msgctxt ""
@@ -841,6 +855,59 @@ msgstr ""'''
  msgid "foo"
  msgstr ""'''
  
+    def test_white_space_in_location(self):
+        catalog = Catalog()
+        catalog.add('foo', locations=[('main.py', 1)])
+        catalog.add('foo', locations=[('utils b.py', 3)])
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+    def test_white_space_in_location_already_enclosed(self):
+        catalog = Catalog()
+        catalog.add('foo', locations=[('main.py', 1)])
+        catalog.add('foo', locations=[('\u2068utils b.py\u2069', 3)])
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+    def test_tab_in_location(self):
+        catalog = Catalog()
+        catalog.add('foo', locations=[('main.py', 1)])
+        catalog.add('foo', locations=[('utils\tb.py', 3)])
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils        b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+    def test_tab_in_location_already_enclosed(self):
+        catalog = Catalog()
+        catalog.add('foo', locations=[('main.py', 1)])
+        catalog.add('foo', locations=[('\u2068utils\tb.py\u2069', 3)])
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils        b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+
+class RoundtripPoTestCase(unittest.TestCase):
+
+    def test_enclosed_filenames_in_location_comment(self):
+        catalog = Catalog()
+        catalog.add("foo", lineno=2, locations=[("main 1.py", 1)], string="")
+        catalog.add("bar", lineno=6, locations=[("other.py", 2)], string="")
+        catalog.add("baz", lineno=10, locations=[("main 1.py", 3), ("other.py", 4)], string="")
+        buf = BytesIO()
+        pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+        buf.seek(0)
+        catalog2 = pofile.read_po(buf)
+        assert True is catalog.is_identical(catalog2)
  
  class PofileFunctionsTestCase(unittest.TestCase):
  
@@ -864,6 +931,51 @@ class PofileFunctionsTestCase(unittest.TestCase):
          assert expected_denormalized == pofile.denormalize(f'""\n{msgstr}')
  
  
+@pytest.mark.parametrize(("line", "locations"), [
+    ("\u2068file1.po\u2069", ["file1.po"]),
+    ("file1.po \u2068file 2.po\u2069 file3.po", ["file1.po", "file 2.po", "file3.po"]),
+    ("file1.po:1 \u2068file 2.po\u2069:2 file3.po:3", ["file1.po:1", "file 2.po:2", "file3.po:3"]),
+    ("\u2068file1.po\u2069:1 \u2068file\t2.po\u2069:2 file3.po:3",
+     ["file1.po:1", "file\t2.po:2", "file3.po:3"]),
+    ("file1.po  file2.po", ["file1.po", "file2.po"]),
+    ("file1.po \u2068\u2069 file2.po", ["file1.po", "file2.po"]),
+])
+def test_extract_locations_valid_location_comment(line, locations):
+    assert locations == _extract_locations(line)
+
+
+@pytest.mark.parametrize(("line",), [
+    ("\u2068file 1.po",),
+    ("file 1.po\u2069",),
+    ("\u2069file 1.po\u2068",),
+    ("\u2068file 1.po:1 \u2068file 2.po\u2069:2",),
+    ("\u2068file 1.po\u2069:1 file 2.po\u2069:2",),
+])
+def test_extract_locations_invalid_location_comment(line):
+    with pytest.raises(ValueError):
+        _extract_locations(line)
+
+
+@pytest.mark.parametrize(("filename",), [
+    ("file.po",),
+    ("file_a.po",),
+    ("file-a.po",),
+    ("file\n.po",),
+    ("\u2068file.po\u2069",),
+    ("\u2068file a.po\u2069",),
+])
+def test_enclose_filename_if_necessary_no_change(filename):
+    assert filename == _enclose_filename_if_necessary(filename)
+
+
+@pytest.mark.parametrize(("filename",), [
+    ("file a.po",),
+    ("file\ta.po",),
+])
+def test_enclose_filename_if_necessary_enclosed(filename):
+    assert "\u2068" + filename + "\u2069" == _enclose_filename_if_necessary(filename)
+
+
  def test_unknown_language_roundtrip():
      buf = StringIO(r'''
  msgid ""
author	Daniel Roschka <dunedan@phoenitydawn.de>
	Wed, 4 Sep 2024 15:58:12 +0000 (17:58 +0200)
committer	GitHub <noreply@github.com>
	Wed, 4 Sep 2024 15:58:12 +0000 (18:58 +0300)
babel/messages/pofile.py		patch \| blob \| blame \| history
tests/messages/test_pofile.py		patch \| blob \| blame \| history