return unescape(string)
+def _extract_locations(line: str) -> list[str]:
+ """Extract locations from location comments.
+
+ Locations are extracted while properly handling First Strong
+ Isolate (U+2068) and Pop Directional Isolate (U+2069), used by
+ gettext to enclose filenames with spaces and tabs in their names.
+ """
+ if "\u2068" not in line and "\u2069" not in line:
+ return line.lstrip().split()
+
+ locations = []
+ location = ""
+ in_filename = False
+ for c in line:
+ if c == "\u2068":
+ if in_filename:
+ raise ValueError("location comment contains more First Strong Isolate "
+ "characters, than Pop Directional Isolate characters")
+ in_filename = True
+ continue
+ elif c == "\u2069":
+ if not in_filename:
+ raise ValueError("location comment contains more Pop Directional Isolate "
+ "characters, than First Strong Isolate characters")
+ in_filename = False
+ continue
+ elif c == " ":
+ if in_filename:
+ location += c
+ elif location:
+ locations.append(location)
+ location = ""
+ else:
+ location += c
+ else:
+ if location:
+ if in_filename:
+ raise ValueError("location comment contains more First Strong Isolate "
+ "characters, than Pop Directional Isolate characters")
+ locations.append(location)
+
+ return locations
+
+
class PoFileError(Exception):
"""Exception thrown by PoParser when an invalid po file is encountered."""
self._finish_current_message()
if line[1:].startswith(':'):
- for location in line[2:].lstrip().split():
+ for location in _extract_locations(line[2:]):
pos = location.rfind(':')
if pos >= 0:
try:
if line[1:].startswith('~'):
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
else:
- self._process_comment(line)
+ try:
+ self._process_comment(line)
+ except ValueError as exc:
+ self._invalid_pofile(line, lineno, str(exc))
else:
self._process_message_line(lineno, line)
return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])
+def _enclose_filename_if_necessary(filename: str) -> str:
+ """Enclose filenames which include white spaces or tabs.
+
+ Do the same as gettext and enclose filenames which contain white
+ spaces or tabs with First Strong Isolate (U+2068) and Pop
+ Directional Isolate (U+2069).
+ """
+ if " " not in filename and "\t" not in filename:
+ return filename
+
+ if not filename.startswith("\u2068"):
+ filename = "\u2068" + filename
+ if not filename.endswith("\u2069"):
+ filename += "\u2069"
+ return filename
+
+
def write_po(
fileobj: SupportsWrite[bytes],
catalog: Catalog,
for filename, lineno in locations:
location = filename.replace(os.sep, '/')
+ location = _enclose_filename_if_necessary(location)
if lineno and include_lineno:
location = f"{location}:{lineno:d}"
if location not in locs:
from babel.core import Locale
from babel.messages import pofile
from babel.messages.catalog import Catalog, Message
+from babel.messages.pofile import _enclose_filename_if_necessary, _extract_locations
from babel.util import FixedOffsetTimezone
assert message.string[1] == ''
assert message.string[2] == 'Vohs [text]'
+ def test_with_location(self):
+ buf = StringIO('''\
+#: main.py:1 \u2068filename with whitespace.py\u2069:123
+msgid "foo"
+msgstr "bar"
+''')
+ catalog = pofile.read_po(buf, locale='de_DE')
+ assert len(catalog) == 1
+ message = catalog['foo']
+ assert message.string == 'bar'
+ assert message.locations == [("main.py", 1), ("filename with whitespace.py", 123)]
+
+
def test_abort_invalid_po_file(self):
invalid_po = '''
msgctxt ""
msgid "foo"
msgstr ""'''
+ def test_white_space_in_location(self):
+ catalog = Catalog()
+ catalog.add('foo', locations=[('main.py', 1)])
+ catalog.add('foo', locations=[('utils b.py', 3)])
+ buf = BytesIO()
+ pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+ assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+ def test_white_space_in_location_already_enclosed(self):
+ catalog = Catalog()
+ catalog.add('foo', locations=[('main.py', 1)])
+ catalog.add('foo', locations=[('\u2068utils b.py\u2069', 3)])
+ buf = BytesIO()
+ pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+ assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+ def test_tab_in_location(self):
+ catalog = Catalog()
+ catalog.add('foo', locations=[('main.py', 1)])
+ catalog.add('foo', locations=[('utils\tb.py', 3)])
+ buf = BytesIO()
+ pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+ assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+ def test_tab_in_location_already_enclosed(self):
+ catalog = Catalog()
+ catalog.add('foo', locations=[('main.py', 1)])
+ catalog.add('foo', locations=[('\u2068utils\tb.py\u2069', 3)])
+ buf = BytesIO()
+ pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+ assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
+msgid "foo"
+msgstr ""'''
+
+
+class RoundtripPoTestCase(unittest.TestCase):
+
+ def test_enclosed_filenames_in_location_comment(self):
+ catalog = Catalog()
+ catalog.add("foo", lineno=2, locations=[("main 1.py", 1)], string="")
+ catalog.add("bar", lineno=6, locations=[("other.py", 2)], string="")
+ catalog.add("baz", lineno=10, locations=[("main 1.py", 3), ("other.py", 4)], string="")
+ buf = BytesIO()
+ pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
+ buf.seek(0)
+ catalog2 = pofile.read_po(buf)
+ assert True is catalog.is_identical(catalog2)
class PofileFunctionsTestCase(unittest.TestCase):
assert expected_denormalized == pofile.denormalize(f'""\n{msgstr}')
+@pytest.mark.parametrize(("line", "locations"), [
+ ("\u2068file1.po\u2069", ["file1.po"]),
+ ("file1.po \u2068file 2.po\u2069 file3.po", ["file1.po", "file 2.po", "file3.po"]),
+ ("file1.po:1 \u2068file 2.po\u2069:2 file3.po:3", ["file1.po:1", "file 2.po:2", "file3.po:3"]),
+ ("\u2068file1.po\u2069:1 \u2068file\t2.po\u2069:2 file3.po:3",
+ ["file1.po:1", "file\t2.po:2", "file3.po:3"]),
+ ("file1.po file2.po", ["file1.po", "file2.po"]),
+ ("file1.po \u2068\u2069 file2.po", ["file1.po", "file2.po"]),
+])
+def test_extract_locations_valid_location_comment(line, locations):
+ assert locations == _extract_locations(line)
+
+
+@pytest.mark.parametrize(("line",), [
+ ("\u2068file 1.po",),
+ ("file 1.po\u2069",),
+ ("\u2069file 1.po\u2068",),
+ ("\u2068file 1.po:1 \u2068file 2.po\u2069:2",),
+ ("\u2068file 1.po\u2069:1 file 2.po\u2069:2",),
+])
+def test_extract_locations_invalid_location_comment(line):
+ with pytest.raises(ValueError):
+ _extract_locations(line)
+
+
+@pytest.mark.parametrize(("filename",), [
+ ("file.po",),
+ ("file_a.po",),
+ ("file-a.po",),
+ ("file\n.po",),
+ ("\u2068file.po\u2069",),
+ ("\u2068file a.po\u2069",),
+])
+def test_enclose_filename_if_necessary_no_change(filename):
+ assert filename == _enclose_filename_if_necessary(filename)
+
+
+@pytest.mark.parametrize(("filename",), [
+ ("file a.po",),
+ ("file\ta.po",),
+])
+def test_enclose_filename_if_necessary_enclosed(filename):
+ assert "\u2068" + filename + "\u2069" == _enclose_filename_if_necessary(filename)
+
+
def test_unknown_language_roundtrip():
buf = StringIO(r'''
msgid ""