]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
pofile parsing. Fix handling of obsolete + refactor
authorMichael Birtwell <michael.birtwell@starleaf.com>
Wed, 12 Oct 2016 15:13:50 +0000 (16:13 +0100)
committerMichael Birtwell <michael.birtwell@starleaf.com>
Mon, 21 Nov 2016 14:32:49 +0000 (14:32 +0000)
Fixes the handling of the unit before the obsolete unit.
Previously it would mark the unit before an obsolete unit as obsolete
also.

Some refactoring:
* the transition between finishing one unit and starting the next
is clearer
* separate the processing of keywords and continuation lines
* combine the reset and initialisation code
* Make the handling of strings consistent.
* Add some nascent error handling, removed some errors in test inputs

babel/messages/pofile.py
tests/messages/test_pofile.py

index 741e25f9f9ceff0be3b4ddbf5cbad429ac833ac9..2d1854b1c0a7c86db445f0200409e36e112abe5e 100644 (file)
@@ -73,6 +73,23 @@ def denormalize(string):
         return unescape(string)
 
 
+class _NormalizedString(object):
+
+    def __init__(self, *args):
+        self._strs = []
+        for arg in args:
+            self.append(arg)
+
+    def append(self, s):
+        self._strs.append(s.strip())
+
+    def denormalize(self):
+        return ''.join(map(unescape, self._strs))
+
+    def __nonzero__(self):
+        return bool(self._strs)
+
+
 class PoFileParser(object):
     """Support class to  read messages from a ``gettext`` PO (portable object) file
     and add them to a `Catalog`
@@ -80,19 +97,29 @@ class PoFileParser(object):
     See `read_po` for simple cases.
     """
 
+    _keywords = [
+        'msgid',
+        'msgstr',
+        'msgctxt',
+        'msgid_plural',
+    ]
+
     def __init__(self, catalog, ignore_obsolete=False):
         self.catalog = catalog
         self.ignore_obsolete = ignore_obsolete
         self.counter = 0
         self.offset = 0
+        self._reset_message_state()
+
+    def _reset_message_state(self):
         self.messages = []
         self.translations = []
         self.locations = []
         self.flags = []
         self.user_comments = []
         self.auto_comments = []
+        self.context = None
         self.obsolete = False
-        self.context = []
         self.in_msgid = False
         self.in_msgstr = False
         self.in_msgctxt = False
@@ -104,9 +131,9 @@ class PoFileParser(object):
         """
         self.translations.sort()
         if len(self.messages) > 1:
-            msgid = tuple([denormalize(m) for m in self.messages])
+            msgid = tuple([m.denormalize() for m in self.messages])
         else:
-            msgid = denormalize(self.messages[0])
+            msgid = self.messages[0].denormalize()
         if isinstance(msgid, (list, tuple)):
             string = []
             for idx in range(self.catalog.num_plurals):
@@ -114,11 +141,11 @@ class PoFileParser(object):
                     string.append(self.translations[idx])
                 except IndexError:
                     string.append((idx, ''))
-            string = tuple([denormalize(t[1]) for t in string])
+            string = tuple([t[1].denormalize() for t in string])
         else:
-            string = denormalize(self.translations[0][1])
+            string = self.translations[0][1].denormalize()
         if self.context:
-            msgctxt = denormalize('\n'.join(self.context))
+            msgctxt = self.context.denormalize()
         else:
             msgctxt = None
         message = Message(msgid, string, list(self.locations), set(self.flags),
@@ -129,55 +156,70 @@ class PoFileParser(object):
                 self.catalog.obsolete[msgid] = message
         else:
             self.catalog[msgid] = message
-        del self.messages[:]
-        del self.translations[:]
-        del self.context[:]
-        del self.locations[:]
-        del self.flags[:]
-        del self.auto_comments[:]
-        del self.user_comments[:]
-        self.obsolete = False
         self.counter += 1
+        self._reset_message_state()
 
-    def _process_message_line(self, lineno, line):
-        if line.startswith('msgid_plural'):
-            self.in_msgid = True
-            msg = line[12:].lstrip()
-            self.messages.append(msg)
-        elif line.startswith('msgid'):
-            self.in_msgid = True
+    def _finish_current_message(self):
+        if self.messages:
+            self._add_message()
+
+    def _process_message_line(self, lineno, line, obsolete=False):
+        if line.startswith('"'):
+            self._process_string_continuation_line(line, lineno)
+        else:
+            self._process_keyword_line(lineno, line, obsolete)
+
+    def _process_keyword_line(self, lineno, line, obsolete=False):
+
+        for keyword in self._keywords:
+            if line.startswith(keyword) and line[len(keyword)] in [' ', '[']:
+                arg = line[len(keyword):]
+                break
+        else:
+            self._invalid_pofile(line, lineno, "Start of line didn't match any expected keyword.")
+            return
+
+        if keyword in ['msgid', 'msgctxt']:
+            self._finish_current_message()
+
+        self.obsolete = obsolete
+
+        # The line that has the msgid is stored as the offset of the msg
+        # should this be the msgctxt if it has one?
+        if keyword == 'msgid':
             self.offset = lineno
-            txt = line[5:].lstrip()
-            if self.messages:
-                self._add_message()
-            self.messages.append(txt)
-        elif line.startswith('msgstr'):
+
+        if keyword in ['msgid', 'msgid_plural']:
+            self.in_msgid = True
+            self.messages.append(_NormalizedString(arg))
+
+        elif keyword == 'msgstr':
             self.in_msgid = False
             self.in_msgstr = True
-            msg = line[6:].lstrip()
-            if msg.startswith('['):
-                idx, msg = msg[1:].split(']', 1)
-                self.translations.append([int(idx), msg.lstrip()])
+            if arg.startswith('['):
+                idx, msg = arg[1:].split(']', 1)
+                self.translations.append([int(idx), _NormalizedString(msg)])
             else:
-                self.translations.append([0, msg])
-        elif line.startswith('msgctxt'):
-            if self.messages:
-                self._add_message()
-            self.in_msgid = self.in_msgstr = False
-            self.context.append(line[7:].lstrip())
-        elif line.startswith('"'):
-            if self.in_msgid:
-                self.messages[-1] += u'\n' + line.rstrip()
-            elif self.in_msgstr:
-                self.translations[-1][1] += u'\n' + line.rstrip()
-            elif self.in_msgctxt:
-                self.context.append(line.rstrip())
+                self.translations.append([0, _NormalizedString(arg)])
+        elif keyword == 'msgctxt':
+            self.context = _NormalizedString(arg)
+
+    def _process_string_continuation_line(self, line, lineno):
+        if self.in_msgid:
+            s = self.messages[-1]
+        elif self.in_msgstr:
+            s = self.translations[-1][1]
+        elif self.in_msgctxt:
+            s = self.context
+        else:
+            self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")
+            return
+        s.append(line)
 
     def _process_comment(self, line):
 
-        self.in_msgid = self.in_msgstr = False
-        if self.messages and self.translations:
-            self._add_message()
+        self._finish_current_message()
+
         if line[1:].startswith(':'):
             for location in line[2:].lstrip().split():
                 pos = location.rfind(':')
@@ -211,25 +253,29 @@ class PoFileParser(object):
             line = line.strip()
             if not isinstance(line, text_type):
                 line = line.decode(self.catalog.charset)
+            if not line:
+                continue
             if line.startswith('#'):
                 if line[1:].startswith('~'):
-                    self.obsolete = True
-                    self._process_message_line(lineno, line[2:].lstrip())
+                    self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
                 else:
                     self._process_comment(line)
             else:
                 self._process_message_line(lineno, line)
 
-        if self.messages:
-            self._add_message()
+        self._finish_current_message()
 
         # No actual messages found, but there was some info in comments, from which
         # we'll construct an empty header message
-        elif not self.counter and (self.flags or self.user_comments or self.auto_comments):
-            self.messages.append(u'')
-            self.translations.append([0, u''])
+        if not self.counter and (self.flags or self.user_comments or self.auto_comments):
+            self.messages.append(_NormalizedString(u'""'))
+            self.translations.append([0, _NormalizedString(u'""')])
             self._add_message()
 
+    def _invalid_pofile(self, line, lineno, msg):
+        print("WARNING:", msg)
+        print("WARNING: Problem on line {0}: {1}".format(lineno + 1, line))
+
 
 def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None):
     """Read messages from a ``gettext`` PO (portable object) file from the given
index d2a10dd05605f01e258f126cb8da4ecc72e7079b..cdb90f5e9fabb9cd728c64f8c523b90801cb1de8 100644 (file)
@@ -79,7 +79,7 @@ msgstr ""''')
                          message.id)
 
     def test_fuzzy_header(self):
-        buf = StringIO(r'''\
+        buf = StringIO(r'''
 # Translations template for AReallyReallyLongNameForAProject.
 # Copyright (C) 2007 ORGANIZATION
 # This file is distributed under the same license as the
@@ -93,7 +93,7 @@ msgstr ""''')
         self.assertEqual(True, list(catalog)[0].fuzzy)
 
     def test_not_fuzzy_header(self):
-        buf = StringIO(r'''\
+        buf = StringIO(r'''
 # Translations template for AReallyReallyLongNameForAProject.
 # Copyright (C) 2007 ORGANIZATION
 # This file is distributed under the same license as the
@@ -106,7 +106,7 @@ msgstr ""''')
         self.assertEqual(False, list(catalog)[0].fuzzy)
 
     def test_header_entry(self):
-        buf = StringIO(r'''\
+        buf = StringIO(r'''
 # SOME DESCRIPTIVE TITLE.
 # Copyright (C) 2007 THE PACKAGE'S COPYRIGHT HOLDER
 # This file is distributed under the same license as the PACKAGE package.
@@ -208,6 +208,28 @@ msgstr "Bahr"
 #: main.py:1
 msgid "bar"
 msgstr "Bahr"
+''')
+        catalog = pofile.read_po(buf)
+        self.assertEqual(1, len(catalog))
+        message = catalog[u'bar']
+        self.assertEqual(u'bar', message.id)
+        self.assertEqual(u'Bahr', message.string)
+        self.assertEqual(['This message is not obsolete'], message.user_comments)
+
+    def test_unit_before_obsolete_is_not_obsoleted(self):
+        buf = StringIO(r'''
+# This message is not obsolete
+#: main.py:1
+msgid "bar"
+msgstr "Bahr"
+
+# This is an obsolete message
+#~ msgid ""
+#~ "foo"
+#~ "fooooooo"
+#~ msgstr ""
+#~ "Voh"
+#~ "Vooooh"
 ''')
         catalog = pofile.read_po(buf)
         self.assertEqual(1, len(catalog))
@@ -242,6 +264,29 @@ msgstr "Bahr"
         assert out_buf.getvalue().strip() == buf.getvalue().strip(), \
             out_buf.getvalue()
 
+    def test_obsolete_message_with_context(self):
+        buf = StringIO('''
+# This message is not obsolete
+msgid "baz"
+msgstr "Bazczch"
+
+# This is an obsolete message
+#~ msgctxt "other"
+#~ msgid "foo"
+#~ msgstr "Voh"
+
+# This message is not obsolete
+#: main.py:1
+msgid "bar"
+msgstr "Bahr"
+''')
+        catalog = pofile.read_po(buf)
+        self.assertEqual(2, len(catalog))
+        self.assertEqual(1, len(catalog.obsolete))
+        message = catalog.obsolete[u"foo"]
+        self.assertEqual(message.context, "other")
+        self.assertEqual(message.string, "Voh")
+
     def test_with_context_two(self):
         buf = BytesIO(b'''msgctxt "Menu"
 msgid "foo"
@@ -308,6 +353,22 @@ msgstr[1] "Vohs [text]"''')
         message = catalog['foo']
         self.assertEqual(2, len(message.string))
 
+    def test_obsolete_plural_with_square_brackets(self):
+        buf = StringIO('''\
+#~ msgid "foo"
+#~ msgid_plural "foos"
+#~ msgstr[0] "Voh [text]"
+#~ msgstr[1] "Vohs [text]"
+''')
+        catalog = pofile.read_po(buf, locale='nb_NO')
+        self.assertEqual(0, len(catalog))
+        self.assertEqual(1, len(catalog.obsolete))
+        self.assertEqual(2, catalog.num_plurals)
+        message = catalog.obsolete[('foo', 'foos')]
+        self.assertEqual(2, len(message.string))
+        self.assertEqual("Voh [text]", message.string[0])
+        self.assertEqual("Vohs [text]", message.string[1])
+
 
 class WritePoTestCase(unittest.TestCase):