]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
allow utf8 BOM + magic comment, closes #189
authorLeonardo Pistone <leonardo.pistone@camptocamp.com>
Fri, 7 Aug 2015 14:43:41 +0000 (16:43 +0200)
committerLeonardo Pistone <leonardo.pistone@camptocamp.com>
Tue, 25 Aug 2015 15:33:05 +0000 (17:33 +0200)
babel/util.py
tests/messages/test_extract.py

index a65fce36e3169d772a29c0951db49ede48564530..dd7378725a8d7471f6edcf465df82031a0282466 100644 (file)
@@ -77,9 +77,11 @@ def parse_encoding(fp):
 
         if has_bom:
             if m:
-                raise SyntaxError(
-                    "python refuses to compile code with both a UTF8 "
-                    "byte-order-mark and a magic encoding comment")
+                magic_comment_encoding = m.group(1).decode('latin-1')
+                if magic_comment_encoding != 'utf-8':
+                    raise SyntaxError(
+                        'encoding problem: {0} with BOM'.format(
+                            magic_comment_encoding))
             return 'utf-8'
         elif m:
             return m.group(1).decode('latin-1')
index 62c72277399efcd75ad967306758723c083bad78..ded697f7fc10007f0b9f0ba7ee158f6e4829ca9f 100644 (file)
@@ -343,6 +343,23 @@ msg = _('Bonjour à tous')
         self.assertEqual(u'Bonjour à tous', messages[0][2])
         self.assertEqual([u'NOTE: hello'], messages[0][3])
 
+    def test_utf8_message_with_utf8_bom_and_magic_comment(self):
+        buf = BytesIO(codecs.BOM_UTF8 + u"""# -*- coding: utf-8 -*-
+# NOTE: hello
+msg = _('Bonjour à tous')
+""".encode('utf-8'))
+        messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {}))
+        self.assertEqual(u'Bonjour à tous', messages[0][2])
+        self.assertEqual([u'NOTE: hello'], messages[0][3])
+
+    def test_utf8_bom_with_latin_magic_comment_fails(self):
+        buf = BytesIO(codecs.BOM_UTF8 + u"""# -*- coding: latin-1 -*-
+# NOTE: hello
+msg = _('Bonjour à tous')
+""".encode('utf-8'))
+        self.assertRaises(SyntaxError, list,
+                          extract.extract_python(buf, ('_',), ['NOTE:'], {}))
+
     def test_utf8_raw_strings_match_unicode_strings(self):
         buf = BytesIO(codecs.BOM_UTF8 + u"""
 msg = _('Bonjour à tous')