From f9b04a5fb2ee166ca75358ff574f00d1cd62a916 Mon Sep 17 00:00:00 2001
From: Roman Imankulov <roman.imankulov@gmail.com>
Date: Tue, 13 Oct 2015 11:18:48 +0000
Subject: [PATCH] Fix UnicodeEncodeError on file encoding detection

If the first line of a python file is not a valid latin-1 string,
parse_encoding dies with "UnicodeDecodeError". These strings nonetheless can be
valid in some scenarios (for example, Mako extractor uses
babel.messages.extract.extract_python), and it makes more sense to ignore this
exception and return None.
---
 babel/util.py      |  2 +-
 tests/test_util.py | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/babel/util.py b/babel/util.py
index 1849e8a0..54f7d2db 100644
--- a/babel/util.py
+++ b/babel/util.py
@@ -65,7 +65,7 @@ def parse_encoding(fp):
             try:
                 import parser
                 parser.suite(line1.decode('latin-1'))
-            except (ImportError, SyntaxError):
+            except (ImportError, SyntaxError, UnicodeEncodeError):
                 # Either it's a real syntax error, in which case the source is
                 # not valid python source, or line2 is a continuation of line1,
                 # in which case we don't want to scan line2 for a magic
diff --git a/tests/test_util.py b/tests/test_util.py
index bb2bfdb4..d4b4be52 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -14,6 +14,7 @@
 import unittest
 
 from babel import util
+from babel._compat import BytesIO
 
 
 def test_distinct():
@@ -52,3 +53,17 @@ class FixedOffsetTimezoneTestCase(unittest.TestCase):
     def test_zone_positive_offset(self):
         self.assertEqual('Etc/GMT+330', util.FixedOffsetTimezone(330).zone)
 
+
+parse_encoding = lambda s: util.parse_encoding(BytesIO(s.encode('utf-8')))
+
+
+def test_parse_encoding_defined():
+    assert parse_encoding(u'# coding: utf-8') == 'utf-8'
+
+
+def test_parse_encoding_undefined():
+    assert parse_encoding(u'') is None
+
+
+def test_parse_encoding_non_ascii():
+    assert parse_encoding(u'K\xf6ln') is None
-- 
2.47.2