From: Roman Imankulov Date: Tue, 13 Oct 2015 11:18:48 +0000 (+0000) Subject: Fix UnicodeEncodeError on file encoding detection X-Git-Tag: 2.3.1~33^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f9b04a5fb2ee166ca75358ff574f00d1cd62a916;p=thirdparty%2Fbabel.git Fix UnicodeEncodeError on file encoding detection If the first line of a python file is not a valid latin-1 string, parse_encoding dies with "UnicodeDecodeError". These strings nonetheless can be valid in some scenarios (for example, Mako extractor uses babel.messages.extract.extract_python), and it makes more sense to ignore this exception and return None. --- diff --git a/babel/util.py b/babel/util.py index 1849e8a0..54f7d2db 100644 --- a/babel/util.py +++ b/babel/util.py @@ -65,7 +65,7 @@ def parse_encoding(fp): try: import parser parser.suite(line1.decode('latin-1')) - except (ImportError, SyntaxError): + except (ImportError, SyntaxError, UnicodeEncodeError): # Either it's a real syntax error, in which case the source is # not valid python source, or line2 is a continuation of line1, # in which case we don't want to scan line2 for a magic diff --git a/tests/test_util.py b/tests/test_util.py index bb2bfdb4..d4b4be52 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -14,6 +14,7 @@ import unittest from babel import util +from babel._compat import BytesIO def test_distinct(): @@ -52,3 +53,17 @@ class FixedOffsetTimezoneTestCase(unittest.TestCase): def test_zone_positive_offset(self): self.assertEqual('Etc/GMT+330', util.FixedOffsetTimezone(330).zone) + +parse_encoding = lambda s: util.parse_encoding(BytesIO(s.encode('utf-8'))) + + +def test_parse_encoding_defined(): + assert parse_encoding(u'# coding: utf-8') == 'utf-8' + + +def test_parse_encoding_undefined(): + assert parse_encoding(u'') is None + + +def test_parse_encoding_non_ascii(): + assert parse_encoding(u'K\xf6ln') is None