From f9b04a5fb2ee166ca75358ff574f00d1cd62a916 Mon Sep 17 00:00:00 2001 From: Roman Imankulov Date: Tue, 13 Oct 2015 11:18:48 +0000 Subject: [PATCH] Fix UnicodeEncodeError on file encoding detection If the first line of a python file is not a valid latin-1 string, parse_encoding dies with "UnicodeDecodeError". These strings nonetheless can be valid in some scenarios (for example, Mako extractor uses babel.messages.extract.extract_python), and it makes more sense to ignore this exception and return None. --- babel/util.py | 2 +- tests/test_util.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/babel/util.py b/babel/util.py index 1849e8a0..54f7d2db 100644 --- a/babel/util.py +++ b/babel/util.py @@ -65,7 +65,7 @@ def parse_encoding(fp): try: import parser parser.suite(line1.decode('latin-1')) - except (ImportError, SyntaxError): + except (ImportError, SyntaxError, UnicodeEncodeError): # Either it's a real syntax error, in which case the source is # not valid python source, or line2 is a continuation of line1, # in which case we don't want to scan line2 for a magic diff --git a/tests/test_util.py b/tests/test_util.py index bb2bfdb4..d4b4be52 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -14,6 +14,7 @@ import unittest from babel import util +from babel._compat import BytesIO def test_distinct(): @@ -52,3 +53,17 @@ class FixedOffsetTimezoneTestCase(unittest.TestCase): def test_zone_positive_offset(self): self.assertEqual('Etc/GMT+330', util.FixedOffsetTimezone(330).zone) + +parse_encoding = lambda s: util.parse_encoding(BytesIO(s.encode('utf-8'))) + + +def test_parse_encoding_defined(): + assert parse_encoding(u'# coding: utf-8') == 'utf-8' + + +def test_parse_encoding_undefined(): + assert parse_encoding(u'') is None + + +def test_parse_encoding_non_ascii(): + assert parse_encoding(u'K\xf6ln') is None -- 2.47.2