from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
from babel.util import parse_encoding, pathmatch, relpath
+from babel._compat import PY2, text_type
from textwrap import dedent
__all__ = ['extract', 'extract_from_dir', 'extract_from_file']
# An empty string msgid isn't valid, emit a warning
where = '%s:%i' % (hasattr(fileobj, 'name') and \
fileobj.name or '(unknown)', lineno)
- print >> sys.stderr, empty_msgid_warning % where
+ sys.stderr.write((empty_msgid_warning % where) + '\n')
continue
messages = tuple(msgs)
encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
- tokens = generate_tokens(fileobj.readline)
+ if PY2:
+ next_line = fileobj.readline
+ else:
+ next_line = lambda: fileobj.readline().decode(encoding)
+
+ tokens = generate_tokens(next_line)
for tok, value, (lineno, _), _, _ in tokens:
if call_stack == -1 and tok == NAME and value in ('def', 'class'):
in_def = True
continue
elif call_stack == -1 and tok == COMMENT:
# Strip the comment token from the line
- value = value.decode(encoding)[1:].strip()
+ if PY2:
+ value = value.decode(encoding)
+ value = value[1:].strip()
if in_translator_comments and \
translator_comments[-1][0] == lineno - 1:
# We're already inside a translator comment, continue appending
# encoding
# https://sourceforge.net/tracker/?func=detail&atid=355470&
# aid=617979&group_id=5470
- value = eval('# coding=%s\n%s' % (encoding, value),
+ value = eval('# coding=%s\n%s' % (str(encoding), value),
{'__builtins__':{}}, {})
- if isinstance(value, str):
+ if PY2 and not isinstance(value, text_type):
value = value.decode(encoding)
buf.append(value)
elif tok == OP and value == ',':
if not m:
try:
import parser
- parser.suite(line1)
+ parser.suite(line1.decode('latin-1'))
except (ImportError, SyntaxError):
# Either it's a real syntax error, in which case the source is
# not valid python source, or line2 is a continuation of line1,
"byte-order-mark and a magic encoding comment")
return 'utf_8'
elif m:
- return m.group(1)
+ return m.group(1).decode('latin-1')
else:
return None
finally:
self.assertEqual([u'NOTE: hello'], messages[0][3])
def test_utf8_message_with_utf8_bom(self):
- buf = StringIO(codecs.BOM_UTF8 + u"""
+ buf = BytesIO(codecs.BOM_UTF8 + u"""
# NOTE: hello
msg = _('Bonjour à tous')
""".encode('utf-8'))
self.assertEqual([u'NOTE: hello'], messages[0][3])
def test_utf8_raw_strings_match_unicode_strings(self):
- buf = StringIO(codecs.BOM_UTF8 + u"""
+ buf = BytesIO(codecs.BOM_UTF8 + u"""
msg = _('Bonjour à tous')
msgu = _(u'Bonjour à tous')
""".encode('utf-8'))
(10, (u'Page', u'Pages'), [], None)], messages)
def test_invalid_extract_method(self):
- buf = StringIO('')
+ buf = BytesIO(b'')
self.assertRaises(ValueError, list, extract.extract('spam', buf))
def test_different_signatures(self):