fix parser for python 3

author Alex Morega <alex@grep.ro>

Sun, 7 Jul 2013 08:34:47 +0000 (10:34 +0200)

committer Alex Morega <alex@grep.ro>

Sun, 7 Jul 2013 08:34:47 +0000 (10:34 +0200)
author Alex Morega <alex@grep.ro>
Sun, 7 Jul 2013 08:34:47 +0000 (10:34 +0200)
committer Alex Morega <alex@grep.ro>
Sun, 7 Jul 2013 08:34:47 +0000 (10:34 +0200)
diff --git a/babel/messages/extract.py b/babel/messages/extract.py

index 11ae46c361fc12a692334f019e1dffe3c2d8a05f..c9e01dd93d708a0560eae23062955f43b6afaff1 100644 (file)
--- a/babel/messages/extract.py
+++ b/babel/messages/extract.py
@@ -26,6 +26,7 @@ import sys
  from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
  
  from babel.util import parse_encoding, pathmatch, relpath
+from babel._compat import PY2, text_type
  from textwrap import dedent
  
  __all__ = ['extract', 'extract_from_dir', 'extract_from_file']
@@ -315,7 +316,7 @@ def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
              # An empty string msgid isn't valid, emit a warning
              where = '%s:%i' % (hasattr(fileobj, 'name') and \
                                     fileobj.name or '(unknown)', lineno)
-            print >> sys.stderr, empty_msgid_warning % where
+            sys.stderr.write((empty_msgid_warning % where) + '\n')
              continue
  
          messages = tuple(msgs)
@@ -357,7 +358,12 @@ def extract_python(fileobj, keywords, comment_tags, options):
  
      encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
  
-    tokens = generate_tokens(fileobj.readline)
+    if PY2:
+        next_line = fileobj.readline
+    else:
+        next_line = lambda: fileobj.readline().decode(encoding)
+
+    tokens = generate_tokens(next_line)
      for tok, value, (lineno, _), _, _ in tokens:
          if call_stack == -1 and tok == NAME and value in ('def', 'class'):
              in_def = True
@@ -376,7 +382,9 @@ def extract_python(fileobj, keywords, comment_tags, options):
              continue
          elif call_stack == -1 and tok == COMMENT:
              # Strip the comment token from the line
-            value = value.decode(encoding)[1:].strip()
+            if PY2:
+                value = value.decode(encoding)
+            value = value[1:].strip()
              if in_translator_comments and \
                      translator_comments[-1][0] == lineno - 1:
                  # We're already inside a translator comment, continue appending
@@ -420,9 +428,9 @@ def extract_python(fileobj, keywords, comment_tags, options):
                  # encoding
                  # https://sourceforge.net/tracker/?func=detail&atid=355470&
                  # aid=617979&group_id=5470
-                value = eval('# coding=%s\n%s' % (encoding, value),
+                value = eval('# coding=%s\n%s' % (str(encoding), value),
                               {'__builtins__':{}}, {})
-                if isinstance(value, str):
+                if PY2 and not isinstance(value, text_type):
                      value = value.decode(encoding)
                  buf.append(value)
              elif tok == OP and value == ',':
diff --git a/babel/util.py b/babel/util.py

index 50aa67de8e8bfbe72d659abecdcc95b65191e4a8..e5e7e7b6584846c2b56a6ebaa10c612d0665b44a 100644 (file)
--- a/babel/util.py
+++ b/babel/util.py
@@ -74,7 +74,7 @@ def parse_encoding(fp):
          if not m:
              try:
                  import parser
-                parser.suite(line1)
+                parser.suite(line1.decode('latin-1'))
              except (ImportError, SyntaxError):
                  # Either it's a real syntax error, in which case the source is
                  # not valid python source, or line2 is a continuation of line1,
@@ -92,7 +92,7 @@ def parse_encoding(fp):
                      "byte-order-mark and a magic encoding comment")
              return 'utf_8'
          elif m:
-            return m.group(1)
+            return m.group(1).decode('latin-1')
          else:
              return None
      finally:
diff --git a/tests/messages/test_extract.py b/tests/messages/test_extract.py

index 7e587f9d7c91e14bec2e9371a3bb4e9f490508a1..62c72277399efcd75ad967306758723c083bad78 100644 (file)
--- a/tests/messages/test_extract.py
+++ b/tests/messages/test_extract.py
@@ -335,7 +335,7 @@ msg = _('Bonjour à tous')
          self.assertEqual([u'NOTE: hello'], messages[0][3])
  
      def test_utf8_message_with_utf8_bom(self):
-        buf = StringIO(codecs.BOM_UTF8 + u"""
+        buf = BytesIO(codecs.BOM_UTF8 + u"""
  # NOTE: hello
  msg = _('Bonjour à tous')
  """.encode('utf-8'))
@@ -344,7 +344,7 @@ msg = _('Bonjour à tous')
          self.assertEqual([u'NOTE: hello'], messages[0][3])
  
      def test_utf8_raw_strings_match_unicode_strings(self):
-        buf = StringIO(codecs.BOM_UTF8 + u"""
+        buf = BytesIO(codecs.BOM_UTF8 + u"""
  msg = _('Bonjour à tous')
  msgu = _(u'Bonjour à tous')
  """.encode('utf-8'))
@@ -485,7 +485,7 @@ msg10 = dngettext(domain, 'Page', 'Pages', 3)
                            (10, (u'Page', u'Pages'), [], None)], messages)
  
      def test_invalid_extract_method(self):
-        buf = StringIO('')
+        buf = BytesIO(b'')
          self.assertRaises(ValueError, list, extract.extract('spam', buf))
  
      def test_different_signatures(self):
author	Alex Morega <alex@grep.ro>
	Sun, 7 Jul 2013 08:34:47 +0000 (10:34 +0200)
committer	Alex Morega <alex@grep.ro>
	Sun, 7 Jul 2013 08:34:47 +0000 (10:34 +0200)
babel/messages/extract.py		patch \| blob \| blame \| history
babel/util.py		patch \| blob \| blame \| history
tests/messages/test_extract.py		patch \| blob \| blame \| history