Fix 're' to work on bytes. It could do with a few more tests, though.

author Thomas Wouters <thomas@python.org>

Tue, 18 Mar 2008 20:19:54 +0000 (20:19 +0000)

committer Thomas Wouters <thomas@python.org>

Tue, 18 Mar 2008 20:19:54 +0000 (20:19 +0000)
author Thomas Wouters <thomas@python.org>
Tue, 18 Mar 2008 20:19:54 +0000 (20:19 +0000)
committer Thomas Wouters <thomas@python.org>
Tue, 18 Mar 2008 20:19:54 +0000 (20:19 +0000)
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py

index f3b415d43f222cfe69bc0a3a729129b23f80c798..4f624161744c8f8c54ec1ce593a67f07b13259db 100644 (file)
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -472,7 +472,7 @@ def _compile_info(code, pattern, flags):
      code[skip] = len(code) - skip
  
  def isstring(obj):
-    return isinstance(obj, str)
+    return isinstance(obj, (str, bytes))
  
  def _code(p, flags):
  
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py

index a04c3432a6d528397b2c70854783e0e5bdf6fefc..6e7002457b611e27c461c0d5c10d6a51230316b7 100644 (file)
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -192,8 +192,8 @@ class Tokenizer:
          char = self.string[self.index:self.index+1]
          # Special case for the str8, since indexing returns a integer
          # XXX This is only needed for test_bug_926075 in test_re.py
-        if isinstance(self.string, bytes):
-            char = chr(char)
+        if char and isinstance(char, bytes):
+            char = chr(char[0])
          if char == "\\":
              try:
                  c = self.string[self.index + 1]
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py

index 28e508c7d889adb1a56fd90f55f937db5b1f3c27..7aa69969c189098283fedf5d5b4f83376721dffe 100644 (file)
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -83,33 +83,22 @@ class ReTests(unittest.TestCase):
          self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
                           'abc\ndef\n')
  
-# This test makes no sense until re supports bytes, and should then probably
-# test for the *in*ability to mix bytes and str this way :)
-#
-#    def test_bug_1140(self):
-#        # re.sub(x, y, b'') should return b'', not '', and
-#        # re.sub(x, y, '') should return '', not b''.
-#        # Also:
-#        # re.sub(x, y, str(x)) should return str(y), and
-#        # re.sub(x, y, bytes(x)) should return
-#        #     str(y) if isinstance(y, str) else unicode(y).
-#        for x in 'x', u'x':
-#            for y in 'y', u'y':
-#                z = re.sub(x, y, u'')
-#                self.assertEqual(z, u'')
-#                self.assertEqual(type(z), unicode)
-#                #
-#                z = re.sub(x, y, '')
-#                self.assertEqual(z, '')
-#                self.assertEqual(type(z), str)
-#                #
-#                z = re.sub(x, y, unicode(x))
-#                self.assertEqual(z, y)
-#                self.assertEqual(type(z), unicode)
-#                #
-#                z = re.sub(x, y, str(x))
-#                self.assertEqual(z, y)
-#                self.assertEqual(type(z), type(y))
+    def test_bug_1140(self):
+        # re.sub(x, y, b'') should return b'', not '', and
+        # re.sub(x, y, '') should return '', not b''.
+        # Also:
+        # re.sub(x, y, str(x)) should return str(y), and
+        # re.sub(x, y, bytes(x)) should return
+        #     str(y) if isinstance(y, str) else unicode(y).
+        for x in 'x',  b'x':
+            for y in 'y', b'y':
+                z = re.sub(x, y, b'')
+                self.assertEqual(z, b'')
+                self.assertEqual(type(z), bytes)
+                #
+                z = re.sub(x, y, '')
+                self.assertEqual(z, '')
+                self.assertEqual(type(z), str)
  
      def test_bug_1661(self):
          # Verify that flags do not get silently ignored with compiled patterns
@@ -599,10 +588,9 @@ class ReTests(unittest.TestCase):
          self.assertEqual([item.group(0) for item in iter],
                           [":", "::", ":::"])
  
-    # XXX This needs to be restored for str vs. bytes.
-##     def test_bug_926075(self):
-##         self.assert_(re.compile('bug_926075') is not
-##                      re.compile(str8('bug_926075')))
+    def test_bug_926075(self):
+        self.assert_(re.compile('bug_926075') is not
+                     re.compile(b'bug_926075'))
  
      def test_bug_931848(self):
          pattern = eval('"[\u002E\u3002\uFF0E\uFF61]"')
author	Thomas Wouters <thomas@python.org>
	Tue, 18 Mar 2008 20:19:54 +0000 (20:19 +0000)
committer	Thomas Wouters <thomas@python.org>
	Tue, 18 Mar 2008 20:19:54 +0000 (20:19 +0000)
Lib/sre_compile.py		patch \| blob \| blame \| history
Lib/sre_parse.py		patch \| blob \| blame \| history
Lib/test/test_re.py		patch \| blob \| blame \| history