]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
#27364: Deprecate invalid escape strings in str/byutes.
authorR David Murray <rdmurray@bitdance.com>
Thu, 8 Sep 2016 19:34:08 +0000 (15:34 -0400)
committerR David Murray <rdmurray@bitdance.com>
Thu, 8 Sep 2016 19:34:08 +0000 (15:34 -0400)
Patch by Emanuel Barry, reviewed by Serhiy Storchaka and Martin Panter.

Doc/reference/lexical_analysis.rst
Doc/whatsnew/3.6.rst
Lib/test/test_codecs.py
Lib/test/test_unicode.py
Misc/NEWS
Objects/bytesobject.c
Objects/unicodeobject.c

index b3b71aff51dc8fae3229b13bd816358557867c70..48f20434f01f527d5fbd5f7240efb843bc0fa9ec 100644 (file)
@@ -560,6 +560,10 @@ is more easily recognized as broken.)  It is also important to note that the
 escape sequences only recognized in string literals fall into the category of
 unrecognized escapes for bytes literals.
 
+   .. versionchanged:: 3.6
+      Unrecognized escape sequences produce a DeprecationWarning.  In
+      some future version of Python they will be a SyntaxError.
+
 Even in a raw literal, quotes can be escaped with a backslash, but the
 backslash remains in the result; for example, ``r"\""`` is a valid string
 literal consisting of two characters: a backslash and a double quote; ``r"\"``
index e53d48e7fa71ebc206a0bd09b67087fa240a178f..a76ac9d38e6d847523f95f5d6df1f5b8ff3a5ee8 100644 (file)
@@ -952,6 +952,11 @@ Deprecated features
   parameter will be dropped in a future Python release and likely earlier
   through third party tools. See :issue:`27919` for details.
 
+* A backslash-character pair that is not a valid escape sequence now generates
+  a DeprecationWarning.  Although this will eventually become a SyntaxError,
+  that will not be for several Python releases.  (Contributed by Emanuel Barry
+  in :issue:`27364`.)
+
 
 Deprecated Python behavior
 --------------------------
index 1af552405c2296bd1c058f508f3fdfb9b234c6e1..4d91a07868f6fc36f83e0a9a8499837b9df225dc 100644 (file)
@@ -1175,7 +1175,7 @@ class EscapeDecodeTest(unittest.TestCase):
         check(b"[\\\n]", b"[]")
         check(br'[\"]', b'["]')
         check(br"[\']", b"[']")
-        check(br"[\\]", br"[\]")
+        check(br"[\\]", b"[\\]")
         check(br"[\a]", b"[\x07]")
         check(br"[\b]", b"[\x08]")
         check(br"[\t]", b"[\x09]")
@@ -1184,7 +1184,6 @@ class EscapeDecodeTest(unittest.TestCase):
         check(br"[\f]", b"[\x0c]")
         check(br"[\r]", b"[\x0d]")
         check(br"[\7]", b"[\x07]")
-        check(br"[\8]", br"[\8]")
         check(br"[\78]", b"[\x078]")
         check(br"[\41]", b"[!]")
         check(br"[\418]", b"[!8]")
@@ -1192,12 +1191,18 @@ class EscapeDecodeTest(unittest.TestCase):
         check(br"[\1010]", b"[A0]")
         check(br"[\501]", b"[A]")
         check(br"[\x41]", b"[A]")
-        check(br"[\X41]", br"[\X41]")
         check(br"[\x410]", b"[A0]")
-        for b in range(256):
-            if b not in b'\n"\'\\abtnvfr01234567x':
-                b = bytes([b])
-                check(b'\\' + b, b'\\' + b)
+        for i in range(97, 123):
+            b = bytes([i])
+            if b not in b'abfnrtvx':
+                with self.assertWarns(DeprecationWarning):
+                    check(b"\\" + b, b"\\" + b)
+            with self.assertWarns(DeprecationWarning):
+                check(b"\\" + b.upper(), b"\\" + b.upper())
+        with self.assertWarns(DeprecationWarning):
+            check(br"\8", b"\\8")
+        with self.assertWarns(DeprecationWarning):
+            check(br"\9", b"\\9")
 
     def test_errors(self):
         decode = codecs.escape_decode
@@ -2448,7 +2453,6 @@ class UnicodeEscapeTest(unittest.TestCase):
         check(br"[\f]", "[\x0c]")
         check(br"[\r]", "[\x0d]")
         check(br"[\7]", "[\x07]")
-        check(br"[\8]", r"[\8]")
         check(br"[\78]", "[\x078]")
         check(br"[\41]", "[!]")
         check(br"[\418]", "[!8]")
@@ -2458,9 +2462,18 @@ class UnicodeEscapeTest(unittest.TestCase):
         check(br"[\x410]", "[A0]")
         check(br"\u20ac", "\u20ac")
         check(br"\U0001d120", "\U0001d120")
-        for b in range(256):
-            if b not in b'\n"\'\\abtnvfr01234567xuUN':
-                check(b'\\' + bytes([b]), '\\' + chr(b))
+        for i in range(97, 123):
+            b = bytes([i])
+            if b not in b'abfnrtuvx':
+                with self.assertWarns(DeprecationWarning):
+                    check(b"\\" + b, "\\" + chr(i))
+            if b.upper() not in b'UN':
+                with self.assertWarns(DeprecationWarning):
+                    check(b"\\" + b.upper(), "\\" + chr(i-32))
+        with self.assertWarns(DeprecationWarning):
+            check(br"\8", "\\8")
+        with self.assertWarns(DeprecationWarning):
+            check(br"\9", "\\9")
 
     def test_decode_errors(self):
         decode = codecs.unicode_escape_decode
index 9ab624e6fc5db6527764f03dca70b0f4e04e0d53..2684b940ef5c7e626acd20d318e313e51bd30623 100644 (file)
@@ -10,6 +10,7 @@ import codecs
 import itertools
 import operator
 import struct
+import string
 import sys
 import unittest
 import warnings
@@ -2752,6 +2753,12 @@ class UnicodeTest(string_tests.CommonTest,
         support.check_free_after_iterating(self, iter, str)
         support.check_free_after_iterating(self, reversed, str)
 
+    def test_invalid_sequences(self):
+        for letter in string.ascii_letters + "89": # 0-7 are octal escapes
+            if letter in "abfnrtuvxNU":
+                continue
+            with self.assertWarns(DeprecationWarning):
+                eval(r"'\%s'" % letter)
 
 class StringModuleTest(unittest.TestCase):
     def test_formatter_parser(self):
index a55400ff5dc90799ce1a7c500e946174888b810a..8f1b724f674792f01de709c5f2810e85b8829c68 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
 Core and Builtins
 -----------------
 
+- Issue #27364: A backslash-character pair that is not a valid escape sequence
+  now generates a DeprecationWarning.
+
 - Issue #27350: `dict` implementation is changed like PyPy. It is more compact
   and preserves insertion order.
   (Concept developed by Raymond Hettinger and patch by Inada Naoki.)
index b0d9b398252a41cdf4d0f932ac047fe53de3f2cc..6e7c4fa1886aa99c7ecf7c254245688f2c5826de 100644 (file)
@@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s,
             break;
 
         default:
+            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
+                goto failed;
             *p++ = '\\';
-            s--;
             goto non_esc; /* an arbitrary number of unescaped
                              UTF-8 bytes may follow. */
         }
index 7979eec84562f4452f3100aac08f88dd4dc05c56..e0c3bfecdd8372a5948d2148de0021c85b3090ee 100644 (file)
@@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
             goto error;
 
         default:
+            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                                 "invalid escape sequence '\\%c'", c) < 0)
+                goto onError;
             WRITE_ASCII_CHAR('\\');
             WRITE_CHAR(c);
             continue;