#27364: Deprecate invalid escape strings in str/byutes.

author R David Murray <rdmurray@bitdance.com>

Thu, 8 Sep 2016 19:34:08 +0000 (15:34 -0400)

committer R David Murray <rdmurray@bitdance.com>

Thu, 8 Sep 2016 19:34:08 +0000 (15:34 -0400)
author R David Murray <rdmurray@bitdance.com>
Thu, 8 Sep 2016 19:34:08 +0000 (15:34 -0400)
committer R David Murray <rdmurray@bitdance.com>
Thu, 8 Sep 2016 19:34:08 +0000 (15:34 -0400)
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst

index b3b71aff51dc8fae3229b13bd816358557867c70..48f20434f01f527d5fbd5f7240efb843bc0fa9ec 100644 (file)
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -560,6 +560,10 @@ is more easily recognized as broken.)  It is also important to note that the
  escape sequences only recognized in string literals fall into the category of
  unrecognized escapes for bytes literals.
  
+   .. versionchanged:: 3.6
+      Unrecognized escape sequences produce a DeprecationWarning.  In
+      some future version of Python they will be a SyntaxError.
+
  Even in a raw literal, quotes can be escaped with a backslash, but the
  backslash remains in the result; for example, ``r"\""`` is a valid string
  literal consisting of two characters: a backslash and a double quote; ``r"\"``
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst

index e53d48e7fa71ebc206a0bd09b67087fa240a178f..a76ac9d38e6d847523f95f5d6df1f5b8ff3a5ee8 100644 (file)
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -952,6 +952,11 @@ Deprecated features
    parameter will be dropped in a future Python release and likely earlier
    through third party tools. See :issue:`27919` for details.
  
+* A backslash-character pair that is not a valid escape sequence now generates
+  a DeprecationWarning.  Although this will eventually become a SyntaxError,
+  that will not be for several Python releases.  (Contributed by Emanuel Barry
+  in :issue:`27364`.)
+
  
  Deprecated Python behavior
  --------------------------
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py

index 1af552405c2296bd1c058f508f3fdfb9b234c6e1..4d91a07868f6fc36f83e0a9a8499837b9df225dc 100644 (file)
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1175,7 +1175,7 @@ class EscapeDecodeTest(unittest.TestCase):
          check(b"[\\\n]", b"[]")
          check(br'[\"]', b'["]')
          check(br"[\']", b"[']")
-        check(br"[\\]", br"[\]")
+        check(br"[\\]", b"[\\]")
          check(br"[\a]", b"[\x07]")
          check(br"[\b]", b"[\x08]")
          check(br"[\t]", b"[\x09]")
@@ -1184,7 +1184,6 @@ class EscapeDecodeTest(unittest.TestCase):
          check(br"[\f]", b"[\x0c]")
          check(br"[\r]", b"[\x0d]")
          check(br"[\7]", b"[\x07]")
-        check(br"[\8]", br"[\8]")
          check(br"[\78]", b"[\x078]")
          check(br"[\41]", b"[!]")
          check(br"[\418]", b"[!8]")
@@ -1192,12 +1191,18 @@ class EscapeDecodeTest(unittest.TestCase):
          check(br"[\1010]", b"[A0]")
          check(br"[\501]", b"[A]")
          check(br"[\x41]", b"[A]")
-        check(br"[\X41]", br"[\X41]")
          check(br"[\x410]", b"[A0]")
-        for b in range(256):
-            if b not in b'\n"\'\\abtnvfr01234567x':
-                b = bytes([b])
-                check(b'\\' + b, b'\\' + b)
+        for i in range(97, 123):
+            b = bytes([i])
+            if b not in b'abfnrtvx':
+                with self.assertWarns(DeprecationWarning):
+                    check(b"\\" + b, b"\\" + b)
+            with self.assertWarns(DeprecationWarning):
+                check(b"\\" + b.upper(), b"\\" + b.upper())
+        with self.assertWarns(DeprecationWarning):
+            check(br"\8", b"\\8")
+        with self.assertWarns(DeprecationWarning):
+            check(br"\9", b"\\9")
  
      def test_errors(self):
          decode = codecs.escape_decode
@@ -2448,7 +2453,6 @@ class UnicodeEscapeTest(unittest.TestCase):
          check(br"[\f]", "[\x0c]")
          check(br"[\r]", "[\x0d]")
          check(br"[\7]", "[\x07]")
-        check(br"[\8]", r"[\8]")
          check(br"[\78]", "[\x078]")
          check(br"[\41]", "[!]")
          check(br"[\418]", "[!8]")
@@ -2458,9 +2462,18 @@ class UnicodeEscapeTest(unittest.TestCase):
          check(br"[\x410]", "[A0]")
          check(br"\u20ac", "\u20ac")
          check(br"\U0001d120", "\U0001d120")
-        for b in range(256):
-            if b not in b'\n"\'\\abtnvfr01234567xuUN':
-                check(b'\\' + bytes([b]), '\\' + chr(b))
+        for i in range(97, 123):
+            b = bytes([i])
+            if b not in b'abfnrtuvx':
+                with self.assertWarns(DeprecationWarning):
+                    check(b"\\" + b, "\\" + chr(i))
+            if b.upper() not in b'UN':
+                with self.assertWarns(DeprecationWarning):
+                    check(b"\\" + b.upper(), "\\" + chr(i-32))
+        with self.assertWarns(DeprecationWarning):
+            check(br"\8", "\\8")
+        with self.assertWarns(DeprecationWarning):
+            check(br"\9", "\\9")
  
      def test_decode_errors(self):
          decode = codecs.unicode_escape_decode
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 9ab624e6fc5db6527764f03dca70b0f4e04e0d53..2684b940ef5c7e626acd20d318e313e51bd30623 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -10,6 +10,7 @@ import codecs
  import itertools
  import operator
  import struct
+import string
  import sys
  import unittest
  import warnings
@@ -2752,6 +2753,12 @@ class UnicodeTest(string_tests.CommonTest,
          support.check_free_after_iterating(self, iter, str)
          support.check_free_after_iterating(self, reversed, str)
  
+    def test_invalid_sequences(self):
+        for letter in string.ascii_letters + "89": # 0-7 are octal escapes
+            if letter in "abfnrtuvxNU":
+                continue
+            with self.assertWarns(DeprecationWarning):
+                eval(r"'\%s'" % letter)
  
  class StringModuleTest(unittest.TestCase):
      def test_formatter_parser(self):
diff --git a/Misc/NEWS b/Misc/NEWS

index a55400ff5dc90799ce1a7c500e946174888b810a..8f1b724f674792f01de709c5f2810e85b8829c68 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
  Core and Builtins
  -----------------
  
+- Issue #27364: A backslash-character pair that is not a valid escape sequence
+  now generates a DeprecationWarning.
+
  - Issue #27350: `dict` implementation is changed like PyPy. It is more compact
    and preserves insertion order.
    (Concept developed by Raymond Hettinger and patch by Inada Naoki.)
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c

index b0d9b398252a41cdf4d0f932ac047fe53de3f2cc..6e7c4fa1886aa99c7ecf7c254245688f2c5826de 100644 (file)
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1207,8 +1207,9 @@ PyObject *PyBytes_DecodeEscape(const char *s,
              break;
  
          default:
+            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", *(--s)) < 0)
+                goto failed;
              *p++ = '\\';
-            s--;
              goto non_esc; /* an arbitrary number of unescaped
                               UTF-8 bytes may follow. */
          }
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 7979eec84562f4452f3100aac08f88dd4dc05c56..e0c3bfecdd8372a5948d2148de0021c85b3090ee 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6065,6 +6065,9 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
              goto error;
  
          default:
+            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                                 "invalid escape sequence '\\%c'", c) < 0)
+                goto onError;
              WRITE_ASCII_CHAR('\\');
              WRITE_CHAR(c);
              continue;
author	R David Murray <rdmurray@bitdance.com>
	Thu, 8 Sep 2016 19:34:08 +0000 (15:34 -0400)
committer	R David Murray <rdmurray@bitdance.com>
	Thu, 8 Sep 2016 19:34:08 +0000 (15:34 -0400)
Doc/reference/lexical_analysis.rst		patch \| blob \| blame \| history
Doc/whatsnew/3.6.rst		patch \| blob \| blame \| history
Lib/test/test_codecs.py		patch \| blob \| blame \| history
Lib/test/test_unicode.py		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Objects/bytesobject.c		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history