Merged revisions 76230 via svnmerge from

author Benjamin Peterson <benjamin@python.org>

Fri, 13 Nov 2009 00:17:59 +0000 (00:17 +0000)

committer Benjamin Peterson <benjamin@python.org>

Fri, 13 Nov 2009 00:17:59 +0000 (00:17 +0000)
author Benjamin Peterson <benjamin@python.org>
Fri, 13 Nov 2009 00:17:59 +0000 (00:17 +0000)
committer Benjamin Peterson <benjamin@python.org>
Fri, 13 Nov 2009 00:17:59 +0000 (00:17 +0000)
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst

index 6008e97edb5c02b99e1fb90b879f165f80f2bfbb..7eb838a16a43bb88879156468aed4e8c965a9383 100644 (file)
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -176,11 +176,15 @@ are always available.  They are listed here in alphabetical order.
  
     .. note::
  
-      When compiling a string with multi-line statements, line endings must be
-      represented by a single newline character (``'\n'``), and the input must
-      be terminated by at least one newline character.  If line endings are
-      represented by ``'\r\n'``, use :meth:`str.replace` to change them into
-      ``'\n'``.
+      When compiling a string with multi-line statements in ``'single'`` or
+      ``'eval'`` mode, input must be terminated by at least one newline
+      character.  This is to facilitate detection of incomplete and complete
+      statements in the :mod:`code` module.
+
+
+   .. versionchanged:: 3.2
+      Allowed use of Windows and Mac newlines.  Also input in ``'exec'`` mode
+      does not have to end in a newline anymore.
  
  
  .. function:: complex([real[, imag]])
diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py

index ad4412196552fe066ad7087e2c3c37b902c29c7b..80a73f3ad505c3b1ad82109147b6546297d54d3c 100644 (file)
--- a/Lib/test/test_codeop.py
+++ b/Lib/test/test_codeop.py
@@ -295,10 +295,6 @@ class CodeopTests(unittest.TestCase):
          self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
                               compile("a = 1\n", "def", 'single').co_filename)
  
-    def test_no_universal_newlines(self):
-        code = compile_command("'\rfoo\r'", symbol='eval')
-        self.assertEqual(eval(code), '\rfoo\r')
-
  
  def test_main():
      run_unittest(CodeopTests)
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py

index 32dd656e53adea331f44ebbfffbcfb5a89124c70..563a7eec63de59cbe54590e8f2cab01c16b8e43f 100644 (file)
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -5,6 +5,19 @@ from test import support
  
  class TestSpecifics(unittest.TestCase):
  
+    def test_no_ending_newline(self):
+        compile("hi", "<test>", "exec")
+        compile("hi\r", "<test>", "exec")
+
+    def test_empty(self):
+        compile("", "<test>", "exec")
+
+    def test_other_newlines(self):
+        compile("\r\n", "<test>", "exec")
+        compile("\r", "<test>", "exec")
+        compile("hi\r\nstuff\r\ndef f():\n    pass\r", "<test>", "exec")
+        compile("this_is\rreally_old_mac\rdef f():\n    pass", "<test>", "exec")
+
      def test_debug_assignment(self):
          # catch assignments to __debug__
          self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')
diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py

index 0ac49da338ca7fec8c1aa6179f1dd7b0eec549b9..d8df6a8616651de9159082ee3952081f74fe67ee 100644 (file)
--- a/Lib/test/test_parser.py
+++ b/Lib/test/test_parser.py
@@ -237,9 +237,9 @@ class RoundtripLegalSyntaxTestCase(unittest.TestCase):
              (14, '+', 2, 13),
              (2, '1', 2, 15),
              (4, '', 2, 16),
-            (6, '', 2, -1),
-            (4, '', 2, -1),
-            (0, '', 2, -1)],
+            (6, '', 3, -1),
+            (4, '', 3, -1),
+            (0, '', 3, -1)],
                           terminals)
  
      def test_extended_unpacking(self):
diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py

index 587b2fcc11adaaead47ac67dc645e20809f45562..8c1fbe770ac2e78749ec3c12c2a656985ce80da1 100644 (file)
--- a/Lib/test/test_pep263.py
+++ b/Lib/test/test_pep263.py
@@ -26,7 +26,7 @@ class PEP263Test(unittest.TestCase):
          try:
              compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
          except SyntaxError as v:
-            self.assertEquals(v.text, "print '\u5e74'")
+            self.assertEquals(v.text, "print '\u5e74'\n")
          else:
              self.fail()
  
diff --git a/Parser/parsetok.c b/Parser/parsetok.c

index 90a90a7e73d8cec9f23f670cfcff85109e508796..ff4ca70922f4092e4f24b64c0b2c8b97c2083797 100644 (file)
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
                           perrdetail *err_ret, int *flags)
  {
         struct tok_state *tok;
+       int exec_input = start == file_input;
  
         initerr(err_ret, filename);
  
         if (*flags & PyPARSE_IGNORE_COOKIE)
-               tok = PyTokenizer_FromUTF8(s);
+               tok = PyTokenizer_FromUTF8(s, exec_input);
         else
-               tok = PyTokenizer_FromString(s);
+               tok = PyTokenizer_FromString(s, exec_input);
         if (tok == NULL) {
                 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
                 return NULL;
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index daf18dc2be61fe7cec7af8844650216d4a221b14..e637cb37e5b1c2e4806ccfe9232edd1e395302b1 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -119,6 +119,7 @@ tok_new(void)
         tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
         tok->done = E_OK;
         tok->fp = NULL;
+       tok->input = NULL;
         tok->tabsize = TABSIZE;
         tok->indent = 0;
         tok->indstack[0] = 0;
@@ -145,6 +146,17 @@ tok_new(void)
         return tok;
  }
  
+static char *
+new_string(const char *s, Py_ssize_t len)
+{
+       char* result = (char *)PyMem_MALLOC(len + 1);
+       if (result != NULL) {
+               memcpy(result, s, len);
+               result[len] = '\0';
+       }
+       return result;
+}
+
  #ifdef PGEN
  
  static char *
@@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
         return feof(tok->fp);
  }
  
-static const char *
-decode_str(const char *str, struct tok_state *tok)
+static char *
+decode_str(const char *str, int exec_input, struct tok_state *tok)
  {
-       return str;
+       return new_string(str, strlen(str));
  }
  
  #else /* PGEN */
@@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
         return NULL;            /* as if it were EOF */
  }
  
-static char *
-new_string(const char *s, Py_ssize_t len)
-{
-       char* result = (char *)PyMem_MALLOC(len + 1);
-       if (result != NULL) {
-               memcpy(result, s, len);
-               result[len] = '\0';
-       }
-       return result;
-}
  
  static char *
  get_normal_name(char *s)       /* for utf-8 and latin-1 */
@@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) {
         return utf8;
  }
  
+
+static char *
+translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
+       int skip_next_lf = 0, length = strlen(s), final_length;
+       char *buf, *current;
+       char c;
+       buf = PyMem_MALLOC(length + 2);
+       if (buf == NULL) {
+               tok->done = E_NOMEM;
+               return NULL;
+       }
+       for (current = buf; (c = *s++);) {
+               if (skip_next_lf) {
+                       skip_next_lf = 0;
+                       if (c == '\n') {
+                               c = *s;
+                               s++;
+                               if (!c)
+                                       break;
+                       }
+               }
+               if (c == '\r') {
+                       skip_next_lf = 1;
+                       c = '\n';
+               }
+               *current = c;
+               current++;
+       }
+       /* If this is exec input, add a newline to the end of the file if
+          there isn't one already. */
+       if (exec_input && *current != '\n') {
+               *current = '\n';
+               current++;
+       }
+       *current = '\0';
+       final_length = current - buf;
+       if (final_length < length && final_length)
+               /* should never fail */
+               buf = PyMem_REALLOC(buf, final_length + 1);
+       return buf;
+}
+
  /* Decode a byte string STR for use as the buffer of TOK.
     Look for encoding declarations inside STR, and record them
     inside TOK.  */
  
  static const char *
-decode_str(const char *str, struct tok_state *tok)
+decode_str(const char *input, int single, struct tok_state *tok)
  {
         PyObject* utf8 = NULL;
+       const char *str;
         const char *s;
         const char *newl[2] = {NULL, NULL};
         int lineno = 0;
+       tok->input = str = translate_newlines(input, single, tok);
+       if (str == NULL)
+               return NULL;
         tok->enc = NULL;
         tok->str = str;
         if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok)
  /* Set up tokenizer for string */
  
  struct tok_state *
-PyTokenizer_FromString(const char *str)
+PyTokenizer_FromString(const char *str, int exec_input)
  {
         struct tok_state *tok = tok_new();
         if (tok == NULL)
                 return NULL;
-       str = (char *)decode_str(str, tok);
+       str = (char *)decode_str(str, exec_input, tok);
         if (str == NULL) {
                 PyTokenizer_Free(tok);
                 return NULL;
@@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str)
  }
  
  struct tok_state *
-PyTokenizer_FromUTF8(const char *str)
+PyTokenizer_FromUTF8(const char *str, int exec_input)
  {
         struct tok_state *tok = tok_new();
         if (tok == NULL)
                 return NULL;
+#ifndef PGEN
+       tok->input = str = translate_newlines(str, exec_input, tok);
+#endif
+       if (str == NULL) {
+               PyTokenizer_Free(tok);
+               return NULL;
+       }
         tok->decoding_state = STATE_RAW;
         tok->read_coding_spec = 1;
         tok->enc = NULL;
@@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str)
         return tok;
  }
  
-
  /* Set up tokenizer for file */
  
  struct tok_state *
@@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok)
  #endif
         if (tok->fp != NULL && tok->buf != NULL)
                 PyMem_FREE(tok->buf);
+       if (tok->input)
+               PyMem_FREE((char *)tok->input);
         PyMem_FREE(tok);
  }
  
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h

index e3328f1a5865defdf5a3a233aa369c88d5a28ce0..1a81e332edf0a5975f7c53556f284965cc414929 100644 (file)
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -58,10 +58,11 @@ struct tok_state {
  #endif
         const char* enc;        /* Encoding for the current str. */
         const char* str;
+       const char* input; /* Tokenizer's newline translated copy of the string. */
  };
  
-extern struct tok_state *PyTokenizer_FromString(const char *);
-extern struct tok_state *PyTokenizer_FromUTF8(const char *);
+extern struct tok_state *PyTokenizer_FromString(const char *, int);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
  extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
                                               char *, char *);
  extern void PyTokenizer_Free(struct tok_state *);
author	Benjamin Peterson <benjamin@python.org>
	Fri, 13 Nov 2009 00:17:59 +0000 (00:17 +0000)
committer	Benjamin Peterson <benjamin@python.org>
	Fri, 13 Nov 2009 00:17:59 +0000 (00:17 +0000)
Doc/library/functions.rst		patch \| blob \| blame \| history
Lib/test/test_codeop.py		patch \| blob \| blame \| history
Lib/test/test_compile.py		patch \| blob \| blame \| history
Lib/test/test_parser.py		patch \| blob \| blame \| history
Lib/test/test_pep263.py		patch \| blob \| blame \| history
Parser/parsetok.c		patch \| blob \| blame \| history
Parser/tokenizer.c		patch \| blob \| blame \| history
Parser/tokenizer.h		patch \| blob \| blame \| history