Issue #26581: Use the first coding cookie on a line, not the last one.

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 20 Mar 2016 21:36:29 +0000 (23:36 +0200)

committer Serhiy Storchaka <storchaka@gmail.com>

Sun, 20 Mar 2016 21:36:29 +0000 (23:36 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 20 Mar 2016 21:36:29 +0000 (23:36 +0200)
committer Serhiy Storchaka <storchaka@gmail.com>
Sun, 20 Mar 2016 21:36:29 +0000 (23:36 +0200)
diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py

index a4cc205c3072ec5e4007855669eca8149de5c1a4..84f39a2fee8e55d4aed872d0b066fe05d4157735 100644 (file)
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -62,7 +62,7 @@ locale_encoding = locale_encoding.lower()
  encoding = locale_encoding  ### KBK 07Sep07  This is used all over IDLE, check!
                              ### 'encoding' is used below in encode(), check!
  
-coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
  blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
  
  def coding_spec(data):
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py

index 1ff1c61ee2250a8114be4fb8e09d05134552baf7..d14db60f7da89ea4d63e9a1af1c30e70ac4aeb1b 100644 (file)
--- a/Lib/lib2to3/pgen2/tokenize.py
+++ b/Lib/lib2to3/pgen2/tokenize.py
@@ -236,7 +236,7 @@ class Untokenizer:
                  startline = False
              toks_append(tokval)
  
-cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
  blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
  
  def _get_normal_name(orig_enc):
diff --git a/Lib/test/test_importlib/source/test_source_encoding.py b/Lib/test/test_importlib/source/test_source_encoding.py

index b604afb5ec882dbf6ac34f98a6e7e81f678bc2cd..1e0771b19debce6521a947e8c8a7e578cbe312d7 100644 (file)
--- a/Lib/test/test_importlib/source/test_source_encoding.py
+++ b/Lib/test/test_importlib/source/test_source_encoding.py
@@ -14,7 +14,7 @@ import unittest
  import warnings
  
  
-CODING_RE = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+CODING_RE = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
  
  
  class EncodingTest:
diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py

index 7979c820109c9c56fb2900fb6a570364ef987071..38734009c0085248de1b92841d787b79715415b6 100644 (file)
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -178,7 +178,7 @@ class AbstractSourceEncodingTest:
      def test_double_coding_same_line(self):
          src = (b'#coding:iso8859-15 coding:latin1\n'
                 b'print(ascii("\xc3\xa4"))\n')
-        self.check_script_output(src, br"'\xc3\xa4'")
+        self.check_script_output(src, br"'\xc3\u20ac'")
  
      def test_first_non_utf8_coding_line(self):
          src = (b'#coding:iso-8859-15 \xa4\n'
diff --git a/Lib/tokenize.py b/Lib/tokenize.py

index 9fd676c5b24b36ff8eadbfdc1b6e4c296af86940..b1d0c8326361939a45f62ba147c9214ae685470c 100644 (file)
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -33,7 +33,7 @@ import re
  import sys
  from token import *
  
-cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
+cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
  blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
  
  import token
diff --git a/Misc/NEWS b/Misc/NEWS

index bdcfebf856545390cd284de5d091220689be89d9..b5672c228baa9f21076604cef1f4e1ce750b991a 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ Release date: tba
  Core and Builtins
  -----------------
  
+- Issue #26581: If coding cookie is specified multiple times on a line in
+  Python source code file, only the first one is taken to account.
+
  - Issue #26464: Fix str.translate() when string is ASCII and first replacements
    removes character, but next replacement uses a non-ASCII character or a
    string longer than 1 character. Regression introduced in Python 3.5.0.
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 1cdbae20dc4bc69b5abc17a19db36fa86dd3f1a5..50ce2e88c969356c07be01c90c6a9fe9040d5a68 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -275,6 +275,7 @@ get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *t
                          return 0;
                  }
                  *spec = r;
+                break;
              }
          }
      }
diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py

index 5f3795e65754dbcfbf70c2a0028532628b5b75f8..6c16b1ce151852da1a54e7aedb515c0e5c898290 100755 (executable)
--- a/Tools/scripts/findnocoding.py
+++ b/Tools/scripts/findnocoding.py
@@ -32,7 +32,7 @@ except ImportError:
                           "no sophisticated Python source file search will be done.", file=sys.stderr)
  
  
-decl_re = re.compile(rb'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
+decl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
  blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)')
  
  def get_declaration(line):
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 20 Mar 2016 21:36:29 +0000 (23:36 +0200)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 20 Mar 2016 21:36:29 +0000 (23:36 +0200)
Lib/idlelib/IOBinding.py		patch \| blob \| blame \| history
Lib/lib2to3/pgen2/tokenize.py		patch \| blob \| blame \| history
Lib/test/test_importlib/source/test_source_encoding.py		patch \| blob \| blame \| history
Lib/test/test_source_encoding.py		patch \| blob \| blame \| history
Lib/tokenize.py		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Parser/tokenizer.c		patch \| blob \| blame \| history
Tools/scripts/findnocoding.py		patch \| blob \| blame \| history