]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
kill py_compile's homemade encoding detection in favor of tokenize.detect_encoding...
authorBenjamin Peterson <benjamin@python.org>
Thu, 18 Mar 2010 22:37:38 +0000 (22:37 +0000)
committerBenjamin Peterson <benjamin@python.org>
Thu, 18 Mar 2010 22:37:38 +0000 (22:37 +0000)
Lib/py_compile.py
Misc/NEWS

index 10af1bb58eaef1429fa7f35166d10d7214c1f38a..859136550d362349a80fa3b3f646c6edb31ebad0 100644 (file)
@@ -7,8 +7,8 @@ import builtins
 import imp
 import marshal
 import os
-import re
 import sys
+import tokenize
 import traceback
 
 MAGIC = imp.get_magic()
@@ -69,21 +69,6 @@ def wr_long(f, x):
                    (x >> 16) & 0xff,
                    (x >> 24) & 0xff]))
 
-def read_encoding(file, default):
-    """Read the first two lines of the file looking for coding: xyzzy."""
-    f = open(file, "rb")
-    try:
-        for i in range(2):
-            line = f.readline()
-            if not line:
-                break
-            m = re.match(br".*\bcoding:\s*(\S+)\b", line)
-            if m:
-                return m.group(1).decode("ascii")
-        return default
-    finally:
-        f.close()
-
 def compile(file, cfile=None, dfile=None, doraise=False):
     """Byte-compile one Python source file to Python bytecode.
 
@@ -119,7 +104,8 @@ def compile(file, cfile=None, dfile=None, doraise=False):
     directories).
 
     """
-    encoding = read_encoding(file, "utf-8")
+    with open(file, "rb") as f:
+        encoding = tokenize.detect_encoding(f.readline)[0]
     with open(file, encoding=encoding) as f:
         try:
             timestamp = int(os.fstat(f.fileno()).st_mtime)
index f1b068b47dc2b95d7845dca61a6ee989b9130923..5484d3941b7912ad1534261b7158a6306cc1365b 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -283,6 +283,8 @@ C-API
 Library
 -------
 
+- Issue #8168: py_compile now handles files with utf-8 BOMS.
+
 - ``tokenize.detect_encoding`` now returns ``'utf-8-sig'`` when a UTF-8 BOM is
   detected.