]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-77102: site: try utf-8 and fallback to locale encoding when reading .pth file...
authorInada Naoki <songofacandy@gmail.com>
Tue, 16 Apr 2024 03:56:16 +0000 (12:56 +0900)
committerGitHub <noreply@github.com>
Tue, 16 Apr 2024 03:56:16 +0000 (12:56 +0900)
Doc/library/site.rst
Doc/whatsnew/3.13.rst
Lib/site.py
Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst [new file with mode: 0644]

index 2dc9fb09d727e2cb7a12eda535fe04690e019765..e52bbd32d4d493c3af3d573feda9edb7ae4f90a7 100644 (file)
@@ -74,6 +74,10 @@ with ``import`` (followed by space or tab) are executed.
    Limiting a code chunk to a single line is a deliberate measure
    to discourage putting anything more complex here.
 
+.. versionchanged:: 3.13
+   The :file:`.pth` files are now decoded by UTF-8 at first and then by the
+   :term:`locale encoding` if it fails.
+
 .. index::
    single: package
    triple: path; configuration; file
index 83c528814c967ec5158ca9258773751ab60f4d8a..f957698ecb06d8ff07300b6a2b57af8ecbc4e722 100644 (file)
@@ -630,6 +630,13 @@ re
 * Rename :exc:`!re.error` to :exc:`re.PatternError` for improved clarity.
   :exc:`!re.error` is kept for backward compatibility.
 
+site
+----
+
+* :file:`.pth` files are now decoded by UTF-8 first, and then by the
+  :term:`locale encoding` if the UTF-8 decoding fails.
+  (Contributed by Inada Naoki in :gh:`117802`.)
+
 sqlite3
 -------
 
index 162bbec4f8f41b9446b6e82447131cc713804433..93af9c453ac7bb26e195465eae086b04a121ce8f 100644 (file)
@@ -179,35 +179,44 @@ def addpackage(sitedir, name, known_paths):
         return
     _trace(f"Processing .pth file: {fullname!r}")
     try:
-        # locale encoding is not ideal especially on Windows. But we have used
-        # it for a long time. setuptools uses the locale encoding too.
-        f = io.TextIOWrapper(io.open_code(fullname), encoding="locale")
+        with io.open_code(fullname) as f:
+            pth_content = f.read()
     except OSError:
         return
-    with f:
-        for n, line in enumerate(f):
-            if line.startswith("#"):
-                continue
-            if line.strip() == "":
+
+    try:
+        pth_content = pth_content.decode()
+    except UnicodeDecodeError:
+        # Fallback to locale encoding for backward compatibility.
+        # We will deprecate this fallback in the future.
+        import locale
+        pth_content = pth_content.decode(locale.getencoding())
+        _trace(f"Cannot read {fullname!r} as UTF-8. "
+               f"Using fallback encoding {locale.getencoding()!r}")
+
+    for n, line in enumerate(pth_content.splitlines(), 1):
+        if line.startswith("#"):
+            continue
+        if line.strip() == "":
+            continue
+        try:
+            if line.startswith(("import ", "import\t")):
+                exec(line)
                 continue
-            try:
-                if line.startswith(("import ", "import\t")):
-                    exec(line)
-                    continue
-                line = line.rstrip()
-                dir, dircase = makepath(sitedir, line)
-                if not dircase in known_paths and os.path.exists(dir):
-                    sys.path.append(dir)
-                    known_paths.add(dircase)
-            except Exception as exc:
-                print("Error processing line {:d} of {}:\n".format(n+1, fullname),
-                      file=sys.stderr)
-                import traceback
-                for record in traceback.format_exception(exc):
-                    for line in record.splitlines():
-                        print('  '+line, file=sys.stderr)
-                print("\nRemainder of file ignored", file=sys.stderr)
-                break
+            line = line.rstrip()
+            dir, dircase = makepath(sitedir, line)
+            if dircase not in known_paths and os.path.exists(dir):
+                sys.path.append(dir)
+                known_paths.add(dircase)
+        except Exception as exc:
+            print(f"Error processing line {n:d} of {fullname}:\n",
+                  file=sys.stderr)
+            import traceback
+            for record in traceback.format_exception(exc):
+                for line in record.splitlines():
+                    print('  '+line, file=sys.stderr)
+            print("\nRemainder of file ignored", file=sys.stderr)
+            break
     if reset:
         known_paths = None
     return known_paths
diff --git a/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst
new file mode 100644 (file)
index 0000000..6f91251
--- /dev/null
@@ -0,0 +1,3 @@
+:mod:`site` module now parses ``.pth`` file with UTF-8 first, and
+:term:`locale encoding` if ``UnicodeDecodeError`` happened. It supported
+only locale encoding before.