]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-131020: py.exe launcher does not correctly detect a BOM when searching for the...
authorChris Eibl <138194463+chris-eibl@users.noreply.github.com>
Mon, 10 Mar 2025 17:59:23 +0000 (18:59 +0100)
committerGitHub <noreply@github.com>
Mon, 10 Mar 2025 17:59:23 +0000 (17:59 +0000)
Lib/test/test_launcher.py
Misc/NEWS.d/next/Windows/2025-03-09-19-57-35.gh-issue-131020._c87wf.rst [new file with mode: 0644]
PC/launcher2.c

index 58baae25df3df789675ae8e4c425b696eb718513..173fc743cf68ae0cc8762737418e08b7d260ef5e 100644 (file)
@@ -271,7 +271,10 @@ class RunPyMixin:
     @contextlib.contextmanager
     def script(self, content, encoding="utf-8"):
         file = Path(tempfile.mktemp(dir=os.getcwd()) + ".py")
-        file.write_text(content, encoding=encoding)
+        if isinstance(content, bytes):
+            file.write_bytes(content)
+        else:
+            file.write_text(content, encoding=encoding)
         try:
             yield file
         finally:
@@ -624,6 +627,25 @@ class TestLauncher(unittest.TestCase, RunPyMixin):
         self.assertEqual("3.100", data["SearchInfo.tag"])
         self.assertEqual(f'X.Y.exe -prearg "{script}" -postarg', data["stdout"].strip())
 
+    def test_py_shebang_valid_bom(self):
+        with self.py_ini(TEST_PY_DEFAULTS):
+            content = "#! /usr/bin/python -prearg".encode("utf-8")
+            with self.script(b"\xEF\xBB\xBF" + content) as script:
+                data = self.run_py([script, "-postarg"])
+        self.assertEqual("PythonTestSuite", data["SearchInfo.company"])
+        self.assertEqual("3.100", data["SearchInfo.tag"])
+        self.assertEqual(f"X.Y.exe -prearg {quote(script)} -postarg", data["stdout"].strip())
+
+    def test_py_shebang_invalid_bom(self):
+        with self.py_ini(TEST_PY_DEFAULTS):
+            content = "#! /usr/bin/python3 -prearg".encode("utf-8")
+            with self.script(b"\xEF\xAA\xBF" + content) as script:
+                data = self.run_py([script, "-postarg"])
+        self.assertIn("Invalid BOM", data["stderr"])
+        self.assertEqual("PythonTestSuite", data["SearchInfo.company"])
+        self.assertEqual("3.100", data["SearchInfo.tag"])
+        self.assertEqual(f"X.Y.exe {quote(script)} -postarg", data["stdout"].strip())
+
     def test_py_handle_64_in_ini(self):
         with self.py_ini("\n".join(["[defaults]", "python=3.999-64"])):
             # Expect this to fail, but should get oldStyleTag flipped on
diff --git a/Misc/NEWS.d/next/Windows/2025-03-09-19-57-35.gh-issue-131020._c87wf.rst b/Misc/NEWS.d/next/Windows/2025-03-09-19-57-35.gh-issue-131020._c87wf.rst
new file mode 100644 (file)
index 0000000..35cd1dd
--- /dev/null
@@ -0,0 +1,2 @@
+:source:`pylauncher <PC/launcher2.c>` correctly detects a BOM when searching for the
+shebang. Fix by Chris Eibl.
index befcbe30600f2cbae3c14abe83052d64f495a9c7..72121724726ccb9f0a019d5114393e284768afee 100644 (file)
@@ -1062,7 +1062,7 @@ checkShebang(SearchInfo *search)
     }
 
     DWORD bytesRead = 0;
-    char buffer[4096];
+    unsigned char buffer[4096];
     if (!ReadFile(hFile, buffer, sizeof(buffer), &bytesRead, NULL)) {
         debug(L"# Failed to read %s for shebang parsing (0x%08X)\n",
               scriptFile, GetLastError());
@@ -1075,7 +1075,7 @@ checkShebang(SearchInfo *search)
     free(scriptFile);
 
 
-    char *b = buffer;
+    unsigned char *b = buffer;
     bool onlyUtf8 = false;
     if (bytesRead > 3 && *b == 0xEF) {
         if (*++b == 0xBB && *++b == 0xBF) {
@@ -1096,13 +1096,13 @@ checkShebang(SearchInfo *search)
     ++b;
     --bytesRead;
     while (--bytesRead > 0 && isspace(*++b)) { }
-    char *start = b;
+    const unsigned char *start = b;
     while (--bytesRead > 0 && *++b != '\r' && *b != '\n') { }
     wchar_t *shebang;
     int shebangLength;
     // We add 1 when bytesRead==0, as in that case we hit EOF and b points
     // to the last character in the file, not the newline
-    int exitCode = _decodeShebang(search, start, (int)(b - start + (bytesRead == 0)), onlyUtf8, &shebang, &shebangLength);
+    int exitCode = _decodeShebang(search, (const char*)start, (int)(b - start + (bytesRead == 0)), onlyUtf8, &shebang, &shebangLength);
     if (exitCode) {
         return exitCode;
     }