From: Xiao Yuan Date: Mon, 15 Jun 2026 15:05:29 +0000 (+0300) Subject: gh-92455: Respect case-sensitive mimetype suffixes (GH-148782) X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=46107ad9da0add7aa5c0a899e159d89c1376d6be;p=thirdparty%2FPython%2Fcpython.git gh-92455: Respect case-sensitive mimetype suffixes (GH-148782) --- diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst index f33098faf7d8..5c29fff146ee 100644 --- a/Doc/library/mimetypes.rst +++ b/Doc/library/mimetypes.rst @@ -39,8 +39,8 @@ the information :func:`init` sets up. (e.g. :program:`compress` or :program:`gzip`). The encoding is suitable for use as a :mailheader:`Content-Encoding` header, **not** as a :mailheader:`Content-Transfer-Encoding` header. The mappings are table driven. - Encoding suffixes are case sensitive; type suffixes are first tried case - sensitively, then case insensitively. + Encoding suffixes are case-sensitive. Suffix mappings and type suffixes are + first tried case-sensitively, then case-insensitively. The optional *strict* argument is a flag specifying whether the list of known MIME types is limited to only the official types `registered with IANA @@ -131,6 +131,8 @@ behavior of the module. is already known the extension will be added to the list of known extensions. Valid extensions are empty or start with a ``'.'``. + Registered lower-case extensions are matched case-insensitively. + When *strict* is ``True`` (the default), the mapping will be added to the official MIME types, otherwise to the non-standard ones. @@ -312,6 +314,8 @@ than one MIME-type database; it provides an interface similar to the one of the extension is already known, the new type will replace the old one. When the type is already known the extension will be added to the list of known extensions. + Registered lower-case extensions are matched case-insensitively. + When *strict* is ``True`` (the default), the mapping will be added to the official MIME types, otherwise to the non-standard ones. diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 15e8c0a437bf..4339ef5a6139 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -86,6 +86,9 @@ class MimeTypes: is already known the extension will be added to the list of known extensions. + Registered lower-case extensions are matched + case-insensitively. + If strict is true, information will be added to list of standard types, else to the list of non-standard types. @@ -172,23 +175,33 @@ class MimeTypes: def _guess_file_type(self, path, strict, splitext): base, ext = splitext(path) - while (ext_lower := ext.lower()) in self.suffix_map: - base, ext = splitext(base + self.suffix_map[ext_lower]) + while True: + if ext in self.suffix_map: + suffix = self.suffix_map[ext] + elif (ext_lower := ext.lower()) in self.suffix_map: + suffix = self.suffix_map[ext_lower] + else: + break + base, ext = splitext(base + suffix) # encodings_map is case sensitive if ext in self.encodings_map: encoding = self.encodings_map[ext] base, ext = splitext(base) else: encoding = None - ext = ext.lower() + ext_lower = ext.lower() types_map = self.types_map[True] if ext in types_map: return types_map[ext], encoding + if ext_lower in types_map: + return types_map[ext_lower], encoding elif strict: return None, encoding types_map = self.types_map[False] if ext in types_map: return types_map[ext], encoding + if ext_lower in types_map: + return types_map[ext_lower], encoding else: return None, encoding @@ -386,6 +399,9 @@ def add_type(type, ext, strict=True): is already known the extension will be added to the list of known extensions. + Registered lower-case extensions are matched + case-insensitively. + If strict is true, information will be added to list of standard types, else to the list of non-standard types. diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 1a3b49b87b12..19983fa3fa76 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -287,6 +287,50 @@ class MimeTypesClassTestCase(unittest.TestCase): eq(self.db.guess_file_type("foobar.tar.z"), (None, None)) eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None)) + def test_suffix_map_case_sensitive_preferred(self): + self.db.suffix_map[".TEST-SUFFIX"] = ".tar.gz" + self.db.suffix_map[".test-suffix"] = ".tar.xz" + self.assertEqual( + self.db.guess_file_type("example.TEST-SUFFIX"), + ("application/x-tar", "gzip"), + ) + self.assertEqual( + self.db.guess_file_type("example.test-suffix"), + ("application/x-tar", "xz"), + ) + + def test_added_types_case_sensitive_preferred(self): + self.db.add_type("text/x-test-uppercase-r", ".R") + self.db.add_type("text/x-test-lowercase-r", ".r") + self.assertEqual( + self.db.guess_file_type("example.R"), + ("text/x-test-uppercase-r", None), + ) + self.assertEqual( + self.db.guess_file_type("example.r"), + ("text/x-test-lowercase-r", None), + ) + self.db.add_type("text/x-test-uppercase-non-strict", + ".NON-STRICT-EXT", strict=False) + self.db.add_type("text/x-test-lowercase-non-strict", + ".non-strict-ext", strict=False) + self.assertEqual( + self.db.guess_file_type("example.NON-STRICT-EXT"), + (None, None), + ) + self.assertEqual( + self.db.guess_file_type("example.non-strict-ext"), + (None, None), + ) + self.assertEqual( + self.db.guess_file_type("example.NON-STRICT-EXT", strict=False), + ("text/x-test-uppercase-non-strict", None), + ) + self.assertEqual( + self.db.guess_file_type("example.non-strict-ext", strict=False), + ("text/x-test-lowercase-non-strict", None), + ) + def test_default_data(self): eq = self.assertEqual eq(self.db.guess_file_type("foo.html"), ("text/html", None)) diff --git a/Misc/NEWS.d/next/Library/2026-04-20-01-24-22.gh-issue-92455.vXhmad.rst b/Misc/NEWS.d/next/Library/2026-04-20-01-24-22.gh-issue-92455.vXhmad.rst new file mode 100644 index 000000000000..8d2a11cb7761 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-20-01-24-22.gh-issue-92455.vXhmad.rst @@ -0,0 +1,3 @@ +Fix :mod:`mimetypes` to prefer case-sensitive matches for suffix mappings and +MIME type suffixes before falling back to case-insensitive matches. +Contributed by Xiao Yuan.