]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-66543: Add mimetypes.guess_file_type() (GH-117258)
authorSerhiy Storchaka <storchaka@gmail.com>
Mon, 6 May 2024 12:50:52 +0000 (15:50 +0300)
committerGitHub <noreply@github.com>
Mon, 6 May 2024 12:50:52 +0000 (15:50 +0300)
Doc/includes/email-dir.py
Doc/library/mimetypes.rst
Doc/library/wsgiref.rst
Doc/whatsnew/3.13.rst
Lib/http/server.py
Lib/mimetypes.py
Lib/test/test_mimetypes.py
Misc/NEWS.d/next/Library/2024-03-26-15-29-39.gh-issue-66543.OZBhU5.rst [new file with mode: 0644]

index 2fc1570e654db6ab5650f7b8857c2570aaebdbdb..aa2a5c7cda52aaee03e13d68b00692870560f4a2 100644 (file)
@@ -53,7 +53,7 @@ must be running an SMTP server.
         # Guess the content type based on the file's extension.  Encoding
         # will be ignored, although we should check for simple things like
         # gzip'd or compressed files.
-        ctype, encoding = mimetypes.guess_type(path)
+        ctype, encoding = mimetypes.guess_file_type(path)
         if ctype is None or encoding is not None:
             # No guess could be made, or the file is encoded (compressed), so
             # use a generic bag-of-bits type.
index f610032acbe41779b8c57c4eefe1513ba4aefd2f..a24eab21d5734372296d5bbf5eae8a80905f4e31 100644 (file)
@@ -52,7 +52,22 @@ the information :func:`init` sets up.
    are also recognized.
 
    .. versionchanged:: 3.8
-      Added support for url being a :term:`path-like object`.
+      Added support for *url* being a :term:`path-like object`.
+
+   .. deprecated:: 3.13
+      Passing a file path instead of URL is :term:`soft deprecated`.
+      Use :func:`guess_file_type` for this.
+
+
+.. function:: guess_file_type(path, *, strict=True)
+
+   .. index:: pair: MIME; headers
+
+   Guess the type of a file based on its path, given by *path*.
+   Similar to the :func:`guess_type` function, but accepts a path instead of URL.
+   Path can be a string, a bytes object or a :term:`path-like object`.
+
+   .. versionadded:: 3.13
 
 
 .. function:: guess_all_extensions(type, strict=True)
@@ -61,7 +76,7 @@ the information :func:`init` sets up.
    return value is a list of strings giving all possible filename extensions,
    including the leading dot (``'.'``).  The extensions are not guaranteed to have
    been associated with any particular data stream, but would be mapped to the MIME
-   type *type* by :func:`guess_type`.
+   type *type* by :func:`guess_type` and :func:`guess_file_type`.
 
    The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
 
@@ -72,8 +87,8 @@ the information :func:`init` sets up.
    return value is a string giving a filename extension, including the leading dot
    (``'.'``).  The extension is not guaranteed to have been associated with any
    particular data stream, but would be mapped to the MIME type *type* by
-   :func:`guess_type`.  If no extension can be guessed for *type*, ``None`` is
-   returned.
+   :func:`guess_type` and :func:`guess_file_type`.
+   If no extension can be guessed for *type*, ``None`` is returned.
 
    The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
 
@@ -238,6 +253,14 @@ than one MIME-type database; it provides an interface similar to the one of the
       the object.
 
 
+   .. method:: MimeTypes.guess_file_type(path, *, strict=True)
+
+      Similar to the :func:`guess_file_type` function, using the tables stored
+      as part of the object.
+
+      .. versionadded:: 3.13
+
+
    .. method:: MimeTypes.guess_all_extensions(type, strict=True)
 
       Similar to the :func:`guess_all_extensions` function, using the tables stored
index c2b0ba7046967edce2f6bb8f0c5f76a5dae8f859..7fe84a2de1fcebf97507359262d65fef38164cfe 100644 (file)
@@ -865,7 +865,7 @@ directory and port number (default: 8000) on the command line::
         fn = os.path.join(path, environ["PATH_INFO"][1:])
         if "." not in fn.split(os.path.sep)[-1]:
             fn = os.path.join(fn, "index.html")
-        mime_type = mimetypes.guess_type(fn)[0]
+        mime_type = mimetypes.guess_file_type(fn)[0]
 
         # Return 200 OK if file exists, otherwise 404 Not Found
         if os.path.exists(fn):
index 558565ccbbeeb51956321d3c53b0fec1f94724fd..0b75665ab9c9ba994fd86f81ed2d862589f45349 100644 (file)
@@ -623,6 +623,13 @@ math
   "fusedMultiplyAdd" operation for special cases.
   (Contributed by Mark Dickinson and Victor Stinner in :gh:`73468`.)
 
+mimetypes
+---------
+
+* Add the :func:`~mimetypes.guess_file_type` function which works with file path.
+  Passing file path instead of URL in :func:`~mimetypes.guess_type` is :term:`soft deprecated`.
+  (Contributed by Serhiy Storchaka in :gh:`66543`.)
+
 mmap
 ----
 
@@ -1167,6 +1174,10 @@ Deprecated
 
 .. Add deprecations above alphabetically, not here at the end.
 
+* Passing file path instead of URL in :func:`~mimetypes.guess_type` is :term:`soft deprecated`.
+  Use :func:`~mimetypes.guess_file_type` instead.
+  (Contributed by Serhiy Storchaka in :gh:`66543`.)
+
 Pending Removal in Python 3.14
 ------------------------------
 
index ee7a9b6aa55b887d3372e66cbb9f9c6d6737df0b..7d0da5052d2d4d57f0e10b394023b4d08124fb9f 100644 (file)
@@ -897,7 +897,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
         ext = ext.lower()
         if ext in self.extensions_map:
             return self.extensions_map[ext]
-        guess, _ = mimetypes.guess_type(path)
+        guess, _ = mimetypes.guess_file_type(path)
         if guess:
             return guess
         return 'application/octet-stream'
index 65a049ae9945f666794ca01cb16b449ec141942c..8604000ed77a196af6da0d77c7e614588d2dad9e 100644 (file)
@@ -40,7 +40,7 @@ except ImportError:
 
 __all__ = [
     "knownfiles", "inited", "MimeTypes",
-    "guess_type", "guess_all_extensions", "guess_extension",
+    "guess_type", "guess_file_type", "guess_all_extensions", "guess_extension",
     "add_type", "init", "read_mime_types",
     "suffix_map", "encodings_map", "types_map", "common_types"
 ]
@@ -119,14 +119,14 @@ class MimeTypes:
         Optional `strict' argument when False adds a bunch of commonly found,
         but non-standard types.
         """
+        # TODO: Deprecate accepting file paths (in particular path-like objects).
         url = os.fspath(url)
         p = urllib.parse.urlparse(url)
         if p.scheme and len(p.scheme) > 1:
             scheme = p.scheme
             url = p.path
         else:
-            scheme = None
-            url = os.path.splitdrive(url)[1]
+            return self.guess_file_type(url, strict=strict)
         if scheme == 'data':
             # syntax of data URLs:
             # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
@@ -146,13 +146,25 @@ class MimeTypes:
             if '=' in type or '/' not in type:
                 type = 'text/plain'
             return type, None           # never compressed, so encoding is None
-        base, ext = posixpath.splitext(url)
+        return self._guess_file_type(url, strict, posixpath.splitext)
+
+    def guess_file_type(self, path, *, strict=True):
+        """Guess the type of a file based on its path.
+
+        Similar to guess_type(), but takes file path istead of URL.
+        """
+        path = os.fsdecode(path)
+        path = os.path.splitdrive(path)[1]
+        return self._guess_file_type(path, strict, os.path.splitext)
+
+    def _guess_file_type(self, path, strict, splitext):
+        base, ext = splitext(path)
         while (ext_lower := ext.lower()) in self.suffix_map:
-            base, ext = posixpath.splitext(base + self.suffix_map[ext_lower])
+            base, ext = splitext(base + self.suffix_map[ext_lower])
         # encodings_map is case sensitive
         if ext in self.encodings_map:
             encoding = self.encodings_map[ext]
-            base, ext = posixpath.splitext(base)
+            base, ext = splitext(base)
         else:
             encoding = None
         ext = ext.lower()
@@ -310,6 +322,16 @@ def guess_type(url, strict=True):
     return _db.guess_type(url, strict)
 
 
+def guess_file_type(path, *, strict=True):
+    """Guess the type of a file based on its path.
+
+    Similar to guess_type(), but takes file path istead of URL.
+    """
+    if _db is None:
+        init()
+    return _db.guess_file_type(path, strict=strict)
+
+
 def guess_all_extensions(type, strict=True):
     """Guess the extensions for a file based on its MIME type.
 
index bf6eae7d0ac9c673f85c0e5e36032952e2ce792c..2e0ad0606ae9c223d871dbddd53a3b81de76bad0 100644 (file)
@@ -36,20 +36,28 @@ class MimeTypesTestCase(unittest.TestCase):
 
     def test_case_sensitivity(self):
         eq = self.assertEqual
-        eq(self.db.guess_type("foobar.HTML"), self.db.guess_type("foobar.html"))
-        eq(self.db.guess_type("foobar.TGZ"), self.db.guess_type("foobar.tgz"))
-        eq(self.db.guess_type("foobar.tar.Z"), ("application/x-tar", "compress"))
-        eq(self.db.guess_type("foobar.tar.z"), (None, None))
+        eq(self.db.guess_file_type("foobar.html"), ("text/html", None))
+        eq(self.db.guess_type("scheme:foobar.html"), ("text/html", None))
+        eq(self.db.guess_file_type("foobar.HTML"), ("text/html", None))
+        eq(self.db.guess_type("scheme:foobar.HTML"), ("text/html", None))
+        eq(self.db.guess_file_type("foobar.tgz"), ("application/x-tar", "gzip"))
+        eq(self.db.guess_type("scheme:foobar.tgz"), ("application/x-tar", "gzip"))
+        eq(self.db.guess_file_type("foobar.TGZ"), ("application/x-tar", "gzip"))
+        eq(self.db.guess_type("scheme:foobar.TGZ"), ("application/x-tar", "gzip"))
+        eq(self.db.guess_file_type("foobar.tar.Z"), ("application/x-tar", "compress"))
+        eq(self.db.guess_type("scheme:foobar.tar.Z"), ("application/x-tar", "compress"))
+        eq(self.db.guess_file_type("foobar.tar.z"), (None, None))
+        eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None))
 
     def test_default_data(self):
         eq = self.assertEqual
-        eq(self.db.guess_type("foo.html"), ("text/html", None))
-        eq(self.db.guess_type("foo.HTML"), ("text/html", None))
-        eq(self.db.guess_type("foo.tgz"), ("application/x-tar", "gzip"))
-        eq(self.db.guess_type("foo.tar.gz"), ("application/x-tar", "gzip"))
-        eq(self.db.guess_type("foo.tar.Z"), ("application/x-tar", "compress"))
-        eq(self.db.guess_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
-        eq(self.db.guess_type("foo.tar.xz"), ("application/x-tar", "xz"))
+        eq(self.db.guess_file_type("foo.html"), ("text/html", None))
+        eq(self.db.guess_file_type("foo.HTML"), ("text/html", None))
+        eq(self.db.guess_file_type("foo.tgz"), ("application/x-tar", "gzip"))
+        eq(self.db.guess_file_type("foo.tar.gz"), ("application/x-tar", "gzip"))
+        eq(self.db.guess_file_type("foo.tar.Z"), ("application/x-tar", "compress"))
+        eq(self.db.guess_file_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
+        eq(self.db.guess_file_type("foo.tar.xz"), ("application/x-tar", "xz"))
 
     def test_data_urls(self):
         eq = self.assertEqual
@@ -63,7 +71,7 @@ class MimeTypesTestCase(unittest.TestCase):
         eq = self.assertEqual
         sio = io.StringIO("x-application/x-unittest pyunit\n")
         self.db.readfp(sio)
-        eq(self.db.guess_type("foo.pyunit"),
+        eq(self.db.guess_file_type("foo.pyunit"),
            ("x-application/x-unittest", None))
         eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit")
 
@@ -95,12 +103,12 @@ class MimeTypesTestCase(unittest.TestCase):
     def test_non_standard_types(self):
         eq = self.assertEqual
         # First try strict
-        eq(self.db.guess_type('foo.xul', strict=True), (None, None))
+        eq(self.db.guess_file_type('foo.xul', strict=True), (None, None))
         eq(self.db.guess_extension('image/jpg', strict=True), None)
         # And then non-strict
-        eq(self.db.guess_type('foo.xul', strict=False), ('text/xul', None))
-        eq(self.db.guess_type('foo.XUL', strict=False), ('text/xul', None))
-        eq(self.db.guess_type('foo.invalid', strict=False), (None, None))
+        eq(self.db.guess_file_type('foo.xul', strict=False), ('text/xul', None))
+        eq(self.db.guess_file_type('foo.XUL', strict=False), ('text/xul', None))
+        eq(self.db.guess_file_type('foo.invalid', strict=False), (None, None))
         eq(self.db.guess_extension('image/jpg', strict=False), '.jpg')
         eq(self.db.guess_extension('image/JPG', strict=False), '.jpg')
 
@@ -124,15 +132,26 @@ class MimeTypesTestCase(unittest.TestCase):
                            '//share/server/', '\\\\share\\server\\'):
                 path = prefix + name
                 with self.subTest(path=path):
+                    eq(self.db.guess_file_type(path), gzip_expected)
                     eq(self.db.guess_type(path), gzip_expected)
             expected = (None, None) if os.name == 'nt' else gzip_expected
             for prefix in ('//', '\\\\', '//share/', '\\\\share\\'):
                 path = prefix + name
                 with self.subTest(path=path):
+                    eq(self.db.guess_file_type(path), expected)
                     eq(self.db.guess_type(path), expected)
+        eq(self.db.guess_file_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
         eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
 
+        eq(self.db.guess_file_type(r'foo/.tar.gz'), (None, 'gzip'))
+        eq(self.db.guess_type(r'foo/.tar.gz'), (None, 'gzip'))
+        expected = (None, 'gzip') if os.name == 'nt' else gzip_expected
+        eq(self.db.guess_file_type(r'foo\.tar.gz'), expected)
+        eq(self.db.guess_type(r'foo\.tar.gz'), expected)
+        eq(self.db.guess_type(r'scheme:foo\.tar.gz'), gzip_expected)
+
     def test_url(self):
+        result = self.db.guess_type('http://example.com/host.html')
         result = self.db.guess_type('http://host.html')
         msg = 'URL only has a host name, not a file'
         self.assertSequenceEqual(result, (None, None), msg)
@@ -242,22 +261,38 @@ class MimeTypesTestCase(unittest.TestCase):
 
     def test_path_like_ob(self):
         filename = "LICENSE.txt"
-        filepath = pathlib.Path(filename)
-        filepath_with_abs_dir = pathlib.Path('/dir/'+filename)
-        filepath_relative = pathlib.Path('../dir/'+filename)
-        path_dir = pathlib.Path('./')
+        filepath = os_helper.FakePath(filename)
+        filepath_with_abs_dir = os_helper.FakePath('/dir/'+filename)
+        filepath_relative = os_helper.FakePath('../dir/'+filename)
+        path_dir = os_helper.FakePath('./')
 
-        expected = self.db.guess_type(filename)
+        expected = self.db.guess_file_type(filename)
 
+        self.assertEqual(self.db.guess_file_type(filepath), expected)
         self.assertEqual(self.db.guess_type(filepath), expected)
+        self.assertEqual(self.db.guess_file_type(
+            filepath_with_abs_dir), expected)
         self.assertEqual(self.db.guess_type(
             filepath_with_abs_dir), expected)
+        self.assertEqual(self.db.guess_file_type(filepath_relative), expected)
         self.assertEqual(self.db.guess_type(filepath_relative), expected)
+
+        self.assertEqual(self.db.guess_file_type(path_dir), (None, None))
         self.assertEqual(self.db.guess_type(path_dir), (None, None))
 
+    def test_bytes_path(self):
+        self.assertEqual(self.db.guess_file_type(b'foo.html'),
+                         self.db.guess_file_type('foo.html'))
+        self.assertEqual(self.db.guess_file_type(b'foo.tar.gz'),
+                         self.db.guess_file_type('foo.tar.gz'))
+        self.assertEqual(self.db.guess_file_type(b'foo.tgz'),
+                         self.db.guess_file_type('foo.tgz'))
+
     def test_keywords_args_api(self):
+        self.assertEqual(self.db.guess_file_type(
+            path="foo.html", strict=True), ("text/html", None))
         self.assertEqual(self.db.guess_type(
-            url="foo.html", strict=True), ("text/html", None))
+            url="scheme:foo.html", strict=True), ("text/html", None))
         self.assertEqual(self.db.guess_all_extensions(
             type='image/jpg', strict=True), [])
         self.assertEqual(self.db.guess_extension(
diff --git a/Misc/NEWS.d/next/Library/2024-03-26-15-29-39.gh-issue-66543.OZBhU5.rst b/Misc/NEWS.d/next/Library/2024-03-26-15-29-39.gh-issue-66543.OZBhU5.rst
new file mode 100644 (file)
index 0000000..12ea508
--- /dev/null
@@ -0,0 +1,3 @@
+Add the :func:`mimetypes.guess_file_type` function which works with file
+path. Passing file path instead of URL in :func:`~mimetypes.guess_type` is
+:term:`soft deprecated`.