are also recognized.
.. versionchanged:: 3.8
- Added support for url being a :term:`path-like object`.
+ Added support for *url* being a :term:`path-like object`.
+
+ .. deprecated:: 3.13
+ Passing a file path instead of URL is :term:`soft deprecated`.
+ Use :func:`guess_file_type` for this.
+
+
+.. function:: guess_file_type(path, *, strict=True)
+
+ .. index:: pair: MIME; headers
+
+ Guess the type of a file based on its path, given by *path*.
+ Similar to the :func:`guess_type` function, but accepts a path instead of URL.
+ Path can be a string, a bytes object or a :term:`path-like object`.
+
+ .. versionadded:: 3.13
.. function:: guess_all_extensions(type, strict=True)
return value is a list of strings giving all possible filename extensions,
including the leading dot (``'.'``). The extensions are not guaranteed to have
been associated with any particular data stream, but would be mapped to the MIME
- type *type* by :func:`guess_type`.
+ type *type* by :func:`guess_type` and :func:`guess_file_type`.
The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
return value is a string giving a filename extension, including the leading dot
(``'.'``). The extension is not guaranteed to have been associated with any
particular data stream, but would be mapped to the MIME type *type* by
- :func:`guess_type`. If no extension can be guessed for *type*, ``None`` is
- returned.
+ :func:`guess_type` and :func:`guess_file_type`.
+ If no extension can be guessed for *type*, ``None`` is returned.
The optional *strict* argument has the same meaning as with the :func:`guess_type` function.
the object.
+ .. method:: MimeTypes.guess_file_type(path, *, strict=True)
+
+ Similar to the :func:`guess_file_type` function, using the tables stored
+ as part of the object.
+
+ .. versionadded:: 3.13
+
+
.. method:: MimeTypes.guess_all_extensions(type, strict=True)
Similar to the :func:`guess_all_extensions` function, using the tables stored
__all__ = [
"knownfiles", "inited", "MimeTypes",
- "guess_type", "guess_all_extensions", "guess_extension",
+ "guess_type", "guess_file_type", "guess_all_extensions", "guess_extension",
"add_type", "init", "read_mime_types",
"suffix_map", "encodings_map", "types_map", "common_types"
]
Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types.
"""
+ # TODO: Deprecate accepting file paths (in particular path-like objects).
url = os.fspath(url)
p = urllib.parse.urlparse(url)
if p.scheme and len(p.scheme) > 1:
scheme = p.scheme
url = p.path
else:
- scheme = None
- url = os.path.splitdrive(url)[1]
+ return self.guess_file_type(url, strict=strict)
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
if '=' in type or '/' not in type:
type = 'text/plain'
return type, None # never compressed, so encoding is None
- base, ext = posixpath.splitext(url)
+ return self._guess_file_type(url, strict, posixpath.splitext)
+
+ def guess_file_type(self, path, *, strict=True):
+ """Guess the type of a file based on its path.
+
+ Similar to guess_type(), but takes file path istead of URL.
+ """
+ path = os.fsdecode(path)
+ path = os.path.splitdrive(path)[1]
+ return self._guess_file_type(path, strict, os.path.splitext)
+
+ def _guess_file_type(self, path, strict, splitext):
+ base, ext = splitext(path)
while (ext_lower := ext.lower()) in self.suffix_map:
- base, ext = posixpath.splitext(base + self.suffix_map[ext_lower])
+ base, ext = splitext(base + self.suffix_map[ext_lower])
# encodings_map is case sensitive
if ext in self.encodings_map:
encoding = self.encodings_map[ext]
- base, ext = posixpath.splitext(base)
+ base, ext = splitext(base)
else:
encoding = None
ext = ext.lower()
return _db.guess_type(url, strict)
+def guess_file_type(path, *, strict=True):
+ """Guess the type of a file based on its path.
+
+ Similar to guess_type(), but takes file path istead of URL.
+ """
+ if _db is None:
+ init()
+ return _db.guess_file_type(path, strict=strict)
+
+
def guess_all_extensions(type, strict=True):
"""Guess the extensions for a file based on its MIME type.
def test_case_sensitivity(self):
eq = self.assertEqual
- eq(self.db.guess_type("foobar.HTML"), self.db.guess_type("foobar.html"))
- eq(self.db.guess_type("foobar.TGZ"), self.db.guess_type("foobar.tgz"))
- eq(self.db.guess_type("foobar.tar.Z"), ("application/x-tar", "compress"))
- eq(self.db.guess_type("foobar.tar.z"), (None, None))
+ eq(self.db.guess_file_type("foobar.html"), ("text/html", None))
+ eq(self.db.guess_type("scheme:foobar.html"), ("text/html", None))
+ eq(self.db.guess_file_type("foobar.HTML"), ("text/html", None))
+ eq(self.db.guess_type("scheme:foobar.HTML"), ("text/html", None))
+ eq(self.db.guess_file_type("foobar.tgz"), ("application/x-tar", "gzip"))
+ eq(self.db.guess_type("scheme:foobar.tgz"), ("application/x-tar", "gzip"))
+ eq(self.db.guess_file_type("foobar.TGZ"), ("application/x-tar", "gzip"))
+ eq(self.db.guess_type("scheme:foobar.TGZ"), ("application/x-tar", "gzip"))
+ eq(self.db.guess_file_type("foobar.tar.Z"), ("application/x-tar", "compress"))
+ eq(self.db.guess_type("scheme:foobar.tar.Z"), ("application/x-tar", "compress"))
+ eq(self.db.guess_file_type("foobar.tar.z"), (None, None))
+ eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None))
def test_default_data(self):
eq = self.assertEqual
- eq(self.db.guess_type("foo.html"), ("text/html", None))
- eq(self.db.guess_type("foo.HTML"), ("text/html", None))
- eq(self.db.guess_type("foo.tgz"), ("application/x-tar", "gzip"))
- eq(self.db.guess_type("foo.tar.gz"), ("application/x-tar", "gzip"))
- eq(self.db.guess_type("foo.tar.Z"), ("application/x-tar", "compress"))
- eq(self.db.guess_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
- eq(self.db.guess_type("foo.tar.xz"), ("application/x-tar", "xz"))
+ eq(self.db.guess_file_type("foo.html"), ("text/html", None))
+ eq(self.db.guess_file_type("foo.HTML"), ("text/html", None))
+ eq(self.db.guess_file_type("foo.tgz"), ("application/x-tar", "gzip"))
+ eq(self.db.guess_file_type("foo.tar.gz"), ("application/x-tar", "gzip"))
+ eq(self.db.guess_file_type("foo.tar.Z"), ("application/x-tar", "compress"))
+ eq(self.db.guess_file_type("foo.tar.bz2"), ("application/x-tar", "bzip2"))
+ eq(self.db.guess_file_type("foo.tar.xz"), ("application/x-tar", "xz"))
def test_data_urls(self):
eq = self.assertEqual
eq = self.assertEqual
sio = io.StringIO("x-application/x-unittest pyunit\n")
self.db.readfp(sio)
- eq(self.db.guess_type("foo.pyunit"),
+ eq(self.db.guess_file_type("foo.pyunit"),
("x-application/x-unittest", None))
eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit")
def test_non_standard_types(self):
eq = self.assertEqual
# First try strict
- eq(self.db.guess_type('foo.xul', strict=True), (None, None))
+ eq(self.db.guess_file_type('foo.xul', strict=True), (None, None))
eq(self.db.guess_extension('image/jpg', strict=True), None)
# And then non-strict
- eq(self.db.guess_type('foo.xul', strict=False), ('text/xul', None))
- eq(self.db.guess_type('foo.XUL', strict=False), ('text/xul', None))
- eq(self.db.guess_type('foo.invalid', strict=False), (None, None))
+ eq(self.db.guess_file_type('foo.xul', strict=False), ('text/xul', None))
+ eq(self.db.guess_file_type('foo.XUL', strict=False), ('text/xul', None))
+ eq(self.db.guess_file_type('foo.invalid', strict=False), (None, None))
eq(self.db.guess_extension('image/jpg', strict=False), '.jpg')
eq(self.db.guess_extension('image/JPG', strict=False), '.jpg')
'//share/server/', '\\\\share\\server\\'):
path = prefix + name
with self.subTest(path=path):
+ eq(self.db.guess_file_type(path), gzip_expected)
eq(self.db.guess_type(path), gzip_expected)
expected = (None, None) if os.name == 'nt' else gzip_expected
for prefix in ('//', '\\\\', '//share/', '\\\\share\\'):
path = prefix + name
with self.subTest(path=path):
+ eq(self.db.guess_file_type(path), expected)
eq(self.db.guess_type(path), expected)
+ eq(self.db.guess_file_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
+ eq(self.db.guess_file_type(r'foo/.tar.gz'), (None, 'gzip'))
+ eq(self.db.guess_type(r'foo/.tar.gz'), (None, 'gzip'))
+ expected = (None, 'gzip') if os.name == 'nt' else gzip_expected
+ eq(self.db.guess_file_type(r'foo\.tar.gz'), expected)
+ eq(self.db.guess_type(r'foo\.tar.gz'), expected)
+ eq(self.db.guess_type(r'scheme:foo\.tar.gz'), gzip_expected)
+
def test_url(self):
+ result = self.db.guess_type('http://example.com/host.html')
result = self.db.guess_type('http://host.html')
msg = 'URL only has a host name, not a file'
self.assertSequenceEqual(result, (None, None), msg)
def test_path_like_ob(self):
filename = "LICENSE.txt"
- filepath = pathlib.Path(filename)
- filepath_with_abs_dir = pathlib.Path('/dir/'+filename)
- filepath_relative = pathlib.Path('../dir/'+filename)
- path_dir = pathlib.Path('./')
+ filepath = os_helper.FakePath(filename)
+ filepath_with_abs_dir = os_helper.FakePath('/dir/'+filename)
+ filepath_relative = os_helper.FakePath('../dir/'+filename)
+ path_dir = os_helper.FakePath('./')
- expected = self.db.guess_type(filename)
+ expected = self.db.guess_file_type(filename)
+ self.assertEqual(self.db.guess_file_type(filepath), expected)
self.assertEqual(self.db.guess_type(filepath), expected)
+ self.assertEqual(self.db.guess_file_type(
+ filepath_with_abs_dir), expected)
self.assertEqual(self.db.guess_type(
filepath_with_abs_dir), expected)
+ self.assertEqual(self.db.guess_file_type(filepath_relative), expected)
self.assertEqual(self.db.guess_type(filepath_relative), expected)
+
+ self.assertEqual(self.db.guess_file_type(path_dir), (None, None))
self.assertEqual(self.db.guess_type(path_dir), (None, None))
+ def test_bytes_path(self):
+ self.assertEqual(self.db.guess_file_type(b'foo.html'),
+ self.db.guess_file_type('foo.html'))
+ self.assertEqual(self.db.guess_file_type(b'foo.tar.gz'),
+ self.db.guess_file_type('foo.tar.gz'))
+ self.assertEqual(self.db.guess_file_type(b'foo.tgz'),
+ self.db.guess_file_type('foo.tgz'))
+
def test_keywords_args_api(self):
+ self.assertEqual(self.db.guess_file_type(
+ path="foo.html", strict=True), ("text/html", None))
self.assertEqual(self.db.guess_type(
- url="foo.html", strict=True), ("text/html", None))
+ url="scheme:foo.html", strict=True), ("text/html", None))
self.assertEqual(self.db.guess_all_extensions(
type='image/jpg', strict=True), [])
self.assertEqual(self.db.guess_extension(