]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-107465: Add `pathlib.Path.from_uri()` classmethod. (#107640)
authorBarney Gale <barney.gale@gmail.com>
Sun, 1 Oct 2023 15:14:02 +0000 (16:14 +0100)
committerGitHub <noreply@github.com>
Sun, 1 Oct 2023 15:14:02 +0000 (16:14 +0100)
This method supports file URIs (including variants) as described in RFC 8089, such as URIs generated by `pathlib.Path.as_uri()` and `urllib.request.pathname2url()`.

The method is added to `Path` rather than `PurePath` because it uses `os.fsdecode()`, and so its results vary from system to system. I intend to deprecate `PurePath.as_uri()` and move it to `Path` for the same reason.

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Doc/library/pathlib.rst
Doc/whatsnew/3.13.rst
Lib/pathlib.py
Lib/test/test_pathlib.py
Misc/NEWS.d/next/Library/2023-08-04-19-00-53.gh-issue-107465.Vc1Il3.rst [new file with mode: 0644]

index 48d6176d26bb8f4028ff11254c0e98becdb33284..8ee89a003a339aadbc95c12ec2d692589ed3b205 100644 (file)
@@ -850,6 +850,42 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.5
 
 
+.. classmethod:: Path.from_uri(uri)
+
+   Return a new path object from parsing a 'file' URI conforming to
+   :rfc:`8089`. For example::
+
+       >>> p = Path.from_uri('file:///etc/hosts')
+       PosixPath('/etc/hosts')
+
+   On Windows, DOS device and UNC paths may be parsed from URIs::
+
+       >>> p = Path.from_uri('file:///c:/windows')
+       WindowsPath('c:/windows')
+       >>> p = Path.from_uri('file://server/share')
+       WindowsPath('//server/share')
+
+   Several variant forms are supported::
+
+       >>> p = Path.from_uri('file:////server/share')
+       WindowsPath('//server/share')
+       >>> p = Path.from_uri('file://///server/share')
+       WindowsPath('//server/share')
+       >>> p = Path.from_uri('file:c:/windows')
+       WindowsPath('c:/windows')
+       >>> p = Path.from_uri('file:/c|/windows')
+       WindowsPath('c:/windows')
+
+   :exc:`ValueError` is raised if the URI does not start with ``file:``, or
+   the parsed path isn't absolute.
+
+   :func:`os.fsdecode` is used to decode percent-escaped byte sequences, and
+   so file URIs are not portable across machines with different
+   :ref:`filesystem encodings <filesystem-encoding>`.
+
+   .. versionadded:: 3.13
+
+
 .. method:: Path.stat(*, follow_symlinks=True)
 
    Return a :class:`os.stat_result` object containing information about this path, like :func:`os.stat`.
index a789084a79c397e5154533f3a00fdff1b231a2f1..1de5479a9243751bb0f84a0410f37bb4e8afcb67 100644 (file)
@@ -184,6 +184,10 @@ pathlib
   :exc:`NotImplementedError` when a path operation isn't supported.
   (Contributed by Barney Gale in :gh:`89812`.)
 
+* Add :meth:`pathlib.Path.from_uri`, a new constructor to create a :class:`pathlib.Path`
+  object from a 'file' URI (``file:/``).
+  (Contributed by Barney Gale in :gh:`107465`.)
+
 * Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
   (Contributed by Barney Gale in :gh:`73435`.)
 
index e6be9061013a8aa82b78c04770fafd72906462e6..9e6d0754eccf3ea5b69b64e187505db458b05a1b 100644 (file)
@@ -18,7 +18,6 @@ import warnings
 from _collections_abc import Sequence
 from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
 from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
-from urllib.parse import quote_from_bytes as urlquote_from_bytes
 
 try:
     import pwd
@@ -452,7 +451,8 @@ class PurePath:
             # It's a posix path => 'file:///etc/hosts'
             prefix = 'file://'
             path = str(self)
-        return prefix + urlquote_from_bytes(os.fsencode(path))
+        from urllib.parse import quote_from_bytes
+        return prefix + quote_from_bytes(os.fsencode(path))
 
     @property
     def _str_normcase(self):
@@ -814,9 +814,10 @@ class _PathBase(PurePath):
     __bytes__ = None
     __fspath__ = None  # virtual paths have no local file system representation
 
-    def _unsupported(self, method_name):
-        msg = f"{type(self).__name__}.{method_name}() is unsupported"
-        if isinstance(self, Path):
+    @classmethod
+    def _unsupported(cls, method_name):
+        msg = f"{cls.__name__}.{method_name}() is unsupported"
+        if issubclass(cls, Path):
             msg += " on this system"
         raise UnsupportedOperation(msg)
 
@@ -1418,6 +1419,11 @@ class _PathBase(PurePath):
         """
         self._unsupported("group")
 
+    @classmethod
+    def from_uri(cls, uri):
+        """Return a new path from the given 'file' URI."""
+        cls._unsupported("from_uri")
+
     def as_uri(self):
         """Return the path as a URI."""
         self._unsupported("as_uri")
@@ -1661,6 +1667,30 @@ class Path(_PathBase):
 
         return self
 
+    @classmethod
+    def from_uri(cls, uri):
+        """Return a new path from the given 'file' URI."""
+        if not uri.startswith('file:'):
+            raise ValueError(f"URI does not start with 'file:': {uri!r}")
+        path = uri[5:]
+        if path[:3] == '///':
+            # Remove empty authority
+            path = path[2:]
+        elif path[:12] == '//localhost/':
+            # Remove 'localhost' authority
+            path = path[11:]
+        if path[:3] == '///' or (path[:1] == '/' and path[2:3] in ':|'):
+            # Remove slash before DOS device/UNC path
+            path = path[1:]
+        if path[1:2] == '|':
+            # Replace bar with colon in DOS drive
+            path = path[:1] + ':' + path[2:]
+        from urllib.parse import unquote_to_bytes
+        path = cls(os.fsdecode(unquote_to_bytes(path)))
+        if not path.is_absolute():
+            raise ValueError(f"URI is not absolute: {uri!r}")
+        return path
+
 
 class PosixPath(Path, PurePosixPath):
     """Path subclass for non-Windows systems.
index 319148e9065a6517054c2efc0dcf31ea6fba10b0..76918addf8b613e966ddce12dc9cca91c4aceb45 100644 (file)
@@ -11,6 +11,7 @@ import stat
 import tempfile
 import unittest
 from unittest import mock
+from urllib.request import pathname2url
 
 from test.support import import_helper
 from test.support import set_recursion_limit
@@ -3602,6 +3603,24 @@ class PosixPathTest(PathTest):
                 self.fail("Bad file descriptor not handled.")
             raise
 
+    def test_from_uri(self):
+        P = self.cls
+        self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar'))
+        self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar'))
+        self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar'))
+        self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))
+        self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar'))
+        self.assertRaises(ValueError, P.from_uri, 'foo/bar')
+        self.assertRaises(ValueError, P.from_uri, '/foo/bar')
+        self.assertRaises(ValueError, P.from_uri, '//foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'file:foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'http://foo/bar')
+
+    def test_from_uri_pathname2url(self):
+        P = self.cls
+        self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), P('/foo/bar'))
+        self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar'))
+
 
 @only_nt
 class WindowsPathTest(PathTest):
@@ -3721,6 +3740,31 @@ class WindowsPathTest(PathTest):
             env['HOME'] = 'C:\\Users\\eve'
             check()
 
+    def test_from_uri(self):
+        P = self.cls
+        # DOS drive paths
+        self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file:c|/path/to/file'), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file:/c|/path/to/file'), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file:///c|/path/to/file'), P('c:/path/to/file'))
+        # UNC paths
+        self.assertEqual(P.from_uri('file://server/path/to/file'), P('//server/path/to/file'))
+        self.assertEqual(P.from_uri('file:////server/path/to/file'), P('//server/path/to/file'))
+        self.assertEqual(P.from_uri('file://///server/path/to/file'), P('//server/path/to/file'))
+        # Localhost paths
+        self.assertEqual(P.from_uri('file://localhost/c:/path/to/file'), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file://localhost/c|/path/to/file'), P('c:/path/to/file'))
+        # Invalid paths
+        self.assertRaises(ValueError, P.from_uri, 'foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'c:/foo/bar')
+        self.assertRaises(ValueError, P.from_uri, '//foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'file:foo/bar')
+        self.assertRaises(ValueError, P.from_uri, 'http://foo/bar')
+
+    def test_from_uri_pathname2url(self):
+        P = self.cls
+        self.assertEqual(P.from_uri('file:' + pathname2url(r'c:\path\to\file')), P('c:/path/to/file'))
+        self.assertEqual(P.from_uri('file:' + pathname2url(r'\\server\path\to\file')), P('//server/path/to/file'))
 
 
 class PathSubclassTest(PathTest):
diff --git a/Misc/NEWS.d/next/Library/2023-08-04-19-00-53.gh-issue-107465.Vc1Il3.rst b/Misc/NEWS.d/next/Library/2023-08-04-19-00-53.gh-issue-107465.Vc1Il3.rst
new file mode 100644 (file)
index 0000000..e98092f
--- /dev/null
@@ -0,0 +1 @@
+Add :meth:`pathlib.Path.from_uri` classmethod.