From: CAM Gerlach Date: Thu, 21 Mar 2019 14:44:51 +0000 (-0500) Subject: bpo-36268: Change default tar format to pax from GNU. (GH-12355) X-Git-Tag: v3.8.0a3~34 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e680c3db80efc4a1d637dd871af21276db45ae03;p=thirdparty%2FPython%2Fcpython.git bpo-36268: Change default tar format to pax from GNU. (GH-12355) --- diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 9cd07158e7f6..c7012a7d48f6 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -229,7 +229,11 @@ details. .. data:: DEFAULT_FORMAT - The default format for creating archives. This is currently :const:`GNU_FORMAT`. + The default format for creating archives. This is currently :const:`PAX_FORMAT`. + + .. versionchanged:: 3.8 + The default format for new archives was changed to + :const:`PAX_FORMAT` from :const:`GNU_FORMAT`. .. seealso:: @@ -820,8 +824,10 @@ There are three tar formats that can be created with the :mod:`tarfile` module: * The POSIX.1-2001 pax format (:const:`PAX_FORMAT`). It is the most flexible format with virtually no limits. It supports long filenames and linknames, large - files and stores pathnames in a portable way. However, not all tar - implementations today are able to handle pax archives properly. + files and stores pathnames in a portable way. Modern tar implementations, + including GNU tar, bsdtar/libarchive and star, fully support extended *pax* + features; some older or unmaintained libraries may not, but should treat + *pax* archives as if they were in the universally-supported *ustar* format. The *pax* format is an extension to the existing *ustar* format. It uses extra headers for information that cannot be stored otherwise. There are two flavours @@ -871,7 +877,7 @@ converted. Possible values are listed in section :ref:`error-handlers`. The default scheme is ``'surrogateescape'`` which Python also uses for its file system calls, see :ref:`os-filenames`. -In case of :const:`PAX_FORMAT` archives, *encoding* is generally not needed +For :const:`PAX_FORMAT` archives (the default), *encoding* is generally not needed because all the metadata is stored using *UTF-8*. *encoding* is only used in the rare cases when binary pax headers are decoded or when strings with surrogate characters are stored. diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 2e311ab1c1f5..18ec2c2f662d 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -316,6 +316,16 @@ and manipulating normal distributions of a random variable. [7.672102882379219, 12.000027119750287, 4.647488369766392] +tarfile +------- + +The :mod:`tarfile` module now defaults to the modern pax (POSIX.1-2001) +format for new archives, instead of the previous GNU-specific one. +This improves cross-platform portability with a consistent encoding (UTF-8) +in a standardized and extensible format, and offers several other benefits. +(Contributed by C.A.M. Gerlach in :issue:`36268`.) + + tokenize -------- diff --git a/Lib/tarfile.py b/Lib/tarfile.py index bb09d10925b2..30cecffd1a84 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -105,7 +105,7 @@ SOLARIS_XHDTYPE = b"X" # Solaris extended header USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format GNU_FORMAT = 1 # GNU tar format PAX_FORMAT = 2 # POSIX.1-2001 (pax) format -DEFAULT_FORMAT = GNU_FORMAT +DEFAULT_FORMAT = PAX_FORMAT #--------------------------------------------------------- # tarfile constants diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 5e5a3c3cea83..7e32cbccd6c5 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2136,15 +2136,16 @@ class MiscTest(unittest.TestCase): def test_write_number_fields(self): self.assertEqual(tarfile.itn(1), b"0000001\x00") self.assertEqual(tarfile.itn(0o7777777), b"7777777\x00") - self.assertEqual(tarfile.itn(0o10000000), + self.assertEqual(tarfile.itn(0o10000000, format=tarfile.GNU_FORMAT), b"\x80\x00\x00\x00\x00\x20\x00\x00") - self.assertEqual(tarfile.itn(0xffffffff), + self.assertEqual(tarfile.itn(0xffffffff, format=tarfile.GNU_FORMAT), b"\x80\x00\x00\x00\xff\xff\xff\xff") - self.assertEqual(tarfile.itn(-1), + self.assertEqual(tarfile.itn(-1, format=tarfile.GNU_FORMAT), b"\xff\xff\xff\xff\xff\xff\xff\xff") - self.assertEqual(tarfile.itn(-100), + self.assertEqual(tarfile.itn(-100, format=tarfile.GNU_FORMAT), b"\xff\xff\xff\xff\xff\xff\xff\x9c") - self.assertEqual(tarfile.itn(-0x100000000000000), + self.assertEqual(tarfile.itn(-0x100000000000000, + format=tarfile.GNU_FORMAT), b"\xff\x00\x00\x00\x00\x00\x00\x00") # Issue 32713: Test if itn() supports float values outside the diff --git a/Misc/NEWS.d/next/Library/2019-03-14-16-25-17.bpo-36268.MDXLw6.rst b/Misc/NEWS.d/next/Library/2019-03-14-16-25-17.bpo-36268.MDXLw6.rst new file mode 100644 index 000000000000..55f4e0f0d051 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-03-14-16-25-17.bpo-36268.MDXLw6.rst @@ -0,0 +1,3 @@ +Switch the default format used for writing tars with mod:`tarfile` to +the modern POSIX.1-2001 pax standard, from the vendor-specific GNU. +Contributed by C.A.M. Gerlach.