]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-45863: tarfile: don't zero out header fields unnecessarily (GH-29693)
authorJoshua Root <jmr@macports.org>
Wed, 9 Feb 2022 17:06:19 +0000 (04:06 +1100)
committerGitHub <noreply@github.com>
Wed, 9 Feb 2022 17:06:19 +0000 (18:06 +0100)
Numeric fields of type float, notably mtime, can't be represented
exactly in the ustar header, so the pax header is used. But it is
helpful to set them to the nearest int (i.e. second rather than
nanosecond precision mtimes) in the ustar header as well, for the
benefit of unarchivers that don't understand the pax header.

Add test for tarfile.TarInfo.create_pax_header to confirm correct
behaviour.

Lib/tarfile.py
Lib/test/test_tarfile.py
Misc/NEWS.d/next/Library/2022-02-09-00-53-23.bpo-45863.zqQXVv.rst [new file with mode: 0644]

index e795100158748f81b59f73066c7fac6fd4efba0f..8d43d0da7b988032fa4386b9c5d1e19eea8397ea 100755 (executable)
@@ -888,15 +888,24 @@ class TarInfo(object):
         # Test number fields for values that exceed the field limit or values
         # that like to be stored as float.
         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
-            if name in pax_headers:
-                # The pax header has priority. Avoid overflow.
-                info[name] = 0
-                continue
+            needs_pax = False
 
             val = info[name]
-            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
-                pax_headers[name] = str(val)
+            val_is_float = isinstance(val, float)
+            val_int = round(val) if val_is_float else val
+            if not 0 <= val_int < 8 ** (digits - 1):
+                # Avoid overflow.
                 info[name] = 0
+                needs_pax = True
+            elif val_is_float:
+                # Put rounded value in ustar header, and full
+                # precision value in pax header.
+                info[name] = val_int
+                needs_pax = True
+
+            # The existing pax header has priority.
+            if needs_pax and name not in pax_headers:
+                pax_headers[name] = str(val)
 
         # Create a pax extended header if necessary.
         if pax_headers:
index 66c1931451748d294912a43c084cb4cee4846c11..0a67bcb0b09d7e54a1bc19ec767be4f37f7f9c6e 100644 (file)
@@ -1911,6 +1911,61 @@ class PaxWriteTest(GNUWriteTest):
         finally:
             tar.close()
 
+    def test_create_pax_header(self):
+        # The ustar header should contain values that can be
+        # represented reasonably, even if a better (e.g. higher
+        # precision) version is set in the pax header.
+        # Issue #45863
+
+        # values that should be kept
+        t = tarfile.TarInfo()
+        t.name = "foo"
+        t.mtime = 1000.1
+        t.size = 100
+        t.uid = 123
+        t.gid = 124
+        info = t.get_info()
+        header = t.create_pax_header(info, encoding="iso8859-1")
+        self.assertEqual(info['name'], "foo")
+        # mtime should be rounded to nearest second
+        self.assertIsInstance(info['mtime'], int)
+        self.assertEqual(info['mtime'], 1000)
+        self.assertEqual(info['size'], 100)
+        self.assertEqual(info['uid'], 123)
+        self.assertEqual(info['gid'], 124)
+        self.assertEqual(header,
+            b'././@PaxHeader' + bytes(86) \
+            + b'0000000\x000000000\x000000000\x0000000000020\x0000000000000\x00010205\x00 x' \
+            + bytes(100) + b'ustar\x0000'+ bytes(247) \
+            + b'16 mtime=1000.1\n' + bytes(496) + b'foo' + bytes(97) \
+            + b'0000644\x000000173\x000000174\x0000000000144\x0000000001750\x00006516\x00 0' \
+            + bytes(100) + b'ustar\x0000' + bytes(247))
+
+        # values that should be changed
+        t = tarfile.TarInfo()
+        t.name = "foo\u3374" # can't be represented in ascii
+        t.mtime = 10**10 # too big
+        t.size = 10**10 # too big
+        t.uid = 8**8 # too big
+        t.gid = 8**8+1 # too big
+        info = t.get_info()
+        header = t.create_pax_header(info, encoding="iso8859-1")
+        # name is kept as-is in info but should be added to pax header
+        self.assertEqual(info['name'], "foo\u3374")
+        self.assertEqual(info['mtime'], 0)
+        self.assertEqual(info['size'], 0)
+        self.assertEqual(info['uid'], 0)
+        self.assertEqual(info['gid'], 0)
+        self.assertEqual(header,
+            b'././@PaxHeader' + bytes(86) \
+            + b'0000000\x000000000\x000000000\x0000000000130\x0000000000000\x00010207\x00 x' \
+            + bytes(100) + b'ustar\x0000' + bytes(247) \
+            + b'15 path=foo\xe3\x8d\xb4\n16 uid=16777216\n' \
+            + b'16 gid=16777217\n20 size=10000000000\n' \
+            + b'21 mtime=10000000000\n'+ bytes(424) + b'foo?' + bytes(96) \
+            + b'0000644\x000000000\x000000000\x0000000000000\x0000000000000\x00006540\x00 0' \
+            + bytes(100) + b'ustar\x0000' + bytes(247))
+
 
 class UnicodeTest:
 
diff --git a/Misc/NEWS.d/next/Library/2022-02-09-00-53-23.bpo-45863.zqQXVv.rst b/Misc/NEWS.d/next/Library/2022-02-09-00-53-23.bpo-45863.zqQXVv.rst
new file mode 100644 (file)
index 0000000..3a1335c
--- /dev/null
@@ -0,0 +1 @@
+When the :mod:`tarfile` module creates a pax format archive, it will put an integer representation of timestamps in the ustar header (if possible) for the benefit of older unarchivers, in addition to the existing full-precision timestamps in the pax extended header.
\ No newline at end of file