gh-132983: Introduce `compression` package and move `_compression` module (GH-133018)

author Emma Smith <emma@emmatyping.dev>

Sun, 27 Apr 2025 21:41:30 +0000 (14:41 -0700)

committer GitHub <noreply@github.com>

Sun, 27 Apr 2025 21:41:30 +0000 (14:41 -0700)
author Emma Smith <emma@emmatyping.dev>
Sun, 27 Apr 2025 21:41:30 +0000 (14:41 -0700)
committer GitHub <noreply@github.com>
Sun, 27 Apr 2025 21:41:30 +0000 (14:41 -0700)
diff --git a/Lib/bz2.py b/Lib/bz2.py

index 2420cd019069b4f8a620efa77ee359e2e18d48ec..eb58f4da596ea18ff5764b43c3233dded3c2a112 100644 (file)
--- a/Lib/bz2.py
+++ b/Lib/bz2.py
@@ -10,9 +10,9 @@ __all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
  __author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
  
  from builtins import open as _builtin_open
+from compression._common import _streams
  import io
  import os
-import _compression
  
  from _bz2 import BZ2Compressor, BZ2Decompressor
  
@@ -23,7 +23,7 @@ _MODE_READ     = 1
  _MODE_WRITE    = 3
  
  
-class BZ2File(_compression.BaseStream):
+class BZ2File(_streams.BaseStream):
  
      """A file object providing transparent bzip2 (de)compression.
  
@@ -88,7 +88,7 @@ class BZ2File(_compression.BaseStream):
              raise TypeError("filename must be a str, bytes, file or PathLike object")
  
          if self._mode == _MODE_READ:
-            raw = _compression.DecompressReader(self._fp,
+            raw = _streams.DecompressReader(self._fp,
                  BZ2Decompressor, trailing_error=OSError)
              self._buffer = io.BufferedReader(raw)
          else:
@@ -248,7 +248,7 @@ class BZ2File(_compression.BaseStream):
  
          Line separators are not added between the written byte strings.
          """
-        return _compression.BaseStream.writelines(self, seq)
+        return _streams.BaseStream.writelines(self, seq)
  
      def seek(self, offset, whence=io.SEEK_SET):
          """Change the file position.
diff --git a/Lib/compression/__init__.py b/Lib/compression/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/Lib/_compression.py b/Lib/compression/_common/_streams.py

similarity index 98%

rename from Lib/_compression.py

rename to Lib/compression/_common/_streams.py

index e8b70aa0a3e6806c0f2b60ffaf9944291abcf4c4..9f367d4e30440f75d9a32c035713b281f3fbb26a 100644 (file)
--- a/Lib/_compression.py
+++ b/Lib/compression/_common/_streams.py
@@ -1,4 +1,4 @@
-"""Internal classes used by the gzip, lzma and bz2 modules"""
+"""Internal classes used by compression modules"""
  
  import io
  import sys
diff --git a/Lib/compression/bz2/__init__.py b/Lib/compression/bz2/__init__.py

new file mode 100644 (file)

index 0000000..16815d6
--- /dev/null
+++ b/Lib/compression/bz2/__init__.py
@@ -0,0 +1,5 @@
+import bz2
+__doc__ = bz2.__doc__
+del bz2
+
+from bz2 import *
diff --git a/Lib/compression/gzip/__init__.py b/Lib/compression/gzip/__init__.py

new file mode 100644 (file)

index 0000000..552f48f
--- /dev/null
+++ b/Lib/compression/gzip/__init__.py
@@ -0,0 +1,5 @@
+import gzip
+__doc__ = gzip.__doc__
+del gzip
+
+from gzip import *
diff --git a/Lib/compression/lzma/__init__.py b/Lib/compression/lzma/__init__.py

new file mode 100644 (file)

index 0000000..b4bc7cc
--- /dev/null
+++ b/Lib/compression/lzma/__init__.py
@@ -0,0 +1,5 @@
+import lzma
+__doc__ = lzma.__doc__
+del lzma
+
+from lzma import *
diff --git a/Lib/compression/zlib/__init__.py b/Lib/compression/zlib/__init__.py

new file mode 100644 (file)

index 0000000..3aa7e2d
--- /dev/null
+++ b/Lib/compression/zlib/__init__.py
@@ -0,0 +1,5 @@
+import zlib
+__doc__ = zlib.__doc__
+del zlib
+
+from zlib import *
diff --git a/Lib/gzip.py b/Lib/gzip.py

index 2a6eea1b3939b7806307a70e73ec3645572a7aee..b7375b2547314f07b72275e8cecec68b07708a3c 100644 (file)
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -5,7 +5,6 @@ but random access is not allowed."""
  
  # based on Andrew Kuchling's minigzip.py distributed with the zlib module
  
-import _compression
  import builtins
  import io
  import os
@@ -14,6 +13,7 @@ import sys
  import time
  import weakref
  import zlib
+from compression._common import _streams
  
  __all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"]
  
@@ -144,7 +144,7 @@ class _WriteBufferStream(io.RawIOBase):
          return True
  
  
-class GzipFile(_compression.BaseStream):
+class GzipFile(_streams.BaseStream):
      """The GzipFile class simulates most of the methods of a file object with
      the exception of the truncate() method.
  
@@ -523,7 +523,7 @@ def _read_gzip_header(fp):
      return last_mtime
  
  
-class _GzipReader(_compression.DecompressReader):
+class _GzipReader(_streams.DecompressReader):
      def __init__(self, fp):
          super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor,
                           wbits=-zlib.MAX_WBITS)
diff --git a/Lib/lzma.py b/Lib/lzma.py

index 946066aa0fba56368b34810d05f3724faced1403..316066d024ea02f3bf5edda62df3127b2f0247f8 100644 (file)
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@@ -24,9 +24,9 @@ __all__ = [
  import builtins
  import io
  import os
+from compression._common import _streams
  from _lzma import *
  from _lzma import _encode_filter_properties, _decode_filter_properties  # noqa: F401
-import _compression
  
  
  # Value 0 no longer used
@@ -35,7 +35,7 @@ _MODE_READ     = 1
  _MODE_WRITE    = 3
  
  
-class LZMAFile(_compression.BaseStream):
+class LZMAFile(_streams.BaseStream):
  
      """A file object providing transparent LZMA (de)compression.
  
@@ -127,7 +127,7 @@ class LZMAFile(_compression.BaseStream):
              raise TypeError("filename must be a str, bytes, file or PathLike object")
  
          if self._mode == _MODE_READ:
-            raw = _compression.DecompressReader(self._fp, LZMADecompressor,
+            raw = _streams.DecompressReader(self._fp, LZMADecompressor,
                  trailing_error=LZMAError, format=format, filters=filters)
              self._buffer = io.BufferedReader(raw)
  
diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py

index 7d786be1d25b1c7ce04d633cb921d92d51772eb6..f32b24b39bad00f682cc1963d22ed699fdc24f58 100644 (file)
--- a/Lib/test/test_bz2.py
+++ b/Lib/test/test_bz2.py
@@ -16,7 +16,7 @@ import threading
  from test.support import import_helper
  from test.support import threading_helper
  from test.support.os_helper import unlink, FakePath
-import _compression
+from compression._common import _streams
  import sys
  
  
@@ -126,15 +126,15 @@ class BZ2FileTest(BaseTest):
      def testReadMonkeyMultiStream(self):
          # Test BZ2File.read() on a multi-stream archive where a stream
          # boundary coincides with the end of the raw read buffer.
-        buffer_size = _compression.BUFFER_SIZE
-        _compression.BUFFER_SIZE = len(self.DATA)
+        buffer_size = _streams.BUFFER_SIZE
+        _streams.BUFFER_SIZE = len(self.DATA)
          try:
              self.createTempFile(streams=5)
              with BZ2File(self.filename) as bz2f:
                  self.assertRaises(TypeError, bz2f.read, float())
                  self.assertEqual(bz2f.read(), self.TEXT * 5)
          finally:
-            _compression.BUFFER_SIZE = buffer_size
+            _streams.BUFFER_SIZE = buffer_size
  
      def testReadTrailingJunk(self):
          self.createTempFile(suffix=self.BAD_DATA)
@@ -742,7 +742,7 @@ class BZ2FileTest(BaseTest):
      def testDecompressLimited(self):
          """Decompressed data buffering should be limited"""
          bomb = bz2.compress(b'\0' * int(2e6), compresslevel=9)
-        self.assertLess(len(bomb), _compression.BUFFER_SIZE)
+        self.assertLess(len(bomb), _streams.BUFFER_SIZE)
  
          decomp = BZ2File(BytesIO(bomb))
          self.assertEqual(decomp.read(1), b'\0')
diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py

index 4dd10faf71360aa021ed4cf043514aae97d495df..d7e8327cfee18ad05119da98e2b83fed4de1814c 100644 (file)
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@@ -1,4 +1,3 @@
-import _compression
  import array
  from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
  import os
@@ -7,6 +6,7 @@ import random
  import sys
  from test import support
  import unittest
+from compression._common import _streams
  
  from test.support import _4G, bigmemtest
  from test.support.import_helper import import_module
@@ -861,13 +861,13 @@ class FileTestCase(unittest.TestCase):
      def test_read_multistream_buffer_size_aligned(self):
          # Test the case where a stream boundary coincides with the end
          # of the raw read buffer.
-        saved_buffer_size = _compression.BUFFER_SIZE
-        _compression.BUFFER_SIZE = len(COMPRESSED_XZ)
+        saved_buffer_size = _streams.BUFFER_SIZE
+        _streams.BUFFER_SIZE = len(COMPRESSED_XZ)
          try:
              with LZMAFile(BytesIO(COMPRESSED_XZ *  5)) as f:
                  self.assertEqual(f.read(), INPUT * 5)
          finally:
-            _compression.BUFFER_SIZE = saved_buffer_size
+            _streams.BUFFER_SIZE = saved_buffer_size
  
      def test_read_trailing_junk(self):
          with LZMAFile(BytesIO(COMPRESSED_XZ + COMPRESSED_BOGUS)) as f:
@@ -1066,7 +1066,7 @@ class FileTestCase(unittest.TestCase):
      def test_decompress_limited(self):
          """Decompressed data buffering should be limited"""
          bomb = lzma.compress(b'\0' * int(2e6), preset=6)
-        self.assertLess(len(bomb), _compression.BUFFER_SIZE)
+        self.assertLess(len(bomb), _streams.BUFFER_SIZE)
  
          decomp = LZMAFile(BytesIO(bomb))
          self.assertEqual(decomp.read(1), b'\0')
diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h

index 26f6272ae9cfbcfc58c447915b76c0f13e4e5a96..fcef7419bd397b8c54399fa32a2d2b4371c4cb51 100644 (file)
--- a/Python/stdlib_module_names.h
+++ b/Python/stdlib_module_names.h
@@ -24,7 +24,6 @@ static const char* _Py_stdlib_module_names[] = {
  "_collections_abc",
  "_colorize",
  "_compat_pickle",
-"_compression",
  "_contextvars",
  "_csv",
  "_ctypes",
@@ -128,6 +127,7 @@ static const char* _Py_stdlib_module_names[] = {
  "collections",
  "colorsys",
  "compileall",
+"compression",
  "concurrent",
  "configparser",
  "contextlib",
author	Emma Smith <emma@emmatyping.dev>
	Sun, 27 Apr 2025 21:41:30 +0000 (14:41 -0700)
committer	GitHub <noreply@github.com>
	Sun, 27 Apr 2025 21:41:30 +0000 (14:41 -0700)
Lib/bz2.py		patch \| blob \| blame \| history
Lib/compression/__init__.py	[new file with mode: 0644]	patch \| blob
Lib/compression/_common/_streams.py	[moved from Lib/_compression.py with 98% similarity]	patch \| blob \| blame \| history
Lib/compression/bz2/__init__.py	[new file with mode: 0644]	patch \| blob
Lib/compression/gzip/__init__.py	[new file with mode: 0644]	patch \| blob
Lib/compression/lzma/__init__.py	[new file with mode: 0644]	patch \| blob
Lib/compression/zlib/__init__.py	[new file with mode: 0644]	patch \| blob
Lib/gzip.py		patch \| blob \| blame \| history
Lib/lzma.py		patch \| blob \| blame \| history
Lib/test/test_bz2.py		patch \| blob \| blame \| history
Lib/test/test_lzma.py		patch \| blob \| blame \| history
Python/stdlib_module_names.h		patch \| blob \| blame \| history