From 97d7d682d8ca2d8381e5b998442b4224290a6fe2 Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Fri, 9 Dec 2011 20:41:03 +0100 Subject: [PATCH] Compress/uncompress files on the fly. This will give us a (hopefully) big speed boost when extracting a lot of packages. There is no temporary uncompressed version of the package on disk needed. --- INSTALL | 2 +- po/pakfire.pot | 40 +- python/pakfire/compress.py | 182 ++--- python/pakfire/errors.py | 4 + python/pakfire/lzma.py | 398 ++++++++++ python/pakfire/packages/file.py | 90 +-- python/pakfire/packages/packager.py | 50 +- python/pakfire/repository/index.py | 39 +- python/pakfire/repository/local.py | 39 +- python/src/Makefile | 46 +- python/src/_lzmamodule.c | 1098 +++++++++++++++++++++++++++ 11 files changed, 1741 insertions(+), 247 deletions(-) create mode 100644 python/pakfire/lzma.py create mode 100644 python/src/_lzmamodule.c diff --git a/INSTALL b/INSTALL index 0eb1dd6a4..02b60260b 100644 --- a/INSTALL +++ b/INSTALL @@ -3,11 +3,11 @@ For general information about pakfire see README. Requirements: * Python 2.6 or greater (not Python 3.x) - * pyliblzma * python-progressbar * python-argsparse (included in Python 2.7) * libcap * libsolv + * xz Install instructions: diff --git a/po/pakfire.pot b/po/pakfire.pot index b188c0994..2777e36b4 100644 --- a/po/pakfire.pot +++ b/po/pakfire.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2011-12-08 23:57+0100\n" +"POT-Creation-Date: 2011-12-09 20:38+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -489,10 +489,9 @@ msgstr "" msgid "Do not verify build dependencies." msgstr "" -#: ../python/pakfire/compress.py:133 -#: ../python/pakfire/packages/packager.py:504 +#: ../python/pakfire/compress.py:85 ../python/pakfire/compress.py:95 #, python-format -msgid "Compressing %s" +msgid "Given algorithm '%s' is not supported." msgstr "" #: ../python/pakfire/downloader.py:134 @@ -503,22 +502,26 @@ msgstr "" msgid "An unhandled error occured." msgstr "" -#: ../python/pakfire/errors.py:54 +#: ../python/pakfire/errors.py:46 +msgid "Could not compress/decompress data." +msgstr "" + +#: ../python/pakfire/errors.py:58 msgid "One or more dependencies could not been resolved." msgstr "" -#: ../python/pakfire/errors.py:69 +#: ../python/pakfire/errors.py:73 msgid "" "The requested action cannot be done on offline mode.\n" "Please connect your system to the network, remove --offline from the command " "line and try again." msgstr "" -#: ../python/pakfire/errors.py:81 +#: ../python/pakfire/errors.py:85 msgid "Running pakfire-build in a pakfire container?" msgstr "" -#: ../python/pakfire/errors.py:85 ../python/pakfire/transaction.py:417 +#: ../python/pakfire/errors.py:89 ../python/pakfire/transaction.py:417 msgid "Transaction test was not successful" msgstr "" @@ -628,27 +631,27 @@ msgstr "" msgid "Config file saved as %s." msgstr "" -#: ../python/pakfire/packages/file.py:97 +#: ../python/pakfire/packages/file.py:93 #, python-format msgid "Could not extract file: /%(src)s - %(dst)s" msgstr "" -#: ../python/pakfire/packages/file.py:147 +#: ../python/pakfire/packages/file.py:159 #, python-format msgid "Filename: %s" msgstr "" -#: ../python/pakfire/packages/file.py:256 +#: ../python/pakfire/packages/file.py:250 #, python-format msgid "File in archive is missing in file metadata: /%s. Skipping." msgstr "" -#: ../python/pakfire/packages/file.py:312 +#: ../python/pakfire/packages/file.py:306 #, python-format msgid "Config file created as %s" msgstr "" -#: ../python/pakfire/packages/file.py:326 +#: ../python/pakfire/packages/file.py:320 #, python-format msgid "Could not remove file: /%s" msgstr "" @@ -662,7 +665,7 @@ msgid "Package version is undefined." msgstr "" #. Load progressbar. -#: ../python/pakfire/packages/packager.py:342 +#: ../python/pakfire/packages/packager.py:358 msgid "Packaging" msgstr "" @@ -698,16 +701,21 @@ msgid "%s: package database" msgstr "" #. Create progress bar. -#: ../python/pakfire/repository/index.py:375 +#: ../python/pakfire/repository/index.py:396 #, python-format msgid "Loading from %s" msgstr "" #. Add all packages from the database to the index. -#: ../python/pakfire/repository/index.py:438 +#: ../python/pakfire/repository/index.py:459 msgid "Loading installed packages" msgstr "" +#. Make a nice progress bar. +#: ../python/pakfire/repository/local.py:149 +msgid "Compressing database..." +msgstr "" + #: ../python/pakfire/repository/remote.py:108 #, python-format msgid "Cannot download this file in offline mode: %s" diff --git a/python/pakfire/compress.py b/python/pakfire/compress.py index a432e076d..e07715d0b 100644 --- a/python/pakfire/compress.py +++ b/python/pakfire/compress.py @@ -19,157 +19,81 @@ # # ############################################################################### -import lzma -import os -import progressbar -import zlib +import pakfire.lzma as lzma from constants import * from i18n import _ -PROGRESS_WIDGETS = [ - progressbar.Bar(left="[", right="]"), - " ", - progressbar.Percentage(), - " ", - progressbar.ETA(), - " ", -] +ALGO_DEFAULT = "xz" -def __compress_helper(i, o, comp, flush, progress=None): - if progress: - widgets = [ "%-30s " % os.path.basename(filename)] + PROGRESS_WIDGETS +# A dictionary with all compression types +# we do support. +# XXX add bzip2, and more here. +MAGICS = { + #"gzip" : "\037\213\010", + "xz" : "\xfd7zXZ", +} - maxval = os.path.getsize(filename) +FILES = { + "xz" : lzma.LZMAFile, +} - progress = progressbar.ProgressBar( - widgets=widgets, - maxval=maxval, - ) +COMPRESSORS = { + "xz" : lzma.LZMACompressor +} - progress.start() +DECOMPRESSORS = { + "xz" : lzma.LZMADecompressor, +} - size = 0 - buf = i.read(BUFFER_SIZE) - while buf: - if progress: - size += len(buf) - progress.update(size) - - o.write(comp(buf)) - - buf = i.read(BUFFER_SIZE) - - o.write(flush()) - - if progress: - progress.finish() - -def compress(filename, filename2=None, algo="xz", progress=None): - i = open(filename) - - if not filename2: - filename2 = filename - os.unlink(filename) - - o = open(filename2, "w") - - compressobj(i, o, algo="xz", progress=None) - - i.close() - o.close() - -def compressobj(i, o, algo="xz", progress=None): - comp = None - if algo == "xz": - comp = lzma.LZMACompressor() - - elif algo == "zlib": - comp = zlib.compressobj(9) - - return __compress_helper(i, o, comp.compress, comp.flush, progress=progress) - -def decompress(filename, filename2=None, algo="xz", progress=None): - i = open(filename) - - if not filename2: - filename2 = filename - os.unlink(filename) - - o = open(filename2, "w") - - decompressobj(i, o, algo="xz", progress=None) - - i.close() - o.close() - -def decompressobj(i, o, algo="xz", progress=None): - comp = None - if algo == "xz": - comp = lzma.LZMADecompressor() - - elif algo == "zlib": - comp = zlib.decompressobj(9) - - return __compress_helper(i, o, comp.decompress, comp.flush, progress=progress) - -def compress_file(inputfile, outputfile, message="", algo="xz", progress=True): +def guess_algo(name=None, fileobj=None): """ - Compress a file in place. + This function takes a filename or a file descriptor + and tells the name of the algorithm the file was + compressed with. + If an unknown or no compression was used, None is returned. """ - assert os.path.exists(inputfile) + ret = None - # Get total size of the file for the progressbar. - total_size = os.path.getsize(inputfile) + if name: + fileobj = open(file) - # Open the input file for reading. - i = open(inputfile, "r") + # Save position of pointer. + pos = fileobj.tell() - # Open the output file for wrinting. - o = open(outputfile, "w") + # Iterate over all algoriths and their magic values + # and check for a match. + for algo, magic in MAGICS.items(): + fileobj.seek(0) - if progress: - if not message: - message = _("Compressing %s") % os.path.basename(filename) + start_sequence = fileobj.read(len(magic)) + if start_sequence == magic: + ret = algo + break - progress = progressbar.ProgressBar( - widgets = ["%-40s" % message, " ",] + PROGRESS_WIDGETS, - maxval = total_size, - ) + # Reset file pointer. + fileobj.seek(pos) - progress.start() + if name: + fileobj.close() - if algo == "xz": - compressor = lzma.LZMACompressor() - elif algo == "zlib": - comp = zlib.decompressobj(9) - else: - raise Exception, "Unknown compression choosen: %s" % algo + return ret - size = 0 - while True: - buf = i.read(BUFFER_SIZE) - if not buf: - break +def decompressobj(name=None, fileobj=None, algo=ALGO_DEFAULT): + f_cls = FILES.get(algo, None) + if not f_cls: + raise CompressionError, _("Given algorithm '%s' is not supported.") - # Update progressbar. - size += len(buf) - if progress: - progress.update(size) + f = f_cls(name, fileobj=fileobj, mode="r") - # Compress the bits in buf. - buf = compressor.compress(buf) + return f - # Write the compressed output. - o.write(buf) - # Flush all buffers. - buf = compressor.flush() - o.write(buf) +def compressobj(name=None, fileobj=None, algo=ALGO_DEFAULT): + f_cls = FILES.get(algo, None) + if not f_cls: + raise CompressionError, _("Given algorithm '%s' is not supported.") - # Close the progress bar. - if progress: - progress.finish() + f = f_cls(name, fileobj=fileobj, mode="w") - i.close() - o.close() + return f diff --git a/python/pakfire/errors.py b/python/pakfire/errors.py index 29103a534..2daa7283d 100644 --- a/python/pakfire/errors.py +++ b/python/pakfire/errors.py @@ -42,6 +42,10 @@ class BuildError(Error): class BuildRootLocked(Error): pass +class CompressionError(Error): + message = _("Could not compress/decompress data.") + + class ConfigError(Error): pass diff --git a/python/pakfire/lzma.py b/python/pakfire/lzma.py new file mode 100644 index 000000000..2c3806461 --- /dev/null +++ b/python/pakfire/lzma.py @@ -0,0 +1,398 @@ +"""Interface to the liblzma compression library. + +This module provides a class for reading and writing compressed files, +classes for incremental (de)compression, and convenience functions for +one-shot (de)compression. + +These classes and functions support both the XZ and legacy LZMA +container formats, as well as raw compressed data streams. +""" + +__all__ = [ + "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256", + "CHECK_ID_MAX", "CHECK_UNKNOWN", + "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64", + "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC", + "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW", + "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4", + "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME", + + "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError", + "compress", "decompress", "check_is_supported", +] + +import io +from _lzma import * + + +_MODE_CLOSED = 0 +_MODE_READ = 1 +_MODE_READ_EOF = 2 +_MODE_WRITE = 3 + +_BUFFER_SIZE = 8192 + + +class LZMAFile(io.BufferedIOBase): + + """A file object providing transparent LZMA (de)compression. + + An LZMAFile can act as a wrapper for an existing file object, or + refer directly to a named file on disk. + + Note that LZMAFile provides a *binary* file interface - data read + is returned as bytes, and data to be written must be given as bytes. + """ + + def __init__(self, filename=None, mode="r", + fileobj=None, format=None, check=-1, + preset=None, filters=None): + """Open an LZMA-compressed file. + + If filename is given, open the named file. Otherwise, operate on + the file object given by fileobj. Exactly one of these two + parameters should be provided. + + mode can be "r" for reading (default), "w" for (over)writing, or + "a" for appending. + + format specifies the container format to use for the file. + If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the + default is FORMAT_XZ. + + check specifies the integrity check to use. This argument can + only be used when opening a file for writing. For FORMAT_XZ, + the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not + support integrity checks - for these formats, check must be + omitted, or be CHECK_NONE. + + When opening a file for reading, the *preset* argument is not + meaningful, and should be omitted. The *filters* argument should + also be omitted, except when format is FORMAT_RAW (in which case + it is required). + + When opening a file for writing, the settings used by the + compressor can be specified either as a preset compression + level (with the *preset* argument), or in detail as a custom + filter chain (with the *filters* argument). For FORMAT_XZ and + FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset + level. For FORMAT_RAW, the caller must always specify a filter + chain; the raw compressor does not support preset compression + levels. + + preset (if provided) should be an integer in the range 0-9, + optionally OR-ed with the constant PRESET_EXTREME. + + filters (if provided) should be a sequence of dicts. Each dict + should have an entry for "id" indicating ID of the filter, plus + additional entries for options to the filter. + """ + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + self._pos = 0 + self._size = -1 + + if mode == "r": + if check != -1: + raise ValueError("Cannot specify an integrity check " + "when opening a file for reading") + if preset is not None: + raise ValueError("Cannot specify a preset compression " + "level when opening a file for reading") + if format is None: + format = FORMAT_AUTO + mode_code = _MODE_READ + # Save the args to pass to the LZMADecompressor initializer. + # If the file contains multiple compressed streams, each + # stream will need a separate decompressor object. + self._init_args = {"format":format, "filters":filters} + self._decompressor = LZMADecompressor(**self._init_args) + self._buffer = None + elif mode in ("w", "a"): + if format is None: + format = FORMAT_XZ + mode_code = _MODE_WRITE + self._compressor = LZMACompressor(format=format, check=check, + preset=preset, filters=filters) + else: + raise ValueError("Invalid mode: {!r}".format(mode)) + + if filename is not None and fileobj is None: + mode += "b" + self._fp = open(filename, mode) + self._closefp = True + self._mode = mode_code + elif fileobj is not None and filename is None: + self._fp = fileobj + self._mode = mode_code + else: + raise ValueError("Must give exactly one of filename and fileobj") + + def close(self): + """Flush and close the file. + + May be called more than once without error. Once the file is + closed, any other operation on it will raise a ValueError. + """ + if self._mode == _MODE_CLOSED: + return + try: + if self._mode in (_MODE_READ, _MODE_READ_EOF): + self._decompressor = None + self._buffer = None + elif self._mode == _MODE_WRITE: + self._fp.write(self._compressor.flush()) + self._compressor = None + finally: + try: + if self._closefp: + self._fp.close() + finally: + self._fp = None + self._closefp = False + self._mode = _MODE_CLOSED + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode in (_MODE_READ, _MODE_READ_EOF) + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + # Mode-checking helper functions. + + def _check_not_closed(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if not self.readable(): + raise io.UnsupportedOperation("File not open for reading") + + def _check_can_write(self): + if not self.writable(): + raise io.UnsupportedOperation("File not open for writing") + + def _check_can_seek(self): + if not self.seekable(): + raise io.UnsupportedOperation("Seeking is only supported " + "on files open for reading") + + # Fill the readahead buffer if it is empty. Returns False on EOF. + def _fill_buffer(self): + if self._buffer: + return True + + if self._decompressor.unused_data: + rawblock = self._decompressor.unused_data + else: + rawblock = self._fp.read(_BUFFER_SIZE) + + if not rawblock: + if self._decompressor.eof: + self._mode = _MODE_READ_EOF + self._size = self._pos + return False + else: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + + # Continue to next stream. + if self._decompressor.eof: + self._decompressor = LZMADecompressor(**self._init_args) + + self._buffer = self._decompressor.decompress(rawblock) + return True + + # Read data until EOF. + # If return_data is false, consume the data without returning it. + def _read_all(self, return_data=True): + blocks = [] + while self._fill_buffer(): + if return_data: + blocks.append(self._buffer) + self._pos += len(self._buffer) + self._buffer = None + if return_data: + return b"".join(blocks) + + # Read a block of up to n bytes. + # If return_data is false, consume the data without returning it. + def _read_block(self, n, return_data=True): + blocks = [] + while n > 0 and self._fill_buffer(): + if n < len(self._buffer): + data = self._buffer[:n] + self._buffer = self._buffer[n:] + else: + data = self._buffer + self._buffer = None + if return_data: + blocks.append(data) + self._pos += len(data) + n -= len(data) + if return_data: + return b"".join(blocks) + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + self._check_can_read() + if self._mode == _MODE_READ_EOF or not self._fill_buffer(): + return b"" + return self._buffer + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b"" if the file is already at EOF. + """ + self._check_can_read() + if self._mode == _MODE_READ_EOF or size == 0: + return b"" + elif size < 0: + return self._read_all() + else: + return self._read_block(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes with at most one read + from the underlying stream. + + Returns b"" if the file is at EOF. + """ + self._check_can_read() + if (size == 0 or self._mode == _MODE_READ_EOF or + not self._fill_buffer()): + return b"" + if 0 < size < len(self._buffer): + data = self._buffer[:size] + self._buffer = self._buffer[size:] + else: + data = self._buffer + self._buffer = None + self._pos += len(data) + return data + + def write(self, data): + """Write a bytes object to the file. + + Returns the number of uncompressed bytes written, which is + always len(data). Note that due to buffering, the file on disk + may not reflect the data written until close() is called. + """ + self._check_can_write() + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += len(data) + return len(data) + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0, 0) + self._mode = _MODE_READ + self._pos = 0 + self._decompressor = LZMADecompressor(**self._init_args) + self._buffer = None + + def seek(self, offset, whence=0): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Possible values for whence are: + + 0: start of stream (default): offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, sp depending on the parameters, + this operation may be extremely slow. + """ + self._check_can_seek() + + # Recalculate offset as an absolute file position. + if whence == 0: + pass + elif whence == 1: + offset = self._pos + offset + elif whence == 2: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + self._read_all(return_data=False) + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Make it so that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + if self._mode != _MODE_READ_EOF: + self._read_block(offset, return_data=False) + + return self._pos + + def tell(self): + """Return the current file position.""" + self._check_not_closed() + return self._pos + + +def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None): + """Compress a block of data. + + Refer to LZMACompressor's docstring for a description of the + optional arguments *format*, *check*, *preset* and *filters*. + + For incremental compression, use an LZMACompressor object instead. + """ + comp = LZMACompressor(format, check, preset, filters) + return comp.compress(data) + comp.flush() + + +def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None): + """Decompress a block of data. + + Refer to LZMADecompressor's docstring for a description of the + optional arguments *format*, *check* and *filters*. + + For incremental decompression, use a LZMADecompressor object instead. + """ + results = [] + while True: + decomp = LZMADecompressor(format, memlimit, filters) + results.append(decomp.decompress(data)) + if not decomp.eof: + raise LZMAError("Compressed data ended before the " + "end-of-stream marker was reached") + if not decomp.unused_data: + return b"".join(results) + # There is unused data left over. Proceed to next stream. + data = decomp.unused_data diff --git a/python/pakfire/packages/file.py b/python/pakfire/packages/file.py index 91c44382e..a585221e0 100644 --- a/python/pakfire/packages/file.py +++ b/python/pakfire/packages/file.py @@ -30,6 +30,7 @@ import logging log = logging.getLogger("pakfire") import pakfire.filelist +import pakfire.lzma as lzma import pakfire.util as util import pakfire.compress as compress from pakfire.constants import * @@ -38,11 +39,6 @@ from pakfire.i18n import _ from base import Package from lexer import FileLexer -# XXX need to add zlib and stuff here. -PAYLOAD_COMPRESSION_MAGIC = { - "xz" : "\xfd7zXZ", -} - class InnerTarFile(tarfile.TarFile): def __init__(self, *args, **kwargs): # Force the PAX format. @@ -109,6 +105,21 @@ class InnerTarFile(tarfile.TarFile): util.set_capabilities(target, caps) +class InnerTarFileXz(InnerTarFile): + @classmethod + def open(cls, name=None, mode="r", fileobj=None, **kwargs): + fileobj = lzma.LZMAFile(name, mode, fileobj=fileobj) + + try: + t = cls.taropen(name, mode, fileobj, **kwargs) + except lzma.LZMAError: + fileobj.close() + raise tarfile.ReadError("not an lzma file") + + t._extfileobj = False + return t + + class FilePackage(Package): """ This class is a wrapper that reads package data from the (outer) @@ -121,8 +132,9 @@ class FilePackage(Package): # Place to cache the metadata self._metadata = {} - # Place to cache the filelist + # Place to cache the filelist and payload compression algorithm. self._filelist = None + self.__payload_compression = None # Store the format of this package file. self.format = self.get_format() @@ -191,9 +203,6 @@ class FilePackage(Package): if prefix is None: prefix = "" - # A place to store temporary data. - tempf = None - # Open package data for read. archive = self.open_archive() @@ -201,31 +210,15 @@ class FilePackage(Package): payload = archive.extractfile("data.img") # Decompress the payload if needed. - log.debug("Compression: %s" % self.payload_compression) - - # Create a temporary file to store the decompressed output. - garbage, tempf = tempfile.mkstemp(prefix="pakfire") - - i = payload - o = open(tempf, "w") + if self.payload_compression == "xz": + payload_archive = InnerTarFileXz.open(fileobj=payload) - # Decompress the package payload. - if self.payload_compression: - compress.decompressobj(i, o, algo=self.payload_compression) + elif self.payload_compression == "none": + payload_archive = InnerTarFile.open(fileobj=payload) else: - buf = i.read(BUFFER_SIZE) - while buf: - o.write(buf) - buf = i.read(BUFFER_SIZE) - - i.close() - o.close() - - payload = open(tempf) - - # Open the tarball in the package. - payload_archive = InnerTarFile.open(fileobj=payload) + raise Exception, "Unhandled payload compression type: %s" \ + % payload_compression # Load progressbar. pb = None @@ -237,9 +230,6 @@ class FilePackage(Package): # the user. messages = [] - # Get a list of files in the archive. - members = payload_archive.getmembers() - name2file = {} for file in self.filelist: name = file.name @@ -250,7 +240,11 @@ class FilePackage(Package): name2file[name] = file i = 0 - for member in members: + while True: + member = payload_archive.next() + if not member: + break + file = name2file.get("/%s" % member.name, None) if not file: log.warning(_("File in archive is missing in file metadata: /%s. Skipping.") % member.name) @@ -343,9 +337,6 @@ class FilePackage(Package): payload.close() archive.close() - if tempf: - os.unlink(tempf) - if pb: pb.finish() @@ -511,23 +502,18 @@ class FilePackage(Package): """ Return the (guessed) compression type of the payload. """ - # Get the max. length of the magic values. - max_length = max([len(v) for v in PAYLOAD_COMPRESSION_MAGIC.values()]) - - a = self.open_archive() - f = a.extractfile("data.img") - - # Read magic bytes from file. - magic = f.read(max_length) + # We cache that because this is costly. + if self.__payload_compression is None: + a = self.open_archive() + f = a.extractfile("data.img") - f.close() - a.close() + # Go and guess what we do have here. + self.__payload_compression = compress.guess_algo(fileobj=f) - for algo, m in PAYLOAD_COMPRESSION_MAGIC.items(): - if not magic.startswith(m): - continue + f.close() + a.close() - return algo + return self.__payload_compression or "none" @property def signature(self): diff --git a/python/pakfire/packages/packager.py b/python/pakfire/packages/packager.py index b361077bc..a0046d688 100644 --- a/python/pakfire/packages/packager.py +++ b/python/pakfire/packages/packager.py @@ -38,13 +38,12 @@ import zlib import logging log = logging.getLogger("pakfire") -import pakfire.compress import pakfire.util as util from pakfire.constants import * from pakfire.i18n import _ -from file import BinaryPackage, InnerTarFile, SourcePackage +from file import BinaryPackage, InnerTarFileXz, SourcePackage class Packager(object): def __init__(self, pakfire, pkg): @@ -137,7 +136,7 @@ class Packager(object): filelist = self.mktemp() f = open(filelist, "w") - datafile = InnerTarFile(datafile) + datafile = InnerTarFileXz.open(datafile) for m in datafile.getmembers(): log.debug(" %s %-8s %-8s %s %6s %s" % \ @@ -182,6 +181,23 @@ class Packager(object): def run(self): raise NotImplementedError + def getsize(self, filename): + if tarfile.is_tarfile(filename): + return os.path.getsize(filename) + + size = 0 + f = lzma.LZMAFile(filename) + + while True: + buf = f.read(BUFFER_SIZE) + if not buf: + break + + size += len(buf) + f.close() + + return size + class BinaryPackager(Packager): def __init__(self, pakfire, pkg, builder, buildroot): @@ -196,7 +212,7 @@ class BinaryPackager(Packager): # Extract datafile in temporary directory and scan for dependencies. tmpdir = self.mktemp(directory=True) - tarfile = InnerTarFile(datafile) + tarfile = InnerTarFileXz.open(datafile) tarfile.extractall(path=tmpdir) tarfile.close() @@ -243,7 +259,7 @@ class BinaryPackager(Packager): # Installed size (equals size of the uncompressed tarball). info.update({ - "inst_size" : os.path.getsize(datafile), + "inst_size" : self.getsize(datafile), }) metafile = self.mktemp() @@ -343,7 +359,7 @@ class BinaryPackager(Packager): pb = util.make_progress(message, len(files), eta=False) datafile = self.mktemp() - tar = InnerTarFile(datafile, mode="w") + tar = InnerTarFileXz.open(datafile, mode="w") # All files in the tarball are relative to this directory. basedir = self.buildroot @@ -450,7 +466,7 @@ class BinaryPackager(Packager): return scriptlets def create_configs(self, datafile): - datafile = InnerTarFile(datafile) + datafile = InnerTarFileXz.open(datafile) members = datafile.getmembers() @@ -496,19 +512,6 @@ class BinaryPackager(Packager): return configsfile - def compress_datafile(self, datafile, algo="xz"): - outputfile = self.mktemp() - - # Compress the datafile with the choosen algorithm. - pakfire.compress.compress_file(datafile, outputfile, algo=algo, - progress=True, message=_("Compressing %s") % self.pkg.friendly_name) - - # We do not need the uncompressed output anymore. - os.unlink(datafile) - - # The outputfile becomes out new datafile. - return outputfile - def run(self, resultdir): # Add all files to this package. datafile = self.create_datafile() @@ -522,9 +525,6 @@ class BinaryPackager(Packager): metafile = self.create_metafile(datafile) - # XXX make xz in variable - datafile = self.compress_datafile(datafile, algo="xz") - # Add files to the tar archive in correct order. self.add(metafile, "info") self.add(filelist, "filelist") @@ -569,7 +569,7 @@ class SourcePackager(Packager): info.update(self.pkg.info) # Size is the size of the (uncompressed) datafile. - info["inst_size"] = os.path.getsize(datafile) + info["inst_size"] = self.getsize(datafile) # Update package information for string formatting. requires = [PACKAGE_INFO_DEPENDENCY_LINE % r for r in self.pkg.requires] @@ -609,7 +609,7 @@ class SourcePackager(Packager): def create_datafile(self): filename = self.mktemp() - datafile = InnerTarFile(filename, mode="w") + datafile = InnerTarFileXz.open(filename, mode="w") # Add all downloaded files to the package. for file in self.pkg.download(): diff --git a/python/pakfire/repository/index.py b/python/pakfire/repository/index.py index de892f805..ee3baeced 100644 --- a/python/pakfire/repository/index.py +++ b/python/pakfire/repository/index.py @@ -288,19 +288,40 @@ class IndexSolv(Index): ) grabber = self.repo.mirrors.group(grabber) - data = grabber.urlread(filename) + # Open file on server. + print "OPENING FILE ON SERVER" + urlobj = fileobj = grabber.urlopen(filename) + print urlobj - with self.cache.open(filename, "w") as o: - o.write(data) - - # decompress the database if self.metadata.database_compression: - # Open input file and remove the file immediately. - # The fileobj is still open and the data will be removed - # when it is closed. - compress.decompress(self.cache.abspath(filename), + fileobj = compress.decompressobj(fileobj=fileobj, algo=self.metadata.database_compression) + # Make a new file in the cache. + cacheobj = self.cache.open(filename, "w") + + try: + while True: + buf = fileobj.read(BUFFER_SIZE) + if not buf: + break + cacheobj.write(buf) + except: + # XXX we should catch decompression errors + + # Close all file descriptors. + cacheobj.close() + fileobj.close() + if not urlobj == fileobj: + urlobj.close() + + raise + + cacheobj.close() + fileobj.close() + if not urlobj == fileobj: + urlobj.close() + # check the hashsum of the downloaded file if not util.calc_hash1(self.cache.abspath(filename)) == self.metadata.database_hash1: # XXX an exception is not a very good idea because this file could diff --git a/python/pakfire/repository/local.py b/python/pakfire/repository/local.py index fd4a9967f..800019e52 100644 --- a/python/pakfire/repository/local.py +++ b/python/pakfire/repository/local.py @@ -34,6 +34,7 @@ import pakfire.packages as packages import pakfire.util as util from pakfire.constants import * +from pakfire.i18n import _ class RepositoryDir(base.RepositoryFactory): def __init__(self, pakfire, name, description, path, type="binary"): @@ -140,12 +141,42 @@ class RepositoryDir(base.RepositoryFactory): # Compress the database. if algo: - compress.compress(db_path, algo=algo, progress=True) + # Open input file and get filesize of input file. + f = open(db_path) + filesize = os.path.getsize(db_path) + + # Make a nice progress bar. + p = util.make_progress(_("Compressing database..."), filesize) + + # Create compressing file handler. + c = compress.compressobj(db_path2) + + try: + size = 0 + while True: + buf = f.read(BUFFER_SIZE) + if not buf: + break + + if p: + size += len(buf) + p.update(size) + + c.write(buf) + except: + # XXX catch compression errors + raise + + finally: + f.close() + c.close() + p.finish() + + # Remove old database. + os.unlink(db_path) - if not os.path.exists(db_path2): - shutil.move(db_path, db_path2) else: - os.unlink(db_path) + shutil.move(db_path, db_path2) # Create a new metadata object and add out information to it. md = metadata.Metadata(self.pakfire, self) diff --git a/python/src/Makefile b/python/src/Makefile index 190bfc847..583cc16af 100644 --- a/python/src/Makefile +++ b/python/src/Makefile @@ -2,28 +2,52 @@ include ../../Makeconfig # The name of the module. -MODULENAME = _pakfire.so +MODULE_PAKFIRE = _pakfire.so +MODULE_LZMA = _lzma.so +MODULES = $(MODULE_PAKFIRE) $(MODULE_LZMA) # Libs that are to be linked into the module. -MODULELIBS = -lcap -lpython$(PYTHON_VERSION) -lsolv -lsolvext - -SOURCES = $(wildcard *.c) -OBJECTS = $(patsubst %.c,%.o,$(SOURCES)) +MODULE_PAKFIRE_LIBS = -lcap -lpython$(PYTHON_VERSION) -lsolv -lsolvext +MODULE_LZMA_LIBS = -llzma + +SOURCES_LZMA = _lzmamodule.c +SOURCES_PAKFIRE = \ + capabilities.c \ + _pakfiremodule.c \ + problem.c \ + repo.c \ + solution.c \ + solver.c \ + transaction.c \ + pool.c \ + relation.c \ + request.c \ + solvable.c \ + step.c \ + util.c + +OBJECTS_LZMA = $(patsubst %.c,%.o,$(SOURCES_LZMA)) +OBJECTS_PAKFIRE = $(patsubst %.c,%.o,$(SOURCES_PAKFIRE)) +OBJECTS = $(OBJECTS_PAKFIRE) $(OBJECTS_LZMA) .PHONY: -all: $(MODULENAME) +all: $(MODULES) + +$(MODULE_PAKFIRE): $(OBJECTS_PAKFIRE) + $(PYTHON_CC) $(PYTHON_CFLAGS) -shared $^ $(MODULE_PAKFIRE_LIBS) -o $@ -$(MODULENAME): $(OBJECTS) - $(PYTHON_CC) $(PYTHON_CFLAGS) -shared $^ $(MODULELIBS) -o $@ +$(MODULE_LZMA): $(OBJECTS_LZMA) + $(PYTHON_CC) $(PYTHON_CFLAGS) -shared $^ $(MODULE_LZMA_LIBS) -o $@ %.o: %.c Makefile config.h $(PYTHON_CC) $(PYTHON_CFLAGS) -o $@ -c $< .PHONY: clean clean: - rm -f $(OBJECTS) $(MODULENAME) + rm -f $(OBJECTS) $(MODULES) .PHONY: install -install: $(MODULENAME) +install: $(MODULES) -mkdir -pv $(DESTDIR)$(PYTHON_DIR)/$(PACKAGE_NAME) - install -m 755 -v $< $(DESTDIR)$(PYTHON_DIR)/$(PACKAGE_NAME)/ + install -m 755 -v $(MODULE_PAKFIRE) $(DESTDIR)$(PYTHON_DIR)/$(PACKAGE_NAME) + install -m 755 -v $(MODULE_LZMA) $(DESTDIR)$(PYTHON_DIR)/$(PACKAGE_NAME) diff --git a/python/src/_lzmamodule.c b/python/src/_lzmamodule.c new file mode 100644 index 000000000..3a99714cf --- /dev/null +++ b/python/src/_lzmamodule.c @@ -0,0 +1,1098 @@ +/* _lzma - Low-level Python interface to liblzma. + + Initial implementation by Per Øyvind Karlsen. + Rewritten by Nadeem Vawda. + +*/ + +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "structmember.h" +#ifdef WITH_THREAD +#include "pythread.h" +#endif + +#include +#include + +#include + + +#ifndef PY_LONG_LONG +#error "This module requires PY_LONG_LONG to be defined" +#endif + + +#ifdef WITH_THREAD +#define ACQUIRE_LOCK(obj) do { \ + if (!PyThread_acquire_lock((obj)->lock, 0)) { \ + Py_BEGIN_ALLOW_THREADS \ + PyThread_acquire_lock((obj)->lock, 1); \ + Py_END_ALLOW_THREADS \ + } } while (0) +#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock) +#else +#define ACQUIRE_LOCK(obj) +#define RELEASE_LOCK(obj) +#endif + + +/* Container formats: */ +enum { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_ALONE, + FORMAT_RAW, +}; + +#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1) + + +typedef struct { + PyObject_HEAD + lzma_stream lzs; + int flushed; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} Compressor; + +typedef struct { + PyObject_HEAD + lzma_stream lzs; + int check; + char eof; + PyObject *unused_data; +#ifdef WITH_THREAD + PyThread_type_lock lock; +#endif +} Decompressor; + +/* LZMAError class object. */ +static PyObject *Error; + +/* An empty tuple, used by the filter specifier parsing code. */ +static PyObject *empty_tuple; + + +/* Helper functions. */ + +static int +catch_lzma_error(lzma_ret lzret) +{ + switch (lzret) { + case LZMA_OK: + case LZMA_GET_CHECK: + case LZMA_NO_CHECK: + case LZMA_STREAM_END: + return 0; + case LZMA_UNSUPPORTED_CHECK: + PyErr_SetString(Error, "Unsupported integrity check"); + return 1; + case LZMA_MEM_ERROR: + PyErr_NoMemory(); + return 1; + case LZMA_MEMLIMIT_ERROR: + PyErr_SetString(Error, "Memory usage limit exceeded"); + return 1; + case LZMA_FORMAT_ERROR: + PyErr_SetString(Error, "Input format not supported by decoder"); + return 1; + case LZMA_OPTIONS_ERROR: + PyErr_SetString(Error, "Invalid or unsupported options"); + return 1; + case LZMA_DATA_ERROR: + PyErr_SetString(Error, "Corrupt input data"); + return 1; + case LZMA_BUF_ERROR: + PyErr_SetString(Error, "Insufficient buffer space"); + return 1; + case LZMA_PROG_ERROR: + PyErr_SetString(Error, "Internal error"); + return 1; + default: + PyErr_Format(Error, "Unrecognized error from liblzma: %d", lzret); + return 1; + } +} + +#if BUFSIZ < 8192 +#define INITIAL_BUFFER_SIZE 8192 +#else +#define INITIAL_BUFFER_SIZE BUFSIZ +#endif + +static int +grow_buffer(PyObject **buf) +{ + size_t size = PyBytes_GET_SIZE(*buf); + return _PyBytes_Resize(buf, size + (size >> 3) + 6); +} + + +/* Some custom type conversions for PyArg_ParseTupleAndKeywords(), + since the predefined conversion specifiers do not suit our needs: + + uint32_t - the "I" (unsigned int) specifier is the right size, but + silently ignores overflows on conversion. + + lzma_mode and lzma_match_finder - these are enumeration types, and + so the size of each is implementation-defined. Worse, different + enum types can be of different sizes within the same program, so + to be strictly correct, we need to define two separate converters. + */ + +#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \ + static int \ + FUNCNAME(PyObject *obj, void *ptr) \ + { \ + unsigned long val; \ + \ + val = PyLong_AsUnsignedLong(obj); \ + if (PyErr_Occurred()) \ + return 0; \ + if ((unsigned long)(TYPE)val != val) { \ + PyErr_SetString(PyExc_OverflowError, \ + "Value too large for " #TYPE " type"); \ + return 0; \ + } \ + *(TYPE *)ptr = val; \ + return 1; \ + } + +INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter) +INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter) +INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter) + +#undef INT_TYPE_CONVERTER_FUNC + + +/* Filter specifier parsing functions. */ + +static void * +parse_filter_spec_lzma(PyObject *spec) +{ + static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp", + "pb", "mode", "nice_len", "mf", "depth", NULL}; + PyObject *id; + PyObject *preset_obj; + uint32_t preset = LZMA_PRESET_DEFAULT; + lzma_options_lzma *options; + + /* First, fill in default values for all the options using a preset. + Then, override the defaults with any values given by the caller. */ + + preset_obj = PyMapping_GetItemString(spec, "preset"); + if (preset_obj == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_Clear(); + else + return NULL; + } else { + int ok = uint32_converter(preset_obj, &preset); + Py_DECREF(preset_obj); + if (!ok) + return NULL; + } + + options = (lzma_options_lzma *)PyMem_Malloc(sizeof *options); + if (options == NULL) + return PyErr_NoMemory(); + memset(options, 0, sizeof *options); + + if (lzma_lzma_preset(options, preset)) { + PyMem_Free(options); + PyErr_Format(Error, "lzma_lzma_preset() failed for preset %#x", preset); + return NULL; + } + + if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, + "|OOO&O&O&O&O&O&O&O&", optnames, + &id, &preset_obj, + uint32_converter, &options->dict_size, + uint32_converter, &options->lc, + uint32_converter, &options->lp, + uint32_converter, &options->pb, + lzma_mode_converter, &options->mode, + uint32_converter, &options->nice_len, + lzma_mf_converter, &options->mf, + uint32_converter, &options->depth)) { + PyErr_SetString(PyExc_ValueError, + "Invalid filter specifier for LZMA filter"); + PyMem_Free(options); + options = NULL; + } + return options; +} + +static void * +parse_filter_spec_delta(PyObject *spec) +{ + static char *optnames[] = {"id", "dist", NULL}; + PyObject *id; + uint32_t dist = 1; + lzma_options_delta *options; + + if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames, + &id, uint32_converter, &dist)) { + PyErr_SetString(PyExc_ValueError, + "Invalid filter specifier for delta filter"); + return NULL; + } + + options = (lzma_options_delta *)PyMem_Malloc(sizeof *options); + if (options == NULL) + return PyErr_NoMemory(); + memset(options, 0, sizeof *options); + options->type = LZMA_DELTA_TYPE_BYTE; + options->dist = dist; + return options; +} + +static void * +parse_filter_spec_bcj(PyObject *spec) +{ + static char *optnames[] = {"id", "start_offset", NULL}; + PyObject *id; + uint32_t start_offset = 0; + lzma_options_bcj *options; + + if (!PyArg_ParseTupleAndKeywords(empty_tuple, spec, "|OO&", optnames, + &id, uint32_converter, &start_offset)) { + PyErr_SetString(PyExc_ValueError, + "Invalid filter specifier for BCJ filter"); + return NULL; + } + + options = (lzma_options_bcj *)PyMem_Malloc(sizeof *options); + if (options == NULL) + return PyErr_NoMemory(); + memset(options, 0, sizeof *options); + options->start_offset = start_offset; + return options; +} + +static void * +parse_filter_spec(lzma_filter *f, PyObject *spec) +{ + PyObject *id_obj; + + if (!PyMapping_Check(spec)) { + PyErr_SetString(PyExc_TypeError, + "Filter specifier must be a dict or dict-like object"); + return NULL; + } + id_obj = PyMapping_GetItemString(spec, "id"); + if (id_obj == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_SetString(PyExc_ValueError, + "Filter specifier must have an \"id\" entry"); + return NULL; + } + f->id = PyLong_AsUnsignedLongLong(id_obj); + Py_DECREF(id_obj); + if (PyErr_Occurred()) + return NULL; + + switch (f->id) { + case LZMA_FILTER_LZMA1: + case LZMA_FILTER_LZMA2: + f->options = parse_filter_spec_lzma(spec); + return f->options; + case LZMA_FILTER_DELTA: + f->options = parse_filter_spec_delta(spec); + return f->options; + case LZMA_FILTER_X86: + case LZMA_FILTER_POWERPC: + case LZMA_FILTER_IA64: + case LZMA_FILTER_ARM: + case LZMA_FILTER_ARMTHUMB: + case LZMA_FILTER_SPARC: + f->options = parse_filter_spec_bcj(spec); + return f->options; + default: + PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id); + return NULL; + } +} + +static void +free_filter_chain(lzma_filter filters[]) +{ + int i; + + for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) + PyMem_Free(filters[i].options); +} + +static int +parse_filter_chain_spec(lzma_filter filters[], PyObject *filterspecs) +{ + Py_ssize_t i, num_filters; + + num_filters = PySequence_Length(filterspecs); + if (num_filters == -1) + return -1; + if (num_filters > LZMA_FILTERS_MAX) { + PyErr_Format(PyExc_ValueError, + "Too many filters - liblzma supports a maximum of %d", + LZMA_FILTERS_MAX); + return -1; + } + + for (i = 0; i < num_filters; i++) { + int ok = 1; + PyObject *spec = PySequence_GetItem(filterspecs, i); + if (spec == NULL || parse_filter_spec(&filters[i], spec) == NULL) + ok = 0; + Py_XDECREF(spec); + if (!ok) { + filters[i].id = LZMA_VLI_UNKNOWN; + free_filter_chain(filters); + return -1; + } + } + filters[num_filters].id = LZMA_VLI_UNKNOWN; + return 0; +} + + +/* LZMACompressor class. */ + +static PyObject * +compress(Compressor *c, uint8_t *data, size_t len, lzma_action action) +{ + size_t data_size = 0; + PyObject *result; + + result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + if (result == NULL) + return NULL; + c->lzs.next_in = data; + c->lzs.avail_in = len; + c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result); + c->lzs.avail_out = PyBytes_GET_SIZE(result); + for (;;) { + lzma_ret lzret; + + Py_BEGIN_ALLOW_THREADS + lzret = lzma_code(&c->lzs, action); + data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result); + Py_END_ALLOW_THREADS + if (catch_lzma_error(lzret)) + goto error; + if ((action == LZMA_RUN && c->lzs.avail_in == 0) || + (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) { + break; + } else if (c->lzs.avail_out == 0) { + if (grow_buffer(&result) == -1) + goto error; + c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; + c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size; + } + } + if (data_size != PyBytes_GET_SIZE(result)) + if (_PyBytes_Resize(&result, data_size) == -1) + goto error; + return result; + +error: + Py_XDECREF(result); + return NULL; +} + +PyDoc_STRVAR(Compressor_compress_doc, +"compress(data) -> bytes\n" +"\n" +"Provide data to the compressor object. Returns a chunk of\n" +"compressed data if possible, or b\"\" otherwise.\n" +"\n" +"When you have finished providing data to the compressor, call the\n" +"flush() method to finish the conversion process.\n"); + +static PyObject * +Compressor_compress(Compressor *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "s*:compress", &buffer)) + return NULL; + + ACQUIRE_LOCK(self); + if (self->flushed) + PyErr_SetString(PyExc_ValueError, "Compressor has been flushed"); + else + result = compress(self, buffer.buf, buffer.len, LZMA_RUN); + RELEASE_LOCK(self); + PyBuffer_Release(&buffer); + return result; +} + +PyDoc_STRVAR(Compressor_flush_doc, +"flush() -> bytes\n" +"\n" +"Finish the compression process. Returns the compressed data left\n" +"in internal buffers.\n" +"\n" +"The compressor object cannot be used after this method is called.\n"); + +static PyObject * +Compressor_flush(Compressor *self, PyObject *noargs) +{ + PyObject *result = NULL; + + ACQUIRE_LOCK(self); + if (self->flushed) { + PyErr_SetString(PyExc_ValueError, "Repeated call to flush()"); + } else { + self->flushed = 1; + result = compress(self, NULL, 0, LZMA_FINISH); + } + RELEASE_LOCK(self); + return result; +} + +static int +Compressor_init_xz(lzma_stream *lzs, int check, uint32_t preset, + PyObject *filterspecs) +{ + lzma_ret lzret; + + if (filterspecs == Py_None) { + lzret = lzma_easy_encoder(lzs, preset, check); + } else { + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + if (parse_filter_chain_spec(filters, filterspecs) == -1) + return -1; + lzret = lzma_stream_encoder(lzs, filters, check); + free_filter_chain(filters); + } + if (catch_lzma_error(lzret)) + return -1; + else + return 0; +} + +static int +Compressor_init_alone(lzma_stream *lzs, uint32_t preset, PyObject *filterspecs) +{ + lzma_ret lzret; + + if (filterspecs == Py_None) { + lzma_options_lzma options; + + if (lzma_lzma_preset(&options, preset)) { + PyErr_Format(Error, "Invalid compression preset: %#x", preset); + return -1; + } + lzret = lzma_alone_encoder(lzs, &options); + } else { + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + + if (parse_filter_chain_spec(filters, filterspecs) == -1) + return -1; + if (filters[0].id == LZMA_FILTER_LZMA1 && + filters[1].id == LZMA_VLI_UNKNOWN) { + lzret = lzma_alone_encoder(lzs, filters[0].options); + } else { + PyErr_SetString(PyExc_ValueError, + "Invalid filter chain for FORMAT_ALONE - " + "must be a single LZMA1 filter"); + lzret = LZMA_PROG_ERROR; + } + free_filter_chain(filters); + } + if (PyErr_Occurred() || catch_lzma_error(lzret)) + return -1; + else + return 0; +} + +static int +Compressor_init_raw(lzma_stream *lzs, PyObject *filterspecs) +{ + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_ret lzret; + + if (filterspecs == Py_None) { + PyErr_SetString(PyExc_ValueError, + "Must specify filters for FORMAT_RAW"); + return -1; + } + if (parse_filter_chain_spec(filters, filterspecs) == -1) + return -1; + lzret = lzma_raw_encoder(lzs, filters); + free_filter_chain(filters); + if (catch_lzma_error(lzret)) + return -1; + else + return 0; +} + +static int +Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs) +{ + static char *arg_names[] = {"format", "check", "preset", "filters", NULL}; + int format = FORMAT_XZ; + int check = -1; + uint32_t preset = LZMA_PRESET_DEFAULT; + PyObject *preset_obj = Py_None; + PyObject *filterspecs = Py_None; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|iiOO:LZMACompressor", arg_names, + &format, &check, &preset_obj, + &filterspecs)) + return -1; + + if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) { + PyErr_SetString(PyExc_ValueError, + "Integrity checks are only supported by FORMAT_XZ"); + return -1; + } + + if (preset_obj != Py_None && filterspecs != Py_None) { + PyErr_SetString(PyExc_ValueError, + "Cannot specify both preset and filter chain"); + return -1; + } + + if (preset_obj != Py_None) + if (!uint32_converter(preset_obj, &preset)) + return -1; + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); + return -1; + } +#endif + + self->flushed = 0; + switch (format) { + case FORMAT_XZ: + if (check == -1) + check = LZMA_CHECK_CRC64; + if (Compressor_init_xz(&self->lzs, check, preset, filterspecs) != 0) + break; + return 0; + + case FORMAT_ALONE: + if (Compressor_init_alone(&self->lzs, preset, filterspecs) != 0) + break; + return 0; + + case FORMAT_RAW: + if (Compressor_init_raw(&self->lzs, filterspecs) != 0) + break; + return 0; + + default: + PyErr_Format(PyExc_ValueError, + "Invalid container format: %d", format); + break; + } + +#ifdef WITH_THREAD + PyThread_free_lock(self->lock); + self->lock = NULL; +#endif + return -1; +} + +static void +Compressor_dealloc(Compressor *self) +{ + lzma_end(&self->lzs); +#ifdef WITH_THREAD + if (self->lock != NULL) + PyThread_free_lock(self->lock); +#endif + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyMethodDef Compressor_methods[] = { + {"compress", (PyCFunction)Compressor_compress, METH_VARARGS, + Compressor_compress_doc}, + {"flush", (PyCFunction)Compressor_flush, METH_NOARGS, + Compressor_flush_doc}, + {NULL} +}; + +PyDoc_STRVAR(Compressor_doc, +"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n" +"\n" +"Create a compressor object for compressing data incrementally.\n" +"\n" +"format specifies the container format to use for the output. This can\n" +"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n" +"\n" +"check specifies the integrity check to use. For FORMAT_XZ, the default\n" +"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not suport integrity\n" +"checks; for these formats, check must be omitted, or be CHECK_NONE.\n" +"\n" +"The settings used by the compressor can be specified either as a\n" +"preset compression level (with the 'preset' argument), or in detail\n" +"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n" +"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n" +"level. For FORMAT_RAW, the caller must always specify a filter chain;\n" +"the raw compressor does not support preset compression levels.\n" +"\n" +"preset (if provided) should be an integer in the range 0-9, optionally\n" +"OR-ed with the constant PRESET_EXTREME.\n" +"\n" +"filters (if provided) should be a sequence of dicts. Each dict should\n" +"have an entry for \"id\" indicating the ID of the filter, plus\n" +"additional entries for options to the filter.\n" +"\n" +"For one-shot compression, use the compress() function instead.\n"); + +static PyTypeObject Compressor_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_lzma.LZMACompressor", /* tp_name */ + sizeof(Compressor), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)Compressor_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + Compressor_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Compressor_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Compressor_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* LZMADecompressor class. */ + +static PyObject * +decompress(Decompressor *d, uint8_t *data, size_t len) +{ + size_t data_size = 0; + PyObject *result; + + result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + if (result == NULL) + return NULL; + d->lzs.next_in = data; + d->lzs.avail_in = len; + d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result); + d->lzs.avail_out = PyBytes_GET_SIZE(result); + for (;;) { + lzma_ret lzret; + + Py_BEGIN_ALLOW_THREADS + lzret = lzma_code(&d->lzs, LZMA_RUN); + data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result); + Py_END_ALLOW_THREADS + if (catch_lzma_error(lzret)) + goto error; + if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) + d->check = lzma_get_check(&d->lzs); + if (lzret == LZMA_STREAM_END) { + d->eof = 1; + if (d->lzs.avail_in > 0) { + Py_CLEAR(d->unused_data); + d->unused_data = PyBytes_FromStringAndSize( + (char *)d->lzs.next_in, d->lzs.avail_in); + if (d->unused_data == NULL) + goto error; + } + break; + } else if (d->lzs.avail_in == 0) { + break; + } else if (d->lzs.avail_out == 0) { + if (grow_buffer(&result) == -1) + goto error; + d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; + d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size; + } + } + if (data_size != PyBytes_GET_SIZE(result)) + if (_PyBytes_Resize(&result, data_size) == -1) + goto error; + return result; + +error: + Py_XDECREF(result); + return NULL; +} + +PyDoc_STRVAR(Decompressor_decompress_doc, +"decompress(data) -> bytes\n" +"\n" +"Provide data to the decompressor object. Returns a chunk of\n" +"decompressed data if possible, or b\"\" otherwise.\n" +"\n" +"Attempting to decompress data after the end of the stream is\n" +"reached raises an EOFError. Any data found after the end of the\n" +"stream is ignored, and saved in the unused_data attribute.\n"); + +static PyObject * +Decompressor_decompress(Decompressor *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "s*:decompress", &buffer)) + return NULL; + + ACQUIRE_LOCK(self); + if (self->eof) + PyErr_SetString(PyExc_EOFError, "Already at end of stream"); + else + result = decompress(self, buffer.buf, buffer.len); + RELEASE_LOCK(self); + PyBuffer_Release(&buffer); + return result; +} + +static int +Decompressor_init_raw(lzma_stream *lzs, PyObject *filterspecs) +{ + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_ret lzret; + + if (parse_filter_chain_spec(filters, filterspecs) == -1) + return -1; + lzret = lzma_raw_decoder(lzs, filters); + free_filter_chain(filters); + if (catch_lzma_error(lzret)) + return -1; + else + return 0; +} + +static int +Decompressor_init(Decompressor *self, PyObject *args, PyObject *kwargs) +{ + static char *arg_names[] = {"format", "memlimit", "filters", NULL}; + const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK; + int format = FORMAT_AUTO; + uint64_t memlimit = UINT64_MAX; + PyObject *memlimit_obj = Py_None; + PyObject *filterspecs = Py_None; + lzma_ret lzret; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|iOO:LZMADecompressor", arg_names, + &format, &memlimit_obj, &filterspecs)) + return -1; + + if (memlimit_obj != Py_None) { + if (format == FORMAT_RAW) { + PyErr_SetString(PyExc_ValueError, + "Cannot specify memory limit with FORMAT_RAW"); + return -1; + } + memlimit = PyLong_AsUnsignedLongLong(memlimit_obj); + if (PyErr_Occurred()) + return -1; + } + + if (format == FORMAT_RAW && filterspecs == Py_None) { + PyErr_SetString(PyExc_ValueError, + "Must specify filters for FORMAT_RAW"); + return -1; + } else if (format != FORMAT_RAW && filterspecs != Py_None) { + PyErr_SetString(PyExc_ValueError, + "Cannot specify filters except with FORMAT_RAW"); + return -1; + } + +#ifdef WITH_THREAD + self->lock = PyThread_allocate_lock(); + if (self->lock == NULL) { + PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); + return -1; + } +#endif + + self->check = LZMA_CHECK_UNKNOWN; + self->unused_data = PyBytes_FromStringAndSize(NULL, 0); + if (self->unused_data == NULL) + goto error; + + switch (format) { + case FORMAT_AUTO: + lzret = lzma_auto_decoder(&self->lzs, memlimit, decoder_flags); + if (catch_lzma_error(lzret)) + break; + return 0; + + case FORMAT_XZ: + lzret = lzma_stream_decoder(&self->lzs, memlimit, decoder_flags); + if (catch_lzma_error(lzret)) + break; + return 0; + + case FORMAT_ALONE: + self->check = LZMA_CHECK_NONE; + lzret = lzma_alone_decoder(&self->lzs, memlimit); + if (catch_lzma_error(lzret)) + break; + return 0; + + case FORMAT_RAW: + self->check = LZMA_CHECK_NONE; + if (Decompressor_init_raw(&self->lzs, filterspecs) == -1) + break; + return 0; + + default: + PyErr_Format(PyExc_ValueError, + "Invalid container format: %d", format); + break; + } + +error: + Py_CLEAR(self->unused_data); +#ifdef WITH_THREAD + PyThread_free_lock(self->lock); + self->lock = NULL; +#endif + return -1; +} + +static void +Decompressor_dealloc(Decompressor *self) +{ + lzma_end(&self->lzs); + Py_CLEAR(self->unused_data); +#ifdef WITH_THREAD + if (self->lock != NULL) + PyThread_free_lock(self->lock); +#endif + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyMethodDef Decompressor_methods[] = { + {"decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS, + Decompressor_decompress_doc}, + {NULL} +}; + +PyDoc_STRVAR(Decompressor_check_doc, +"ID of the integrity check used by the input stream."); + +PyDoc_STRVAR(Decompressor_eof_doc, +"True if the end-of-stream marker has been reached."); + +PyDoc_STRVAR(Decompressor_unused_data_doc, +"Data found after the end of the compressed stream."); + +static PyMemberDef Decompressor_members[] = { + {"check", T_INT, offsetof(Decompressor, check), READONLY, + Decompressor_check_doc}, + {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY, + Decompressor_eof_doc}, + {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY, + Decompressor_unused_data_doc}, + {NULL} +}; + +PyDoc_STRVAR(Decompressor_doc, +"LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)\n" +"\n" +"Create a decompressor object for decompressing data incrementally.\n" +"\n" +"format specifies the container format of the input stream. If this is\n" +"FORMAT_AUTO (the default), the decompressor will automatically detect\n" +"whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with\n" +"FORMAT_RAW cannot be autodetected.\n" +"\n" +"memlimit can be specified to limit the amount of memory used by the\n" +"decompressor. This will cause decompression to fail if the input\n" +"cannot be decompressed within the given limit.\n" +"\n" +"filters specifies a custom filter chain. This argument is required for\n" +"FORMAT_RAW, and not accepted with any other format. When provided,\n" +"this should be a sequence of dicts, each indicating the ID and options\n" +"for a single filter.\n" +"\n" +"For one-shot decompression, use the decompress() function instead.\n"); + +static PyTypeObject Decompressor_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_lzma.LZMADecompressor", /* tp_name */ + sizeof(Decompressor), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)Decompressor_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + Decompressor_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Decompressor_methods, /* tp_methods */ + Decompressor_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Decompressor_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + + +/* Module-level functions. */ + +PyDoc_STRVAR(check_is_supported_doc, +"check_is_supported(check_id) -> bool\n" +"\n" +"Test whether the given integrity check is supported.\n" +"\n" +"Always returns True for CHECK_NONE and CHECK_CRC32.\n"); + +static PyObject * +check_is_supported(PyObject *self, PyObject *args) +{ + int check_id; + + if (!PyArg_ParseTuple(args, "i:check_is_supported", &check_id)) + return NULL; + + return PyBool_FromLong(lzma_check_is_supported(check_id)); +} + + +/* Module initialization. */ + +static PyMethodDef module_methods[] = { + {"check_is_supported", (PyCFunction)check_is_supported, + METH_VARARGS, check_is_supported_doc}, + {NULL} +}; + +/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant + would not work correctly on platforms with 32-bit longs. */ +static int +module_add_int_constant(PyObject *m, const char *name, PY_LONG_LONG value) +{ + PyObject *o = PyLong_FromLongLong(value); + if (o == NULL) + return -1; + if (PyModule_AddObject(m, name, o) == 0) + return 0; + Py_DECREF(o); + return -1; +} + +#define ADD_INT_PREFIX_MACRO(m, macro) \ + module_add_int_constant(m, #macro, LZMA_ ## macro) + +void init_lzma(void) +{ + PyObject *m; + + empty_tuple = PyTuple_New(0); + if (empty_tuple == NULL) + return; + + m = Py_InitModule("_lzma", module_methods); + if (m == NULL) + return; + + if (PyModule_AddIntMacro(m, FORMAT_AUTO) == -1 || + PyModule_AddIntMacro(m, FORMAT_XZ) == -1 || + PyModule_AddIntMacro(m, FORMAT_ALONE) == -1 || + PyModule_AddIntMacro(m, FORMAT_RAW) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_NONE) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_CRC32) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_CRC64) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_SHA256) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_ID_MAX) == -1 || + ADD_INT_PREFIX_MACRO(m, CHECK_UNKNOWN) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_LZMA1) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_LZMA2) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_DELTA) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_X86) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_IA64) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_ARM) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_ARMTHUMB) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_SPARC) == -1 || + ADD_INT_PREFIX_MACRO(m, FILTER_POWERPC) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_HC3) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_HC4) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_BT2) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_BT3) == -1 || + ADD_INT_PREFIX_MACRO(m, MF_BT4) == -1 || + ADD_INT_PREFIX_MACRO(m, MODE_FAST) == -1 || + ADD_INT_PREFIX_MACRO(m, MODE_NORMAL) == -1 || + ADD_INT_PREFIX_MACRO(m, PRESET_DEFAULT) == -1 || + ADD_INT_PREFIX_MACRO(m, PRESET_EXTREME) == -1) + return; + + Error = PyErr_NewExceptionWithDoc( + "_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL); + if (Error == NULL) + return; + Py_INCREF(Error); + if (PyModule_AddObject(m, "LZMAError", Error) == -1) + return; + + if (PyType_Ready(&Compressor_type) == -1) + return; + Py_INCREF(&Compressor_type); + if (PyModule_AddObject(m, "LZMACompressor", + (PyObject *)&Compressor_type) == -1) + return; + + if (PyType_Ready(&Decompressor_type) == -1) + return; + Py_INCREF(&Decompressor_type); + if (PyModule_AddObject(m, "LZMADecompressor", + (PyObject *)&Decompressor_type) == -1) + return; + + return m; +} -- 2.39.5