From: Lasse Collin Date: Sat, 24 May 2025 16:28:00 +0000 (+0300) Subject: xz, xzdec, lzmainfo: Add a hack to avoid memory leaks on MorphOS X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fmorphos;p=thirdparty%2Fxz.git xz, xzdec, lzmainfo: Add a hack to avoid memory leaks on MorphOS There is no functional change on other OSes. See the comment in my_allocator.h. Reported-by: Harry Sintonen Fixes: https://github.com/tukaani-project/xz/pull/181 --- diff --git a/src/common/my_allocator.h b/src/common/my_allocator.h new file mode 100644 index 00000000..dd08e334 --- /dev/null +++ b/src/common/my_allocator.h @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: 0BSD + +/////////////////////////////////////////////////////////////////////////////// +// +/// \file my_allocator.h +/// \brief lzma_allocator to use malloc() and free() from app's libc +/// +/// In 1980s and early 1990s, some operating systems implemented shared +/// libraries so that not only the code but also the data is shared +/// between processes. If an application allocates memory by calling into +/// a shared library, it also needs to call into the library to free the +/// memory before the application terminates. Otherwise memory is leaked +/// in the shared library. +/// +/// The memory allocated by the application itself *is* freed on those +/// OSes when the application terminates. It's only the memory (and other +/// resources) allocated in shared libraries that can be a problem. If +/// a shared library is made to use application's malloc() and free(), +/// the problem is solved as long as the shared library doesn't also +/// allocate some other types of resources. Thus, this kind of leak +/// prevention is incompatible with threads. +/// +/// A related issue is that if the shared library allocates memory with +/// malloc() and the resulting pointer is passed to the application, the +/// application cannot free the memory using free(). This is because the +/// shared library and application have their own heaps. This too is +/// solved if the shared library is made to use application's malloc() +/// and free(). (This issue is possible on Windows even in modern times +/// if a DLL uses a different CRT than the application. However, it is +/// reasonable to assume that XZ Utils components all use the same CRT.) +/// +/// The allocator in this header is a hack that should only be enabled +/// on those OSes that really need it. liblzma uses not only malloc() and +/// free() but also calloc(). When a custom allocator is used, calloc() +/// is replaced with allocator->alloc() + memset() which is significantly +/// slower in certain situations where most of the allocated memory isn't +/// actually needed (compressing a tiny file with the LZMA2 dictionary +/// size set to a large value). +// +// Author: Lasse Collin +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef MY_ALLOCATOR_H +#define MY_ALLOCATOR_H + +#include "sysdefs.h" +#include "lzma.h" + + +#ifdef __MORPHOS__ + +static void * LZMA_API_CALL +my_alloc(void *opaque lzma_attribute((__unused__)), size_t nmemb, size_t size) +{ + // liblzma guarantees that this won't overflow. + return malloc(nmemb * size); +} + +static void LZMA_API_CALL +my_free(void *opaque lzma_attribute((__unused__)), void *ptr) +{ + free(ptr); +} + +static const lzma_allocator my_allocator = { &my_alloc, &my_free, NULL }; + +#define MY_ALLOCATOR (&my_allocator) +#define MY_ALLOCATOR_SET(strm) ((strm).allocator = &my_allocator) + +#else + +// OSes with modern shared library mechanism don't need the allocator hack. +// For example, this isn't needed on Windows 95. +#define MY_ALLOCATOR NULL +#define MY_ALLOCATOR_SET(strm) ((void)0) + +#endif + +#endif diff --git a/src/lzmainfo/lzmainfo.c b/src/lzmainfo/lzmainfo.c index 0b0b0d3d..ad5950de 100644 --- a/src/lzmainfo/lzmainfo.c +++ b/src/lzmainfo/lzmainfo.c @@ -20,6 +20,7 @@ #include "tuklib_mbstr_nonprint.h" #include "tuklib_mbstr_wrap.h" #include "tuklib_exit.h" +#include "my_allocator.h" #ifdef TUKLIB_DOSLIKE # include @@ -135,7 +136,7 @@ lzmainfo(const char *name, FILE *f) lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; // Parse the first five bytes. - switch (lzma_properties_decode(&filter, NULL, buf, 5)) { + switch (lzma_properties_decode(&filter, MY_ALLOCATOR, buf, 5)) { case LZMA_OK: break; diff --git a/src/xz/coder.c b/src/xz/coder.c index c28f874a..ba0ba15f 100644 --- a/src/xz/coder.c +++ b/src/xz/coder.c @@ -12,6 +12,7 @@ #include "private.h" #include "tuklib_integer.h" +#include "my_allocator.h" /// Return value type for coder_init(). @@ -102,7 +103,7 @@ forget_filter_chain(void) // Setting a preset or using --filters makes us forget // the earlier custom filter chain (if any). if (filters_count > 0) { - lzma_filters_free(chains[0], NULL); + lzma_filters_free(chains[0], MY_ALLOCATOR); filters_count = 0; } @@ -161,7 +162,7 @@ str_to_filters(const char *str, uint32_t index, uint32_t flags) { int error_pos; const char *err = lzma_str_to_filters(str, &error_pos, - chains[index], flags, NULL); + chains[index], flags, MY_ALLOCATOR); if (err != NULL) { char filter_num[2] = ""; @@ -208,7 +209,7 @@ coder_add_block_filters(const char *str, size_t slot) { // Free old filters first, if they were previously allocated. if (chains_used_mask & (1U << slot)) - lzma_filters_free(chains[slot], NULL); + lzma_filters_free(chains[slot], MY_ALLOCATOR); str_to_filters(str, slot, 0); @@ -747,7 +748,8 @@ is_format_lzma(void) // Decode the LZMA1 properties. lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; - if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) + if (lzma_properties_decode(&filter, MY_ALLOCATOR, in_buf.u8, 5) + != LZMA_OK) return false; // A hack to ditch tons of false positives: We allow only dictionary @@ -821,6 +823,9 @@ coder_init(file_pair *pair) ? chains[0] : chains[opt_block_list[0].chain_num]; + // See src/common/my_allocator.h. + MY_ALLOCATOR_SET(strm); + if (opt_mode == MODE_COMPRESS) { #ifdef HAVE_ENCODERS switch (opt_format) { @@ -1467,7 +1472,7 @@ coder_free(void) // don't worry about freeing it. for (uint32_t i = 1; i < ARRAY_SIZE(chains); i++) { if (chains_used_mask & (1U << i)) - lzma_filters_free(chains[i], NULL); + lzma_filters_free(chains[i], MY_ALLOCATOR); } lzma_end(&strm); diff --git a/src/xz/list.c b/src/xz/list.c index 210f23ad..ceb31e80 100644 --- a/src/xz/list.c +++ b/src/xz/list.c @@ -11,6 +11,7 @@ #include "private.h" #include "tuklib_integer.h" +#include "my_allocator.h" /// Information about a .xz file @@ -360,6 +361,7 @@ parse_indexes(xz_file_info *xfi, file_pair *pair) io_buf buf; lzma_stream strm = LZMA_STREAM_INIT; + MY_ALLOCATOR_SET(strm); lzma_index *idx = NULL; lzma_ret ret = lzma_file_info_decoder(&strm, &idx, @@ -472,7 +474,7 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter, goto data_error; // Decode the Block Header. - switch (lzma_block_header_decode(&block, NULL, buf.u8)) { + switch (lzma_block_header_decode(&block, MY_ALLOCATOR, buf.u8)) { case LZMA_OK: break; @@ -529,7 +531,7 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter, case LZMA_DATA_ERROR: // Free the memory allocated by lzma_block_header_decode(). - lzma_filters_free(filters, NULL); + lzma_filters_free(filters, MY_ALLOCATOR); goto data_error; default: @@ -584,10 +586,10 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter, // Convert the filter chain to human readable form. const lzma_ret str_ret = lzma_str_from_filters( &bhi->filter_chain, filters, - LZMA_STR_DECODER | LZMA_STR_GETOPT_LONG, NULL); + LZMA_STR_DECODER | LZMA_STR_GETOPT_LONG, MY_ALLOCATOR); // Free the memory allocated by lzma_block_header_decode(). - lzma_filters_free(filters, NULL); + lzma_filters_free(filters, MY_ALLOCATOR); // Check if the stringification succeeded. if (str_ret != LZMA_OK) { @@ -1349,7 +1351,7 @@ list_file(const char *filename) if (!fail) update_totals(&xfi); - lzma_index_end(xfi.idx, NULL); + lzma_index_end(xfi.idx, MY_ALLOCATOR); } io_close(pair, false); diff --git a/src/xz/message.c b/src/xz/message.c index 7657e856..2e519080 100644 --- a/src/xz/message.c +++ b/src/xz/message.c @@ -12,6 +12,7 @@ #include "private.h" #include "tuklib_mbstr_wrap.h" +#include "my_allocator.h" #include @@ -900,7 +901,7 @@ message_filters_show(enum message_verbosity v, const lzma_filter *filters) char *buf; const lzma_ret ret = lzma_str_from_filters(&buf, filters, - LZMA_STR_ENCODER | LZMA_STR_GETOPT_LONG, NULL); + LZMA_STR_ENCODER | LZMA_STR_GETOPT_LONG, MY_ALLOCATOR); if (ret != LZMA_OK) message_fatal("%s", message_strm(ret)); @@ -1303,7 +1304,7 @@ message_filters_help(void) char *encoder_options; if (lzma_str_list_filters(&encoder_options, LZMA_VLI_UNKNOWN, - LZMA_STR_ENCODER, NULL) != LZMA_OK) + LZMA_STR_ENCODER, MY_ALLOCATOR) != LZMA_OK) message_bug(); if (!opt_robot) { diff --git a/src/xzdec/xzdec.c b/src/xzdec/xzdec.c index e1e27449..d875b253 100644 --- a/src/xzdec/xzdec.c +++ b/src/xzdec/xzdec.c @@ -38,6 +38,7 @@ #include "tuklib_progname.h" #include "tuklib_mbstr_nonprint.h" #include "tuklib_exit.h" +#include "my_allocator.h" #ifdef TUKLIB_DOSLIKE # include @@ -429,6 +430,7 @@ main(int argc, char **argv) // we don't need to reallocate memory for every file if they use same // compression settings. lzma_stream strm = LZMA_STREAM_INIT; + MY_ALLOCATOR_SET(strm); // Some systems require setting stdin and stdout to binary mode. #ifdef TUKLIB_DOSLIKE