From 10a9214ac65e171f08589ef0ec94ffcbccbfcc49 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Thu, 17 Sep 2020 18:28:17 +0100 Subject: [PATCH] Avoid millions of rand() calls() when running tests MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Many tests use a loop calling rand() to fill buffers with test data. As these calls cannot be inlined, this adds up to noticeable overhead: For example, running on QEMU RISC-V the test_write_format_7zip_large_copy test took ~22 seconds before and with this change it's ~17 seconds. This change uses a simpler xorshift64 random number generator that can be inlined into the loop filling the data buffer. By default the seed for this RNG is rand(), but it can be overwritten by setting the TEST_RANDOM_SEED environment variable. For a native build the difference is much less noticeable, but it's still measurable: test_write_format_7zip_large_copy takes 314.9 ms ± 3.9 ms before and 227.8 ms ± 5.8 ms after (i.e. 38% faster for that test). --- libarchive/test/test_read_data_large.c | 5 +- libarchive/test/test_read_extract.c | 4 +- libarchive/test/test_read_large.c | 5 +- libarchive/test/test_read_pax_truncated.c | 5 +- libarchive/test/test_read_truncated.c | 4 +- libarchive/test/test_read_truncated_filter.c | 17 ++-- .../test/test_write_format_7zip_large.c | 5 +- test_utils/test_utils.c | 81 +++++++++++++++++++ test_utils/test_utils.h | 5 ++ 9 files changed, 110 insertions(+), 21 deletions(-) diff --git a/libarchive/test/test_read_data_large.c b/libarchive/test/test_read_data_large.c index 418020d11..22f46b75d 100644 --- a/libarchive/test/test_read_data_large.c +++ b/libarchive/test/test_read_data_large.c @@ -23,6 +23,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test.h" +#include "test_utils.h" __FBSDID("$FreeBSD: head/lib/libarchive/test/test_read_data_large.c 201247 2009-12-30 05:59:21Z kientzle $"); /* @@ -49,7 +50,6 @@ DEFINE_TEST(test_read_data_large) char tmpfilename[] = "largefile"; int tmpfilefd; FILE *f; - unsigned int i; size_t used; /* Create a new archive in memory. */ @@ -64,8 +64,7 @@ DEFINE_TEST(test_read_data_large) assert((ae = archive_entry_new()) != NULL); archive_entry_copy_pathname(ae, "file"); archive_entry_set_mode(ae, S_IFREG | 0755); - for (i = 0; i < sizeof(buff2); i++) - buff2[i] = (unsigned char)rand(); + fill_with_pseudorandom_data(buff2, sizeof(buff2)); archive_entry_set_size(ae, sizeof(buff2)); assertA(0 == archive_write_header(a, ae)); archive_entry_free(ae); diff --git a/libarchive/test/test_read_extract.c b/libarchive/test/test_read_extract.c index 2b1a21e47..c3e0594e9 100644 --- a/libarchive/test/test_read_extract.c +++ b/libarchive/test/test_read_extract.c @@ -23,6 +23,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test.h" +#include "test_utils.h" __FBSDID("$FreeBSD: head/lib/libarchive/test/test_read_extract.c 201247 2009-12-30 05:59:21Z kientzle $"); #define BUFF_SIZE 1000000 @@ -59,8 +60,7 @@ DEFINE_TEST(test_read_extract) assert((ae = archive_entry_new()) != NULL); archive_entry_copy_pathname(ae, "file"); archive_entry_set_mode(ae, S_IFREG | 0755); - for (i = 0; i < FILE_BUFF_SIZE; i++) - file_buff[i] = (unsigned char)rand(); + fill_with_pseudorandom_data(file_buff, FILE_BUFF_SIZE); archive_entry_set_size(ae, FILE_BUFF_SIZE); assertA(0 == archive_write_header(a, ae)); assertA(FILE_BUFF_SIZE == archive_write_data(a, file_buff, FILE_BUFF_SIZE)); diff --git a/libarchive/test/test_read_large.c b/libarchive/test/test_read_large.c index 6966ccbe1..2a4c8e867 100644 --- a/libarchive/test/test_read_large.c +++ b/libarchive/test/test_read_large.c @@ -23,6 +23,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test.h" +#include "test_utils.h" __FBSDID("$FreeBSD: head/lib/libarchive/test/test_read_large.c 201247 2009-12-30 05:59:21Z kientzle $"); static unsigned char testdata[10 * 1024 * 1024]; @@ -37,7 +38,6 @@ static unsigned char buff[11 * 1024 * 1024]; /* Check correct behavior on large reads. */ DEFINE_TEST(test_read_large) { - unsigned int i; int tmpfilefd; char tmpfilename[] = "test-read_large.XXXXXX"; size_t used; @@ -45,8 +45,7 @@ DEFINE_TEST(test_read_large) struct archive_entry *entry; FILE *f; - for (i = 0; i < sizeof(testdata); i++) - testdata[i] = (unsigned char)(rand()); + fill_with_pseudorandom_data(testdata, sizeof(testdata)); assert(NULL != (a = archive_write_new())); assertA(0 == archive_write_set_format_ustar(a)); diff --git a/libarchive/test/test_read_pax_truncated.c b/libarchive/test/test_read_pax_truncated.c index 1f6e78ace..a6705fa9c 100644 --- a/libarchive/test/test_read_pax_truncated.c +++ b/libarchive/test/test_read_pax_truncated.c @@ -23,6 +23,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test.h" +#include "test_utils.h" __FBSDID("$FreeBSD: head/lib/libarchive/test/test_read_pax_truncated.c 189483 2009-03-07 03:34:34Z kientzle $"); DEFINE_TEST(test_read_pax_truncated) @@ -48,8 +49,8 @@ DEFINE_TEST(test_read_pax_truncated) assert((ae = archive_entry_new()) != NULL); archive_entry_copy_pathname(ae, "file"); archive_entry_set_mode(ae, S_IFREG | 0755); - for (i = 0; i < filedata_size; i++) - filedata[i] = (unsigned char)rand(); + fill_with_pseudorandom_data(filedata, filedata_size); + archive_entry_set_atime(ae, 1, 2); archive_entry_set_ctime(ae, 3, 4); archive_entry_set_mtime(ae, 5, 6); diff --git a/libarchive/test/test_read_truncated.c b/libarchive/test/test_read_truncated.c index 3991ab2ba..117c12ade 100644 --- a/libarchive/test/test_read_truncated.c +++ b/libarchive/test/test_read_truncated.c @@ -23,6 +23,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test.h" +#include "test_utils.h" __FBSDID("$FreeBSD: src/lib/libarchive/test/test_read_truncated.c,v 1.4 2008/09/01 05:38:33 kientzle Exp $"); static char buff[1000000]; @@ -47,8 +48,7 @@ DEFINE_TEST(test_read_truncated) assert((ae = archive_entry_new()) != NULL); archive_entry_copy_pathname(ae, "file"); archive_entry_set_mode(ae, S_IFREG | 0755); - for (i = 0; i < sizeof(buff2); i++) - buff2[i] = (unsigned char)rand(); + fill_with_pseudorandom_data(buff2, sizeof(buff2)); archive_entry_set_size(ae, sizeof(buff2)); assertEqualIntA(a, ARCHIVE_OK, archive_write_header(a, ae)); archive_entry_free(ae); diff --git a/libarchive/test/test_read_truncated_filter.c b/libarchive/test/test_read_truncated_filter.c index 4c409cc00..59a6dc852 100644 --- a/libarchive/test/test_read_truncated_filter.c +++ b/libarchive/test/test_read_truncated_filter.c @@ -25,6 +25,7 @@ */ #include "test.h" +#include "test_utils.h" __FBSDID("$FreeBSD$"); /* @@ -41,7 +42,7 @@ test_truncation(const char *compression, char path[16]; char *buff, *data; size_t buffsize, datasize, used1; - int i, j, r, use_prog; + int i, r, use_prog; buffsize = 2000000; assert(NULL != (buff = (char *)malloc(buffsize))); @@ -91,9 +92,7 @@ test_truncation(const char *compression, free(buff); return; } - for (j = 0; j < (int)datasize; ++j) { - data[j] = (char)(rand() % 256); - } + fill_with_pseudorandom_data(data, datasize); failure("%s", path); if (!assertEqualIntA(a, datasize, archive_write_data(a, data, datasize))) { @@ -111,8 +110,13 @@ test_truncation(const char *compression, assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); - assertEqualIntA(a, ARCHIVE_OK, - archive_read_open_memory(a, buff, used1 - used1/64)); + r = archive_read_open_memory(a, buff, used1 - used1/64); + if (r != ARCHIVE_OK) { + assertEqualStringA(a, "truncated bzip2 input", + archive_error_string(a)); + goto out; + } + for (i = 0; i < 100; i++) { if (ARCHIVE_OK != archive_read_next_header(a, &ae)) { failure("Should have non-NULL error message for %s", @@ -133,6 +137,7 @@ test_truncation(const char *compression, archive_read_close(a)); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); +out: free(data); free(buff); } diff --git a/libarchive/test/test_write_format_7zip_large.c b/libarchive/test/test_write_format_7zip_large.c index 5c49f59a1..be344a3ef 100644 --- a/libarchive/test/test_write_format_7zip_large.c +++ b/libarchive/test/test_write_format_7zip_large.c @@ -25,6 +25,7 @@ #include "test.h" +#include "test_utils.h" __FBSDID("$FreeBSD$"); #define LARGE_SIZE (16*1024*1024) @@ -37,7 +38,6 @@ test_large(const char *compression_type) size_t buffsize = LARGE_SIZE + 1024 * 256; size_t datasize = LARGE_SIZE; char *buff, *filedata, *filedata2; - unsigned i; assert((buff = malloc(buffsize)) != NULL); assert((filedata = malloc(datasize)) != NULL); @@ -87,8 +87,7 @@ test_large(const char *compression_type) /* NOTE: PPMd cannot handle random data correctly.*/ memset(filedata, 'a', datasize); } else { - for (i = 0; i < datasize; i++) - filedata[i] = (char)rand(); + fill_with_pseudorandom_data(filedata, datasize); } assertEqualInt(datasize, archive_write_data(a, filedata, datasize)); diff --git a/test_utils/test_utils.c b/test_utils/test_utils.c index 8ea3d3c4b..db6c31b2a 100644 --- a/test_utils/test_utils.c +++ b/test_utils/test_utils.c @@ -26,8 +26,11 @@ #include "test_utils.h" +#include #include +#include #include +#include /* Filter tests against a glob pattern. Returns non-zero if test matches * pattern, zero otherwise. A '^' at the beginning of the pattern negates @@ -122,3 +125,81 @@ int get_test_set(int *test_set, int limit, const char *test, } return ((idx == 0)?-1:idx); } + +static inline uint64_t +xorshift64(uint64_t *state) +{ + uint64_t x = *state; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + *state = x; + return (x); +} + +/* + * Fill a buffer with reproducible pseudo-random data using a simple xorshift + * algorithm. Originally, most tests filled buffers with a loop that calls + * rand() once for each byte. However, this initialization can be extremely + * slow when running on emulated platforms such as QEMU where 16M calls to + * rand() take a long time: Before the test_write_format_7zip_large_copy test + * took ~22 seconds, whereas using a xorshift random number generator (that can + * be inlined) reduces it to ~17 seconds on QEMU RISC-V. + */ +void +fill_with_pseudorandom_data_seed(uint64_t seed, void *buffer, size_t size) +{ + uint64_t *aligned_buffer; + size_t num_values; + size_t i; + size_t unaligned_suffix; + size_t unaligned_prefix = 0; + /* + * To avoid unaligned stores we only fill the aligned part of the buffer + * with pseudo-random data and fill the unaligned prefix with 0xab and + * the suffix with 0xcd. + */ + if ((uintptr_t)buffer % sizeof(uint64_t)) { + unaligned_prefix = + sizeof(uint64_t) - (uintptr_t)buffer % sizeof(uint64_t); + aligned_buffer = + (uint64_t *)((char *)buffer + unaligned_prefix); + memset(buffer, 0xab, unaligned_prefix); + } else { + aligned_buffer = (uint64_t *)buffer; + } + assert((uintptr_t)aligned_buffer % sizeof(uint64_t) == 0); + num_values = (size - unaligned_prefix) / sizeof(uint64_t); + unaligned_suffix = + size - unaligned_prefix - num_values * sizeof(uint64_t); + for (i = 0; i < num_values; i++) { + aligned_buffer[i] = xorshift64(&seed); + } + if (unaligned_suffix) { + memset((char *)buffer + size - unaligned_suffix, 0xcd, + unaligned_suffix); + } +} + +void +fill_with_pseudorandom_data(void *buffer, size_t size) +{ + uint64_t seed; + const char* seed_str; + /* + * Check if a seed has been specified in the environment, otherwise fall + * back to using rand() as a seed. + */ + if ((seed_str = getenv("TEST_RANDOM_SEED")) != NULL) { + errno = 0; + seed = strtoull(seed_str, NULL, 10); + if (errno != 0) { + fprintf(stderr, "strtoull(%s) failed: %s", seed_str, + strerror(errno)); + seed = rand(); + } + } else { + seed = rand(); + } + fill_with_pseudorandom_data_seed(seed, buffer, size); +} diff --git a/test_utils/test_utils.h b/test_utils/test_utils.h index 164c528fc..3f61f6b2f 100644 --- a/test_utils/test_utils.h +++ b/test_utils/test_utils.h @@ -27,6 +27,9 @@ #ifndef TEST_UTILS_H #define TEST_UTILS_H +#include +#include + struct test_list_t { void (*func)(void); @@ -35,5 +38,7 @@ struct test_list_t }; int get_test_set(int *, int, const char *, struct test_list_t *); +void fill_with_pseudorandom_data(void* buffer, size_t size); +void fill_with_pseudorandom_data_seed(uint64_t seed, void* buffer, size_t size); #endif /* TEST_UTILS_H */ -- 2.47.2