asm volatile ("vmovq %0, %%xmm15": "=m" (buf1));
/* Test vmovdq style instructions. */
- /* For local and dynamic buffers, we can't guarantee they will be aligned.
+ /* For local and global buffers, we can't guarantee they will be aligned.
However, the aligned and unaligned versions seem to be encoded the same,
- so testing one is enough to validate both. */
+ so testing one is enough to validate both. For safety, though, the
+ dynamic buffers are forced to be 32-bit aligned so vmovdqa can be
+ explicitly tested at least once. */
/* Operations based on local buffers. */
asm volatile ("vmovdqu %0, %%ymm0": : "m"(buf0));
asm volatile ("vmovdqu %%ymm0, %0": "=m"(buf1));
/* Operations based on global buffers. */
- /* Global buffers seem to always be aligned, lets sanity check vmovdqa. */
- asm volatile ("vmovdqa %0, %%ymm15": : "m"(global_buf0));
- asm volatile ("vmovdqa %%ymm15, %0": "=m"(global_buf1));
asm volatile ("vmovdqu %0, %%ymm0": : "m"(global_buf0));
asm volatile ("vmovdqu %%ymm0, %0": "=m"(global_buf1));
/* Operations based on dynamic buffers. */
- /* The dynamic buffers are not aligned, so we skip vmovdqa. */
+ asm volatile ("vmovdqa %0, %%ymm15": : "m"(*dyn_buf0));
+ asm volatile ("vmovdqa %%ymm15, %0": "=m"(*dyn_buf1));
asm volatile ("vmovdqu %0, %%ymm0": : "m"(*dyn_buf0));
asm volatile ("vmovdqu %%ymm0, %0": "=m"(*dyn_buf1));
return 0; /* end vzeroupper_test */
}
+/* This include is used to allocate the dynamic buffer and have
+ the pointers aligned to a 32-bit boundary, so we can test instructions
+ that require aligned memory. */
+#include "precise-aligned-alloc.c"
+
int
main ()
{
- dyn_buf0 = (char *) malloc(sizeof(char) * 32);
- dyn_buf1 = (char *) malloc(sizeof(char) * 32);
+ dyn_buf0 = (char *) precise_aligned_alloc(32, sizeof(char) * 32, NULL);
+ dyn_buf1 = (char *) precise_aligned_alloc(32, sizeof(char) * 32, NULL);
for (int i =0; i < 32; i++)
{
dyn_buf0[i] = 0x20 + (i % 16);
# some targets have leading underscores on assembly symbols.
set additional_flags [gdb_target_symbol_prefix_flags]
+lappend_include_file alloc_lib $srcdir/lib/precise-aligned-alloc.c
if {[prepare_for_testing "failed to prepare" $testfile $srcfile \
- [list debug $additional_flags]]} {
+ [list debug $additional_flags $alloc_lib]]} {
return -1
}
if {[record_full_function "vmov"] == true} {
# Now execute backwards, checking all instructions.
test_one_register "vmovdqa" "ymm0" \
- "0x1f1e1d1c1b1a19181716151413121110, 0x1f1e1d1c1b1a19181716151413121110" \
+ "0x2f2e2d2c2b2a29282726252423222120, 0x2f2e2d2c2b2a29282726252423222120" \
"from register: "
test_one_register "vmovdqu" "ymm15" \
- "0x1f1e1d1c1b1a19181716151413121110, 0x1f1e1d1c1b1a19181716151413121110" \
+ "0x2f2e2d2c2b2a29282726252423222120, 0x2f2e2d2c2b2a29282726252423222120" \
"from register: "
test_one_register "vmovdqu" "ymm0" \
"0x2f2e2d2c2b2a29282726252423222120, 0x2f2e2d2c2b2a29282726252423222120" \
"from register: "
- test_one_memory "vmovdqu" "dyn_buf1" "0x0 .repeats 32 times" \
+ test_one_memory "vmovdqu" "dyn_buf1" \
+ "0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29" \
true "dynamic buffer: "
test_one_register "vmovdqu" "ymm0" \
"0x1f1e1d1c1b1a19181716151413121110, 0x1f1e1d1c1b1a19181716151413121110" \
"dynamic buffer: "
+ test_one_memory "vmovdqa" "dyn_buf1" "0x0 .repeats 32 times" true
+ test_one_register "vmovdqa" "ymm15" "0x0, 0x0"
# Don't check the full buffer because that'd be too long
test_one_memory "vmovdqu" "global_buf1" \
- "0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19" \
- "global buffer: "
+ "0x0 .repeats 32 times" \
+ false "global buffer: "
test_one_register "vmovdqu" "ymm0" \
"0x3f3e3d3c3b3a39383736353433323130, 0x3f3e3d3c3b3a39383736353433323130" \
"global buffer: "
- test_one_memory "vmovdqa" "global_buf1" "0x0 .repeats 32 times"
- test_one_register "vmovdqa" "ymm15" "0x0, 0x0"
test_one_memory "vmovdqu" "buf1" "0x0 .repeats 32 times"
test_one_register "vmovdqu" "ymm0" "0x2726252423222120, 0x0" "local buffer: "