From: Julian Seward Date: Mon, 11 Jul 2011 11:46:52 +0000 (+0000) Subject: Add / fix up test cases for {LD,ST}REX{,B,H,D} on ARM. Works for both X-Git-Tag: svn/VALGRIND_3_7_0~372 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ed80c32ae5cf1aaea17a585935f202da7e9ddeed;p=thirdparty%2Fvalgrind.git Add / fix up test cases for {LD,ST}REX{,B,H,D} on ARM. Works for both ARM and Thumb encodings. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11865 --- diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c index 97e1664018..80cf810fe7 100644 --- a/memcheck/tests/atomic_incs.c +++ b/memcheck/tests/atomic_incs.c @@ -5,6 +5,11 @@ atomicity of the relevant instructions in the generated code; but the post-DCAS-merge versions of Valgrind do behave correctly. */ +/* On ARM, this can be compiled into either ARM or Thumb code, so as + to test both A and T encodings of LDREX/STREX et al. Also on ARM, + it tests doubleword atomics (LDREXD, STREXD) which I don't think it + does on any other platform. */ + #include #include #include @@ -75,7 +80,22 @@ __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) ); } while (success != 1); #elif defined(VGA_arm) - *p += n; + unsigned int block[3] + = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; + do { + __asm__ __volatile__( + "mov r5, %0" "\n\t" + "ldr r9, [r5, #0]" "\n\t" // p + "ldr r10, [r5, #4]" "\n\t" // n + "ldrexb r8, [r9]" "\n\t" + "add r8, r8, r10" "\n\t" + "strexb r4, r8, [r9]" "\n\t" + "str r4, [r5, #8]" "\n\t" + : /*out*/ + : /*in*/ "r"(&block[0]) + : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" + ); + } while (block[2] != 0); #elif defined(VGA_s390x) int dummy; __asm__ __volatile__( @@ -153,7 +173,22 @@ __attribute__((noinline)) void atomic_add_16bit ( short* p, int n ) ); } while (success != 1); #elif defined(VGA_arm) - *p += n; + unsigned int block[3] + = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF }; + do { + __asm__ __volatile__( + "mov r5, %0" "\n\t" + "ldr r9, [r5, #0]" "\n\t" // p + "ldr r10, [r5, #4]" "\n\t" // n + "ldrexh r8, [r9]" "\n\t" + "add r8, r8, r10" "\n\t" + "strexh r4, r8, [r9]" "\n\t" + "str r4, [r5, #8]" "\n\t" + : /*out*/ + : /*in*/ "r"(&block[0]) + : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" + ); + } while (block[2] != 0); #elif defined(VGA_s390x) int dummy; __asm__ __volatile__( @@ -237,11 +272,11 @@ __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) "ldr r10, [r5, #4]" "\n\t" // n "ldrex r8, [r9]" "\n\t" "add r8, r8, r10" "\n\t" - "strex r11, r8, [r9]" "\n\t" - "str r11, [r5, #8]" "\n\t" + "strex r4, r8, [r9]" "\n\t" + "str r4, [r5, #8]" "\n\t" : /*out*/ : /*in*/ "r"(&block[0]) - : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10" + : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4" ); } while (block[2] != 0); #elif defined(VGA_s390x) @@ -261,7 +296,7 @@ __attribute__((noinline)) void atomic_add_32bit ( int* p, int n ) __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) { -#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_arm) +#if defined(VGA_x86) || defined(VGA_ppc32) /* do nothing; is not supported */ #elif defined(VGA_amd64) // this is a bit subtle. It relies on the fact that, on a 64-bit platform, @@ -290,6 +325,26 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) : /*trash*/ "memory", "cc", "r15" ); } while (success != 1); +#elif defined(VGA_arm) + unsigned long long int block[3] + = { (unsigned long long int)(unsigned long)p, + (unsigned long long int)n, + 0xFFFFFFFFFFFFFFFFULL }; + do { + __asm__ __volatile__( + "mov r5, %0" "\n\t" + "ldr r8, [r5, #0]" "\n\t" // p + "ldrd r2, r3, [r5, #8]" "\n\t" // n + "ldrexd r0, r1, [r8]" "\n\t" + "adds r2, r2, r0" "\n\t" + "adc r3, r3, r1" "\n\t" + "strexd r1, r2, r3, [r8]" "\n\t" + "str r1, [r5, #16]" "\n\t" + : /*out*/ + : /*in*/ "r"(&block[0]) + : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3" + ); + } while (block[2] != 0xFFFFFFFF00000000ULL); #elif defined(VGA_s390x) __asm__ __volatile__( " lg 0,%0\n\t"