--- /dev/null
+#include <stdlib.h>
+#include <stdio.h>
+
+typedef unsigned int UInt;
+typedef unsigned long long int ULong;
+
+void do_cmpxchg8b ( /*OUT*/
+ ULong* rdxOut, ULong* raxOut,
+ ULong* memHiOut, ULong* memLoOut,
+ ULong* zOut,
+ /*IN*/
+ ULong rdxIn, ULong raxIn,
+ ULong memHiIn, ULong memLoIn,
+ ULong rcxIn, ULong rbxIn )
+{
+ UInt mem[2];
+ ULong block[6];
+ mem[0] = (UInt)memLoIn;
+ mem[1] = (UInt)memHiIn;
+ block[0] = rdxIn;
+ block[1] = raxIn;
+ block[2] = rcxIn;
+ block[3] = rbxIn;
+ block[4] = (ULong)&mem[0];
+ block[5] = ~(0ULL);
+ __asm__ __volatile__(
+ "movq %0,%%r11\n"
+ "\tmovq 0(%%r11),%%rdx\n"
+ "\tmovq 8(%%r11),%%rax\n"
+ "\tmovq 16(%%r11),%%rcx\n"
+ "\tmovq 24(%%r11),%%rbx\n"
+ "\tmovq 32(%%r11),%%r10\n"
+ "\tlock cmpxchg8b (%%r10)\n"
+ "\tmovabsq $0,%%r10\n"
+ "\tsetz %%r10b\n"
+ "\tmovq %%r10,40(%%r11)\n"
+ "\tmovq %%rdx,0(%%r11)\n"
+ "\tmovq %%rax,8(%%r11)\n"
+ : /*out*/
+ : /*in*/ "r"(&block[0])
+ : /*trash*/ "%r11", "%r10", "%rax", "%rbx", "%rcx", "%rdx",
+ "cc", "memory" );
+ *rdxOut = block[0];
+ *raxOut = block[1];
+ *memLoOut = (ULong)mem[0];
+ *memHiOut = (ULong)mem[1];
+ *zOut = block[5];
+}
+
+void try8b ( ULong d, ULong a, ULong mHi, ULong mLo, ULong c, ULong b )
+{
+ ULong dd, aa, mmHi, mmLo, zz;
+ do_cmpxchg8b( &dd, &aa, &mmHi, &mmLo, &zz,
+ d,a,mHi,mLo,c,b);
+ printf(" Q d:a=%llx:%llx mem=%llx:%llx c:b=%llx:%llx "
+ "-> z=%lld d:a=%llx:%llx mem=%llx:%llx\n",
+ d,a, mHi,mLo, c,b, zz, dd,aa, mmHi,mmLo );
+}
+
+void do_cmpxchg16b ( /*OUT*/
+ ULong* rdxOut, ULong* raxOut,
+ ULong* memHiOut, ULong* memLoOut,
+ ULong* zOut,
+ /*IN*/
+ ULong rdxIn, ULong raxIn,
+ ULong memHiIn, ULong memLoIn,
+ ULong rcxIn, ULong rbxIn )
+{
+ ULong mem[2];
+ ULong block[6];
+ mem[0] = memLoIn;
+ mem[1] = memHiIn;
+ block[0] = rdxIn;
+ block[1] = raxIn;
+ block[2] = rcxIn;
+ block[3] = rbxIn;
+ block[4] = (ULong)&mem[0];
+ block[5] = ~(0ULL);
+ __asm__ __volatile__(
+ "movq %0,%%r11\n"
+ "\tmovq 0(%%r11),%%rdx\n"
+ "\tmovq 8(%%r11),%%rax\n"
+ "\tmovq 16(%%r11),%%rcx\n"
+ "\tmovq 24(%%r11),%%rbx\n"
+ "\tmovq 32(%%r11),%%r10\n"
+ "\tlock cmpxchg16b (%%r10)\n"
+ "\tmovabsq $0,%%r10\n"
+ "\tsetz %%r10b\n"
+ "\tmovq %%r10,40(%%r11)\n"
+ "\tmovq %%rdx,0(%%r11)\n"
+ "\tmovq %%rax,8(%%r11)\n"
+ : /*out*/
+ : /*in*/ "r"(&block[0])
+ : /*trash*/ "%r11", "%r10", "%rax", "%rbx", "%rcx", "%rdx",
+ "cc", "memory" );
+ *rdxOut = block[0];
+ *raxOut = block[1];
+ *memLoOut = mem[0];
+ *memHiOut = mem[1];
+ *zOut = block[5];
+}
+
+void try16b ( ULong d, ULong a, ULong mHi, ULong mLo, ULong c, ULong b )
+{
+ ULong dd, aa, mmHi, mmLo, zz;
+ do_cmpxchg16b( &dd, &aa, &mmHi, &mmLo, &zz,
+ d,a,mHi,mLo,c,b);
+ printf("QQ d:a=%llx:%llx mem=%llx:%llx c:b=%llx:%llx "
+ "-> z=%lld d:a=%llx:%llx mem=%llx:%llx\n",
+ d,a, mHi,mLo, c,b, zz, dd,aa, mmHi,mmLo );
+}
+
+int main(void)
+{
+ ULong z = 0xDEADBEEF00000000ULL;
+
+ try8b( 0,1, 5,4, 3,2 );
+ try8b( 0,1, 0,1, 3,2 );
+
+ try8b( 0,1, 0,4, 3,2 );
+ try8b( 0,1, 0,0, 3,2 );
+
+ try8b( 0,1, 5,0, 3,2 );
+ try8b( 0,1, 1,1, 3,2 );
+
+ try8b( 0+z,1+z, 5+z,4+z, 3+z,2+z );
+ try8b( 0+z,1+z, 0+z,1+z, 3+z,2+z );
+
+ try8b( 0+z,1+z, 0+z,4+z, 3+z,2+z );
+ try8b( 0+z,1+z, 0+z,0+z, 3+z,2+z );
+
+ try8b( 0+z,1+z, 5+z,0+z, 3+z,2+z );
+ try8b( 0+z,1+z, 1+z,1+z, 3+z,2+z );
+
+ try16b( 0,1, 5,4, 3,2 );
+ try16b( 0,1, 0,1, 3,2 );
+
+ try16b( 0,1, 0,4, 3,2 );
+ try16b( 0,1, 0,0, 3,2 );
+
+ try16b( 0,1, 5,0, 3,2 );
+ try16b( 0,1, 1,1, 3,2 );
+
+ try16b( 0+z,1+z, 5+z,4+z, 3+z,2+z );
+ try16b( 0+z,1+z, 0+z,1+z, 3+z,2+z );
+
+ try16b( 0+z,1+z, 0+z,4+z, 3+z,2+z );
+ try16b( 0+z,1+z, 0+z,0+z, 3+z,2+z );
+
+ try16b( 0+z,1+z, 5+z,0+z, 3+z,2+z );
+ try16b( 0+z,1+z, 1+z,1+z, 3+z,2+z );
+
+ return 0;
+}
+