]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Add a test which checks the ISTRI-vs-ESTRI-vs-ISTRM-vs-ESTRM
authorJulian Seward <jseward@acm.org>
Tue, 17 Aug 2010 22:43:21 +0000 (22:43 +0000)
committerJulian Seward <jseward@acm.org>
Tue, 17 Aug 2010 22:43:21 +0000 (22:43 +0000)
aspects of PCMP{I,E}STR{I,M}.  Doesn't check the actual arithmetic
very much -- that's done by pcmpstr64.c.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11267

none/tests/amd64/pcmpxstrx64.c [new file with mode: 0644]

diff --git a/none/tests/amd64/pcmpxstrx64.c b/none/tests/amd64/pcmpxstrx64.c
new file mode 100644 (file)
index 0000000..6d67aa7
--- /dev/null
@@ -0,0 +1,334 @@
+
+/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}.  Does not
+   check the core arithmetic in any detail.  */
+
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+typedef  unsigned char  V128[16];
+typedef  unsigned int   UInt;
+typedef  signed int     Int;
+typedef  unsigned char  UChar;
+typedef  unsigned long long int ULong;
+typedef  UChar          Bool;
+#define False ((Bool)0)
+#define True  ((Bool)1)
+
+void show_V128 ( V128* vec )
+{
+   Int i;
+   for (i = 15; i >= 0; i--)
+      printf("%02x", (UInt)( (*vec)[i] ));
+}
+
+void expand ( V128* dst, char* summary )
+{
+   Int i;
+   assert( strlen(summary) == 16 );
+   for (i = 0; i < 16; i++) {
+      UChar xx = 0;
+      UChar x = summary[15-i];
+      if      (x >= '0' && x <= '9') { xx = x - '0'; }
+      else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
+      else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
+      else assert(0);
+
+      assert(xx < 16);
+      xx = (xx << 4) | xx;
+      assert(xx < 256);
+      (*dst)[i] = xx;
+   }
+}
+
+void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
+{
+   V128 argL, argR;
+   expand( &argL, summL );
+   expand( &argR, summR );
+   printf("\n");
+   printf("rdx %016llx  argL ", rdxIN);
+   show_V128(&argL);
+   printf("  rax %016llx  argR ", raxIN);
+   show_V128(&argR);
+   printf("\n");
+
+   ULong block[ 2/*in:argL*/          // 0  0
+                + 2/*in:argR*/        // 2  16
+                + 1/*in:rdx*/         // 4  32
+                + 1/*in:rax*/         // 5  40
+                + 2/*inout:xmm0*/     // 6  48
+                + 1/*inout:rcx*/      // 8  64
+                + 1/*out:rflags*/ ];  // 9  72
+   assert(sizeof(block) == 80);
+
+   UChar* blockC = (UChar*)&block[0];
+
+   /* ---------------- ISTRI_4A ---------------- */
+   memset(blockC, 0x55, 80);
+   memcpy(blockC + 0,  &argL,  16);
+   memcpy(blockC + 16, &argR,  16);
+   memcpy(blockC + 24, &rdxIN, 8);
+   memcpy(blockC + 32, &raxIN, 8);
+   memcpy(blockC + 40, &rdxIN, 8);
+   __asm__ __volatile__(
+      "movupd    0(%0), %%xmm2"           "\n\t"
+      "movupd    16(%0), %%xmm13"         "\n\t"
+      "movq      32(%0), %%rdx"           "\n\t"
+      "movq      40(%0), %%rax"           "\n\t"
+      "movupd    48(%0), %%xmm0"          "\n\t"
+      "movw      64(%0), %%rcx"           "\n\t"
+      "pcmpistri $0x4A, %%xmm2, %%xmm13"  "\n\t"
+      "movupd    %%xmm0, 48(%0)"          "\n\t"
+      "movw      %%rcx, 64(%0)"           "\n\t"
+      "pushfq"                            "\n\t"
+      "popq      %%r15"                   "\n\t"
+      "movq      %%r15, 72(%0)"           "\n\t"
+      : /*out*/ 
+      : /*in*/"r"(blockC) 
+      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+   );
+   printf("  istri $0x4A:  ");
+   printf("    xmm0 ");
+   show_V128( (V128*)(blockC+48) );
+   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
+
+   /* ---------------- ISTRI_0A ---------------- */
+   memset(blockC, 0x55, 80);
+   memcpy(blockC + 0,  &argL,  16);
+   memcpy(blockC + 16, &argR,  16);
+   memcpy(blockC + 24, &rdxIN, 8);
+   memcpy(blockC + 32, &raxIN, 8);
+   memcpy(blockC + 40, &rdxIN, 8);
+   __asm__ __volatile__(
+      "movupd    0(%0), %%xmm2"           "\n\t"
+      "movupd    16(%0), %%xmm13"         "\n\t"
+      "movq      32(%0), %%rdx"           "\n\t"
+      "movq      40(%0), %%rax"           "\n\t"
+      "movupd    48(%0), %%xmm0"          "\n\t"
+      "movw      64(%0), %%rcx"           "\n\t"
+      "pcmpistri $0x0A, %%xmm2, %%xmm13"  "\n\t"
+      "movupd    %%xmm0, 48(%0)"          "\n\t"
+      "movw      %%rcx, 64(%0)"           "\n\t"
+      "pushfq"                            "\n\t"
+      "popq      %%r15"                   "\n\t"
+      "movq      %%r15, 72(%0)"           "\n\t"
+      : /*out*/ 
+      : /*in*/"r"(blockC) 
+      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+   );
+   printf("  istri $0x0A:  ");
+   printf("    xmm0 ");
+   show_V128( (V128*)(blockC+48) );
+   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
+
+   /* ---------------- ISTRM_4A ---------------- */
+   memset(blockC, 0x55, 80);
+   memcpy(blockC + 0,  &argL,  16);
+   memcpy(blockC + 16, &argR,  16);
+   memcpy(blockC + 24, &rdxIN, 8);
+   memcpy(blockC + 32, &raxIN, 8);
+   memcpy(blockC + 40, &rdxIN, 8);
+   __asm__ __volatile__(
+      "movupd    0(%0), %%xmm2"           "\n\t"
+      "movupd    16(%0), %%xmm13"         "\n\t"
+      "movq      32(%0), %%rdx"           "\n\t"
+      "movq      40(%0), %%rax"           "\n\t"
+      "movupd    48(%0), %%xmm0"          "\n\t"
+      "movw      64(%0), %%rcx"           "\n\t"
+      "pcmpistrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
+      "movupd    %%xmm0, 48(%0)"          "\n\t"
+      "movw      %%rcx, 64(%0)"           "\n\t"
+      "pushfq"                            "\n\t"
+      "popq      %%r15"                   "\n\t"
+      "movq      %%r15, 72(%0)"           "\n\t"
+      : /*out*/ 
+      : /*in*/"r"(blockC) 
+      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+   );
+   printf("  istrm $0x4A:  ");
+   printf("    xmm0 ");
+   show_V128( (V128*)(blockC+48) );
+   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
+
+   /* ---------------- ISTRM_0A ---------------- */
+   memset(blockC, 0x55, 80);
+   memcpy(blockC + 0,  &argL,  16);
+   memcpy(blockC + 16, &argR,  16);
+   memcpy(blockC + 24, &rdxIN, 8);
+   memcpy(blockC + 32, &raxIN, 8);
+   memcpy(blockC + 40, &rdxIN, 8);
+   __asm__ __volatile__(
+      "movupd    0(%0), %%xmm2"           "\n\t"
+      "movupd    16(%0), %%xmm13"         "\n\t"
+      "movq      32(%0), %%rdx"           "\n\t"
+      "movq      40(%0), %%rax"           "\n\t"
+      "movupd    48(%0), %%xmm0"          "\n\t"
+      "movw      64(%0), %%rcx"           "\n\t"
+      "pcmpistrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
+      "movupd    %%xmm0, 48(%0)"          "\n\t"
+      "movw      %%rcx, 64(%0)"           "\n\t"
+      "pushfq"                            "\n\t"
+      "popq      %%r15"                   "\n\t"
+      "movq      %%r15, 72(%0)"           "\n\t"
+      : /*out*/ 
+      : /*in*/"r"(blockC) 
+      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+   );
+   printf("  istrm $0x0A:  ");
+   printf("    xmm0 ");
+   show_V128( (V128*)(blockC+48) );
+   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
+
+   /* ---------------- ESTRI_4A ---------------- */
+   memset(blockC, 0x55, 80);
+   memcpy(blockC + 0,  &argL,  16);
+   memcpy(blockC + 16, &argR,  16);
+   memcpy(blockC + 24, &rdxIN, 8);
+   memcpy(blockC + 32, &raxIN, 8);
+   memcpy(blockC + 40, &rdxIN, 8);
+   __asm__ __volatile__(
+      "movupd    0(%0), %%xmm2"           "\n\t"
+      "movupd    16(%0), %%xmm13"         "\n\t"
+      "movq      32(%0), %%rdx"           "\n\t"
+      "movq      40(%0), %%rax"           "\n\t"
+      "movupd    48(%0), %%xmm0"          "\n\t"
+      "movw      64(%0), %%rcx"           "\n\t"
+      "pcmpestri $0x4A, %%xmm2, %%xmm13"  "\n\t"
+      "movupd    %%xmm0, 48(%0)"          "\n\t"
+      "movw      %%rcx, 64(%0)"           "\n\t"
+      "pushfq"                            "\n\t"
+      "popq      %%r15"                   "\n\t"
+      "movq      %%r15, 72(%0)"           "\n\t"
+      : /*out*/ 
+      : /*in*/"r"(blockC) 
+      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+   );
+   printf("  estri $0x4A:  ");
+   printf("    xmm0 ");
+   show_V128( (V128*)(blockC+48) );
+   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
+
+   /* ---------------- ESTRI_0A ---------------- */
+   memset(blockC, 0x55, 80);
+   memcpy(blockC + 0,  &argL,  16);
+   memcpy(blockC + 16, &argR,  16);
+   memcpy(blockC + 24, &rdxIN, 8);
+   memcpy(blockC + 32, &raxIN, 8);
+   memcpy(blockC + 40, &rdxIN, 8);
+   __asm__ __volatile__(
+      "movupd    0(%0), %%xmm2"           "\n\t"
+      "movupd    16(%0), %%xmm13"         "\n\t"
+      "movq      32(%0), %%rdx"           "\n\t"
+      "movq      40(%0), %%rax"           "\n\t"
+      "movupd    48(%0), %%xmm0"          "\n\t"
+      "movw      64(%0), %%rcx"           "\n\t"
+      "pcmpestri $0x0A, %%xmm2, %%xmm13"  "\n\t"
+      "movupd    %%xmm0, 48(%0)"          "\n\t"
+      "movw      %%rcx, 64(%0)"           "\n\t"
+      "pushfq"                            "\n\t"
+      "popq      %%r15"                   "\n\t"
+      "movq      %%r15, 72(%0)"           "\n\t"
+      : /*out*/ 
+      : /*in*/"r"(blockC) 
+      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+   );
+   printf("  estri $0x0A:  ");
+   printf("    xmm0 ");
+   show_V128( (V128*)(blockC+48) );
+   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
+
+   /* ---------------- ESTRM_4A ---------------- */
+   memset(blockC, 0x55, 80);
+   memcpy(blockC + 0,  &argL,  16);
+   memcpy(blockC + 16, &argR,  16);
+   memcpy(blockC + 24, &rdxIN, 8);
+   memcpy(blockC + 32, &raxIN, 8);
+   memcpy(blockC + 40, &rdxIN, 8);
+   __asm__ __volatile__(
+      "movupd    0(%0), %%xmm2"           "\n\t"
+      "movupd    16(%0), %%xmm13"         "\n\t"
+      "movq      32(%0), %%rdx"           "\n\t"
+      "movq      40(%0), %%rax"           "\n\t"
+      "movupd    48(%0), %%xmm0"          "\n\t"
+      "movw      64(%0), %%rcx"           "\n\t"
+      "pcmpestrm $0x4A, %%xmm2, %%xmm13"  "\n\t"
+      "movupd    %%xmm0, 48(%0)"          "\n\t"
+      "movw      %%rcx, 64(%0)"           "\n\t"
+      "pushfq"                            "\n\t"
+      "popq      %%r15"                   "\n\t"
+      "movq      %%r15, 72(%0)"           "\n\t"
+      : /*out*/ 
+      : /*in*/"r"(blockC) 
+      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+   );
+   printf("  estrm $0x4A:  ");
+   printf("    xmm0 ");
+   show_V128( (V128*)(blockC+48) );
+   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
+
+   /* ---------------- ESTRM_0A ---------------- */
+   memset(blockC, 0x55, 80);
+   memcpy(blockC + 0,  &argL,  16);
+   memcpy(blockC + 16, &argR,  16);
+   memcpy(blockC + 24, &rdxIN, 8);
+   memcpy(blockC + 32, &raxIN, 8);
+   memcpy(blockC + 40, &rdxIN, 8);
+   __asm__ __volatile__(
+      "movupd    0(%0), %%xmm2"           "\n\t"
+      "movupd    16(%0), %%xmm13"         "\n\t"
+      "movq      32(%0), %%rdx"           "\n\t"
+      "movq      40(%0), %%rax"           "\n\t"
+      "movupd    48(%0), %%xmm0"          "\n\t"
+      "movw      64(%0), %%rcx"           "\n\t"
+      "pcmpestrm $0x0A, %%xmm2, %%xmm13"  "\n\t"
+      "movupd    %%xmm0, 48(%0)"          "\n\t"
+      "movw      %%rcx, 64(%0)"           "\n\t"
+      "pushfq"                            "\n\t"
+      "popq      %%r15"                   "\n\t"
+      "movq      %%r15, 72(%0)"           "\n\t"
+      : /*out*/ 
+      : /*in*/"r"(blockC) 
+      : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+   );
+   printf("  estrm $0x0A:  ");
+   printf("    xmm0 ");
+   show_V128( (V128*)(blockC+48) );
+   printf("  rcx %016llx  flags %08llx\n", block[8], block[9] & 0x8D5);
+
+
+
+
+}
+
+int main ( void )
+{
+   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
+   one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
+
+   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
+   one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
+
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
+
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
+   one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
+
+   one_test("aaaaaaaaaaaaaaaa", 5,  "aaaaaaaaaaaaaaaa", 6 );
+   one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
+   one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
+   one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
+
+   one_test("aaaaaaaaaaaaaaaa", -5,  "aaaaaaaaaaaaaaaa", 6 );
+   one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
+   one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
+   one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
+
+   return 0;
+}