--- /dev/null
+
+/* Tests e-vs-i or i-vs-m aspects for pcmp{e,i}str{i,m}. Does not
+ check the core arithmetic in any detail. */
+
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+typedef unsigned char V128[16];
+typedef unsigned int UInt;
+typedef signed int Int;
+typedef unsigned char UChar;
+typedef unsigned long long int ULong;
+typedef UChar Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+void show_V128 ( V128* vec )
+{
+ Int i;
+ for (i = 15; i >= 0; i--)
+ printf("%02x", (UInt)( (*vec)[i] ));
+}
+
+void expand ( V128* dst, char* summary )
+{
+ Int i;
+ assert( strlen(summary) == 16 );
+ for (i = 0; i < 16; i++) {
+ UChar xx = 0;
+ UChar x = summary[15-i];
+ if (x >= '0' && x <= '9') { xx = x - '0'; }
+ else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; }
+ else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; }
+ else assert(0);
+
+ assert(xx < 16);
+ xx = (xx << 4) | xx;
+ assert(xx < 256);
+ (*dst)[i] = xx;
+ }
+}
+
+void one_test ( char* summL, ULong rdxIN, char* summR, ULong raxIN )
+{
+ V128 argL, argR;
+ expand( &argL, summL );
+ expand( &argR, summR );
+ printf("\n");
+ printf("rdx %016llx argL ", rdxIN);
+ show_V128(&argL);
+ printf(" rax %016llx argR ", raxIN);
+ show_V128(&argR);
+ printf("\n");
+
+ ULong block[ 2/*in:argL*/ // 0 0
+ + 2/*in:argR*/ // 2 16
+ + 1/*in:rdx*/ // 4 32
+ + 1/*in:rax*/ // 5 40
+ + 2/*inout:xmm0*/ // 6 48
+ + 1/*inout:rcx*/ // 8 64
+ + 1/*out:rflags*/ ]; // 9 72
+ assert(sizeof(block) == 80);
+
+ UChar* blockC = (UChar*)&block[0];
+
+ /* ---------------- ISTRI_4A ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpistri $0x4A, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" istri $0x4A: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ISTRI_0A ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpistri $0x0A, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" istri $0x0A: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ISTRM_4A ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpistrm $0x4A, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" istrm $0x4A: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ISTRM_0A ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpistrm $0x0A, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" istrm $0x0A: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ESTRI_4A ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpestri $0x4A, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" estri $0x4A: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ESTRI_0A ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpestri $0x0A, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" estri $0x0A: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ESTRM_4A ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpestrm $0x4A, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" estrm $0x4A: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+ /* ---------------- ESTRM_0A ---------------- */
+ memset(blockC, 0x55, 80);
+ memcpy(blockC + 0, &argL, 16);
+ memcpy(blockC + 16, &argR, 16);
+ memcpy(blockC + 24, &rdxIN, 8);
+ memcpy(blockC + 32, &raxIN, 8);
+ memcpy(blockC + 40, &rdxIN, 8);
+ __asm__ __volatile__(
+ "movupd 0(%0), %%xmm2" "\n\t"
+ "movupd 16(%0), %%xmm13" "\n\t"
+ "movq 32(%0), %%rdx" "\n\t"
+ "movq 40(%0), %%rax" "\n\t"
+ "movupd 48(%0), %%xmm0" "\n\t"
+ "movw 64(%0), %%rcx" "\n\t"
+ "pcmpestrm $0x0A, %%xmm2, %%xmm13" "\n\t"
+ "movupd %%xmm0, 48(%0)" "\n\t"
+ "movw %%rcx, 64(%0)" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%r15" "\n\t"
+ "movq %%r15, 72(%0)" "\n\t"
+ : /*out*/
+ : /*in*/"r"(blockC)
+ : /*trash*/"memory","cc","xmm2","xmm13","xmm0","rdx","rax","rcx","r15"
+ );
+ printf(" estrm $0x0A: ");
+ printf(" xmm0 ");
+ show_V128( (V128*)(blockC+48) );
+ printf(" rcx %016llx flags %08llx\n", block[8], block[9] & 0x8D5);
+
+
+
+
+}
+
+int main ( void )
+{
+ one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaa0aaaaaaa", 0 );
+ one_test("0000000000000000", 0, "aaaaaaaa0aaaaaaa", 0 );
+
+ one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 0 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 0 );
+ one_test("aaaaaaaaaaaaaaaa", 0, "aaaaaaaaaaaaaaaa", 6 );
+
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 15 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 16 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 17 );
+
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -6 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -15 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -16 );
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", -17 );
+
+ one_test("aaaaaaaaaaaaaaaa", 5, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", 15, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", 16, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", 17, "aaaaaaaaaaaaaaaa", 6 );
+
+ one_test("aaaaaaaaaaaaaaaa", -5, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", -15, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", -16, "aaaaaaaaaaaaaaaa", 6 );
+ one_test("aaaaaaaaaaaaaaaa", -17, "aaaaaaaaaaaaaaaa", 6 );
+
+ return 0;
+}