From: Julian Seward Date: Fri, 6 Aug 2010 08:10:45 +0000 (+0000) Subject: Add a test program for PCMPISTRI on amd64 (not yet hooked up). X-Git-Tag: svn/VALGRIND_3_6_0~198 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=366f1bd8be29c2d21a5bb6a275bd2594eca280b9;p=thirdparty%2Fvalgrind.git Add a test program for PCMPISTRI on amd64 (not yet hooked up). git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11250 --- diff --git a/none/tests/amd64/pcmpstr64.c b/none/tests/amd64/pcmpstr64.c new file mode 100644 index 0000000000..b3defd2c47 --- /dev/null +++ b/none/tests/amd64/pcmpstr64.c @@ -0,0 +1,596 @@ + +#include +#include +#include + +typedef unsigned char V128[16]; +typedef unsigned int UInt; +typedef signed int Int; +typedef unsigned char UChar; +typedef unsigned long long int ULong; + +#define SHIFT_O 11 +#define SHIFT_S 7 +#define SHIFT_Z 6 +#define SHIFT_A 4 +#define SHIFT_C 0 +#define SHIFT_P 2 + +#define MASK_O (1ULL << SHIFT_O) +#define MASK_S (1ULL << SHIFT_S) +#define MASK_Z (1ULL << SHIFT_Z) +#define MASK_A (1ULL << SHIFT_A) +#define MASK_C (1ULL << SHIFT_C) +#define MASK_P (1ULL << SHIFT_P) + + +UInt clz32 ( UInt x ) +{ + Int y, m, n; + y = -(x >> 16); + m = (y >> 16) & 16; + n = 16 - m; + x = x >> m; + y = x - 0x100; + m = (y >> 16) & 8; + n = n + m; + x = x << m; + y = x - 0x1000; + m = (y >> 16) & 4; + n = n + m; + x = x << m; + y = x - 0x4000; + m = (y >> 16) & 2; + n = n + m; + x = x << m; + y = x >> 14; + m = y & ~(y >> 1); + return n + 2 - m; +} + +UInt ctz32 ( UInt x ) +{ + return 32 - clz32((~x) & (x-1)); +} + +void expand ( V128* dst, char* summary ) +{ + Int i; + assert( strlen(summary) == 16 ); + for (i = 0; i < 16; i++) { + UChar xx = 0; + UChar x = summary[15-i]; + if (x >= '0' && x <= '9') { xx = x - '0'; } + else if (x >= 'A' && x <= 'F') { xx = x - 'A' + 10; } + else if (x >= 'a' && x <= 'f') { xx = x - 'a' + 10; } + else assert(0); + + assert(xx < 16); + xx = (xx << 4) | xx; + assert(xx < 256); + (*dst)[i] = xx; + } +} + +void try_istri ( char* which, + UInt(*h_fn)(V128*,V128*), + UInt(*s_fn)(V128*,V128*), + char* summL, char* summR ) +{ + assert(strlen(which) == 2); + V128 argL, argR; + expand(&argL, summL); + expand(&argR, summR); + UInt h_res = h_fn(&argL, &argR); + UInt s_res = s_fn(&argL, &argR); + printf("istri %s %s %s -> %08x %08x %s\n", + which, summL, summR, h_res, s_res, h_res == s_res ? "" : "!!!!"); +} + +////////////////////////////////////////////////////////// +// // +// ISTRI_4A // +// // +////////////////////////////////////////////////////////// + +UInt h_pcmpistri_4A ( V128* argL, V128* argR ) +{ + V128 block[2]; + memcpy(&block[0], argL, sizeof(V128)); + memcpy(&block[1], argR, sizeof(V128)); + ULong res, flags; + __asm__ __volatile__( + "subq $1024, %%rsp" "\n\t" + "movdqu 0(%2), %%xmm2" "\n\t" + "movdqu 16(%2), %%xmm11" "\n\t" + "pcmpistri $0x4A, %%xmm2, %%xmm11" "\n\t" + "pushfq" "\n\t" + "popq %%rdx" "\n\t" + "movq %%rcx, %0" "\n\t" + "movq %%rdx, %1" "\n\t" + "addq $1024, %%rsp" "\n\t" + : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) + : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" + ); + return ((flags & 0x8D5) << 16) | (res & 0xFFFF); +} + +UInt s_pcmpistri_4A ( V128* argLU, V128* argRU ) +{ + /* signed bytes (also works for unsigned) + equal each (straightforward parallel compare) + polarity + (IntRes2 = IntRes1) + index 1 (want index of ms 1 bit) + */ + Int i; + UChar* argL = (UChar*)argLU; + UChar* argR = (UChar*)argRU; + UInt boolResII = 0, zmaskL = 0, zmaskR = 0; + for (i = 15; i >= 0; i--) { + UChar cL = argL[i]; + UChar cR = argR[i]; + zmaskL = (zmaskL << 1) | (cL == 0 ? 1 : 0); + zmaskR = (zmaskR << 1) | (cR == 0 ? 1 : 0); + boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); + } + UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) + UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) + + // do invalidation, common to all equal-each cases + UInt intRes1 + = (boolResII & validL & validR) // if both valid, use cmpres + | (~ (validL | validR)); // if both invalid, force 1 + // else force 0 + intRes1 &= 0xFFFF; + + // polarity: + + UInt intRes2 = intRes1; + + // generate ecx value, common to all index-of-ms-1-bit cases + UInt newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); + + // generate new flags, common to all ISTRI and ISTRM cases + UInt newFlags // A, P are zero + = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 + | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 + | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 + | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] + + return (newFlags << 16) | newECX; +} + +void istri_4A ( void ) +{ + char* wot = "4A"; + UInt(*h)(V128*,V128*) = h_pcmpistri_4A; + UInt(*s)(V128*,V128*) = s_pcmpistri_4A; + + try_istri(wot,h,s, "0000000000000000", "0000000000000000"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); + + try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); + try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); + + try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); + + try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); +} + +////////////////////////////////////////////////////////// +// // +// ISTRI_3A // +// // +////////////////////////////////////////////////////////// + +UInt h_pcmpistri_3A ( V128* argL, V128* argR ) +{ + V128 block[2]; + memcpy(&block[0], argL, sizeof(V128)); + memcpy(&block[1], argR, sizeof(V128)); + ULong res, flags; + __asm__ __volatile__( + "subq $1024, %%rsp" "\n\t" + "movdqu 0(%2), %%xmm2" "\n\t" + "movdqu 16(%2), %%xmm11" "\n\t" + "pcmpistri $0x3A, %%xmm2, %%xmm11" "\n\t" + "pushfq" "\n\t" + "popq %%rdx" "\n\t" + "movq %%rcx, %0" "\n\t" + "movq %%rdx, %1" "\n\t" + "addq $1024, %%rsp" "\n\t" + : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) + : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" + ); + return ((flags & 0x8D5) << 16) | (res & 0xFFFF); +} + +UInt s_pcmpistri_3A ( V128* argLU, V128* argRU ) +{ + /* signed bytes (also works for unsigned) + equal each (straightforward parallel compare) + polarity Masked- (IntRes2 = IntRes1 ^ validL) + index 0 (want index of ls 1 bit) + */ + Int i; + UChar* argL = (UChar*)argLU; + UChar* argR = (UChar*)argRU; + UInt boolResII = 0, zmaskL = 0, zmaskR = 0; + for (i = 15; i >= 0; i--) { + UChar cL = argL[i]; + UChar cR = argR[i]; + zmaskL = (zmaskL << 1) | (cL == 0 ? 1 : 0); + zmaskR = (zmaskR << 1) | (cR == 0 ? 1 : 0); + boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); + } + UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) + UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) + + // do invalidation, common to all equal-each cases + UInt intRes1 + = (boolResII & validL & validR) // if both valid, use cmpres + | (~ (validL | validR)); // if both invalid, force 1 + // else force 0 + intRes1 &= 0xFFFF; + + // polarity: Masked- + UInt intRes2 = (intRes1 ^ validL) & 0xFFFF; + + // generate ecx value, common to all index-of-ls-1-bit cases + UInt newECX = intRes2 == 0 ? 16 : ctz32(intRes2); + + // generate new flags, common to all ISTRI and ISTRM cases + UInt newFlags // A, P are zero + = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 + | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 + | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 + | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] + + return (newFlags << 16) | newECX; +} + +void istri_3A ( void ) +{ + char* wot = "3A"; + UInt(*h)(V128*,V128*) = h_pcmpistri_3A; + UInt(*s)(V128*,V128*) = s_pcmpistri_3A; + + try_istri(wot,h,s, "0000000000000000", "0000000000000000"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); + + try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); + try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); + + try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); + + try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); +} + + + +////////////////////////////////////////////////////////// +// // +// ISTRI_0C // +// // +////////////////////////////////////////////////////////// + +__attribute__((noinline)) +UInt h_pcmpistri_0C ( V128* argL, V128* argR ) +{ + V128 block[2]; + memcpy(&block[0], argL, sizeof(V128)); + memcpy(&block[1], argR, sizeof(V128)); + ULong res = 0, flags = 0; + __asm__ __volatile__( + "movdqa 0(%2), %%xmm2" "\n\t" + "movdqa 16(%2), %%xmm11" "\n\t" + "pcmpistri $0x0C, %%xmm2, %%xmm11" "\n\t" + //"pcmpistrm $0x0C, %%xmm2, %%xmm11" "\n\t" + //"movd %%xmm0, %%ecx" "\n\t" + "pushfq" "\n\t" + "popq %%rdx" "\n\t" + "movq %%rcx, %0" "\n\t" + "movq %%rdx, %1" "\n\t" + : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0][0]) + : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" + ); + return ((flags & 0x8D5) << 16) | (res & 0xFFFF); +} + +UInt s_pcmpistri_0C ( V128* argLU, V128* argRU ) +{ + /* unsigned bytes + equal ordered (substring search) + polarity + (IntRes2 = IntRes1) + index 0 (want index of ls 1 bit) + + argL: haystack, argR: needle + */ + UInt i, hi, ni; + UChar* argL = (UChar*)argLU; + UChar* argR = (UChar*)argRU; + UInt boolRes = 0, zmaskL = 0, zmaskR = 0; + UInt keepSearching = 1; + for (i = 0; i < 16; i++) { + UChar cL = argL[i]; + UChar cR = argR[i]; + zmaskL = (zmaskL >> 1) | (cL == 0 ? (1 << 15) : 0); + zmaskR = (zmaskR >> 1) | (cR == 0 ? (1 << 15) : 0); + + if (argL[i] == 0) { + // run off the end of the haystack. + keepSearching = 0; + } + + UInt m = 1; + if (keepSearching) { + for (ni = 0; ni < 16; ni++) { + if (argR[ni] == 0) break; + hi = ni + i; + if (hi >= 16) break; + if (argL[hi] != argR[ni]) { m = 0; break; } + } + } else { + m = 0; + } + boolRes = (boolRes >> 1) | (m << 15); + + } + + // boolRes is "pre-invalidated" + UInt intRes1 = boolRes & 0xFFFF; + + // polarity: + + UInt intRes2 = intRes1; + + // generate ecx value, common to all index-of-ls-1-bit cases + UInt newECX = intRes2 == 0 ? 16 : ctz32(intRes2); + + // generate new flags, common to all ISTRI and ISTRM cases + UInt newFlags // A, P are zero + = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 + | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 + | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 + | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] + + return (newFlags << 16) | newECX; +} + +void istri_0C ( void ) +{ + char* wot = "0C"; + UInt(*h)(V128*,V128*) = h_pcmpistri_0C; + UInt(*s)(V128*,V128*) = s_pcmpistri_0C; + + try_istri(wot,h,s, "111111111abcde11", "00000000000abcde"); + + try_istri(wot,h,s, "111111111abcde11", "0000abcde00abcde"); + + try_istri(wot,h,s, "1111111111abcde1", "00000000000abcde"); + try_istri(wot,h,s, "11111111111abcde", "00000000000abcde"); + try_istri(wot,h,s, "111111111111abcd", "00000000000abcde"); + + try_istri(wot,h,s, "111abcde1abcde11", "00000000000abcde"); + + try_istri(wot,h,s, "11abcde11abcde11", "00000000000abcde"); + try_istri(wot,h,s, "1abcde111abcde11", "00000000000abcde"); + try_istri(wot,h,s, "abcde1111abcde11", "00000000000abcde"); + try_istri(wot,h,s, "bcde11111abcde11", "00000000000abcde"); + try_istri(wot,h,s, "cde111111abcde11", "00000000000abcde"); + + try_istri(wot,h,s, "01abcde11abcde11", "00000000000abcde"); + try_istri(wot,h,s, "00abcde11abcde11", "00000000000abcde"); + try_istri(wot,h,s, "000bcde11abcde11", "00000000000abcde"); + + try_istri(wot,h,s, "00abcde10abcde11", "00000000000abcde"); + try_istri(wot,h,s, "00abcde100bcde11", "00000000000abcde"); + + try_istri(wot,h,s, "1111111111111234", "0000000000000000"); + try_istri(wot,h,s, "1111111111111234", "0000000000000001"); + try_istri(wot,h,s, "1111111111111234", "0000000000000011"); + + try_istri(wot,h,s, "1111111111111234", "1111111111111234"); + try_istri(wot,h,s, "a111111111111111", "000000000000000a"); + try_istri(wot,h,s, "b111111111111111", "000000000000000a"); +} + + +////////////////////////////////////////////////////////// +// // +// ISTRI_08 // +// // +////////////////////////////////////////////////////////// + +UInt h_pcmpistri_08 ( V128* argL, V128* argR ) +{ + V128 block[2]; + memcpy(&block[0], argL, sizeof(V128)); + memcpy(&block[1], argR, sizeof(V128)); + ULong res, flags; + __asm__ __volatile__( + "subq $1024, %%rsp" "\n\t" + "movdqu 0(%2), %%xmm2" "\n\t" + "movdqu 16(%2), %%xmm11" "\n\t" + "pcmpistri $0x08, %%xmm2, %%xmm11" "\n\t" + "pushfq" "\n\t" + "popq %%rdx" "\n\t" + "movq %%rcx, %0" "\n\t" + "movq %%rdx, %1" "\n\t" + "addq $1024, %%rsp" "\n\t" + : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0]) + : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory" + ); + return ((flags & 0x8D5) << 16) | (res & 0xFFFF); +} + +UInt s_pcmpistri_08 ( V128* argLU, V128* argRU ) +{ + /* unsigned bytes (also works for unsigned) + equal each (straightforward parallel compare) + polarity + (IntRes2 = IntRes1) + index 0 (want index of ls 1 bit) + */ + Int i; + UChar* argL = (UChar*)argLU; + UChar* argR = (UChar*)argRU; + UInt boolResII = 0, zmaskL = 0, zmaskR = 0; + for (i = 15; i >= 0; i--) { + UChar cL = argL[i]; + UChar cR = argR[i]; + zmaskL = (zmaskL << 1) | (cL == 0 ? 1 : 0); + zmaskR = (zmaskR << 1) | (cR == 0 ? 1 : 0); + boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); + } + UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) + UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) + + // do invalidation, common to all equal-each cases + UInt intRes1 + = (boolResII & validL & validR) // if both valid, use cmpres + | (~ (validL | validR)); // if both invalid, force 1 + // else force 0 + intRes1 &= 0xFFFF; + + // polarity: + + UInt intRes2 = intRes1; + + // generate ecx value, common to all index-of-ls-1-bit cases + UInt newECX = intRes2 == 0 ? 16 : ctz32(intRes2); + + // generate new flags, common to all ISTRI and ISTRM cases + UInt newFlags // A, P are zero + = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 + | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 + | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 + | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] + + return (newFlags << 16) | newECX; +} + +void istri_08 ( void ) +{ + char* wot = "08"; + UInt(*h)(V128*,V128*) = h_pcmpistri_08; + UInt(*s)(V128*,V128*) = s_pcmpistri_08; + + try_istri(wot,h,s, "0000000000000000", "0000000000000000"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaa2aaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaa2aaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaa2aa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaa2aaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaa2aaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaa2a"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "baaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9aaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaaaaaa7aaa"); + try_istri(wot,h,s, "b9baaaaaaaaaaaaa", "aaaaaaaa2aaa4aaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaa0aaa"); + + try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaa0aaa"); + try_istri(wot,h,s, "aaaaaaaa0aaaaaaa", "aaaaaaaaaaaa0aaa"); + + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaa0aaa", "aaaaaaaa0aaaaaaa"); + + try_istri(wot,h,s, "0000000000000000", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "8000000000000000", "aaaaaaaa0aaaaaaa"); + try_istri(wot,h,s, "0000000000000001", "aaaaaaaa0aaaaaaa"); + + try_istri(wot,h,s, "0000000000000000", "aaaaaaaaaaaaaaaa"); + try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "0000000000000000"); +} + + + + + +////////////////////////////////////////////////////////// +// // +// main // +// // +////////////////////////////////////////////////////////// + +int main ( void ) +{ + istri_4A(); + istri_3A(); + istri_08(); + istri_0C(); + return 0; +}