return res;
}
+static UInt zmask_from_V128_wide ( V128* arg )
+{
+ UInt i, res = 0;
+ for (i = 0; i < 8; i++) {
+ res |= ((arg->w16[i] == 0) ? 1 : 0) << i;
+ }
+ return res;
+}
+
/* Helps with PCMP{I,E}STR{I,M}.
CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
HWord isISTRx = opc4 & 2;
HWord isxSTRM = (opc4 & 1) ^ 1;
vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
- vassert((imm8 & 1) == 0); /* we support byte-size cases only */
+ HWord wide = (imm8 & 1);
// where the args are
V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
// FIXME: this is only right for the 8-bit data cases.
// At least that is asserted above.
UInt zmaskL, zmaskR;
- if (isISTRx) {
- zmaskL = zmask_from_V128(argL);
- zmaskR = zmask_from_V128(argR);
- } else {
- Int tmp;
- tmp = edxIN & 0xFFFFFFFF;
- if (tmp < -16) tmp = -16;
- if (tmp > 16) tmp = 16;
- if (tmp < 0) tmp = -tmp;
- vassert(tmp >= 0 && tmp <= 16);
- zmaskL = (1 << tmp) & 0xFFFF;
- tmp = eaxIN & 0xFFFFFFFF;
- if (tmp < -16) tmp = -16;
- if (tmp > 16) tmp = 16;
- if (tmp < 0) tmp = -tmp;
- vassert(tmp >= 0 && tmp <= 16);
- zmaskR = (1 << tmp) & 0xFFFF;
- }
// temp spot for the resulting flags and vector.
V128 resV;
UInt resOSZACP;
- // do the meyaath
- Bool ok = compute_PCMPxSTRx (
- &resV, &resOSZACP, argL, argR,
- zmaskL, zmaskR, imm8, (Bool)isxSTRM
- );
+ // for checking whether case was handled
+ Bool ok = False;
+
+ if (wide) {
+ if (isISTRx) {
+ zmaskL = zmask_from_V128_wide(argL);
+ zmaskR = zmask_from_V128_wide(argR);
+ } else {
+ Int tmp;
+ tmp = edxIN & 0xFFFFFFFF;
+ if (tmp < -8) tmp = -8;
+ if (tmp > 8) tmp = 8;
+ if (tmp < 0) tmp = -tmp;
+ vassert(tmp >= 0 && tmp <= 8);
+ zmaskL = (1 << tmp) & 0xFF;
+ tmp = eaxIN & 0xFFFFFFFF;
+ if (tmp < -8) tmp = -8;
+ if (tmp > 8) tmp = 8;
+ if (tmp < 0) tmp = -tmp;
+ vassert(tmp >= 0 && tmp <= 8);
+ zmaskR = (1 << tmp) & 0xFF;
+ }
+ // do the meyaath
+ ok = compute_PCMPxSTRx_wide (
+ &resV, &resOSZACP, argL, argR,
+ zmaskL, zmaskR, imm8, (Bool)isxSTRM
+ );
+ } else {
+ if (isISTRx) {
+ zmaskL = zmask_from_V128(argL);
+ zmaskR = zmask_from_V128(argR);
+ } else {
+ Int tmp;
+ tmp = edxIN & 0xFFFFFFFF;
+ if (tmp < -16) tmp = -16;
+ if (tmp > 16) tmp = 16;
+ if (tmp < 0) tmp = -tmp;
+ vassert(tmp >= 0 && tmp <= 16);
+ zmaskL = (1 << tmp) & 0xFFFF;
+ tmp = eaxIN & 0xFFFFFFFF;
+ if (tmp < -16) tmp = -16;
+ if (tmp > 16) tmp = 16;
+ if (tmp < 0) tmp = -tmp;
+ vassert(tmp >= 0 && tmp <= 16);
+ zmaskR = (1 << tmp) & 0xFFFF;
+ }
+ // do the meyaath
+ ok = compute_PCMPxSTRx (
+ &resV, &resOSZACP, argL, argR,
+ zmaskL, zmaskR, imm8, (Bool)isxSTRM
+ );
+ }
// front end shouldn't pass us any imm8 variants we can't
// handle. Hence:
}
+/* Convert a 2-bit value to a 32-bit value by cloning each bit 16
+ times. There's surely a better way to do this, but I don't know
+ what it is. */
+static UInt bits2_to_bytes4 ( UInt bits2 )
+{
+ UInt r = 0;
+ r |= (bits2 & 1) ? 0x0000FFFF : 0;
+ r |= (bits2 & 2) ? 0xFFFF0000 : 0;
+ return r;
+}
+
+
/* Given partial results from a pcmpXstrX operation (intRes1,
basically), generate an I- or M-format output value, also the new
OSZACP flags. */
}
+/* Given partial results from a 16-bit pcmpXstrX operation (intRes1,
+ basically), generate an I- or M-format output value, also the new
+ OSZACP flags. */
+static
+void compute_PCMPxSTRx_gen_output_wide (/*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ UInt intRes1,
+ UInt zmaskL, UInt zmaskR,
+ UInt validL,
+ UInt pol, UInt idx,
+ Bool isxSTRM )
+{
+ vassert((pol >> 2) == 0);
+ vassert((idx >> 1) == 0);
+
+ UInt intRes2 = 0;
+ switch (pol) {
+ case 0: intRes2 = intRes1; break; // pol +
+ case 1: intRes2 = ~intRes1; break; // pol -
+ case 2: intRes2 = intRes1; break; // pol m+
+ case 3: intRes2 = intRes1 ^ validL; break; // pol m-
+ }
+ intRes2 &= 0xFF;
+
+ if (isxSTRM) {
+
+ // generate M-format output (a bit or byte mask in XMM0)
+ if (idx) {
+ resV->w32[0] = bits2_to_bytes4( (intRes2 >> 0) & 0x3 );
+ resV->w32[1] = bits2_to_bytes4( (intRes2 >> 2) & 0x3 );
+ resV->w32[2] = bits2_to_bytes4( (intRes2 >> 4) & 0x3 );
+ resV->w32[3] = bits2_to_bytes4( (intRes2 >> 6) & 0x3 );
+ } else {
+ resV->w32[0] = intRes2 & 0xFF;
+ resV->w32[1] = 0;
+ resV->w32[2] = 0;
+ resV->w32[3] = 0;
+ }
+
+ } else {
+
+ // generate I-format output (an index in ECX)
+ // generate ecx value
+ UInt newECX = 0;
+ if (idx) {
+ // index of ms-1-bit
+ newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2));
+ } else {
+ // index of ls-1-bit
+ newECX = intRes2 == 0 ? 8 : ctz32(intRes2);
+ }
+
+ resV->w32[0] = newECX;
+ resV->w32[1] = 0;
+ resV->w32[2] = 0;
+ resV->w32[3] = 0;
+
+ }
+
+ // generate new flags, common to all ISTRI and ISTRM cases
+ *resOSZACP // A, P are zero
+ = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
+ | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
+ | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
+ | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
+}
+
+
/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
- variants.
+ variants on 8-bit data.
For xSTRI variants, the new ECX value is placed in the 32 bits
pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
}
+/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
+ variants on 16-bit characters.
+
+ For xSTRI variants, the new ECX value is placed in the 32 bits
+ pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
+ variants, the result is a 128 bit value and is placed at *resV in
+ the obvious way.
+
+ For all variants, the new OSZACP value is placed at *resOSZACP.
+
+ argLV and argRV are the vector args. The caller must prepare a
+ 8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
+ must be 1 for each zero byte of of the respective arg. For ESTRx
+ variants this is derived from the explicit length indication, and
+ must be 0 in all places except at the bit index corresponding to
+ the valid length (0 .. 8). If the valid length is 8 then the
+ mask must be all zeroes. In all cases, bits 31:8 must be zero.
+
+ imm8 is the original immediate from the instruction. isSTRM
+ indicates whether this is a xSTRM or xSTRI variant, which controls
+ how much of *res is written.
+
+ If the given imm8 case can be handled, the return value is True.
+ If not, False is returned, and neither *res not *resOSZACP are
+ altered.
+*/
+
+Bool compute_PCMPxSTRx_wide ( /*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ V128* argLV, V128* argRV,
+ UInt zmaskL, UInt zmaskR,
+ UInt imm8, Bool isxSTRM )
+{
+ vassert(imm8 < 0x80);
+ vassert((zmaskL >> 8) == 0);
+ vassert((zmaskR >> 8) == 0);
+
+ /* Explicitly reject any imm8 values that haven't been validated,
+ even if they would probably work. Life is too short to have
+ unvalidated cases in the code base. */
+ switch (imm8) {
+ case 0x01:
+ case 0x03: case 0x09: case 0x0B: case 0x0D: case 0x13:
+ case 0x1B: case 0x39: case 0x3B: case 0x45: case 0x4B:
+ break;
+ default:
+ return False;
+ }
+
+ UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
+ UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
+ UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
+ UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
+
+ /*----------------------------------------*/
+ /*-- strcmp on wide data --*/
+ /*----------------------------------------*/
+
+ if (agg == 2/*equal each, aka strcmp*/
+ && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
+ Int i;
+ UShort* argL = (UShort*)argLV;
+ UShort* argR = (UShort*)argRV;
+ UInt boolResII = 0;
+ for (i = 7; i >= 0; i--) {
+ UShort cL = argL[i];
+ UShort cR = argR[i];
+ boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
+ }
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+
+ // do invalidation, common to all equal-each cases
+ UInt intRes1
+ = (boolResII & validL & validR) // if both valid, use cmpres
+ | (~ (validL | validR)); // if both invalid, force 1
+ // else force 0
+ intRes1 &= 0xFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output_wide(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- set membership on wide data --*/
+ /*----------------------------------------*/
+
+ if (agg == 0/*equal any, aka find chars in a set*/
+ && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
+ /* argL: the string, argR: charset */
+ UInt si, ci;
+ UShort* argL = (UShort*)argLV;
+ UShort* argR = (UShort*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+
+ for (si = 0; si < 8; si++) {
+ if ((validL & (1 << si)) == 0)
+ // run off the end of the string.
+ break;
+ UInt m = 0;
+ for (ci = 0; ci < 8; ci++) {
+ if ((validR & (1 << ci)) == 0) break;
+ if (argR[ci] == argL[si]) { m = 1; break; }
+ }
+ boolRes |= (m << si);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output_wide(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- substring search on wide data --*/
+ /*----------------------------------------*/
+
+ if (agg == 3/*equal ordered, aka substring search*/
+ && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
+
+ /* argL: haystack, argR: needle */
+ UInt ni, hi;
+ UShort* argL = (UShort*)argLV;
+ UShort* argR = (UShort*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+ for (hi = 0; hi < 8; hi++) {
+ if ((validL & (1 << hi)) == 0)
+ // run off the end of the haystack
+ break;
+ UInt m = 1;
+ for (ni = 0; ni < 8; ni++) {
+ if ((validR & (1 << ni)) == 0) break;
+ UInt i = ni + hi;
+ if (i >= 8) break;
+ if (argL[i] != argR[ni]) { m = 0; break; }
+ }
+ boolRes |= (m << hi);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output_wide(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- ranges, unsigned wide data --*/
+ /*----------------------------------------*/
+
+ if (agg == 1/*ranges*/
+ && fmt == 1/*uw*/) {
+
+ /* argL: string, argR: range-pairs */
+ UInt ri, si;
+ UShort* argL = (UShort*)argLV;
+ UShort* argR = (UShort*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+ for (si = 0; si < 8; si++) {
+ if ((validL & (1 << si)) == 0)
+ // run off the end of the string
+ break;
+ UInt m = 0;
+ for (ri = 0; ri < 8; ri += 2) {
+ if ((validR & (3 << ri)) != (3 << ri)) break;
+ if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
+ m = 1; break;
+ }
+ }
+ boolRes |= (m << si);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output_wide(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
+
+ return False;
+}
+
+
/*---------------------------------------------------------------*/
/*--- end guest_generic_x87.c ---*/
/*---------------------------------------------------------------*/