than mailing the developers (or mailing lists) directly -- bugs that
are not entered into bugzilla tend to get forgotten about or ignored.
+271615 Unhandled instruction "popcnt" on x86
295974 Add SSE4.1 PEXTRD instruction for x86 32 bit
391311 [Patch] Fix arm64 valgrind tests compilation with clang
519223 Recognize ioctl(UFFDIO_*) operations
return delta;
}
+/* Generate an IR sequence to do a popcount operation on the supplied
+ IRTemp, and return a new IRTemp holding the result. 'ty' may be
+ Ity_I16 or Ity_I32 only. */
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
+{
+ Int i;
+ vassert(ty == Ity_I16 || ty == Ity_I32);
+
+ if (ty == Ity_I16) {
+ IRTemp old = IRTemp_INVALID;
+ IRTemp nyu = IRTemp_INVALID;
+ IRTemp mask[4], shift[4];
+ for (i = 0; i < 4; i++) {
+ mask[i] = newTemp(ty);
+ shift[i] = 1 << i;
+ }
+ assign(mask[0], mkU16(0x5555));
+ assign(mask[1], mkU16(0x3333));
+ assign(mask[2], mkU16(0x0F0F));
+ assign(mask[3], mkU16(0x00FF));
+ old = src;
+ for (i = 0; i < 4; i++) {
+ nyu = newTemp(ty);
+ assign(nyu,
+ binop(Iop_Add16,
+ binop(Iop_And16,
+ mkexpr(old),
+ mkexpr(mask[i])),
+ binop(Iop_And16,
+ binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
+ mkexpr(mask[i]))));
+ old = nyu;
+ }
+ return nyu;
+ }
+ if (ty == Ity_I32) {
+ IRTemp old = IRTemp_INVALID;
+ IRTemp nyu = IRTemp_INVALID;
+ IRTemp mask[5], shift[5];
+ for (i = 0; i < 5; i++) {
+ mask[i] = newTemp(ty);
+ shift[i] = 1 << i;
+ }
+ assign(mask[0], mkU32(0x55555555));
+ assign(mask[1], mkU32(0x33333333));
+ assign(mask[2], mkU32(0x0F0F0F0F));
+ assign(mask[3], mkU32(0x00FF00FF));
+ assign(mask[4], mkU32(0x0000FFFF));
+ old = src;
+ for (i = 0; i < 5; i++) {
+ nyu = newTemp(ty);
+ assign(nyu,
+ binop(Iop_Add32,
+ binop(Iop_And32,
+ mkexpr(old),
+ mkexpr(mask[i])),
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+ mkexpr(mask[i]))));
+ old = nyu;
+ }
+ return nyu;
+ }
+ /*NOTREACHED*/
+ vassert(0);
+}
/* Generate an IR sequence to do a count-leading-zeroes operation on
the supplied IRTemp, and return a new IRTemp holding the result.
goto decode_success;
}
+ /* F3 0F B8 = POPCNT{W,L}
+ Count the number of 1 bits in a register
+ */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xB8
+ && (sz == 2 || sz == 4)) {
+ /*IRType*/ ty = szToITy(sz);
+ IRTemp src = newTemp(ty);
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ assign(src, getIReg(sz, eregOfRM(modrm)));
+ delta += 3+1;
+ DIP("popcnt%c %s, %s\n", nameISize(sz),
+ nameIReg(sz, eregOfRM(modrm)),
+ nameIReg(sz, gregOfRM(modrm)));
+ } else {
+ addr = disAMode( &alen, sorb, delta+3, dis_buf );
+ assign(src, loadLE(ty, mkexpr(addr)));
+ delta += 3+alen;
+ DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
+ nameIReg(sz, gregOfRM(modrm)));
+ }
+
+ IRTemp result = gen_POPCOUNT(ty, src);
+ putIReg(sz, gregOfRM(modrm), mkexpr(result));
+
+ // Update flags. This is pretty lame .. perhaps can do better
+ // if this turns out to be performance critical.
+ // O S A C P are cleared. Z is set if SRC == 0.
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop(Iop_Shl32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32,
+ widenUto32(mkexpr(src)),
+ mkU32(0))),
+ mkU8(X86G_CC_SHIFT_Z))));
+
+ goto decode_success;
+ }
+
/* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
which we can only decode if we're sure this is an AMD cpu that
supports LZCNT, since otherwise it's BSR, which behaves
return r;
}
+static UInt randUInt ( void )
+{
+ Int i;
+ UInt r = 0;
+ for (i = 0; i < 4; i++) {
+ r = (r << 8) | (UInt)(0xFF & randUChar());
+ }
+ return r;
+}
+
static void randV128 ( V128* v )
{
Int i;
}
}
+void test_POPCNTL_x86 ( void )
+{
+ UInt block[4];
+ Int i;
+ UInt oszacp_mask = 0x8D5;
+ for (i = 0; i < 10; i++) {
+ block[0] = i == 0 ? 0 : (UInt)randUInt();
+ block[1] = (UInt)randUInt();
+ block[2] = (UInt)randUInt();
+ block[3] = (UInt)randUInt();
+ __asm__ __volatile__(
+ "movl %0, %%eax" "\n\t"
+ "movl 0(%%eax), %%edi" "\n\t"
+ "movl 4(%%eax), %%ecx" "\n\t"
+ "popcntl %%edi, %%ecx" "\n\t"
+ "movl %%ecx, 8(%%eax)" "\n\t"
+ "pushf" "\n\t"
+ "popl %%edx" "\n\t"
+ "movl %%edx, 12(%%eax)" "\n"
+ : /*out*/
+ : /*in*/"r"(&block[0])
+ : /*trash*/ "cc", "memory", "edi", "ecx", "edx"
+ );
+ printf("r popcntl %08x %08x %08x %08x\n",
+ block[0], block[1], block[2], block[3] & oszacp_mask);
+
+ block[0] = i == 0 ? 0 : (UInt)randUInt();
+ block[1] = (UInt)randUInt();
+ block[2] = (UInt)randUInt();
+ block[3] = (UInt)randUInt();
+ __asm__ __volatile__(
+ "movl %0, %%eax" "\n\t"
+ "movl 4(%%eax), %%ecx" "\n\t"
+ "popcntl 0(%%eax), %%ecx" "\n\t"
+ "movl %%ecx, 8(%%eax)" "\n\t"
+ "pushf" "\n\t"
+ "popl %%edx" "\n\t"
+ "movl %%edx, 12(%%eax)" "\n"
+ : /*out*/
+ : /*in*/"r"(&block[0])
+ : /*trash*/ "cc", "memory", "ecx", "edx"
+ );
+ printf("m popcntl %08x %08x %08x %08x\n",
+ block[0], block[1], block[2], block[3] & oszacp_mask);
+ }
+}
+
+void test_POPCNTW_x86 ( void )
+{
+ UInt block[4];
+ Int i;
+ UInt oszacp_mask = 0x8D5;
+ for (i = 0; i < 10; i++) {
+ block[0] = i == 0 ? 0 : (UInt)randUInt();
+ block[1] = (UInt)randUInt();
+ block[2] = (UInt)randUInt();
+ block[3] = (UInt)randUInt();
+ __asm__ __volatile__(
+ "movl %0, %%eax" "\n\t"
+ "movl 0(%%eax), %%edi" "\n\t"
+ "movl 4(%%eax), %%ecx" "\n\t"
+ "popcntw %%di, %%cx" "\n\t"
+ "movl %%ecx, 8(%%eax)" "\n\t"
+ "pushf" "\n\t"
+ "popl %%edx" "\n\t"
+ "movl %%edx, 12(%%eax)" "\n"
+ : /*out*/
+ : /*in*/"r"(&block[0])
+ : /*trash*/ "cc", "memory", "edi", "ecx", "edx"
+ );
+ printf("r popcntw %08x %08x %08x %08x\n",
+ block[0], block[1], block[2], block[3] & oszacp_mask);
+
+ block[0] = i == 0 ? 0 : (UInt)randUInt();
+ block[1] = (UInt)randUInt();
+ block[2] = (UInt)randUInt();
+ block[3] = (UInt)randUInt();
+ __asm__ __volatile__(
+ "movl %0, %%eax" "\n\t"
+ "movl 4(%%eax), %%ecx" "\n\t"
+ "popcntw 0(%%eax), %%cx" "\n\t"
+ "movl %%ecx, 8(%%eax)" "\n\t"
+ "pushf" "\n\t"
+ "popl %%edx" "\n\t"
+ "movl %%edx, 12(%%eax)" "\n"
+ : /*out*/
+ : /*in*/"r"(&block[0])
+ : /*trash*/ "cc", "memory", "ecx", "edx"
+ );
+ printf("m popcntw %08x %08x %08x %08x\n",
+ block[0], block[1], block[2], block[3] & oszacp_mask);
+ }
+}
+
/* ------------ main ------------ */
int main(void)
test_ROUNDSS_w_mxcsr_rounding();
test_PEXTRD();
test_PACKUSDW();
+ test_POPCNTL_x86();
+ test_POPCNTW_x86();
return 0;
}
m packusdw 5682a3b8d4b76745cdd370da388f06c9 8226fb36fbcd8ee17ddf8c850d83549f ffff00000000ffff00000000ffffffff
r packusdw 00008877000066550000443300002211 0000b2a10000ffee0000ddcc0000bbaa 8877665544332211b2a1ffeeddccbbaa
m packusdw 00008877000066550000443300002211 0000b2a10000ffee0000ddcc0000bbaa 8877665544332211b2a1ffeeddccbbaa
+r popcntl 00000000 9be26277 00000000 00000040
+m popcntl 00000000 520ad0fe 00000000 00000040
+r popcntl d1182f88 29619ba4 0000000d 00000000
+m popcntl 108ae014 3a6fff31 00000009 00000000
+r popcntl 659a1c5f 6283d740 00000011 00000000
+m popcntl 357f852b decd5b4c 00000012 00000000
+r popcntl 48501d1c 4c5c25b2 0000000b 00000000
+m popcntl c10546b6 a9202fac 0000000d 00000000
+r popcntl 2676c05c 52e8d255 0000000e 00000000
+m popcntl 5e59ae52 0664c8ab 00000011 00000000
+r popcntl ad4790bb e2242b87 00000011 00000000
+m popcntl b9b7499a 639673a6 00000012 00000000
+r popcntl 87001ad4 670a7aa3 0000000b 00000000
+m popcntl 7d5aa42b 2bb17af9 00000011 00000000
+r popcntl 60dde943 4d980c04 00000010 00000000
+m popcntl 577f4aa0 cab12900 00000011 00000000
+r popcntl e51989a5 00c92c08 0000000f 00000000
+m popcntl f262c795 ad92ce17 00000011 00000000
+r popcntl c2f18695 ed9a280a 0000000f 00000000
+m popcntl fb3ea8a7 3e50b49a 00000014 00000000
+r popcntw 00000000 a2a16db0 a2a10000 00000040
+m popcntw 00000000 ab99c6e4 ab990000 00000040
+r popcntw 05e44ed5 13fb5873 13fb0009 00000000
+m popcntw be8982a6 f7359876 f7350006 00000000
+r popcntw 6d59520d 85a83116 85a80006 00000000
+m popcntw c79d315f 4a932c60 4a930009 00000000
+r popcntw e27cf1cd 3516f23f 3516000a 00000000
+m popcntw 33fdc26d 92304d7f 92300008 00000000
+r popcntw 90073631 0fc067cb 0fc00007 00000000
+m popcntw 2e633eec ba86c9af ba86000a 00000000
+r popcntw a2b72d53 00225c97 00220008 00000000
+m popcntw e38bb3f9 ae136acd ae13000b 00000000
+r popcntw 4548e252 f2b99d7d f2b90007 00000000
+m popcntw 7e332cb0 5a52fdb3 5a520006 00000000
+r popcntw a4756248 d3fff55a d3ff0005 00000000
+m popcntw 2d15b62c aabf4e3e aabf0008 00000000
+r popcntw ecfbb851 8d72310a 8d720007 00000000
+m popcntw 19ee5c8a 8ad7294a 8ad70007 00000000
+r popcntw 4996f18a 0e8d1e69 0e8d0008 00000000
+m popcntw 717a2ae4 e71459b3 e7140007 00000000