#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
-static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
+static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
{
IRRef rref = ir->op2;
IRIns *irr = IR(rref);
int shift;
dest = ra_dest(as, ir, RSET_GPR);
shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
+ if (!xv && shift && (as->flags & JIT_F_BMI2)) {
+ Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
+ if (left != dest) { /* BMI2 rotate right by constant. */
+ emit_i8(as, xs == XOg_ROL ? -shift : shift);
+ emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
+ return;
+ }
+ }
switch (shift) {
case 0: break;
case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
}
+ } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
+ Reg left, right;
+ dest = ra_dest(as, ir, RSET_GPR);
+ right = ra_alloc1(as, rref, RSET_GPR);
+ left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
+ irt_is64(ir->t));
+ emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
+ return;
} else { /* Variable shifts implicitly use register cl (i.e. ecx). */
Reg right;
dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
*/
}
-#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL)
-#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR)
-#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR)
-#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL)
-#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR)
+#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
+#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
+#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
+#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
+#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
/* -- Comparisons --------------------------------------------------------- */
if (rex != 0x40) *--(p) = rex; }
#define FORCE_REX 0x200
#define REX_64 (FORCE_REX|0x080000)
+#define VEX_64 0x800000
#else
#define REXRB(p, rr, rb) ((void)0)
#define FORCE_REX 0
#define REX_64 0
+#define VEX_64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
MCode *p, int delta)
{
int n = (int8_t)xo;
+ if (n == -60) { /* VEX-encoded instruction */
+#if LJ_64
+ xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
+#endif
+ *(uint32_t *)(p+delta-5) = (uint32_t)xo;
+ return p+delta-5;
+ }
#if defined(__GNUC__)
if (__builtin_constant_p(xo) && n == -2)
p[delta-2] = (MCode)(xo >> 24);
/* Use 64 bit operations to handle 64 bit IR types. */
#if LJ_64
#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
+#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
#else
#define REX_64IR(ir, r) (r)
+#define VEX_64IR(ir, r) (r)
#endif
/* Generic move between two regs. */
#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
+#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
+#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
+#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
+#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
+
/* This list of x86 opcodes is not intended to be complete. Opcodes are only
** included when needed. Take a look at DynASM or jit.dis_x86 to see the
** whole mess.
XI_FSCALE = 0xfdd9,
XI_FYL2X = 0xf1d9,
+ /* VEX-encoded instructions. XV_* prefix. */
+ XV_RORX = XV_f20f3a(f0),
+ XV_SARX = XV_f30f38(f7),
+ XV_SHLX = XV_660f38(f7),
+ XV_SHRX = XV_f20f38(f7),
+
/* Variable-length opcodes. XO_* prefix. */
XO_MOV = XO_(8b),
XO_MOVto = XO_(89),