/* Fuse XLOAD/XSTORE reference into load/store operand. */
static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
- RegSet allow)
+ RegSet allow, int32_t ofs)
{
IRIns *ir = IR(ref);
- int32_t ofs = 0;
Reg base;
if (ra_noreg(ir->r) && mayfuse(as, ref)) {
int32_t lim = (ai & 0x04000000) ? 4096 : 256;
if (ir->o == IR_ADD) {
- if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i) > -lim && ofs < lim) {
+ int32_t ofs2;
+ if (irref_isk(ir->op2) &&
+ (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim) {
+ ofs = ofs2;
ref = ir->op1;
- } else {
+ } else if (ofs == 0) {
IRRef lref = ir->op1, rref = ir->op2;
Reg rn, rm;
if ((ai & 0x04000000)) {
return;
}
} else if (ir->o == IR_STRREF) {
+ lua_assert(ofs == 0);
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
ofs += IR(ir->op2)->i;
static void asm_fstore(ASMState *as, IRIns *ir)
{
- Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
- IRIns *irf = IR(ir->op1);
- Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
- int32_t ofs = field_ofs[irf->op2];
- ARMIns ai = asm_fxstoreins(ir);
- if ((ai & 0x04000000))
- emit_lso(as, ai, src, idx, ofs);
- else
- emit_lsox(as, ai, src, idx, ofs);
+ if (ir->r == RID_SINK) { /* Sink store. */
+ asm_snap_prep(as);
+ } else {
+ Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+ ARMIns ai = asm_fxstoreins(ir);
+ if ((ai & 0x04000000))
+ emit_lso(as, ai, src, idx, ofs);
+ else
+ emit_lsox(as, ai, src, idx, ofs);
+ }
}
static void asm_xload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
}
-static void asm_xstore(ASMState *as, IRIns *ir)
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
- rset_exclude(RSET_GPR, src));
+ rset_exclude(RSET_GPR, src), ofs);
}
static void asm_ahuvload(ASMState *as, IRIns *ir)
if (uselo || usehi)
asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
return;
+ } else if ((ir-1)->o == IR_XSTORE) {
+ asm_xstore(as, ir, 4);
+ return;
}
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
- case IR_XSTORE: asm_xstore(as, ir); break;
+ case IR_XSTORE: asm_xstore(as, ir, 0); break;
/* Allocations. */
case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
/* Fuse XLOAD/XSTORE reference into load/store operand. */
static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
- RegSet allow)
+ RegSet allow, int32_t ofs)
{
IRIns *ir = IR(ref);
- int32_t ofs = 0;
Reg base;
if (ra_noreg(ir->r) && mayfuse(as, ref)) {
if (ir->o == IR_ADD) {
int32_t ofs2;
- if (irref_isk(ir->op2) && (ofs2 = IR(ir->op2)->i, checki16(ofs2))) {
+ if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
ref = ir->op1;
ofs = ofs2;
}
} else if (ir->o == IR_STRREF) {
int32_t ofs2 = 65536;
+ lua_assert(ofs == 0);
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
ofs2 = ofs + IR(ir->op2)->i;
static void asm_fstore(ASMState *as, IRIns *ir)
{
- Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
- IRIns *irf = IR(ir->op1);
- Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
- int32_t ofs = field_ofs[irf->op2];
- MIPSIns mi = asm_fxstoreins(ir);
- lua_assert(!irt_isfp(ir->t));
- emit_tsi(as, mi, src, idx, ofs);
+ if (ir->r == RID_SINK) { /* Sink store. */
+ asm_snap_prep(as);
+ return;
+ } else {
+ Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+ MIPSIns mi = asm_fxstoreins(ir);
+ lua_assert(!irt_isfp(ir->t));
+ emit_tsi(as, mi, src, idx, ofs);
+ }
}
static void asm_xload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
}
-static void asm_xstore(ASMState *as, IRIns *ir)
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{
Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
- rset_exclude(RSET_GPR, src));
+ rset_exclude(RSET_GPR, src), ofs);
}
static void asm_ahuvload(ASMState *as, IRIns *ir)
as->curins--; /* Always skip the loword comparison. */
asm_comp64eq(as, ir);
return;
+ } else if ((ir-1)->o == IR_XSTORE) {
+ as->curins--; /* Handle both stores here. */
+ asm_xstore(as, ir, LJ_LE ? 4 : 0);
+ asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
+ return;
}
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
- case IR_XSTORE: asm_xstore(as, ir); break;
+ case IR_XSTORE: asm_xstore(as, ir, 0); break;
/* Allocations. */
case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
/* Fuse XLOAD/XSTORE reference into load/store operand. */
static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
- RegSet allow)
+ RegSet allow, int32_t ofs)
{
IRIns *ir = IR(ref);
- int32_t ofs = 0;
Reg base;
if (ra_noreg(ir->r) && mayfuse(as, ref)) {
if (ir->o == IR_ADD) {
- if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i, checki16(ofs))) {
+ int32_t ofs2;
+ if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
+ ofs = ofs2;
ref = ir->op1;
- } else {
+ } else if (ofs == 0) {
Reg right, left = ra_alloc2(as, ir, allow);
right = (left >> 8); left &= 255;
emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
return;
}
} else if (ir->o == IR_STRREF) {
+ lua_assert(ofs == 0);
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
ofs += IR(ir->op2)->i;
lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
if (irt_isi8(ir->t))
emit_as(as, PPCI_EXTSB, dest, dest);
- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
}
-static void asm_xstore(ASMState *as, IRIns *ir)
+static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
{
IRIns *irb;
- if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
+ if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
/* Fuse BSWAP with XSTORE to stwbrx. */
Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
} else {
Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
- rset_exclude(RSET_GPR, src));
+ rset_exclude(RSET_GPR, src), ofs);
}
}
as->curins--; /* Always skip the loword comparison. */
asm_comp64(as, ir);
return;
+ } else if ((ir-1)->o == IR_XSTORE) {
+ as->curins--; /* Handle both stores here. */
+ asm_xstore(as, ir, 0);
+ asm_xstore(as, ir-1, 4);
+ return;
}
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
- case IR_XSTORE: asm_xstore(as, ir); break;
+ case IR_XSTORE: asm_xstore(as, ir, 0); break;
/* Allocations. */
case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
}
rset_clear(allow, src);
}
- if (ir->o == IR_FSTORE)
+ if (ir->o == IR_FSTORE) {
asm_fusefref(as, IR(ir->op1), allow);
- else
+ } else {
asm_fusexref(as, ir->op1, allow);
- /* ir->op2 is ignored -- unaligned stores are ok on x86. */
+ if (LJ_32 && ir->o == IR_HIOP) as->mrm.ofs += 4;
+ }
if (ra_hasreg(src)) {
x86Op xo;
switch (irt_type(ir->t)) {
} else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
asm_comp_int64(as, ir);
return;
+ } else if ((ir-1)->o == IR_XSTORE) {
+ asm_fxstore(as, ir);
+ return;
}
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
** 0105 int HIOP 0103 +0
** 0106 p32 ADD base +16
** 0107 int XSTORE 0106 0104
-** 0108 p32 ADD base +20
-** 0109 int XSTORE 0108 0105
+** 0108 int HIOP 0106 0105
**
** mov eax, [esi+0x8]
** mov ecx, [esi+0xc]
#endif
break;
}
- case IR_ASTORE: case IR_HSTORE: case IR_USTORE:
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
break;
- case IR_XSTORE: {
-#if LJ_LE
- IRRef hiref = hisubst[ir->op2];
-#else
- IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2];
-#endif
- split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP),
- split_ptr(J, oir, ir->op1), hiref);
- break;
- }
case IR_CONV: { /* Conversion to number. Others handled below. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
UNUSED(st);
#endif
break;
case IR_XSTORE:
-#if LJ_LE
- hiref = hisubst[ir->op2];
-#else
- hiref = nir->op2; nir->op2 = hisubst[ir->op2];
-#endif
- split_emit(J, IRTI(IR_XSTORE), split_ptr(J, oir, ir->op1), hiref);
+ split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
break;
case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);