IRRef ren;
lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
ren = tref_ref(lj_ir_emit(as->J));
- as->ir = as->T->ir; /* The IR may have been reallocated. */
- IR(ren)->r = (uint8_t)down;
- IR(ren)->s = SPS_NONE;
+ as->J->cur.ir[ren].r = (uint8_t)down;
+ as->J->cur.ir[ren].s = SPS_NONE;
}
/* Rename register allocation and emit move. */
} else {
/* Process any renames above the highwater mark. */
for (; as->snaprename < as->T->nins; as->snaprename++) {
- IRIns *ir = IR(as->snaprename);
+ IRIns *ir = &as->T->ir[as->snaprename];
if (asm_snap_checkrename(as, ir->op1))
ir->op2 = REF_BIAS-1; /* Kill rename. */
}
ir = IR(nins-1);
if (ir->o == IR_RENAME) {
+ /* Remove any renames left over from ASM restart due to LJ_TRERR_MCODELM. */
do { ir--; nins--; } while (ir->o == IR_RENAME);
- T->nins = nins; /* Remove any renames left over from ASM restart. */
+ T->nins = nins;
}
as->snaprename = nins;
as->snapref = nins;
MCode *origtop;
/* Ensure an initialized instruction beyond the last one for HIOP checks. */
- J->cur.nins = lj_ir_nextins(J);
- J->cur.ir[J->cur.nins].o = IR_NOP;
+ /* This also allows one RENAME to be added without reallocating curfinal. */
+ as->orignins = lj_ir_nextins(J);
+ J->cur.ir[as->orignins].o = IR_NOP;
/* Setup initial state. Copy some fields to reduce indirections. */
as->J = J;
as->T = T;
- as->ir = T->ir;
+ J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
as->flags = J->flags;
as->loopref = J->loopref;
as->realign = NULL;
as->mclim = as->mcbot + MCLIM_REDZONE;
asm_setup_target(as);
- do {
+ /*
+ ** This is a loop, because the MCode may have to be (re-)assembled
+ ** multiple times:
+ **
+ ** 1. as->realign is set (and the assembly aborted), if the arch-specific
+ ** backend wants the MCode to be aligned differently.
+ **
+ ** This is currently only the case on x86/x64, where small loops get
+ ** an aligned loop body plus a short branch. Not much effort is wasted,
+ ** because the abort happens very quickly and only once.
+ **
+ ** 2. The IR is immovable, since the MCode embeds pointers to various
+ ** constants inside the IR. But RENAMEs may need to be added to the IR
+ ** during assembly, which might grow and reallocate the IR. We check
+ ** at the end if the IR (in J->cur.ir) has actually grown, resize the
+ ** copy (in J->curfinal.ir) and try again.
+ **
+ ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
+ ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
+ ** always have one spare slot in the IR (see above), which means we
+ ** have to redo the assembly for only ~2% of all traces.
+ **
+ ** Very, very rarely, this needs to be done repeatedly, since the
+ ** location of constants inside the IR (actually, reachability from
+ ** a global pointer) may affect register allocation and thus the
+ ** number of RENAMEs.
+ */
+ for (;;) {
as->mcp = as->mctop;
#ifdef LUA_USE_ASSERT
as->mcp_prev = as->mcp;
#endif
- as->curins = T->nins;
+ as->ir = J->curfinal->ir; /* Use the copied IR. */
+ as->curins = J->cur.nins = as->orignins;
+
RA_DBG_START();
RA_DBGX((as, "===== STOP ====="));
checkmclim(as);
asm_ir(as, ir);
}
- } while (as->realign); /* Retry in case the MCode needs to be realigned. */
- /* Emit head of trace. */
- RA_DBG_REF();
- checkmclim(as);
- if (as->gcsteps > 0) {
- as->curins = as->T->snap[0].ref;
- asm_snap_prep(as); /* The GC check is a guard. */
- asm_gc_check(as);
+ if (as->realign && J->curfinal->nins >= T->nins)
+ continue; /* Retry in case only the MCode needs to be realigned. */
+
+ /* Emit head of trace. */
+ RA_DBG_REF();
+ checkmclim(as);
+ if (as->gcsteps > 0) {
+ as->curins = as->T->snap[0].ref;
+ asm_snap_prep(as); /* The GC check is a guard. */
+ asm_gc_check(as);
+ }
+ ra_evictk(as);
+ if (as->parent)
+ asm_head_side(as);
+ else
+ asm_head_root(as);
+ asm_phi_fixup(as);
+
+ if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
+ lua_assert(J->curfinal->nk == T->nk);
+ memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
+ (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
+ T->nins = J->curfinal->nins;
+ break; /* Done. */
+ }
+
+ /* Otherwise try again with a bigger IR. */
+ lj_trace_free(J2G(J), J->curfinal);
+ J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
+ J->curfinal = lj_trace_alloc(J->L, T);
+ as->realign = NULL;
}
- ra_evictk(as);
- if (as->parent)
- asm_head_side(as);
- else
- asm_head_root(as);
- asm_phi_fixup(as);
RA_DBGX((as, "===== START ===="));
RA_DBG_FLUSH();
}
#endif
-/* Allocate space for copy of trace. */
-static GCtrace *trace_save_alloc(jit_State *J)
+/* Allocate space for copy of T. */
+GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T)
{
size_t sztr = ((sizeof(GCtrace)+7)&~7);
- size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns);
+ size_t szins = (T->nins-T->nk)*sizeof(IRIns);
size_t sz = sztr + szins +
- J->cur.nsnap*sizeof(SnapShot) +
- J->cur.nsnapmap*sizeof(SnapEntry);
- return lj_mem_newt(J->L, (MSize)sz, GCtrace);
+ T->nsnap*sizeof(SnapShot) +
+ T->nsnapmap*sizeof(SnapEntry);
+ GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace);
+ char *p = (char *)T2 + sztr;
+ T2->gct = ~LJ_TTRACE;
+ T2->marked = 0;
+ T2->traceno = 0;
+ T2->ir = (IRIns *)p - T->nk;
+ T2->nins = T->nins;
+ T2->nk = T->nk;
+ T2->nsnap = T->nsnap;
+ T2->nsnapmap = T->nsnapmap;
+ memcpy(p, T->ir + T->nk, szins);
+ return T2;
}
/* Save current trace by copying and compacting it. */
setgcrefp(J2G(J)->gc.root, T);
newwhite(J2G(J), T);
T->gct = ~LJ_TTRACE;
- T->ir = (IRIns *)p - J->cur.nk;
- memcpy(p, J->cur.ir+J->cur.nk, szins);
+ T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */
p += szins;
TRACE_APPENDVEC(snap, nsnap, SnapShot)
TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
J->cur.traceno = 0;
+ J->curfinal = NULL;
setgcrefp(J->trace[T->traceno], T);
lj_gc_barriertrace(J2G(J), T->traceno);
lj_gdbjit_addtrace(J, T);
BCOp op = bc_op(J->cur.startins);
GCproto *pt = &gcref(J->cur.startpt)->pt;
TraceNo traceno = J->cur.traceno;
- GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */
+ GCtrace *T = J->curfinal;
lua_State *L;
switch (op) {
J->postproc = LJ_POST_NONE;
lj_mcode_abort(J);
+ if (J->curfinal) {
+ lj_trace_free(J2G(J), J->curfinal);
+ J->curfinal = NULL;
+ }
if (tvisnumber(L->top-1))
e = (TraceError)numberVint(L->top-1);
if (e == LJ_TRERR_MCODELM) {