<tr class="even">
<td class="param_name">recunroll</td><td class="param_default">2</td><td class="param_desc">Min. unroll factor for true recursion</td></tr>
<tr class="odd separate">
-<td class="param_name">sizemcode</td><td class="param_default">32</td><td class="param_desc">Size of each machine code area in KBytes (Windows: 64K)</td></tr>
+<td class="param_name">sizemcode</td><td class="param_default">64</td><td class="param_desc">Size of each machine code area in KBytes</td></tr>
<tr class="even">
-<td class="param_name">maxmcode</td><td class="param_default">512</td><td class="param_desc">Max. total size of all machine code areas in KBytes</td></tr>
+<td class="param_name">maxmcode</td><td class="param_default">2048</td><td class="param_desc">Max. total size of all machine code areas in KBytes</td></tr>
</table>
<br class="flush">
</div>
size_t len = *(const uint8_t *)lst;
lj_assertJ(len != 0, "bad JIT_P_STRING");
if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
- int32_t n = 0;
+ uint32_t n = 0;
const char *p = &str[len+1];
while (*p >= '0' && *p <= '9')
n = n*10 + (*p++ - '0');
- if (*p) return 0; /* Malformed number. */
- J->param[i] = n;
+ if (*p || (int32_t)n < 0) return 0; /* Malformed number. */
+ if (i == JIT_P_sizemcode) { /* Adjust to required range here. */
+#if LJ_TARGET_JUMPRANGE
+ uint32_t maxkb = ((1 << (LJ_TARGET_JUMPRANGE - 10)) - 64);
+#else
+ uint32_t maxkb = ((1 << (31 - 10)) - 64);
+#endif
+ n = (n + (LJ_PAGESIZE >> 10) - 1) & ~((LJ_PAGESIZE >> 10) - 1);
+ if (n > maxkb) n = maxkb;
+ }
+ J->param[i] = (int32_t)n;
if (i == JIT_P_hotloop)
lj_dispatch_init_hotcount(J2G(J));
return 1; /* Ok. */
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_TARGET_GC64 1
+#define LJ_PAGESIZE 16384
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#define LJ_ARCH_VERSION 80
#define LJ_TARGET_MIPS 1
#define LJ_TARGET_EHRETREG 4
#define LJ_TARGET_EHRAREG 31
-#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
+#define LJ_TARGET_JUMPRANGE 28 /* 2^28 = 256MB-aligned region */
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
MCode *invmcp; /* Points to invertible loop branch (or NULL). */
MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
MCode *realign; /* Realign loop if not NULL. */
+ MCode *mctail; /* Tail of trace before stack adjust + jmp. */
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64
+ MCode *mcexit; /* Pointer to exit stubs. */
+#endif
#ifdef LUAJIT_RANDOM_RA
/* Randomize register allocation. OK for fuzz testing, not for production. */
RA_DBGX((as, "===== STOP ====="));
/* General trace setup. Emit tail of trace. */
- asm_tail_prep(as);
+ asm_tail_prep(as, T->link);
as->mcloop = NULL;
as->flagmcp = NULL;
as->topslot = 0;
/* Generate an exit stub group at the bottom of the reserved MCode memory. */
static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
{
+ ExitNo i;
+ int ind = 0;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mcbot;
- int i;
- if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop)
+ if (mxp + 6+EXITSTUBS_PER_GROUP >= as->mctop)
asm_mclimit(as);
- /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */
- *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP);
- *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu);
- mxp++;
+ if ((((target - mxp - 2) + 0x00800000u) >> 24) == 0) {
+ /* str lr, [sp]; bl ->vm_exit_handler;
+ ** .long DISPATCH_address, group.
+ */
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp = ARMI_BL | ((target - mxp - 2) & 0x00ffffffu); mxp++;
+ } else if ((as->flags & JIT_F_ARMV6T2)) {
+ /*
+ ** str lr, [sp]; movw/movt lr, vm_exit_handler; blx lr;
+ ** .long DISPATCH_address, group;
+ */
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mxp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mxp++ = ARMI_BLXr | ARMF_M(RID_LR);
+ ind = 2;
+ } else {
+ /* .long vm_exit_handler;
+ ** str lr, [sp]; ldr lr, [pc, #-16]; blx lr;
+ ** .long DISPATCH_address, group;
+ */
+ *mxp++ = (MCode)target;
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp++ = ARMI_LDRL | ARMF_D(RID_LR) | 16;
+ *mxp++ = ARMI_BLXr | ARMF_M(RID_LR);
+ ind = 1;
+ }
*mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */
*mxp++ = group*EXITSTUBS_PER_GROUP;
for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
- *mxp++ = ARMI_B|((-6-i)&0x00ffffffu);
+ *mxp++ = ARMI_B | ((-6-ind-i) & 0x00ffffffu);
lj_mcode_sync(as->mcbot, mxp);
lj_mcode_commitbot(as->J, mxp);
as->mcbot = mxp;
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
- MCode *target;
+ MCode *target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- as->mctop = --p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
uint32_t k = emit_isk12(ARMI_ADD, spadj);
lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
- p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ *mcp++ = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ }
+ if ((((target - mcp - 2) + 0x00800000u) >> 24) == 0) {
+ *mcp = ARMI_B | ((target - mcp - 2) & 0x00ffffffu); mcp++;
+ } else if ((as->flags & JIT_F_ARMV6T2)) {
+ *mcp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mcp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mcp++ = ARMI_BX | ARMF_M(RID_LR);
+ } else {
+ *mcp++ = ARMI_LDRL | ARMI_LS_U | ARMF_D(RID_LR) | 0;
+ *mcp++ = ARMI_BX | ARMF_M(RID_LR);
+ *mcp++ = (MCode)target;
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu);
+ while (as->mctop > mcp) *--as->mctop = ARMI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-1; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ MCode *target = (MCode *)(void *)lj_vm_exit_interp;
+ if ((((target - p - 2) + 0x00800000u) >> 24) ||
+ (((target - p - 1) + 0x00800000u) >> 24)) p -= 2;
+ }
+ p--; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
*p = 0; /* Prevent load/store merging. */
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
{
ExitNo i;
+ int ind;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
asm_mclimit(as);
- /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
+ ind = !A64F_S_OK(target - (mxp - nexits - 2), 26);
+ /* !ind: 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno;
+ ** ind: 1: str lr,[sp]; ldr lr, [gl, K64_VXH]; blr lr; movz w0,traceno;
+ ** bl <1; bl <1; ...
+ */
for (i = nexits-1; (int32_t)i >= 0; i--)
- *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
+ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-ind-i));
+ as->mcexit = mxp;
*--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
- mxp--;
- *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
+ if (ind) {
+ *--mxp = A64I_LE(A64I_BLR_AUTH | A64F_N(RID_LR));
+ *--mxp = A64I_LE(A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]) >> 3));
+ } else {
+ mxp--;
+ *mxp = A64I_LE(A64I_BL | A64F_S26(target-mxp));
+ }
*--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
as->mctop = mxp;
}
static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
{
/* Keep this in-sync with exitstub_trace_addr(). */
- return as->mctop + exitno + 3;
+ return as->mcexit + exitno;
}
/* Emit conditional branch to exit for guard. */
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
+ MCode *mcp = as->mctail;
MCode *target;
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
- if (spadj == 0) {
- *--p = A64I_LE(A64I_NOP);
- as->mctop = p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
uint32_t k = emit_isk12(spadj);
lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
- p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
+ *mcp++ = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = A64I_B | A64F_S26((target-p)+1);
+ /* Emit exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ if (lnk || A64F_S_OK(target - mcp, 26)) {
+ *mcp = A64I_B | A64F_S26(target - mcp); mcp++;
+ } else {
+ *mcp++ = A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]) >> 3);
+ *mcp++ = A64I_BR_AUTH | A64F_N(RID_LR);
+ }
+ while (as->mctop > mcp) *--as->mctop = A64I_LE(A64I_NOP);
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-1; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ MCode *target = (MCode *)(void *)lj_vm_exit_interp;
+ if (!A64F_S_OK(target - p, 26) || !A64F_S_OK(target - (p+1), 26)) p--;
+ }
+ p--; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
*p = 0; /* Prevent load/store merging. */
}
/* Setup exit stub after the end of each trace. */
static void asm_exitstub_setup(ASMState *as)
{
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
- *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
- *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
- lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0,
- "branch target out of range");
- *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
+ *--mxp = MIPSI_LI | MIPSF_T(RID_TMP) | as->T->traceno;
+ if (((uintptr_t)(mxp-1) ^ target) >> 28 == 0) {
+ /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
+ *--mxp = MIPSI_J | ((target >> 2) & 0x03ffffffu);
+ } else {
+ /* sw TMP, 0(sp); li TMP, K*_VXH(jgl); jr TMP ; li TMP, traceno */
+ *--mxp = MIPSI_JR | MIPSF_S(RID_TMP);
+ *--mxp = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) |
+#if LJ_64
+ jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]);
+#else
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]);
+#endif
+ }
+ *--mxp = MIPSI_SW | MIPSF_T(RID_TMP) | MIPSF_S(RID_SP) | 0;
as->mctop = mxp;
}
{
/* The modified regs must match with the *.dasc implementation. */
RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
- RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
+ RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)|
+ RID2RSET(RID_CFUNCADDR)
#if LJ_TARGET_MIPSR6
|RID2RSET(RID_F21)
#endif
{
/* The modified regs must match with the *.dasc implementation. */
RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
- RID2RSET(RID_R1)|RID2RSET(RID_R12);
+ RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_CFUNCADDR);
if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
ra_evictset(as, drop);
/* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
+ uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- MCode *p = as->mctop-1;
- *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
- p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
+ if (((uintptr_t)mcp ^ target) >> 28 == 0) {
+ *mcp++ = MIPSI_J | ((target >> 2) & 0x03ffffffu);
+ } else {
+ *mcp++ = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) |
+#if LJ_64
+ jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]);
+#else
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]);
+#endif
+ *mcp++ = MIPSI_JR | MIPSF_S(RID_TMP);
+ }
+ *mcp++ = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
- as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */
- as->invmcp = as->loopref ? as->mcp : NULL;
+ as->mcp = as->mctop - 2; /* Leave room for branch plus nop or stack adj. */
+ if (as->loopref) {
+ as->invmcp = as->mcp;
+ } else {
+ if (!lnk) {
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp;
+ if (((uintptr_t)as->mcp ^ target) >> 28 != 0) as->mcp--;
+ }
+ as->invmcp = NULL;
+ }
+ as->mctail = as->mcp;
}
/* -- Trace setup --------------------------------------------------------- */
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
{
ExitNo i;
+ int ind;
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
asm_mclimit(as);
- /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
+ ind = ((target - (uintptr_t)(mxp - nexits - 2) + 0x02000000u) >> 26) ? 2 : 0;
+ /* !ind: 1: mflr r0; bl ->vm_exit_handler; li r0, traceno;
+ ** ind: 1: lwz r0, K32_VXH(jgl); mtctr r0; mflr r0; bctrl; li r0, traceno;
+ ** bl <1; bl <1; ...
+ */
for (i = nexits-1; (int32_t)i >= 0; i--)
- *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
+ *--mxp = PPCI_BL | (((-3-ind-i) & 0x00ffffffu) << 2);
+ as->mcexit = mxp;
*--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */
- mxp--;
- *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2);
- *--mxp = PPCI_MFLR|PPCF_T(RID_TMP);
+ if (ind) {
+ *--mxp = PPCI_BCTRL;
+ *--mxp = PPCI_MFLR | PPCF_T(RID_TMP);
+ *--mxp = PPCI_MTCTR | PPCF_T(RID_TMP);
+ *--mxp = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) |
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]);
+ } else {
+ mxp--;
+ *mxp = PPCI_BL | ((target - (uintptr_t)mxp) & 0x03fffffcu);
+ *--mxp = PPCI_MFLR | PPCF_T(RID_TMP);
+ }
as->mctop = mxp;
}
static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
{
/* Keep this in-sync with exitstub_trace_addr(). */
- return as->mctop + exitno + 3;
+ return as->mcexit + exitno;
}
/* Emit conditional branch to exit for guard. */
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
- MCode *target;
+ uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- *--p = PPCI_NOP;
- *--p = PPCI_NOP;
- as->mctop = p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
- p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
- p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
+ *mcp++ = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
+ *mcp++ = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
+ /* Emit exit branch. */
+ if ((((target - (uintptr_t)mcp) + 0x02000000u) >> 26) == 0) {
+ *mcp = PPCI_B | ((target - (uintptr_t)mcp) & 0x03fffffcu); mcp++;
+ } else {
+ *mcp++ = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) |
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]);
+ *mcp++ = PPCI_MTCTR | PPCF_T(RID_TMP);
+ *mcp++ = PPCI_BCTR;
+ }
+ while (as->mctop > mcp) *--as->mctop = PPCI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-2; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp;
+ if ((((target - (uintptr_t)p) + 0x02000000u) >> 26) ||
+ (((target - (uintptr_t)(p-2)) + 0x02000000u) >> 26)) p -= 2;
+ }
+ p -= 2; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
{
ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mcbot;
MCode *mxpstart = mxp;
- if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop)
+ if (mxp + ((2+2)*EXITSTUBS_PER_GROUP +
+ (LJ_GC64 ? 0 : 8) +
+ (LJ_64 ? 6 : 5)) >= as->mctop)
asm_mclimit(as);
/* Push low byte of exitno for each exit stub. */
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
*(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
#endif
/* Jump to exit handler which fills in the ExitState. */
- *mxp++ = XI_JMP; mxp += 4;
- *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler);
+ if (jmprel_ok(mxp + 5, target)) { /* Direct jump. */
+ *mxp++ = XI_JMP; mxp += 4;
+ *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, target);
+ } else { /* RIP-relative indirect jump. */
+ *mxp++ = XI_GROUP5; *mxp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mxp += 4;
+ *((int32_t *)(mxp-4)) = (int32_t)((group ? as->J->exitstubgroup[0] : mxpstart) - 8 - mxp);
+ }
/* Commit the code for this group (even if assembly fails later on). */
lj_mcode_commitbot(as->J, mxp);
as->mcbot = mxp;
ExitNo i;
if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
lj_trace_err(as->J, LJ_TRERR_SNAPOV);
+#if LJ_64
+ if (as->J->exitstubgroup[0] == NULL) {
+ /* Store the two potentially out-of-range targets below group 0. */
+ MCode *mxp = as->mcbot;
+ while ((uintptr_t)mxp & 7) *mxp++ = XI_INT3;
+ *((void **)mxp) = (void *)lj_vm_exit_interp; mxp += 8;
+ *((void **)mxp) = (void *)lj_vm_exit_handler; mxp += 8;
+ as->mcbot = mxp; /* Don't bother to commit, done in asm_exitstub_gen. */
+ }
+#endif
for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
if (as->J->exitstubgroup[i] == NULL)
as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
"bad interned 64 bit constant");
} else {
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
- *(uint64_t*)as->mcbot = *k;
+ *(uint64_t *)as->mcbot = *k;
ir->i = (int32_t)(as->mctop - as->mcbot);
as->mcbot += 8;
as->mclim = as->mcbot + MCLIM_REDZONE;
p = (MCode *)(void *)ir_k64(irf)->u64;
else
p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
- if (p - as->mcp == (int32_t)(p - as->mcp))
+ if (jmprel_ok(p, as->mcp))
return p; /* Call target is still in +-2GB range. */
/* Avoid the indirect case of emit_call(). Try to hoist func addr. */
}
emit_rr(as, XO_TEST, RID_RET, RID_RET);
args[0] = ASMREF_TMP1; /* global_State *g */
args[1] = ASMREF_TMP2; /* MSize steps */
+ /* Insert nop to simplify GC exit recognition in lj_asm_patchexit. */
+ if (!jmprel_ok(as->mcp, (MCode *)(void *)ci->func)) *--as->mcp = XI_NOP;
asm_gencall(as, ci, args);
tmp = ra_releasetmp(as, ASMREF_TMP1);
#if LJ_GC64
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
/* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
- MCode *p = as->mctop;
- MCode *target, *q;
+ MCode *mcp = as->mctail;
+ MCode *target;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- p -= LJ_64 ? 7 : 6;
- } else {
- MCode *p1;
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
+ if (LJ_64) *mcp++ = 0x48;
if (checki8(spadj)) {
- p -= 3;
- p1 = p-6;
- *p1 = (MCode)spadj;
+ *mcp++ = XI_ARITHi8;
+ *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP);
+ *mcp++ = (MCode)spadj;
} else {
- p1 = p-9;
- *(int32_t *)p1 = spadj;
+ *mcp++ = XI_ARITHi;
+ *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP);
+ *(int32_t *)mcp = spadj; mcp += 4;
}
-#if LJ_64
- p1[-3] = 0x48;
-#endif
- p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
- p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- *(int32_t *)(p-4) = jmprel(as->J, p, target);
- p[-5] = XI_JMP;
+ /* Emit exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ if (lnk || jmprel_ok(mcp + 5, target)) { /* Direct jump. */
+ *mcp++ = XI_JMP; mcp += 4;
+ *(int32_t *)(mcp-4) = jmprel(as->J, mcp, target);
+ } else { /* RIP-relative indirect jump. */
+ *mcp++ = XI_GROUP5; *mcp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mcp += 4;
+ *((int32_t *)(mcp-4)) = (int32_t)(as->J->exitstubgroup[0] - 16 - mcp);
+ }
/* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
- for (q = as->mctop-1; q >= p; q--)
- *q = XI_NOP;
- as->mctop = p;
+ while (as->mctop > mcp) *--as->mctop = XI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop;
/* Realign and leave room for backwards loop branch or exit branch. */
as->mctop = p;
p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */
} else {
- p -= 5; /* Space for exit branch (near jmp). */
+ p -= (LJ_64 && !lnk) ? 6 : 5; /* Space for exit branch. */
}
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
- as->mcp = p - (LJ_64 ? 7 : 6);
+ /* Leave room for ESP adjustment: add esp, imm */
+ p -= LJ_64 ? 7 : 6;
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
} else if (*p == XI_CALL &&
(void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) {
pgc = p+7; /* Do not patch GC check exit. */
+ } else if (LJ_64 && *p == 0xff &&
+ p[1] == MODRM(XM_REG, XOg_CALL, RID_RET) &&
+ p[2] == XI_NOP) {
+ pgc = p+5; /* Do not patch GC check exit. */
}
}
lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
return 0; /* Failed. */
}
+#define emit_movw_k(k) \
+ (ARMI_MOVW | ((k) & 0x0fffu) | (((k) & 0xf000u) << 4))
+#define emit_movt_k(k) \
+ (ARMI_MOVT | (((k) >> 16) & 0x0fffu) | ((((k) >> 16) & 0xf000u) << 4))
+
/* Load a 32 bit constant into a GPR. */
static void emit_loadi(ASMState *as, Reg rd, int32_t i)
{
emit_d(as, ARMI_MOV^k, rd);
} else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
/* 16 bit loword constant for ARMv6T2. */
- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
+ emit_d(as, emit_movw_k(i), rd);
} else if (emit_kdelta1(as, rd, i)) {
/* One step delta relative to another constant. */
} else if ((as->flags & JIT_F_ARMV6T2)) {
/* 32 bit hiword/loword constant for ARMv6T2. */
- emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd);
- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
+ emit_d(as, emit_movt_k(i), rd);
+ emit_d(as, emit_movw_k(i), rd);
} else if (emit_kdelta2(as, rd, i)) {
/* Two step delta relative to another constant. */
} else {
/* -- Emit loads/stores --------------------------------------------------- */
+#define jglofs(as, k) \
+ (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff)
+
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
/* -- Emit loads/stores --------------------------------------------------- */
+#define jglofs(as, k) \
+ (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff)
+
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
/* Return label pointing to current PC. */
#define emit_label(as) ((as)->mcp)
+/* Check if two adresses are in relative jump range. */
+static LJ_AINLINE int jmprel_ok(MCode *a, MCode *b)
+{
+#if LJ_64
+ return a - b == (int32_t)(a - b);
+#else
+ UNUSED(a); UNUSED(b);
+ return 1;
+#endif
+}
+
/* Compute relative 32 bit offset for jump and call instructions. */
static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
{
{
MCode *p = as->mcp;
#if LJ_64
- if (target-p != (int32_t)(target-p)) {
+ if (!jmprel_ok(target, p)) {
/* Assumes RID_RET is never an argument to calls and always clobbered. */
emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET);
emit_loadu64(as, RID_RET, (uint64_t)target);
/* -- JIT engine parameters ----------------------------------------------- */
-#if LJ_TARGET_WINDOWS || LJ_64
-/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */
-#define JIT_P_sizemcode_DEFAULT 64
-#else
-/* Could go as low as 4K, but the mmap() overhead would be rather high. */
-#define JIT_P_sizemcode_DEFAULT 32
-#endif
-
/* Optimization parameters and their defaults. Length is a char in octal! */
#define JIT_PARAMDEF(_) \
_(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
_(\011, recunroll, 2) /* Min. unroll for true recursion. */ \
\
/* Size of each machine code area (in KBytes). */ \
- _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \
+ _(\011, sizemcode, 64) \
/* Max. total size of all machine code areas (in KBytes). */ \
- _(\010, maxmcode, 512) \
+ _(\010, maxmcode, 2048) \
/* End of list. */
enum {
LJ_K64_2P63, /* 2^63 */
LJ_K64_M2P64, /* -2^64 */
#endif
+#endif
+#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
+ LJ_K64_VM_EXIT_HANDLER,
+ LJ_K64_VM_EXIT_INTERP,
#endif
LJ_K64__MAX,
};
-#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
+#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS)
enum {
#if LJ_TARGET_X86ORX64
#if LJ_TARGET_MIPS64
LJ_K32_2P63, /* 2^63 */
LJ_K32_M2P64, /* -2^64 */
+#endif
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
+ LJ_K32_VM_EXIT_HANDLER,
+ LJ_K32_VM_EXIT_INTERP,
#endif
LJ_K32__MAX
};
MCode *mcbot; /* Bottom of current mcode area. */
size_t szmcarea; /* Size of current mcode area. */
size_t szallmcarea; /* Total size of all allocated mcode areas. */
+ uintptr_t mcmin, mcmax; /* Mcode allocation range. */
TValue errinfo; /* Additional info element for trace errors. */
#if LJ_HASJIT
+#if LUAJIT_SECURITY_MCODE != 0
+/* Protection twiddling failed. Probably due to kernel security. */
+static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
+{
+ lua_CFunction panic = J2G(J)->panic;
+ if (panic) {
+ lua_State *L = J->L;
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
+ panic(L);
+ }
+ exit(EXIT_FAILURE);
+}
+#endif
+
#if LJ_TARGET_WINDOWS
#define MCPROT_RW PAGE_READWRITE
#define MCPROT_RX PAGE_EXECUTE_READ
#define MCPROT_RWX PAGE_EXECUTE_READWRITE
-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
+static void *mcode_alloc_at(uintptr_t hint, size_t sz, DWORD prot)
{
- void *p = LJ_WIN_VALLOC((void *)hint, sz,
- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
- if (!p && !hint)
- lj_trace_err(J, LJ_TRERR_MCODEAL);
- return p;
+ return LJ_WIN_VALLOC((void *)hint, sz,
+ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
}
-static void mcode_free(jit_State *J, void *p, size_t sz)
+static void mcode_free(void *p, size_t sz)
{
- UNUSED(J); UNUSED(sz);
+ UNUSED(sz);
VirtualFree(p, 0, MEM_RELEASE);
}
-static int mcode_setprot(void *p, size_t sz, DWORD prot)
+static void mcode_setprot(jit_State *J, void *p, size_t sz, DWORD prot)
{
+#if LUAJIT_SECURITY_MCODE != 0
DWORD oprot;
- return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
+ if (!LJ_WIN_VPROTECT(p, sz, prot, &oprot)) mcode_protfail(J);
+#else
+ UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot);
+#endif
}
#elif LJ_TARGET_POSIX
#define MCPROT_CREATE 0
#endif
-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
+static void *mcode_alloc_at(uintptr_t hint, size_t sz, int prot)
{
void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0);
- if (p == MAP_FAILED) {
- if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
- p = NULL;
+ if (p == MAP_FAILED) return NULL;
#if MCMAP_CREATE
- } else {
- pthread_jit_write_protect_np(0);
+ pthread_jit_write_protect_np(0);
#endif
- }
return p;
}
-static void mcode_free(jit_State *J, void *p, size_t sz)
+static void mcode_free(void *p, size_t sz)
{
- UNUSED(J);
munmap(p, sz);
}
-static int mcode_setprot(void *p, size_t sz, int prot)
+static void mcode_setprot(jit_State *J, void *p, size_t sz, int prot)
{
+#if LUAJIT_SECURITY_MCODE != 0
#if MCMAP_CREATE
+ UNUSED(J); UNUSED(p); UNUSED(sz);
pthread_jit_write_protect_np((prot & PROT_EXEC));
return 0;
#else
- return mprotect(p, sz, prot);
+ if (mprotect(p, sz, prot)) mcode_protfail(J);
+#endif
+#else
+ UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot);
#endif
}
#endif
+#ifdef LUAJIT_MCODE_TEST
+/* Test wrapper for mcode allocation. DO NOT ENABLE in production! Try:
+** LUAJIT_MCODE_TEST=hhhhhhhhhhhhhhhh luajit -jv main.lua
+** LUAJIT_MCODE_TEST=F luajit -jv main.lua
+*/
+static void *mcode_alloc_at_TEST(jit_State *J, uintptr_t hint, size_t sz, int prot)
+{
+ static int test_ofs = 0;
+ static const char *test_str;
+ if (!test_str) {
+ test_str = getenv("LUAJIT_MCODE_TEST");
+ if (!test_str) test_str = "";
+ }
+ switch (test_str[test_ofs]) {
+ case 'a': /* OK for one allocation. */
+ test_ofs++;
+ /* fallthrough */
+ case '\0': /* EOS: OK for any further allocations. */
+ break;
+ case 'h': /* Ignore one hint. */
+ test_ofs++;
+ /* fallthrough */
+ case 'H': /* Ignore any further hints. */
+ hint = 0u;
+ break;
+ case 'r': /* Randomize one hint. */
+ test_ofs++;
+ /* fallthrough */
+ case 'R': /* Randomize any further hints. */
+ hint = lj_prng_u64(&J2G(J)->prng) & ~(uintptr_t)0xffffu;
+ hint &= ((uintptr_t)1 << (LJ_64 ? 47 : 31)) - 1;
+ break;
+ case 'f': /* Fail one allocation. */
+ test_ofs++;
+ /* fallthrough */
+ default: /* 'F' or unknown: Fail any further allocations. */
+ return NULL;
+ }
+ return mcode_alloc_at(hint, sz, prot);
+}
+#define mcode_alloc_at(hint, sz, prot) mcode_alloc_at_TEST(J, hint, sz, prot)
+#endif
+
/* -- MCode area protection ----------------------------------------------- */
#if LUAJIT_SECURITY_MCODE == 0
static void mcode_protect(jit_State *J, int prot)
{
- UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot);
+ UNUSED(J); UNUSED(prot);
}
#else
#define MCPROT_GEN MCPROT_RW
#define MCPROT_RUN MCPROT_RX
-/* Protection twiddling failed. Probably due to kernel security. */
-static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
-{
- lua_CFunction panic = J2G(J)->panic;
- if (panic) {
- lua_State *L = J->L;
- setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
- panic(L);
- }
- exit(EXIT_FAILURE);
-}
-
/* Change protection of MCode area. */
static void mcode_protect(jit_State *J, int prot)
{
if (J->mcprot != prot) {
- if (LJ_UNLIKELY(mcode_setprot(J->mcarea, J->szmcarea, prot)))
- mcode_protfail(J);
+ mcode_setprot(J, J->mcarea, J->szmcarea, prot);
J->mcprot = prot;
}
}
/* -- MCode area allocation ----------------------------------------------- */
-#if LJ_64
-#define mcode_validptr(p) (p)
-#else
-#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000)
-#endif
-
#ifdef LJ_TARGET_JUMPRANGE
-/* Get memory within relative jump distance of our code in 64 bit mode. */
-static void *mcode_alloc(jit_State *J, size_t sz)
+#define MCODE_RANGE64 ((1u << LJ_TARGET_JUMPRANGE) - 0x10000u)
+
+/* Set a memory range for mcode allocation with addr in the middle. */
+static void mcode_setrange(jit_State *J, uintptr_t addr)
{
- /* Target an address in the static assembler code (64K aligned).
- ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB.
- ** Use half the jump range so every address in the range can reach any other.
- */
#if LJ_TARGET_MIPS
- /* Use the middle of the 256MB-aligned region. */
- uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
- ~(uintptr_t)0x0fffffffu) + 0x08000000u;
+ /* Use the whole 256MB-aligned region. */
+ J->mcmin = addr & ~(uintptr_t)((1u << LJ_TARGET_JUMPRANGE) - 1);
+ J->mcmax = J->mcmin + (1u << LJ_TARGET_JUMPRANGE);
#else
- uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
+ /* Every address in the 64KB-aligned range should be able to reach
+ ** any other, so MCODE_RANGE64 is only half the (signed) branch range.
+ */
+ J->mcmin = (addr - (MCODE_RANGE64 >> 1) + 0xffffu) & ~(uintptr_t)0xffffu;
+ J->mcmax = J->mcmin + MCODE_RANGE64;
#endif
- const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21);
- /* First try a contiguous area below the last one. */
- uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0;
- int i;
- /* Limit probing iterations, depending on the available pool size. */
- for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) {
- if (mcode_validptr(hint)) {
- void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN);
-
- if (mcode_validptr(p) &&
- ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range))
- return p;
- if (p) mcode_free(J, p, sz); /* Free badly placed area. */
- }
- /* Next try probing 64K-aligned pseudo-random addresses. */
+ /* Avoid wrap-around and the 64KB corners. */
+ if (addr < J->mcmin || !J->mcmin) J->mcmin = 0x10000u;
+ if (addr > J->mcmax) J->mcmax = ~(uintptr_t)0xffffu;
+}
+
+/* Check if an address is in range of the mcode allocation range. */
+static LJ_AINLINE int mcode_inrange(jit_State *J, uintptr_t addr, size_t sz)
+{
+ /* Take care of unsigned wrap-around of addr + sz, too. */
+ return addr >= J->mcmin && addr + sz >= J->mcmin && addr + sz <= J->mcmax;
+}
+
+/* Get memory within a specific jump range in 64 bit mode. */
+static void *mcode_alloc(jit_State *J, size_t sz)
+{
+ uintptr_t hint;
+ int i = 0, j;
+ if (!J->mcmin) /* Place initial range near the interpreter code. */
+ mcode_setrange(J, (uintptr_t)(void *)lj_vm_exit_handler);
+ else if (!J->mcmax) /* Switch to a new range (already flushed). */
+ goto newrange;
+ /* First try a contiguous area below the last one (if in range). */
+ hint = (uintptr_t)J->mcarea - sz;
+ if (!mcode_inrange(J, hint, sz)) /* Also takes care of NULL J->mcarea. */
+ goto probe;
+ for (; i < 16; i++) {
+ void *p = mcode_alloc_at(hint, sz, MCPROT_GEN);
+ if (mcode_inrange(J, (uintptr_t)p, sz))
+ return p; /* Success. */
+ else if (p)
+ mcode_free(p, sz); /* Free badly placed area. */
+ probe:
+ /* Next try probing 64KB-aligned pseudo-random addresses. */
+ j = 0;
do {
- hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000);
- } while (!(hint + sz < range+range));
- hint = target + hint - range;
+ hint = J->mcmin + (lj_prng_u64(&J2G(J)->prng) & MCODE_RANGE64);
+ if (++j > 15) goto fail;
+ } while (!mcode_inrange(J, hint, sz));
+ }
+fail:
+ if (!J->mcarea) { /* Switch to a new range now. */
+ void *p;
+ newrange:
+ p = mcode_alloc_at(0, sz, MCPROT_GEN);
+ if (p) {
+ mcode_setrange(J, (uintptr_t)p + (sz >> 1));
+ return p; /* Success. */
+ }
+ } else {
+ J->mcmax = 0; /* Switch to a new range after the flush. */
}
lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */
return NULL;
{
#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
/* Allow better executable memory allocation for OpenBSD W^X mode. */
- void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
- if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
- mcode_free(J, p, sz);
- return NULL;
- }
- return p;
+ void *p = mcode_alloc_at(0, sz, MCPROT_RUN);
+ if (p) mcode_setprot(J, p, sz, MCPROT_GEN);
#else
- return mcode_alloc_at(J, 0, sz, MCPROT_GEN);
+ void *p = mcode_alloc_at(0, sz, MCPROT_GEN);
#endif
+ if (!p) lj_trace_err(J, LJ_TRERR_MCODEAL);
+ return p;
}
#endif
{
MCode *oldarea = J->mcarea;
size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10;
- sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
J->mcarea = (MCode *)mcode_alloc(J, sz);
J->szmcarea = sz;
J->mcprot = MCPROT_GEN;
MCode *next = ((MCLink *)mc)->next;
size_t sz = ((MCLink *)mc)->size;
lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
- mcode_free(J, mc, sz);
+ mcode_free(mc, sz);
mc = next;
}
}
MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
{
if (finish) {
-#if LUAJIT_SECURITY_MCODE
if (J->mcarea == ptr)
mcode_protect(J, MCPROT_RUN);
- else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN)))
- mcode_protfail(J);
-#endif
+ else
+ mcode_setprot(J, ptr, ((MCLink *)ptr)->size, MCPROT_RUN);
return NULL;
} else {
- MCode *mc = J->mcarea;
+ uintptr_t base = (uintptr_t)J->mcarea, addr = (uintptr_t)ptr;
/* Try current area first to use the protection cache. */
- if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) {
-#if LUAJIT_SECURITY_MCODE
+ if (addr >= base && addr < base + J->szmcarea) {
mcode_protect(J, MCPROT_GEN);
-#endif
- return mc;
+ return (MCode *)base;
}
/* Otherwise search through the list of MCode areas. */
for (;;) {
- mc = ((MCLink *)mc)->next;
- lj_assertJ(mc != NULL, "broken MCode area chain");
- if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) {
-#if LUAJIT_SECURITY_MCODE
- if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN)))
- mcode_protfail(J);
-#endif
- return mc;
+ base = (uintptr_t)(((MCLink *)base)->next);
+ lj_assertJ(base != 0, "broken MCode area chain");
+ if (addr >= base && addr < base + ((MCLink *)base)->size) {
+ mcode_setprot(J, (MCode *)base, ((MCLink *)base)->size, MCPROT_GEN);
+ return (MCode *)base;
}
}
}
size_t sizemcode, maxmcode;
lj_mcode_abort(J);
sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10;
- sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10;
if (need * sizeof(MCode) > sizemcode)
lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */
ARMI_LDRSB = 0xe01000d0,
ARMI_LDRSH = 0xe01000f0,
ARMI_LDRD = 0xe00000d0,
+ ARMI_LDRL = 0xe51f0000,
ARMI_STR = 0xe4000000,
ARMI_STRB = 0xe4400000,
ARMI_STRH = 0xe00000b0,
ARMI_BL = 0xeb000000,
ARMI_BLX = 0xfa000000,
ARMI_BLXr = 0xe12fff30,
+ ARMI_BX = 0xe12fff10,
/* ARMv6 */
ARMI_REV = 0xe6bf0f30,
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
{
while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
+ if ((LJ_LE ? p[1] >> 28 : p[1] & 0xf) == 0xf) p++; /* Skip A64I_LDRx. */
return p + 3 + exitno;
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
{
while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */
+ if (p[3] == 0x4e800421) p += 2; /* Indirect branch PPCI_BCTRL. */
return p + 3 + exitno;
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
J->k32[LJ_K32_M2P64] = 0xdf800000;
#endif
#endif
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
+ J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler;
+ J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp;
+#endif
+#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
+ J->k64[LJ_K64_VM_EXIT_HANDLER].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_handler, 0);
+ J->k64[LJ_K64_VM_EXIT_INTERP].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_interp, 0);
+#endif
}
/* Free everything associated with the JIT compiler state. */
J->cur.traceno = 0;
}
L->top--; /* Remove error object */
- if (e == LJ_TRERR_DOWNREC)
+ if (e == LJ_TRERR_DOWNREC) {
return trace_downrec(J);
- else if (e == LJ_TRERR_MCODEAL)
+ } else if (e == LJ_TRERR_MCODEAL) {
+ if (!J->mcarea) { /* Disable JIT compiler if first mcode alloc fails. */
+ J->flags &= ~JIT_F_ON;
+ lj_dispatch_update(J2G(J));
+ }
lj_trace_flushall(L);
+ }
return 0;
}