From: Mike Pall Date: Wed, 5 Nov 2025 23:42:02 +0000 (+0100) Subject: Allow mcode allocations outside of the jump range to the support code. X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fv2.1;p=thirdparty%2FLuaJIT.git Allow mcode allocations outside of the jump range to the support code. Thank you for your patience. #285 --- diff --git a/doc/running.html b/doc/running.html index f71eee42..56d4c7bf 100644 --- a/doc/running.html +++ b/doc/running.html @@ -299,9 +299,9 @@ Here are the parameters and their default settings: recunroll2Min. unroll factor for true recursion -sizemcode32Size of each machine code area in KBytes (Windows: 64K) +sizemcode64Size of each machine code area in KBytes -maxmcode512Max. total size of all machine code areas in KBytes +maxmcode2048Max. total size of all machine code areas in KBytes
diff --git a/src/lib_jit.c b/src/lib_jit.c index fd8e585b..1b74d957 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -479,12 +479,21 @@ static int jitopt_param(jit_State *J, const char *str) size_t len = *(const uint8_t *)lst; lj_assertJ(len != 0, "bad JIT_P_STRING"); if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { - int32_t n = 0; + uint32_t n = 0; const char *p = &str[len+1]; while (*p >= '0' && *p <= '9') n = n*10 + (*p++ - '0'); - if (*p) return 0; /* Malformed number. */ - J->param[i] = n; + if (*p || (int32_t)n < 0) return 0; /* Malformed number. */ + if (i == JIT_P_sizemcode) { /* Adjust to required range here. */ +#if LJ_TARGET_JUMPRANGE + uint32_t maxkb = ((1 << (LJ_TARGET_JUMPRANGE - 10)) - 64); +#else + uint32_t maxkb = ((1 << (31 - 10)) - 64); +#endif + n = (n + (LJ_PAGESIZE >> 10) - 1) & ~((LJ_PAGESIZE >> 10) - 1); + if (n > maxkb) n = maxkb; + } + J->param[i] = (int32_t)n; if (i == JIT_P_hotloop) lj_dispatch_init_hotcount(J2G(J)); return 1; /* Ok. */ diff --git a/src/lj_arch.h b/src/lj_arch.h index 6d1a9271..799f9c6c 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -301,6 +301,7 @@ #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_GC64 1 +#define LJ_PAGESIZE 16384 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_ARCH_VERSION 80 @@ -456,7 +457,7 @@ #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 #define LJ_TARGET_EHRAREG 31 -#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ +#define LJ_TARGET_JUMPRANGE 28 /* 2^28 = 256MB-aligned region */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ diff --git a/src/lj_asm.c b/src/lj_asm.c index 8f558a03..0e888c29 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -93,6 +93,10 @@ typedef struct ASMState { MCode *invmcp; /* Points to invertible loop branch (or NULL). */ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *realign; /* Realign loop if not NULL. */ + MCode *mctail; /* Tail of trace before stack adjust + jmp. */ +#if LJ_TARGET_PPC || LJ_TARGET_ARM64 + MCode *mcexit; /* Pointer to exit stubs. */ +#endif #ifdef LUAJIT_RANDOM_RA /* Randomize register allocation. OK for fuzz testing, not for production. */ @@ -2541,7 +2545,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) RA_DBGX((as, "===== STOP =====")); /* General trace setup. Emit tail of trace. */ - asm_tail_prep(as); + asm_tail_prep(as, T->link); as->mcloop = NULL; as->flagmcp = NULL; as->topslot = 0; diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 24deaeae..406360d2 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -79,18 +79,43 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) /* Generate an exit stub group at the bottom of the reserved MCode memory. */ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) { + ExitNo i; + int ind = 0; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mcbot; - int i; - if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop) + if (mxp + 6+EXITSTUBS_PER_GROUP >= as->mctop) asm_mclimit(as); - /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ - *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); - *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); - mxp++; + if ((((target - mxp - 2) + 0x00800000u) >> 24) == 0) { + /* str lr, [sp]; bl ->vm_exit_handler; + ** .long DISPATCH_address, group. + */ + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp = ARMI_BL | ((target - mxp - 2) & 0x00ffffffu); mxp++; + } else if ((as->flags & JIT_F_ARMV6T2)) { + /* + ** str lr, [sp]; movw/movt lr, vm_exit_handler; blx lr; + ** .long DISPATCH_address, group; + */ + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR); + *mxp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR); + *mxp++ = ARMI_BLXr | ARMF_M(RID_LR); + ind = 2; + } else { + /* .long vm_exit_handler; + ** str lr, [sp]; ldr lr, [pc, #-16]; blx lr; + ** .long DISPATCH_address, group; + */ + *mxp++ = (MCode)target; + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp++ = ARMI_LDRL | ARMF_D(RID_LR) | 16; + *mxp++ = ARMI_BLXr | ARMF_M(RID_LR); + ind = 1; + } *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ *mxp++ = group*EXITSTUBS_PER_GROUP; for (i = 0; i < EXITSTUBS_PER_GROUP; i++) - *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); + *mxp++ = ARMI_B | ((-6-ind-i) & 0x00ffffffu); lj_mcode_sync(as->mcbot, mxp); lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -2210,33 +2235,46 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; - MCode *target; + MCode *target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - as->mctop = --p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ uint32_t k = emit_isk12(ARMI_ADD, spadj); lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); - p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + *mcp++ = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + } + if ((((target - mcp - 2) + 0x00800000u) >> 24) == 0) { + *mcp = ARMI_B | ((target - mcp - 2) & 0x00ffffffu); mcp++; + } else if ((as->flags & JIT_F_ARMV6T2)) { + *mcp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR); + *mcp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR); + *mcp++ = ARMI_BX | ARMF_M(RID_LR); + } else { + *mcp++ = ARMI_LDRL | ARMI_LS_U | ARMF_D(RID_LR) | 0; + *mcp++ = ARMI_BX | ARMF_M(RID_LR); + *mcp++ = (MCode)target; } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu); + while (as->mctop > mcp) *--as->mctop = ARMI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + MCode *target = (MCode *)(void *)lj_vm_exit_interp; + if ((((target - p - 2) + 0x00800000u) >> 24) || + (((target - p - 1) + 0x00800000u) >> 24)) p -= 2; + } + p--; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } *p = 0; /* Prevent load/store merging. */ + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 4feaa3b0..085f9357 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -51,15 +51,27 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) static void asm_exitstub_setup(ASMState *as, ExitNo nexits) { ExitNo i; + int ind; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) asm_mclimit(as); - /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ + ind = !A64F_S_OK(target - (mxp - nexits - 2), 26); + /* !ind: 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; + ** ind: 1: str lr,[sp]; ldr lr, [gl, K64_VXH]; blr lr; movz w0,traceno; + ** bl <1; bl <1; ... + */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); + *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-ind-i)); + as->mcexit = mxp; *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); - mxp--; - *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); + if (ind) { + *--mxp = A64I_LE(A64I_BLR_AUTH | A64F_N(RID_LR)); + *--mxp = A64I_LE(A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]) >> 3)); + } else { + mxp--; + *mxp = A64I_LE(A64I_BL | A64F_S26(target-mxp)); + } *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); as->mctop = mxp; } @@ -67,7 +79,7 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) { /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; + return as->mcexit + exitno; } /* Emit conditional branch to exit for guard. */ @@ -1917,34 +1929,42 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; + MCode *mcp = as->mctail; MCode *target; /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); - if (spadj == 0) { - *--p = A64I_LE(A64I_NOP); - as->mctop = p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ uint32_t k = emit_isk12(spadj); lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); - p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); + *mcp++ = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = A64I_B | A64F_S26((target-p)+1); + /* Emit exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + if (lnk || A64F_S_OK(target - mcp, 26)) { + *mcp = A64I_B | A64F_S26(target - mcp); mcp++; + } else { + *mcp++ = A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]) >> 3); + *mcp++ = A64I_BR_AUTH | A64F_N(RID_LR); + } + while (as->mctop > mcp) *--as->mctop = A64I_LE(A64I_NOP); } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + MCode *target = (MCode *)(void *)lj_vm_exit_interp; + if (!A64F_S_OK(target - p, 26) || !A64F_S_OK(target - (p+1), 26)) p--; + } + p--; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; *p = 0; /* Prevent load/store merging. */ } diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index af0e714f..8dadabe4 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -92,13 +92,23 @@ static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) /* Setup exit stub after the end of each trace. */ static void asm_exitstub_setup(ASMState *as) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ - *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; - *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, - "branch target out of range"); - *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; + *--mxp = MIPSI_LI | MIPSF_T(RID_TMP) | as->T->traceno; + if (((uintptr_t)(mxp-1) ^ target) >> 28 == 0) { + /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ + *--mxp = MIPSI_J | ((target >> 2) & 0x03ffffffu); + } else { + /* sw TMP, 0(sp); li TMP, K*_VXH(jgl); jr TMP ; li TMP, traceno */ + *--mxp = MIPSI_JR | MIPSF_S(RID_TMP); + *--mxp = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) | +#if LJ_64 + jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]); +#else + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]); +#endif + } + *--mxp = MIPSI_SW | MIPSF_T(RID_TMP) | MIPSF_S(RID_SP) | 0; as->mctop = mxp; } @@ -428,7 +438,8 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) + RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)| + RID2RSET(RID_CFUNCADDR) #if LJ_TARGET_MIPSR6 |RID2RSET(RID_F21) #endif @@ -514,7 +525,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg r) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| - RID2RSET(RID_R1)|RID2RSET(RID_R12); + RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_CFUNCADDR); if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ra_evictset(as, drop); /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ @@ -2699,18 +2710,37 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; + uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; - p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); + if (((uintptr_t)mcp ^ target) >> 28 == 0) { + *mcp++ = MIPSI_J | ((target >> 2) & 0x03ffffffu); + } else { + *mcp++ = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) | +#if LJ_64 + jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]); +#else + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]); +#endif + *mcp++ = MIPSI_JR | MIPSF_S(RID_TMP); + } + *mcp++ = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { - as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ - as->invmcp = as->loopref ? as->mcp : NULL; + as->mcp = as->mctop - 2; /* Leave room for branch plus nop or stack adj. */ + if (as->loopref) { + as->invmcp = as->mcp; + } else { + if (!lnk) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp; + if (((uintptr_t)as->mcp ^ target) >> 28 != 0) as->mcp--; + } + as->invmcp = NULL; + } + as->mctail = as->mcp; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index df1ac42f..d77c45ce 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -48,23 +48,38 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) static void asm_exitstub_setup(ASMState *as, ExitNo nexits) { ExitNo i; + int ind; + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) asm_mclimit(as); - /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ + ind = ((target - (uintptr_t)(mxp - nexits - 2) + 0x02000000u) >> 26) ? 2 : 0; + /* !ind: 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; + ** ind: 1: lwz r0, K32_VXH(jgl); mtctr r0; mflr r0; bctrl; li r0, traceno; + ** bl <1; bl <1; ... + */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); + *--mxp = PPCI_BL | (((-3-ind-i) & 0x00ffffffu) << 2); + as->mcexit = mxp; *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ - mxp--; - *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); - *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); + if (ind) { + *--mxp = PPCI_BCTRL; + *--mxp = PPCI_MFLR | PPCF_T(RID_TMP); + *--mxp = PPCI_MTCTR | PPCF_T(RID_TMP); + *--mxp = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) | + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]); + } else { + mxp--; + *mxp = PPCI_BL | ((target - (uintptr_t)mxp) & 0x03fffffcu); + *--mxp = PPCI_MFLR | PPCF_T(RID_TMP); + } as->mctop = mxp; } static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) { /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; + return as->mcexit + exitno; } /* Emit conditional branch to exit for guard. */ @@ -2218,34 +2233,43 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; - MCode *target; + uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - *--p = PPCI_NOP; - *--p = PPCI_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); - p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); - p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; + *mcp++ = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); + *mcp++ = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); + /* Emit exit branch. */ + if ((((target - (uintptr_t)mcp) + 0x02000000u) >> 26) == 0) { + *mcp = PPCI_B | ((target - (uintptr_t)mcp) & 0x03fffffcu); mcp++; + } else { + *mcp++ = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) | + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]); + *mcp++ = PPCI_MTCTR | PPCF_T(RID_TMP); + *mcp++ = PPCI_BCTR; + } + while (as->mctop > mcp) *--as->mctop = PPCI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-2; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp; + if ((((target - (uintptr_t)p) + 0x02000000u) >> 26) || + (((target - (uintptr_t)(p-2)) + 0x02000000u) >> 26)) p -= 2; + } + p -= 2; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 774e77b4..f3c2238a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -9,9 +9,12 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) { ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mcbot; MCode *mxpstart = mxp; - if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop) + if (mxp + ((2+2)*EXITSTUBS_PER_GROUP + + (LJ_GC64 ? 0 : 8) + + (LJ_64 ? 6 : 5)) >= as->mctop) asm_mclimit(as); /* Push low byte of exitno for each exit stub. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs; @@ -30,8 +33,13 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; #endif /* Jump to exit handler which fills in the ExitState. */ - *mxp++ = XI_JMP; mxp += 4; - *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); + if (jmprel_ok(mxp + 5, target)) { /* Direct jump. */ + *mxp++ = XI_JMP; mxp += 4; + *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, target); + } else { /* RIP-relative indirect jump. */ + *mxp++ = XI_GROUP5; *mxp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mxp += 4; + *((int32_t *)(mxp-4)) = (int32_t)((group ? as->J->exitstubgroup[0] : mxpstart) - 8 - mxp); + } /* Commit the code for this group (even if assembly fails later on). */ lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -45,6 +53,16 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) ExitNo i; if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) lj_trace_err(as->J, LJ_TRERR_SNAPOV); +#if LJ_64 + if (as->J->exitstubgroup[0] == NULL) { + /* Store the two potentially out-of-range targets below group 0. */ + MCode *mxp = as->mcbot; + while ((uintptr_t)mxp & 7) *mxp++ = XI_INT3; + *((void **)mxp) = (void *)lj_vm_exit_interp; mxp += 8; + *((void **)mxp) = (void *)lj_vm_exit_handler; mxp += 8; + as->mcbot = mxp; /* Don't bother to commit, done in asm_exitstub_gen. */ + } +#endif for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) if (as->J->exitstubgroup[i] == NULL) as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); @@ -396,7 +414,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) "bad interned 64 bit constant"); } else { while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; - *(uint64_t*)as->mcbot = *k; + *(uint64_t *)as->mcbot = *k; ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; @@ -728,7 +746,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) p = (MCode *)(void *)ir_k64(irf)->u64; else p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i; - if (p - as->mcp == (int32_t)(p - as->mcp)) + if (jmprel_ok(p, as->mcp)) return p; /* Call target is still in +-2GB range. */ /* Avoid the indirect case of emit_call(). Try to hoist func addr. */ } @@ -2806,6 +2824,8 @@ static void asm_gc_check(ASMState *as) emit_rr(as, XO_TEST, RID_RET, RID_RET); args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ + /* Insert nop to simplify GC exit recognition in lj_asm_patchexit. */ + if (!jmprel_ok(as->mcp, (MCode *)(void *)ci->func)) *--as->mcp = XI_NOP; asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); #if LJ_GC64 @@ -2919,40 +2939,36 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) static void asm_tail_fixup(ASMState *as, TraceNo lnk) { /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */ - MCode *p = as->mctop; - MCode *target, *q; + MCode *mcp = as->mctail; + MCode *target; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - p -= LJ_64 ? 7 : 6; - } else { - MCode *p1; - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ + if (LJ_64) *mcp++ = 0x48; if (checki8(spadj)) { - p -= 3; - p1 = p-6; - *p1 = (MCode)spadj; + *mcp++ = XI_ARITHi8; + *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP); + *mcp++ = (MCode)spadj; } else { - p1 = p-9; - *(int32_t *)p1 = spadj; + *mcp++ = XI_ARITHi; + *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP); + *(int32_t *)mcp = spadj; mcp += 4; } -#if LJ_64 - p1[-3] = 0x48; -#endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - *(int32_t *)(p-4) = jmprel(as->J, p, target); - p[-5] = XI_JMP; + /* Emit exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + if (lnk || jmprel_ok(mcp + 5, target)) { /* Direct jump. */ + *mcp++ = XI_JMP; mcp += 4; + *(int32_t *)(mcp-4) = jmprel(as->J, mcp, target); + } else { /* RIP-relative indirect jump. */ + *mcp++ = XI_GROUP5; *mcp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mcp += 4; + *((int32_t *)(mcp-4)) = (int32_t)(as->J->exitstubgroup[0] - 16 - mcp); + } /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ - for (q = as->mctop-1; q >= p; q--) - *q = XI_NOP; - as->mctop = p; + while (as->mctop > mcp) *--as->mctop = XI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop; /* Realign and leave room for backwards loop branch or exit branch. */ @@ -2964,15 +2980,17 @@ static void asm_tail_prep(ASMState *as) as->mctop = p; p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ } else { - p -= 5; /* Space for exit branch (near jmp). */ + p -= (LJ_64 && !lnk) ? 6 : 5; /* Space for exit branch. */ } if (as->loopref) { as->invmcp = as->mcp = p; } else { - /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (LJ_64 ? 7 : 6); + /* Leave room for ESP adjustment: add esp, imm */ + p -= LJ_64 ? 7 : 6; + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ @@ -3132,6 +3150,10 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } else if (*p == XI_CALL && (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { pgc = p+7; /* Do not patch GC check exit. */ + } else if (LJ_64 && *p == 0xff && + p[1] == MODRM(XM_REG, XOg_CALL, RID_RET) && + p[2] == XI_NOP) { + pgc = p+5; /* Do not patch GC check exit. */ } } lj_mcode_sync(T->mcode, T->mcode + T->szmcode); diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index c60e7d75..3e1eb64b 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -173,6 +173,11 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) return 0; /* Failed. */ } +#define emit_movw_k(k) \ + (ARMI_MOVW | ((k) & 0x0fffu) | (((k) & 0xf000u) << 4)) +#define emit_movt_k(k) \ + (ARMI_MOVT | (((k) >> 16) & 0x0fffu) | ((((k) >> 16) & 0xf000u) << 4)) + /* Load a 32 bit constant into a GPR. */ static void emit_loadi(ASMState *as, Reg rd, int32_t i) { @@ -184,13 +189,13 @@ static void emit_loadi(ASMState *as, Reg rd, int32_t i) emit_d(as, ARMI_MOV^k, rd); } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + emit_d(as, emit_movw_k(i), rd); } else if (emit_kdelta1(as, rd, i)) { /* One step delta relative to another constant. */ } else if ((as->flags & JIT_F_ARMV6T2)) { /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + emit_d(as, emit_movt_k(i), rd); + emit_d(as, emit_movw_k(i), rd); } else if (emit_kdelta2(as, rd, i)) { /* Two step delta relative to another constant. */ } else { diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index d8104959..d65b1c57 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -80,6 +80,9 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, /* -- Emit loads/stores --------------------------------------------------- */ +#define jglofs(as, k) \ + (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff) + /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index b13f00fe..56928e42 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -53,6 +53,9 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) /* -- Emit loads/stores --------------------------------------------------- */ +#define jglofs(as, k) \ + (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff) + /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 5fd6cfa7..858fe753 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -478,6 +478,17 @@ static void emit_sfixup(ASMState *as, MCLabel source) /* Return label pointing to current PC. */ #define emit_label(as) ((as)->mcp) +/* Check if two adresses are in relative jump range. */ +static LJ_AINLINE int jmprel_ok(MCode *a, MCode *b) +{ +#if LJ_64 + return a - b == (int32_t)(a - b); +#else + UNUSED(a); UNUSED(b); + return 1; +#endif +} + /* Compute relative 32 bit offset for jump and call instructions. */ static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) { @@ -511,7 +522,7 @@ static void emit_call_(ASMState *as, MCode *target) { MCode *p = as->mcp; #if LJ_64 - if (target-p != (int32_t)(target-p)) { + if (!jmprel_ok(target, p)) { /* Assumes RID_RET is never an argument to calls and always clobbered. */ emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET); emit_loadu64(as, RID_RET, (uint64_t)target); diff --git a/src/lj_jit.h b/src/lj_jit.h index 102ba0b4..05a8e9bb 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -104,14 +104,6 @@ /* -- JIT engine parameters ----------------------------------------------- */ -#if LJ_TARGET_WINDOWS || LJ_64 -/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */ -#define JIT_P_sizemcode_DEFAULT 64 -#else -/* Could go as low as 4K, but the mmap() overhead would be rather high. */ -#define JIT_P_sizemcode_DEFAULT 32 -#endif - /* Optimization parameters and their defaults. Length is a char in octal! */ #define JIT_PARAMDEF(_) \ _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ @@ -131,9 +123,9 @@ _(\011, recunroll, 2) /* Min. unroll for true recursion. */ \ \ /* Size of each machine code area (in KBytes). */ \ - _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ + _(\011, sizemcode, 64) \ /* Max. total size of all machine code areas (in KBytes). */ \ - _(\010, maxmcode, 512) \ + _(\010, maxmcode, 2048) \ /* End of list. */ enum { @@ -374,10 +366,14 @@ enum { LJ_K64_2P63, /* 2^63 */ LJ_K64_M2P64, /* -2^64 */ #endif +#endif +#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 + LJ_K64_VM_EXIT_HANDLER, + LJ_K64_VM_EXIT_INTERP, #endif LJ_K64__MAX, }; -#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) +#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) enum { #if LJ_TARGET_X86ORX64 @@ -393,6 +389,10 @@ enum { #if LJ_TARGET_MIPS64 LJ_K32_2P63, /* 2^63 */ LJ_K32_M2P64, /* -2^64 */ +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 + LJ_K32_VM_EXIT_HANDLER, + LJ_K32_VM_EXIT_INTERP, #endif LJ_K32__MAX }; @@ -513,6 +513,7 @@ typedef struct jit_State { MCode *mcbot; /* Bottom of current mcode area. */ size_t szmcarea; /* Size of current mcode area. */ size_t szallmcarea; /* Total size of all allocated mcode areas. */ + uintptr_t mcmin, mcmax; /* Mcode allocation range. */ TValue errinfo; /* Additional info element for trace errors. */ diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 2b8ac2df..c3032f4e 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -63,31 +63,46 @@ void lj_mcode_sync(void *start, void *end) #if LJ_HASJIT +#if LUAJIT_SECURITY_MCODE != 0 +/* Protection twiddling failed. Probably due to kernel security. */ +static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) +{ + lua_CFunction panic = J2G(J)->panic; + if (panic) { + lua_State *L = J->L; + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); + panic(L); + } + exit(EXIT_FAILURE); +} +#endif + #if LJ_TARGET_WINDOWS #define MCPROT_RW PAGE_READWRITE #define MCPROT_RX PAGE_EXECUTE_READ #define MCPROT_RWX PAGE_EXECUTE_READWRITE -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) +static void *mcode_alloc_at(uintptr_t hint, size_t sz, DWORD prot) { - void *p = LJ_WIN_VALLOC((void *)hint, sz, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); - if (!p && !hint) - lj_trace_err(J, LJ_TRERR_MCODEAL); - return p; + return LJ_WIN_VALLOC((void *)hint, sz, + MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); } -static void mcode_free(jit_State *J, void *p, size_t sz) +static void mcode_free(void *p, size_t sz) { - UNUSED(J); UNUSED(sz); + UNUSED(sz); VirtualFree(p, 0, MEM_RELEASE); } -static int mcode_setprot(void *p, size_t sz, DWORD prot) +static void mcode_setprot(jit_State *J, void *p, size_t sz, DWORD prot) { +#if LUAJIT_SECURITY_MCODE != 0 DWORD oprot; - return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); + if (!LJ_WIN_VPROTECT(p, sz, prot, &oprot)) mcode_protfail(J); +#else + UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot); +#endif } #elif LJ_TARGET_POSIX @@ -117,33 +132,33 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #define MCPROT_CREATE 0 #endif -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) +static void *mcode_alloc_at(uintptr_t hint, size_t sz, int prot) { void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0); - if (p == MAP_FAILED) { - if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); - p = NULL; + if (p == MAP_FAILED) return NULL; #if MCMAP_CREATE - } else { - pthread_jit_write_protect_np(0); + pthread_jit_write_protect_np(0); #endif - } return p; } -static void mcode_free(jit_State *J, void *p, size_t sz) +static void mcode_free(void *p, size_t sz) { - UNUSED(J); munmap(p, sz); } -static int mcode_setprot(void *p, size_t sz, int prot) +static void mcode_setprot(jit_State *J, void *p, size_t sz, int prot) { +#if LUAJIT_SECURITY_MCODE != 0 #if MCMAP_CREATE + UNUSED(J); UNUSED(p); UNUSED(sz); pthread_jit_write_protect_np((prot & PROT_EXEC)); return 0; #else - return mprotect(p, sz, prot); + if (mprotect(p, sz, prot)) mcode_protfail(J); +#endif +#else + UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot); #endif } @@ -153,6 +168,49 @@ static int mcode_setprot(void *p, size_t sz, int prot) #endif +#ifdef LUAJIT_MCODE_TEST +/* Test wrapper for mcode allocation. DO NOT ENABLE in production! Try: +** LUAJIT_MCODE_TEST=hhhhhhhhhhhhhhhh luajit -jv main.lua +** LUAJIT_MCODE_TEST=F luajit -jv main.lua +*/ +static void *mcode_alloc_at_TEST(jit_State *J, uintptr_t hint, size_t sz, int prot) +{ + static int test_ofs = 0; + static const char *test_str; + if (!test_str) { + test_str = getenv("LUAJIT_MCODE_TEST"); + if (!test_str) test_str = ""; + } + switch (test_str[test_ofs]) { + case 'a': /* OK for one allocation. */ + test_ofs++; + /* fallthrough */ + case '\0': /* EOS: OK for any further allocations. */ + break; + case 'h': /* Ignore one hint. */ + test_ofs++; + /* fallthrough */ + case 'H': /* Ignore any further hints. */ + hint = 0u; + break; + case 'r': /* Randomize one hint. */ + test_ofs++; + /* fallthrough */ + case 'R': /* Randomize any further hints. */ + hint = lj_prng_u64(&J2G(J)->prng) & ~(uintptr_t)0xffffu; + hint &= ((uintptr_t)1 << (LJ_64 ? 47 : 31)) - 1; + break; + case 'f': /* Fail one allocation. */ + test_ofs++; + /* fallthrough */ + default: /* 'F' or unknown: Fail any further allocations. */ + return NULL; + } + return mcode_alloc_at(hint, sz, prot); +} +#define mcode_alloc_at(hint, sz, prot) mcode_alloc_at_TEST(J, hint, sz, prot) +#endif + /* -- MCode area protection ----------------------------------------------- */ #if LUAJIT_SECURITY_MCODE == 0 @@ -174,7 +232,7 @@ static int mcode_setprot(void *p, size_t sz, int prot) static void mcode_protect(jit_State *J, int prot) { - UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); + UNUSED(J); UNUSED(prot); } #else @@ -190,24 +248,11 @@ static void mcode_protect(jit_State *J, int prot) #define MCPROT_GEN MCPROT_RW #define MCPROT_RUN MCPROT_RX -/* Protection twiddling failed. Probably due to kernel security. */ -static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) -{ - lua_CFunction panic = J2G(J)->panic; - if (panic) { - lua_State *L = J->L; - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); - panic(L); - } - exit(EXIT_FAILURE); -} - /* Change protection of MCode area. */ static void mcode_protect(jit_State *J, int prot) { if (J->mcprot != prot) { - if (LJ_UNLIKELY(mcode_setprot(J->mcarea, J->szmcarea, prot))) - mcode_protfail(J); + mcode_setprot(J, J->mcarea, J->szmcarea, prot); J->mcprot = prot; } } @@ -216,47 +261,74 @@ static void mcode_protect(jit_State *J, int prot) /* -- MCode area allocation ----------------------------------------------- */ -#if LJ_64 -#define mcode_validptr(p) (p) -#else -#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000) -#endif - #ifdef LJ_TARGET_JUMPRANGE -/* Get memory within relative jump distance of our code in 64 bit mode. */ -static void *mcode_alloc(jit_State *J, size_t sz) +#define MCODE_RANGE64 ((1u << LJ_TARGET_JUMPRANGE) - 0x10000u) + +/* Set a memory range for mcode allocation with addr in the middle. */ +static void mcode_setrange(jit_State *J, uintptr_t addr) { - /* Target an address in the static assembler code (64K aligned). - ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB. - ** Use half the jump range so every address in the range can reach any other. - */ #if LJ_TARGET_MIPS - /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & - ~(uintptr_t)0x0fffffffu) + 0x08000000u; + /* Use the whole 256MB-aligned region. */ + J->mcmin = addr & ~(uintptr_t)((1u << LJ_TARGET_JUMPRANGE) - 1); + J->mcmax = J->mcmin + (1u << LJ_TARGET_JUMPRANGE); #else - uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; + /* Every address in the 64KB-aligned range should be able to reach + ** any other, so MCODE_RANGE64 is only half the (signed) branch range. + */ + J->mcmin = (addr - (MCODE_RANGE64 >> 1) + 0xffffu) & ~(uintptr_t)0xffffu; + J->mcmax = J->mcmin + MCODE_RANGE64; #endif - const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21); - /* First try a contiguous area below the last one. */ - uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; - int i; - /* Limit probing iterations, depending on the available pool size. */ - for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) { - if (mcode_validptr(hint)) { - void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN); - - if (mcode_validptr(p) && - ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range)) - return p; - if (p) mcode_free(J, p, sz); /* Free badly placed area. */ - } - /* Next try probing 64K-aligned pseudo-random addresses. */ + /* Avoid wrap-around and the 64KB corners. */ + if (addr < J->mcmin || !J->mcmin) J->mcmin = 0x10000u; + if (addr > J->mcmax) J->mcmax = ~(uintptr_t)0xffffu; +} + +/* Check if an address is in range of the mcode allocation range. */ +static LJ_AINLINE int mcode_inrange(jit_State *J, uintptr_t addr, size_t sz) +{ + /* Take care of unsigned wrap-around of addr + sz, too. */ + return addr >= J->mcmin && addr + sz >= J->mcmin && addr + sz <= J->mcmax; +} + +/* Get memory within a specific jump range in 64 bit mode. */ +static void *mcode_alloc(jit_State *J, size_t sz) +{ + uintptr_t hint; + int i = 0, j; + if (!J->mcmin) /* Place initial range near the interpreter code. */ + mcode_setrange(J, (uintptr_t)(void *)lj_vm_exit_handler); + else if (!J->mcmax) /* Switch to a new range (already flushed). */ + goto newrange; + /* First try a contiguous area below the last one (if in range). */ + hint = (uintptr_t)J->mcarea - sz; + if (!mcode_inrange(J, hint, sz)) /* Also takes care of NULL J->mcarea. */ + goto probe; + for (; i < 16; i++) { + void *p = mcode_alloc_at(hint, sz, MCPROT_GEN); + if (mcode_inrange(J, (uintptr_t)p, sz)) + return p; /* Success. */ + else if (p) + mcode_free(p, sz); /* Free badly placed area. */ + probe: + /* Next try probing 64KB-aligned pseudo-random addresses. */ + j = 0; do { - hint = lj_prng_u64(&J2G(J)->prng) & ((1u<mcmin + (lj_prng_u64(&J2G(J)->prng) & MCODE_RANGE64); + if (++j > 15) goto fail; + } while (!mcode_inrange(J, hint, sz)); + } +fail: + if (!J->mcarea) { /* Switch to a new range now. */ + void *p; + newrange: + p = mcode_alloc_at(0, sz, MCPROT_GEN); + if (p) { + mcode_setrange(J, (uintptr_t)p + (sz >> 1)); + return p; /* Success. */ + } + } else { + J->mcmax = 0; /* Switch to a new range after the flush. */ } lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */ return NULL; @@ -269,15 +341,13 @@ static void *mcode_alloc(jit_State *J, size_t sz) { #if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP /* Allow better executable memory allocation for OpenBSD W^X mode. */ - void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); - if (p && mcode_setprot(p, sz, MCPROT_GEN)) { - mcode_free(J, p, sz); - return NULL; - } - return p; + void *p = mcode_alloc_at(0, sz, MCPROT_RUN); + if (p) mcode_setprot(J, p, sz, MCPROT_GEN); #else - return mcode_alloc_at(J, 0, sz, MCPROT_GEN); + void *p = mcode_alloc_at(0, sz, MCPROT_GEN); #endif + if (!p) lj_trace_err(J, LJ_TRERR_MCODEAL); + return p; } #endif @@ -289,7 +359,6 @@ static void mcode_allocarea(jit_State *J) { MCode *oldarea = J->mcarea; size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10; - sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); J->mcarea = (MCode *)mcode_alloc(J, sz); J->szmcarea = sz; J->mcprot = MCPROT_GEN; @@ -311,7 +380,7 @@ void lj_mcode_free(jit_State *J) MCode *next = ((MCLink *)mc)->next; size_t sz = ((MCLink *)mc)->size; lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); - mcode_free(J, mc, sz); + mcode_free(mc, sz); mc = next; } } @@ -347,32 +416,25 @@ void lj_mcode_abort(jit_State *J) MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) { if (finish) { -#if LUAJIT_SECURITY_MCODE if (J->mcarea == ptr) mcode_protect(J, MCPROT_RUN); - else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) - mcode_protfail(J); -#endif + else + mcode_setprot(J, ptr, ((MCLink *)ptr)->size, MCPROT_RUN); return NULL; } else { - MCode *mc = J->mcarea; + uintptr_t base = (uintptr_t)J->mcarea, addr = (uintptr_t)ptr; /* Try current area first to use the protection cache. */ - if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { -#if LUAJIT_SECURITY_MCODE + if (addr >= base && addr < base + J->szmcarea) { mcode_protect(J, MCPROT_GEN); -#endif - return mc; + return (MCode *)base; } /* Otherwise search through the list of MCode areas. */ for (;;) { - mc = ((MCLink *)mc)->next; - lj_assertJ(mc != NULL, "broken MCode area chain"); - if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { -#if LUAJIT_SECURITY_MCODE - if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) - mcode_protfail(J); -#endif - return mc; + base = (uintptr_t)(((MCLink *)base)->next); + lj_assertJ(base != 0, "broken MCode area chain"); + if (addr >= base && addr < base + ((MCLink *)base)->size) { + mcode_setprot(J, (MCode *)base, ((MCLink *)base)->size, MCPROT_GEN); + return (MCode *)base; } } } @@ -384,7 +446,6 @@ void lj_mcode_limiterr(jit_State *J, size_t need) size_t sizemcode, maxmcode; lj_mcode_abort(J); sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; - sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; if (need * sizeof(MCode) > sizemcode) lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index d0bbc5a5..947545f8 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h @@ -190,6 +190,7 @@ typedef enum ARMIns { ARMI_LDRSB = 0xe01000d0, ARMI_LDRSH = 0xe01000f0, ARMI_LDRD = 0xe00000d0, + ARMI_LDRL = 0xe51f0000, ARMI_STR = 0xe4000000, ARMI_STRB = 0xe4400000, ARMI_STRH = 0xe00000b0, @@ -200,6 +201,7 @@ typedef enum ARMIns { ARMI_BL = 0xeb000000, ARMI_BLX = 0xfa000000, ARMI_BLXr = 0xe12fff30, + ARMI_BX = 0xe12fff10, /* ARMv6 */ ARMI_REV = 0xe6bf0f30, diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 30aff478..3113d141 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -110,6 +110,7 @@ typedef struct { static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ + if ((LJ_LE ? p[1] >> 28 : p[1] & 0xf) == 0xf) p++; /* Skip A64I_LDRx. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index 5a1b5a7c..58f31188 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -115,6 +115,7 @@ typedef struct { static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */ + if (p[3] == 0x4e800421) p += 2; /* Indirect branch PPCI_BCTRL. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ diff --git a/src/lj_trace.c b/src/lj_trace.c index 0e948e8d..3e2cd0b3 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -343,6 +343,14 @@ void lj_trace_initstate(global_State *g) J->k32[LJ_K32_M2P64] = 0xdf800000; #endif #endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 + J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; + J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; +#endif +#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_VM_EXIT_HANDLER].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_handler, 0); + J->k64[LJ_K64_VM_EXIT_INTERP].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_interp, 0); +#endif } /* Free everything associated with the JIT compiler state. */ @@ -637,10 +645,15 @@ static int trace_abort(jit_State *J) J->cur.traceno = 0; } L->top--; /* Remove error object */ - if (e == LJ_TRERR_DOWNREC) + if (e == LJ_TRERR_DOWNREC) { return trace_downrec(J); - else if (e == LJ_TRERR_MCODEAL) + } else if (e == LJ_TRERR_MCODEAL) { + if (!J->mcarea) { /* Disable JIT compiler if first mcode alloc fails. */ + J->flags &= ~JIT_F_ON; + lj_dispatch_update(J2G(J)); + } lj_trace_flushall(L); + } return 0; }