DASM_AFLAGS+= -D PAUTH
TARGET_ARCH+= -DLJ_ABI_PAUTH=1
endif
-ifneq (,$(findstring LJ_CET_BR 1,$(TARGET_TESTARCH)))
- DASM_AFLAGS+= -D CET_BR
- TARGET_ARCH+= -DLJ_CET_BR=1
+ifneq (,$(findstring LJ_ABI_BRANCH_TRACK 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D BRANCH_TRACK
+ TARGET_ARCH+= -DLJ_ABI_BRANCH_TRACK=1
+endif
+ifneq (,$(findstring LJ_ABI_SHADOW_STACK 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D SHADOW_STACK
+ TARGET_ARCH+= -DLJ_ABI_SHADOW_STACK=1
endif
DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
ifeq (Windows,$(TARGET_SYS))
#error "macOS requires GC64 -- don't disable it"
#endif
-#if (__CET__ & 1) && defined(LUAJIT_ENABLE_CET_BR)
+#if !defined(LJ_ABI_BRANCH_TRACK) && (__CET__ & 1) && \
+ LJ_TARGET_GC64 && defined(LUAJIT_ENABLE_CET_BR)
/*
** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT).
** This is not enabled by default because it causes a notable slowdown of
** the interpreter on all x64 CPUs, whether they have CET enabled or not.
** If your toolchain enables -fcf-protection=branch by default, you need
-** to build with: make XCFLAGS=-DLUAJIT_ENABLE_CET_BR
+** to build with: make amalg XCFLAGS=-DLUAJIT_ENABLE_CET_BR
*/
-#define LJ_CET_BR 1
+#define LJ_ABI_BRANCH_TRACK 1
+#endif
+
+#if !defined(LJ_ABI_SHADOW_STACK) && (__CET__ & 2)
+/*
+** Control-Flow Enforcement Technique (CET) shadow stack (CET-SS).
+** It has no code overhead and doesn't cause any slowdowns when unused.
+** It can also be unconditionally enabled since all code already follows
+** a strict CALL to RET correspondence for performance reasons (all modern
+** CPUs use a (non-enforcing) shadow stack for return branch prediction).
+*/
+#define LJ_ABI_SHADOW_STACK 1
#endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
asm_head_side(as);
else
asm_head_root(as);
-#if LJ_CET_BR
- emit_endbr(as);
+#if LJ_ABI_BRANCH_TRACK
+ emit_branch_track(as);
#endif
asm_phi_fixup(as);
#elif LJ_TARGET_X86ORX64
+#if LJ_ABI_BRANCH_TRACK
+#define CALLBACK_MCODE_SLOTSZ 8
+#else
+#define CALLBACK_MCODE_SLOTSZ 4
+#endif
+#define CALLBACK_MCODE_NSLOT (128 / CALLBACK_MCODE_SLOTSZ)
+
#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0)
#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
#define CALLBACK_SLOT2OFS(slot) \
- (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
+ (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/CALLBACK_MCODE_NSLOT) + CALLBACK_MCODE_SLOTSZ*(slot))
static MSize CALLBACK_OFS2SLOT(MSize ofs)
{
MSize group;
ofs -= CALLBACK_MCODE_HEAD;
- group = ofs / (32*4 + CALLBACK_MCODE_GROUP);
- return (ofs % (32*4 + CALLBACK_MCODE_GROUP))/4 + group*32;
+ group = ofs / (128 + CALLBACK_MCODE_GROUP);
+ return (ofs % (128 + CALLBACK_MCODE_GROUP))/CALLBACK_MCODE_SLOTSZ + group*CALLBACK_MCODE_NSLOT;
}
#define CALLBACK_MAX_SLOT \
- (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32)
+ (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+128))*CALLBACK_MCODE_NSLOT)
#elif LJ_TARGET_ARM
*(void **)p = target; p += 8;
#endif
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+#if LJ_ABI_BRANCH_TRACK
+ *(uint32_t *)p = XI_ENDBR64; p += 4;
+#endif
/* mov al, slot; jmp group */
*p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot;
- if ((slot & 31) == 31 || slot == CALLBACK_MAX_SLOT-1) {
+ if ((slot & (CALLBACK_MCODE_NSLOT-1)) == (CALLBACK_MCODE_NSLOT-1) ||
+ slot == CALLBACK_MAX_SLOT-1) {
/* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
*p++ = XI_PUSH + RID_EBP;
*p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
*p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4;
#endif
} else {
- *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2);
+ *p++ = XI_JMPs;
+ *p++ = (uint8_t)(CALLBACK_MCODE_SLOTSZ*(CALLBACK_MCODE_NSLOT-1-(slot&(CALLBACK_MCODE_NSLOT-1))) - 2);
}
}
return p;
return p;
}
-#if LJ_CET_BR
-static void emit_endbr(ASMState *as)
+#if LJ_ABI_BRANCH_TRACK
+static void emit_branch_track(ASMState *as)
{
emit_u32(as, XI_ENDBR64);
}
|
|//-- Control-Flow Enforcement Technique (CET) ---------------------------
|
-|.if CET_BR
+|.if BRANCH_TRACK
|.macro endbr; endbr64; .endmacro
|.else
|.macro endbr; .endmacro
|//-----------------------------------------------------------------------
|
|// Instruction headers.
-|.macro ins_A; endbr; .endmacro
-|.macro ins_AD; endbr; .endmacro
-|.macro ins_AJ; endbr; .endmacro
-|.macro ins_ABC; endbr; movzx RBd, RCH; movzx RCd, RCL; .endmacro
-|.macro ins_AB_; endbr; movzx RBd, RCH; .endmacro
-|.macro ins_A_C; endbr; movzx RCd, RCL; .endmacro
-|.macro ins_AND; endbr; not RD; .endmacro
+|.macro ins_A; .endmacro
+|.macro ins_AD; .endmacro
+|.macro ins_AJ; .endmacro
+|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
+|.macro ins_AB_; movzx RBd, RCH; .endmacro
+|.macro ins_A_C; movzx RCd, RCL; .endmacro
+|.macro ins_AND; not RD; .endmacro
|
|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
|.macro ins_NEXT
| jmp <3
|
|->vm_unwind_yield:
- | endbr
| mov al, LUA_YIELD
| jmp ->vm_unwind_c_eh
|
|->vm_unwind_c: // Unwind C stack, return from vm_pcall.
- | endbr
| // (void *cframe, int errcode)
+ | endbr
| mov eax, CARG2d // Error return status for vm_pcall.
| mov rsp, CARG1
|->vm_unwind_c_eh: // Landing pad for external unwinder.
|.endif
|
|->vm_unwind_ff: // Unwind C stack, return from ff pcall.
- | endbr
| // (void *cframe)
+ | endbr
| and CARG1, CFRAME_RAWMASK
| mov rsp, CARG1
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
|//-- Continuation dispatch ----------------------------------------------
|
|->cont_dispatch:
- | endbr
| // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
| add RA, BASE
| and PC, -8
|
|.macro .ffunc, name
|->ff_ .. name:
- | endbr
+ | endbr
|.endmacro
|
|.macro .ffunc_1, name
|
|->cont_stitch: // Trace stitching.
|.if JIT
- | endbr
| // BASE = base, RC = result, RB = mbase
+ | endbr
| mov TRACE:ITYPE, [RB-40] // Save previous trace.
| cleartp TRACE:ITYPE
| mov TMPRd, MULTRES
| jmp >1
|.endif
|->vm_exit_interp:
- | endbr
| // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
+ | endbr
|.if JIT
| // Restore additional callee-save registers only used in compiled code.
|.if X64WIN
|=>defop:
switch (op) {
+#if !LJ_HASJIT
+ case BC_FORL:
+ case BC_JFORI:
+ case BC_JFORL:
+ case BC_ITERL:
+ case BC_JITERL:
+ case BC_LOOP:
+ case BC_JLOOP:
+ case BC_FUNCF:
+ case BC_JFUNCF:
+ case BC_JFUNCV:
+#endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ break; /* Avoid redundant endbr instructions. */
+ default:
+ | endbr
+ break;
+ }
+
+ switch (op) {
/* -- Comparison ops ---------------------------------------------------- */
case BC_ITERN:
|.if JIT
- | endbr
| hotloop RBd
|.endif
|->vm_IITERN:
| jnz >7 // Not returning to a fixarg Lua func?
switch (op) {
case BC_RET:
- | endbr
|->BC_RET_Z:
| mov KBASE, BASE // Use KBASE for result move.
| sub RDd, 1
| ja >6
break;
case BC_RET1:
- | endbr
| mov RB, [BASE+RA]
| mov [BASE-16], RB
/* fallthrough */
case BC_RET0:
- | endbr
|5:
| cmp PC_RB, RDL // More results expected?
| ja >6
case BC_FORL:
|.if JIT
- | endbr
| hotloop RBd
|.endif
| // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
case BC_ITERL:
|.if JIT
- | endbr
| hotloop RBd
|.endif
| // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
case BC_FUNCF:
|.if JIT
- | endbr
| hotcall RBd
|.endif
case BC_FUNCV: /* NYI: compiled vararg functions. */
"\t.align 8\n"
".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
#endif
+#endif
+#if LJ_TARGET_LINUX && (LJ_ABI_BRANCH_TRACK || LJ_ABI_SHADOW_STACK)
+ fprintf(ctx->fp,
+ "\t.section .note.gnu.property,\"a\"\n"
+ "\t.align 8\n"
+ "\t.long 4\n"
+ "\t.long 16\n"
+ "\t.long 5\n"
+ "\t.long 0x00554e47\n"
+ "\t.long 0xc0000002\n"
+ "\t.long 4\n"
+ "\t.long %d\n"
+ "\t.long 0\n",
+#if LJ_ABI_BRANCH_TRACK
+ 1|
+#else
+ 0|
+#endif
+#if LJ_ABI_SHADOW_STACK
+ 2
+#else
+ 0
+#endif
+ );
#endif
break;
#if !LJ_NO_UNWIND