"u32",
"i64",
"u64",
+ "sfp",
}
local colortype_ansi = {
"\027[35m%s\027[m",
"\027[35m%s\027[m",
"\027[35m%s\027[m",
+ "\027[35m%s\027[m",
}
local function colorize_text(s, t)
if ref < 0 then
out:write(formatk(tr, ref))
else
- local m, ot, op1, op2 = traceir(tr, ref)
- out:write(colorize(format("%04d", ref), band(ot, 31)))
if band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
- local m, ot, op1, op2 = traceir(tr, ref+1)
- out:write(colorize(format("/%04d", ref+1), band(ot, 31)))
+ out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
+ else
+ local m, ot, op1, op2 = traceir(tr, ref)
+ out:write(colorize(format("%04d", ref), band(ot, 31)))
end
end
out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
- lj_lib.h
+ lj_vm.h lj_lib.h
lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
lj_state.h lj_lex.h lj_parse.h lj_char.h
IRRef1 phireg[RID_MAX]; /* PHI register references. */
uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */
+#if LJ_SOFTFP
+ uint16_t parentmaphi[LJ_MAX_JSLOTS]; /* Parent slot to hi RegSP map. */
+#endif
} ASMState;
#define IR(ref) (&as->ir[(ref)])
ra_modified(as, r);
ir->r = RID_INIT; /* Do not keep any hint. */
RA_DBGX((as, "remat $i $r", ir, r));
+#if !LJ_SOFTFP
if (ir->o == IR_KNUM) {
emit_loadn(as, r, ir_knum(ir));
- } else if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
+ } else
+#endif
+ if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
emit_getgl(as, r, jit_base);
} else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
return 0;
}
-/* Allocate registers or spill slots for refs escaping to a snapshot. */
+/* Allocate register or spill slot for a ref that escapes to a snapshot. */
+static void asm_snap_alloc1(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (!ra_used(ir)) {
+ RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR;
+ /* Get a weak register if we have a free one or can rematerialize. */
+ if ((as->freeset & allow) ||
+ (allow == RSET_FPR && asm_snap_canremat(as))) {
+ Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */
+ if (!irt_isphi(ir->t))
+ ra_weak(as, r); /* But mark it as weakly referenced. */
+ checkmclim(as);
+ RA_DBGX((as, "snapreg $f $r", ref, ir->r));
+ } else {
+ ra_spill(as, ir); /* Otherwise force a spill slot. */
+ RA_DBGX((as, "snapspill $f $s", ref, ir->s));
+ }
+ }
+}
+
+/* Allocate refs escaping to a snapshot. */
static void asm_snap_alloc(ASMState *as)
{
SnapShot *snap = &as->T->snap[as->snapno];
SnapEntry *map = &as->T->snapmap[snap->mapofs];
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
- IRRef ref = snap_ref(map[n]);
+ SnapEntry sn = map[n];
+ IRRef ref = snap_ref(sn);
if (!irref_isk(ref)) {
- IRIns *ir = IR(ref);
- if (!ra_used(ir)) {
- RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
- /* Get a weak register if we have a free one or can rematerialize. */
- if ((as->freeset & allow) ||
- (allow == RSET_FPR && asm_snap_canremat(as))) {
- Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */
- if (!irt_isphi(ir->t))
- ra_weak(as, r); /* But mark it as weakly referenced. */
- checkmclim(as);
- RA_DBGX((as, "snapreg $f $r", ref, ir->r));
- } else {
- ra_spill(as, ir); /* Otherwise force a spill slot. */
- RA_DBGX((as, "snapspill $f $s", ref, ir->s));
- }
- }
+ asm_snap_alloc1(as, ref);
+ if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM))
+ asm_snap_alloc1(as, ref+1);
}
}
}
as->T->topslot = gcref(as->T->startpt)->pt.framesize;
}
+/* Get RegSP for parent slot. */
+static LJ_AINLINE RegSP asm_head_parentrs(ASMState *as, IRIns *ir)
+{
+#if LJ_SOFTFP
+ if (ir->o == IR_HIOP) return as->parentmaphi[(ir-1)->op1];
+#endif
+ return as->parentmap[ir->op1];
+}
+
/* Head of a side trace.
**
** The current simplistic algorithm requires that all slots inherited
for (i = as->stopins; i > REF_BASE; i--) {
IRIns *ir = IR(i);
RegSP rs;
- lua_assert(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT));
- rs = as->parentmap[ir->op1];
+ lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
+ (LJ_SOFTFP && ir->o == IR_HIOP));
+ rs = asm_head_parentrs(as, ir);
if (ra_hasreg(ir->r)) {
rset_clear(allow, ir->r);
if (ra_hasspill(ir->s))
}
as->T->spadjust = (uint16_t)spadj;
+#if !LJ_TARGET_X86ORX64
+ /* Restore BASE register from parent spill slot. */
+ if (ra_hasspill(irp->s))
+ emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, spdelta + sps_scale(irp->s));
+#endif
+
/* Reload spilled target registers. */
if (pass2) {
for (i = as->stopins; i > REF_BASE; i--) {
Reg r;
RegSP rs;
irt_clearmark(ir->t);
- rs = as->parentmap[ir->op1];
+ rs = asm_head_parentrs(as, ir);
if (!ra_hasspill(regsp_spill(rs)))
ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */
else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
continue; /* Same spill slot, do nothing. */
- mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow;
+ mask = ((!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
if (mask == RSET_EMPTY)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
r = ra_allocref(as, i, mask);
while (work) {
Reg r = rset_pickbot(work);
IRIns *ir = IR(regcost_ref(as->cost[r]));
- RegSP rs = as->parentmap[ir->op1];
+ RegSP rs = asm_head_parentrs(as, ir);
rset_clear(work, r);
if (ra_hasspill(regsp_spill(rs))) {
int32_t ofs = sps_scale(regsp_spill(rs));
(RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
continue;
}
-#if LJ_32 && LJ_HASFFI
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
case IR_HIOP:
- if ((ir-1)->o == IR_CALLN) {
+ switch ((ir-1)->o) {
+#if LJ_SOFTFP
+ case IR_SLOAD:
+ if (((ir-1)->op2 & IRSLOAD_PARENT)) {
+ RegSP rs = as->parentmaphi[(ir-1)->op1];
+ lua_assert(regsp_used(rs));
+ as->stopins = i;
+ if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) {
+ ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
+ continue;
+ }
+ }
+ break;
+#endif
+ case IR_CALLN: case IR_CALLXS:
+#if LJ_SOFTFP
+ case IR_MIN: case IR_MAX:
+#endif
ir->prev = REGSP_HINT(RID_RETHI);
continue;
+ default:
+ break;
}
break;
+#endif
+#if LJ_SOFTFP
+ case IR_MIN: case IR_MAX:
+ if ((ir+1)->o != IR_HIOP) break;
+ /* fallthrough */
#endif
/* C calls evict all scratch regs and return results in RID_RET. */
case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
as->loopinv = 0;
if (J->parent) {
as->parent = traceref(J, J->parent);
- lj_snap_regspmap(as->parentmap, as->parent, J->exitno);
+ lj_snap_regspmap(as->parentmap, as->parent, J->exitno, 0);
+#if LJ_SOFTFP
+ lj_snap_regspmap(as->parentmaphi, as->parent, J->exitno, 1);
+#endif
} else {
as->parent = NULL;
}
static void LJ_FASTCALL recff_math_binary(jit_State *J, RecordFFData *rd)
{
TRef tr = lj_ir_tonum(J, J->base[0]);
- J->base[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, J->base[1]));
+#if LJ_TARGET_X86ORX64
+ TRef tr2 = lj_ir_tonum(J, J->base[1]);
+#else
+ TRef tr2 = lj_opt_narrow_toint(J, J->base[1]);
+#endif
+ J->base[0] = emitir(IRTN(rd->data), tr, tr2);
}
/* Record math.asin, math.acos, math.atan. */
#include "lj_cdata.h"
#include "lj_carith.h"
#endif
+#include "lj_vm.h"
#include "lj_lib.h"
/* Some local macros to save typing. Undef'd at the end. */
_(NIL) _(FALSE) _(TRUE) _(LIGHTUD) _(STR) _(P32) _(THREAD) \
_(PROTO) _(FUNC) _(P64) _(CDATA) _(TAB) _(UDATA) \
_(FLOAT) _(NUM) _(I8) _(U8) _(I16) _(U16) _(INT) _(U32) _(I64) _(U64) \
- /* There is room for 10 more types. */
+ _(SOFTFP) /* There is room for 9 more types. */
/* IR result type and flags (8 bit). */
typedef enum {
#define CCI_FASTCALL 0x0800 /* Fastcall convention. */
/* Function definitions for CALL* instructions. */
+#if LJ_SOFTFP
+#if LJ_HASFFI
+#define IRCALLDEF_SOFTFP_FFI(_) \
+ _(softfp_ui2d, 1, N, NUM, 0) \
+ _(softfp_l2d, 2, N, NUM, 0) \
+ _(softfp_ul2d, 2, N, NUM, 0) \
+ _(softfp_f2d, 1, N, NUM, 0) \
+ _(softfp_d2ui, 2, N, INT, 0) \
+ _(softfp_d2l, 2, N, I64, 0) \
+ _(softfp_d2ul, 2, N, U64, 0) \
+ _(softfp_d2f, 2, N, FLOAT, 0) \
+ _(softfp_i2f, 1, N, FLOAT, 0) \
+ _(softfp_ui2f, 1, N, FLOAT, 0) \
+ _(softfp_l2f, 2, N, FLOAT, 0) \
+ _(softfp_ul2f, 2, N, FLOAT, 0) \
+ _(softfp_f2i, 1, N, INT, 0) \
+ _(softfp_f2ui, 1, N, INT, 0) \
+ _(softfp_f2l, 1, N, I64, 0) \
+ _(softfp_f2ul, 1, N, U64, 0)
+#else
+#define IRCALLDEF_SOFTFP_FFI(_)
+#endif
+#define IRCALLDEF_SOFTFP(_) \
+ _(lj_vm_tobit, 2, N, INT, 0) \
+ _(softfp_add, 4, N, NUM, 0) \
+ _(softfp_sub, 4, N, NUM, 0) \
+ _(softfp_mul, 4, N, NUM, 0) \
+ _(softfp_div, 4, N, NUM, 0) \
+ _(softfp_cmp, 4, N, NIL, 0) \
+ _(softfp_i2d, 1, N, NUM, 0) \
+ _(softfp_d2i, 2, N, INT, 0) \
+ IRCALLDEF_SOFTFP_FFI(_)
+#else
+#define IRCALLDEF_SOFTFP(_)
+#endif
+
+#if LJ_TARGET_X86ORX64
+/* Use lj_vm_* helpers and x87 ops. */
+#define IRCALLDEF_FPMATH(_)
+#else
+/* Use standard math library calls. */
+#if LJ_SOFTFP
+#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */
+#else
+#define ARG1_FP 1
+#endif
+/* ORDER FPM */
+#define IRCALLDEF_FPMATH(_) \
+ _(lj_vm_floor, ARG1_FP, N, NUM, 0) \
+ _(lj_vm_ceil, ARG1_FP, N, NUM, 0) \
+ _(lj_vm_trunc, ARG1_FP, N, NUM, 0) \
+ _(sqrt, ARG1_FP, N, NUM, 0) \
+ _(exp, ARG1_FP, N, NUM, 0) \
+ _(exp2, ARG1_FP, N, NUM, 0) \
+ _(log, ARG1_FP, N, NUM, 0) \
+ _(log2, ARG1_FP, N, NUM, 0) \
+ _(log10, ARG1_FP, N, NUM, 0) \
+ _(sin, ARG1_FP, N, NUM, 0) \
+ _(cos, ARG1_FP, N, NUM, 0) \
+ _(tan, ARG1_FP, N, NUM, 0) \
+ _(lj_vm_powi, ARG1_FP+1, N, NUM, 0) \
+ _(pow, ARG1_FP*2, N, NUM, 0) \
+ _(atan2, ARG1_FP*2, N, NUM, 0) \
+ _(ldexp, ARG1_FP+1, N, NUM, 0)
+#endif
+
#if LJ_HASFFI
#if LJ_32
#define ARG2_64 4 /* Treat as 4 32 bit arguments. */
#else
#define IRCALLDEF_FFI(_)
#endif
+
#define IRCALLDEF(_) \
_(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
_(lj_str_new, 3, S, STR, CCI_L) \
_(lj_gc_barrieruv, 2, FS, NIL, 0) \
_(lj_mem_newgco, 2, FS, P32, CCI_L) \
_(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
+ IRCALLDEF_SOFTFP(_) \
+ IRCALLDEF_FPMATH(_) \
IRCALLDEF_FFI(_) \
_(sinh, 1, N, NUM, 0) \
_(cosh, 1, N, NUM, 0) \
LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
+/* Soft-float declarations. */
+#if LJ_SOFTFP
+#if LJ_TARGET_ARM
+#define softfp_add __aeabi_dadd
+#define softfp_sub __aeabi_dsub
+#define softfp_mul __aeabi_dmul
+#define softfp_div __aeabi_ddiv
+#define softfp_cmp __aeabi_cdcmple
+#define softfp_i2d __aeabi_i2d
+#define softfp_ui2d __aeabi_ui2d
+#define softfp_l2d __aeabi_l2d
+#define softfp_ul2d __aeabi_ul2d
+#define softfp_f2d __aeabi_f2d
+#define softfp_d2i __aeabi_d2iz
+#define softfp_d2ui __aeabi_d2uiz
+#define softfp_d2l __aeabi_d2lz
+#define softfp_d2ul __aeabi_d2ulz
+#define softfp_d2f __aeabi_d2f
+#define softfp_i2f __aeabi_i2f
+#define softfp_ui2f __aeabi_ui2f
+#define softfp_l2f __aeabi_l2f
+#define softfp_ul2f __aeabi_ul2f
+#define softfp_f2i __aeabi_f2iz
+#define softfp_f2ui __aeabi_f2uiz
+#define softfp_f2l __aeabi_f2lz
+#define softfp_f2ul __aeabi_f2ulz
+#else
+#error "Missing soft-float definitions for target architecture"
+#endif
+extern double softfp_add(double a, double b);
+extern double softfp_sub(double a, double b);
+extern double softfp_mul(double a, double b);
+extern double softfp_div(double a, double b);
+extern void softfp_cmp(double a, double b);
+extern double softfp_i2d(int32_t a);
+extern double softfp_ui2d(uint32_t a);
+extern double softfp_l2d(int64_t a);
+extern double softfp_ul2d(uint64_t a);
+extern double softfp_f2d(float a);
+extern int32_t softfp_d2i(double a);
+extern uint32_t softfp_d2ui(double a);
+extern int64_t softfp_d2l(double a);
+extern uint64_t softfp_d2ul(double a);
+extern float softfp_d2f(double a);
+extern float softfp_i2f(int32_t a);
+extern float softfp_ui2f(uint32_t a);
+extern float softfp_l2f(int64_t a);
+extern float softfp_ul2f(uint64_t a);
+extern int32_t softfp_f2i(float a);
+extern uint32_t softfp_f2ui(float a);
+extern int64_t softfp_f2l(float a);
+extern uint64_t softfp_f2ul(float a);
+#endif
+
#endif
/* Optimization passes. */
LJ_FUNC void lj_opt_dce(jit_State *J);
LJ_FUNC int lj_opt_loop(jit_State *J);
-#if LJ_HASFFI && LJ_32
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
LJ_FUNC void lj_opt_split(jit_State *J);
#else
#define lj_opt_split(J) UNUSED(J)
((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
/* Set/reset flag to activate the SPLIT pass for the current trace. */
-#if LJ_32 && LJ_HASFFI
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
#define lj_needsplit(J) (J->needsplit = 1)
#define lj_resetsplit(J) (J->needsplit = 0)
#else
MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
PostProc postproc; /* Required post-processing after execution. */
-#if LJ_32 && LJ_HASFFI
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
int needsplit; /* Need SPLIT pass. */
#endif
#include "lj_obj.h"
-#if LJ_HASJIT && LJ_HASFFI && LJ_32
+#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
#include "lj_err.h"
#include "lj_str.h"
/* SPLIT pass:
**
** This pass splits up 64 bit IR instructions into multiple 32 bit IR
-** instructions. It's only active for 32 bit CPUs which lack native 64 bit
-** operations. The FFI is currently the only emitter for 64 bit
-** instructions, so this pass is disabled if the FFI is disabled.
+** instructions. It's only active for soft-float targets or for 32 bit CPUs
+** which lack native 64 bit integer operations (the FFI is currently the
+** only emitter for 64 bit integer instructions).
**
** Splitting the IR in a separate pass keeps each 32 bit IR assembler
** backend simple. Only a small amount of extra functionality needs to be
** The operands of HIOP hold the hiword input references. The output of HIOP
** is the hiword output reference, which is also used to hold the hiword
** register or spill slot information. The register allocator treats this
-** instruction independent of any other instruction, which improves code
+** instruction independently of any other instruction, which improves code
** quality compared to using fixed register pairs.
**
** It's easier to split up some instructions into two regular 32 bit
** instructions. E.g. XLOAD is split up into two XLOADs with two different
** addresses. Obviously 64 bit constants need to be split up into two 32 bit
** constants, too. Some hiword instructions can be entirely omitted, e.g.
-** when zero-extending a 32 bit value to 64 bits.
+** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
+** are split up into two 32 bit arguments each.
+**
+** On soft-float targets, floating-point instructions are directly converted
+** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
+** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
**
** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
** two int64_t fields:
return nref;
}
-/* Emit a CALLN with two split 64 bit arguments. */
-static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir,
+#if LJ_SOFTFP
+/* Emit a CALLN with one split 64 bit argument. */
+static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
IRIns *ir, IRCallID id)
+{
+ IRRef tmp, op1 = ir->op1;
+ J->cur.nins--;
+#if LJ_LE
+ tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
+#else
+ tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
+#endif
+ ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
+ return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
+}
+
+/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
+static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
+ IRIns *ir, IRCallID id)
+{
+ IRRef tmp, op1 = ir->op1, op2 = ir->op2;
+ J->cur.nins--;
+#if LJ_LE
+ tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
+#else
+ tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
+#endif
+ tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
+ ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
+ return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
+}
+#endif
+
+/* Emit a CALLN with two split 64 bit arguments. */
+static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
+ IRIns *ir, IRCallID id)
{
IRRef tmp, op1 = ir->op1, op2 = ir->op2;
J->cur.nins--;
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
#endif
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
- return split_emit(J, IRTI(IR_HIOP), tmp, tmp);
+ return split_emit(J,
+ IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
+ tmp, tmp);
}
/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
/* Process constants and fixed references. */
for (ref = nk; ref <= REF_BASE; ref++) {
IRIns *ir = &oir[ref];
- if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */
+ if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
+ /* Split up 64 bit constant. */
TValue tv = *ir_k64(ir);
ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
hisubst[ref] = 0;
/* Split 64 bit instructions. */
+#if LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
+ /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
+ switch (ir->o) {
+ case IR_ADD:
+ hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
+ break;
+ case IR_SUB:
+ hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
+ break;
+ case IR_MUL:
+ hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
+ break;
+ case IR_DIV:
+ hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
+ break;
+ case IR_POW:
+ hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
+ break;
+ case IR_FPMATH:
+ hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
+ break;
+ case IR_ATAN2:
+ hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
+ break;
+ case IR_LDEXP:
+ hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
+ break;
+ case IR_NEG: case IR_ABS:
+ nir->o = IR_CONV; /* Pass through loword. */
+ nir->op2 = (IRT_INT << 5) | IRT_INT;
+ hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
+ hisubst[ir->op1], hisubst[ir->op2]);
+ break;
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+ case IR_MIN: case IR_MAX:
+ hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
+ break;
+ case IR_XLOAD:
+ hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP),
+ split_ptr(J, nir->op1), ir->op2);
+#if LJ_BE
+ ir->prev = hi; hi = nref;
+#endif
+ break;
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE:
+ split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
+ break;
+ case IR_XSTORE: {
+#if LJ_LE
+ IRRef hiref = hisubst[ir->op2];
+#else
+ IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2];
+#endif
+ split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP),
+ split_ptr(J, nir->op1), hiref);
+ break;
+ }
+ case IR_CONV: { /* Conversion to number. Others handled below. */
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if LJ_32 && LJ_HASFFI
+ if (st == IRT_I64 || st == IRT_U64) {
+ hi = split_call_l(J, hisubst, oir, ir,
+ st == IRT_I64 ? IRCALL_softfp_l2d : IRCALL_softfp_ul2d);
+ break;
+ }
+#endif
+ lua_assert(st == IRT_INT ||
+ (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
+ nir->o = IR_CALLN;
+#if LJ_32 && LJ_HASFFI
+ nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
+ st == IRT_FLOAT ? IRCALL_softfp_f2d :
+ IRCALL_softfp_ui2d;
+#else
+ nir->op2 = IRCALL_softfp_i2d;
+#endif
+ hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
+ break;
+ }
+ case IR_CALLS:
+ case IR_CALLXS:
+ goto split_call;
+ case IR_PHI:
+ if (nir->op1 == nir->op2)
+ J->cur.nins--; /* Drop useless PHIs. */
+ if (hisubst[ir->op1] != hisubst[ir->op2])
+ split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
+ hisubst[ir->op1], hisubst[ir->op2]);
+ break;
+ default:
+ lua_assert(ir->o <= IR_NE);
+ split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
+ hisubst[ir->op1], hisubst[ir->op2]);
+ break;
+ }
+ } else
+#endif
+#if LJ_32 && LJ_HASFFI
if (irt_isint64(ir->t)) {
IRRef hiref = hisubst[ir->op1];
nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
break;
case IR_MUL:
- hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
+ hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
break;
case IR_DIV:
- hi = split_call64(J, hisubst, oir, ir,
- irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
- IRCALL_lj_carith_divu64);
+ hi = split_call_ll(J, hisubst, oir, ir,
+ irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+ IRCALL_lj_carith_divu64);
break;
case IR_MOD:
- hi = split_call64(J, hisubst, oir, ir,
- irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
- IRCALL_lj_carith_modu64);
+ hi = split_call_ll(J, hisubst, oir, ir,
+ irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+ IRCALL_lj_carith_modu64);
break;
case IR_POW:
- hi = split_call64(J, hisubst, oir, ir,
- irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
- IRCALL_lj_carith_powu64);
+ hi = split_call_ll(J, hisubst, oir, ir,
+ irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+ IRCALL_lj_carith_powu64);
break;
case IR_FLOAD:
lua_assert(ir->op2 == IRFL_CDATA_INT64);
break;
case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if LJ_SOFTFP
+ if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
+ split_call_l(J, hisubst, oir, ir,
+ irt_isi64(ir->t) ? IRCALL_softfp_d2l : IRCALL_softfp_d2ul);
+ } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
+ nir->o = IR_CALLN;
+ nir->op2 = irt_isi64(ir->t) ? IRCALL_softfp_f2l : IRCALL_softfp_f2ul;
+ hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
+ }
+#else
if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
- } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
+ }
+#endif
+ else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
/* Drop cast, since assembler doesn't care. */
goto fwdlo;
} else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
break;
}
- } else if (ir->o == IR_CONV) { /* See above, too. */
+ } else
+#endif
+#if LJ_SOFTFP
+ if (ir->o == IR_TOBIT) {
+ IRRef tmp, op1 = ir->op1;
+ J->cur.nins--;
+#if LJ_LE
+ tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
+#else
+ tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
+#endif
+ ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
+ } else
+#endif
+ if (ir->o == IR_CONV) { /* See above, too. */
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if LJ_32 && LJ_HASFFI
if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
+#if LJ_SOFTFP
+ if (irt_isfloat(ir->t)) {
+ split_call_l(J, hisubst, oir, ir,
+ st == IRT_I64 ? IRCALL_softfp_l2f : IRCALL_softfp_ul2f);
+ J->cur.nins--; /* Drop unused HIOP. */
+ }
+#else
if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
hisubst[ir->op1], nref);
- } else { /* Truncate to lower 32 bits. */
+ }
+#endif
+ else { /* Truncate to lower 32 bits. */
fwdlo:
ir->prev = nir->op1; /* Forward loword. */
/* Replace with NOP to avoid messing up the snapshot logic. */
nir->op1 = nir->op2 = 0;
}
}
+#endif
+#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
+ else if (irt_isfloat(ir->t)) {
+ if (st == IRT_NUM) {
+ split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
+ J->cur.nins--; /* Drop unused HIOP. */
+ } else {
+ nir->o = IR_CALLN;
+ nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
+ }
+ } else if (st == IRT_FLOAT) {
+ nir->o = IR_CALLN;
+ nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
+ } else
+#endif
+#if LJ_SOFTFP
+ if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
+ if (irt_isguard(ir->t)) {
+ lua_assert(0); /* NYI: missing check. */
+ }
+ split_call_l(J, hisubst, oir, ir,
+#if LJ_32 && LJ_HASFFI
+ st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
+#else
+ IRCALL_softfp_d2i
+#endif
+ );
+ J->cur.nins--; /* Drop unused HIOP. */
+ }
+#endif
} else if (ir->o == IR_CALLXS) {
IRRef hiref;
split_call:
#endif
ir->prev = nref = split_emit(J, ot, nref, op2);
}
- if (irt_isint64(ir->t))
- hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
+ if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
+ hi = split_emit(J,
+ IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
+ nref, nref);
} else if (ir->o == IR_CARG) {
IRRef hiref = hisubst[ir->op1];
if (hiref) {
return NULL;
}
-#ifdef LUA_USE_ASSERT
+#if defined(LUA_USE_ASSERT) || LJ_SOFTFP
/* Slow, but sure way to check whether a SPLIT pass is needed. */
static int split_needsplit(jit_State *J)
{
IRIns *ir, *irend;
IRRef ref;
for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
- if (irt_isint64(ir->t))
+ if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
return 1;
for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev)
- if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
+ if ((LJ_SOFTFP && (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_NUM) ||
+ (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
(IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64)
return 1;
return 0; /* Nope. */
/* SPLIT pass. */
void lj_opt_split(jit_State *J)
{
+#if LJ_SOFTFP
+ if (!J->needsplit)
+ J->needsplit = split_needsplit(J);
+#else
lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
+#endif
if (J->needsplit) {
int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
if (errcode) {
IRRef ref = snap_ref(sn);
BCReg s = snap_slot(sn);
IRIns *ir = &T->ir[ref];
+ IRType t = irt_type(ir->t);
TRef tr;
/* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
if (bloomtest(seen, ref)) {
bloomset(seen, ref);
switch ((IROp)ir->o) {
/* Only have to deal with constants that can occur in stack slots. */
- case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
+ case IR_KPRI: tr = TREF_PRI(t); break;
case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break;
case IR_KPTR: tr = lj_ir_kptr(J, ir_kptr(ir)); break; /* Continuation. */
/* Inherited SLOADs don't need a guard or type check. */
case IR_SLOAD:
- tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
+ if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
+ tr = emitir_raw(IRT(IR_SLOAD, t), s,
(ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
/* Parent refs are already typed and don't need a guard. */
default:
- tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
- IRSLOAD_INHERIT|IRSLOAD_PARENT);
+ if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
+ tr = emitir_raw(IRT(IR_SLOAD, t), s, IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
}
setslot:
/* Convert a snapshot into a linear slot -> RegSP map.
** Note: unused slots are not initialized!
*/
-void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno)
+void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno, int hi)
{
SnapShot *snap = &T->snap[snapno];
MSize n, nent = snap->nent;
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
IRRef ref = snap_ref(sn);
- if (!irref_isk(ref)) {
+ if ((LJ_SOFTFP && hi) ? (ref++, (sn & SNAP_SOFTFPNUM)) : !irref_isk(ref)) {
IRIns *ir = &T->ir[ref];
uint32_t rs = ir->prev;
if (bloomtest(rfilt, ref))
LJ_FUNC void lj_snap_add(jit_State *J);
LJ_FUNC void lj_snap_purge(jit_State *J);
LJ_FUNC void lj_snap_shrink(jit_State *J);
-LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno);
+LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno,
+ int hi);
LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);