From: Mike Pall Date: Thu, 27 Nov 2025 16:45:17 +0000 (+0100) Subject: Unify Lua number to FFI integer conversions. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f80b349d5490aa289b2925d297f3f3c618977570;p=thirdparty%2FLuaJIT.git Unify Lua number to FFI integer conversions. Phew. #1411 --- diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index cd533e8c..cd72da21 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -338,42 +338,44 @@ pointer or type compatibility: Integer→rounddouble, float -double, floattrunc int32_tnarrow(u)int8_t, (u)int16_t +double, floattrunc int64_tnarrow *(u)int8_t, (u)int16_t, (u)int32_t -double, floattrunc(u)int32_t, (u)int64_t +double, floattruncint64_t +double, floattrunc uint64_t ∪ int64_t →reinterpret *uint64_t + double, floatroundfloat, double - + Numbern == 0 → 0, otherwise 1bool - + boolfalse → 0, true → 1Number - + Complex numberconvert real partNumber - -Numberconvert real part, imag = 0Complex number +Numberconvert real part, imag = 0Complex number + Complex numberconvert real and imag partComplex number - + Numberconvert scalar and replicateVector - + Vectorcopy (same size)Vector - + struct/uniontake base address (compat)Pointer - -Arraytake base address (compat)Pointer +Arraytake base address (compat)Pointer + Functiontake function addressFunction pointer - + Numberconvert via uintptr_t (cast)Pointer - -Pointerconvert address (compat/cast)Pointer -Pointerconvert address (cast)Integer +Pointerconvert address (compat/cast)Pointer +Pointerconvert address (cast)Integer + Arrayconvert base address (cast)Integer - + Arraycopy (compat)Array - + struct/unioncopy (identical type)struct/union

@@ -384,6 +386,24 @@ type. Conversions not listed above will raise an error. E.g. it's not possible to convert a pointer to a complex number or vice versa.

+

+* Some conversions from double have a larger defined range to +allow for mixed-signedness conversions, which are common in C code. +E.g. initializing an int32_t field with 0xffffffff +or initializing an uint32_t or uint64_t field with +-1. Under strict conversion rules, these assignments would +give undefined results, since Lua numbers are doubles. The extended +ranges make these conversions defined. Lua numbers that are even +outside that range give an architecture-specific result. +

+

+Please note that doubles do not have the precision to represent the +whole signed or unsigned 64 bit integer range. Beware of large hex +constants in particular: e.g. 0xffffffffffffffff is a double +rounded up to 0x1p64 during parsing. This will not +convert to a defined 64 bit integer value. Use the 64 bit literal +syntax instead, i.e. 0xffffffffffffffffULL. +

Conversions for vararg C function arguments

diff --git a/src/lib_io.c b/src/lib_io.c index 5659ff51..ec7d2545 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -127,8 +127,9 @@ static int io_file_readnum(lua_State *L, FILE *fp) lua_Number d; if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { if (LJ_DUALNUM) { - int32_t i = lj_num2int(d); - if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) { + int64_t i64; + int32_t i; + if (lj_num2int_check(d, i64, i) && !tvismzero((cTValue *)&d)) { setintV(L->top++, i); return 1; } @@ -335,7 +336,7 @@ LJLIB_CF(io_method_seek) if (tvisint(o)) ofs = (int64_t)intV(o); else if (tvisnum(o)) - ofs = (int64_t)numV(o); + ofs = lj_num2i64(numV(o)); else if (!tvisnil(o)) lj_err_argt(L, 3, LUA_TNUMBER); } diff --git a/src/lib_os.c b/src/lib_os.c index ae3fc857..fffc923e 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -171,7 +171,8 @@ static int getfield(lua_State *L, const char *key, int d) LJLIB_CF(os_date) { const char *s = luaL_optstring(L, 1, "%c"); - time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL)); + time_t t = lua_isnoneornil(L, 2) ? time(NULL) : + lj_num2int_type(luaL_checknumber(L, 2), time_t); struct tm *stm; #if LJ_TARGET_POSIX struct tm rtm; @@ -253,8 +254,9 @@ LJLIB_CF(os_time) LJLIB_CF(os_difftime) { - lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)), - (time_t)(luaL_optnumber(L, 2, (lua_Number)0)))); + lua_pushnumber(L, + difftime(lj_num2int_type(luaL_checknumber(L, 1), time_t), + lj_num2int_type(luaL_optnumber(L, 2, (lua_Number)0), time_t))); return 1; } diff --git a/src/lj_api.c b/src/lj_api.c index e9fc25b4..94d8bc7e 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -416,11 +416,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) return intV(&tmp); n = numV(&tmp); } -#if LJ_64 - return (lua_Integer)n; -#else - return lj_num2int(n); -#endif + return lj_num2int_type(n, lua_Integer); } LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) @@ -445,11 +441,7 @@ LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) n = numV(&tmp); } if (ok) *ok = 1; -#if LJ_64 - return (lua_Integer)n; -#else - return lj_num2int(n); -#endif + return lj_num2int_type(n, lua_Integer); } LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) @@ -468,11 +460,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 - return (lua_Integer)n; -#else - return lj_num2int(n); -#endif + return lj_num2int_type(n, lua_Integer); } LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) @@ -493,11 +481,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) return (lua_Integer)intV(&tmp); n = numV(&tmp); } -#if LJ_64 - return (lua_Integer)n; -#else - return lj_num2int(n); -#endif + return lj_num2int_type(n, lua_Integer); } LUA_API int lua_toboolean(lua_State *L, int idx) diff --git a/src/lj_asm.c b/src/lj_asm.c index 0e888c29..8f7ae9a3 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1329,27 +1329,32 @@ static void asm_conv64(ASMState *as, IRIns *ir) IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); IRCallID id; + const CCallInfo *ci; +#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP + CCallInfo cim; +#endif IRRef args[2]; lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); args[LJ_BE] = (ir-1)->op1; args[LJ_LE] = ir->op1; - if (st == IRT_NUM || st == IRT_FLOAT) { - id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); + lj_assertA(st != IRT_FLOAT, "bad CONV *64.float emitted"); + if (st == IRT_NUM) { + id = IRCALL_lj_vm_num2u64; ir--; + ci = &lj_ir_callinfo[id]; } else { id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); - } - { #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP - CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; + cim = lj_ir_callinfo[id]; cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ + ci = &cim; #else - const CCallInfo *ci = &lj_ir_callinfo[id]; + ci = &lj_ir_callinfo[id]; #endif - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); } + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); } #endif diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 406360d2..1ddd2b3e 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -624,10 +624,9 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); Reg dest = ra_dest(as, ir, RSET_GPR); ARMIns ai; + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); - ai = irt_isint(ir->t) ? - (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : - (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); + ai = st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32; emit_dm(as, ai, (tmp & 15), (left & 15)); } } else diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index fdcff1db..507fc084 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -648,14 +648,18 @@ static void asm_conv(ASMState *as, IRIns *ir) } else { Reg left = ra_alloc1(as, lref, RSET_FPR); Reg dest = ra_dest(as, ir, RSET_GPR); - A64Ins ai = irt_is64(ir->t) ? - (st == IRT_NUM ? - (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : - (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : - (st == IRT_NUM ? - (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : - (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); - emit_dn(as, ai, dest, (left & 31)); + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); + if (irt_isu64(ir->t)) { + emit_dnm(as, A64I_CSELx | A64F_CC(CC_VC), dest, dest, RID_TMP); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), dest); + emit_dn(as, st == IRT_NUM ? A64I_FCVT_U64_F64 : A64I_FCVT_U64_F32, RID_TMP, (left & 31)); + emit_dn(as, st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32, dest, (left & 31)); + } else { + A64Ins ai = irt_is64(ir->t) ? + (st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32) : + (st == IRT_NUM ? A64I_FCVT_S32_F64 : A64I_FCVT_S32_F32); + emit_dn(as, ai, dest, (left & 31)); + } } } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg dest = ra_dest(as, ir, RSET_GPR); diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 8dadabe4..36ed5de4 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -635,64 +635,38 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, lref, RSET_FPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ - /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ - emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); - emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); - emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D, - tmp, tmp); - emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D, - tmp, left, tmp); - if (st == IRT_FLOAT) - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - else - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); #if LJ_64 - } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ - MCLabel l_end; + if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ + MCLabel l_end = emit_label(as); emit_tg(as, MIPSI_DMFC1, dest, tmp); - l_end = emit_label(as); - /* For inputs >= 2^63 add -2^64 and convert again. */ + /* For result == INT64_MAX add -2^64 and convert again. */ if (st == IRT_NUM) { emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ -#if !LJ_TARGET_MIPSR6 - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); -#else - emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); - emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp); -#endif - emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)&as->J->k64[LJ_K64_2P63], - rset_exclude(RSET_GPR, dest)); + rset_exclude(RSET_GPR, dest)); /* Delay slot. */ + emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */ + emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1); + emit_ti(as, MIPSI_LI, RID_TMP, -1); + emit_tg(as, MIPSI_DMFC1, dest, tmp); + emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); } else { emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_M2P64], - rset_exclude(RSET_GPR, dest)); - emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ -#if !LJ_TARGET_MIPSR6 - emit_branch(as, MIPSI_BC1T, 0, 0, l_end); - emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); -#else - emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end); - emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp); -#endif - emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P63], - rset_exclude(RSET_GPR, dest)); + rset_exclude(RSET_GPR, dest)); /* Delay slot. */ + emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end); /* != INT64_MAX? */ + emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1); + emit_ti(as, MIPSI_LI, RID_TMP, -1); + emit_tg(as, MIPSI_DMFC1, dest, tmp); + emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); } + } else #endif - } else { + { #if LJ_32 emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, @@ -733,13 +707,11 @@ static void asm_conv(ASMState *as, IRIns *ir) "bad type for checked CONV"); asm_tointg(as, ir, RID_NONE); } else { - IRCallID cid = irt_is64(ir->t) ? - ((st == IRT_NUM) ? - (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : - (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : - ((st == IRT_NUM) ? - (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : - (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); + IRCallID cid; + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); + lj_assertA(!(irt_is64(ir->t) && st != IRT_NUM), "bad CONV *64.float emitted"); + cid = irt_is64(ir->t) ? IRCALL_lj_vm_num2u64 : + (st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i); asm_callid(as, ir, cid); } } else @@ -780,7 +752,10 @@ static void asm_conv(ASMState *as, IRIns *ir) } } } else { - if (st64 && !(ir->op2 & IRCONV_NONE)) { + if (!irt_isu32(ir->t)) { /* Implicit sign extension. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_dta(as, MIPSI_SLL, dest, left, 0); + } else if (st64 && !(ir->op2 & IRCONV_NONE)) { /* This is either a 32 bit reg/reg mov which zeroes the hiword ** or a load of the loword from a 64 bit address. */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index d77c45ce..9e2af414 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -512,29 +512,10 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, lref, RSET_FPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { - /* Convert both x and x-2^31 to int and merge results. */ - Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); - emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ - emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP); - emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP); - emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */ - emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */ - emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */ - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_tai(as, PPCI_LWZ, dest, - RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */ - emit_fb(as, PPCI_FCTIWZ, tmp, left); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, tmp); - emit_fab(as, PPCI_FSUB, tmp, left, tmp); - emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); - } else { - emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); - emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); - emit_fb(as, PPCI_FCTIWZ, tmp, left); - } + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); + emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); + emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); + emit_fb(as, PPCI_FCTIWZ, tmp, left); } } else #endif diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index f3c2238a..bdbce116 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -905,29 +905,28 @@ static void asm_conv(ASMState *as, IRIns *ir) } else { Reg dest = ra_dest(as, ir, RSET_GPR); x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; - if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { - /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ - /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ + lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); +#if LJ_64 + if (irt_isu64(ir->t)) { + /* For the indefinite result -2^63, add -2^64 and convert again. */ Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) : ra_scratch(as, RSET_FPR); MCLabel l_end = emit_label(as); - if (LJ_32) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); emit_rr(as, op, dest|REX_64, tmp); if (st == IRT_NUM) - emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); + emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64]); else - emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); - emit_sjcc(as, CC_NS, l_end); - emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ + emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64]); + emit_sjcc(as, CC_NO, l_end); + emit_gmrmi(as, XG_ARITHi(XOg_CMP), dest|REX_64, 1); emit_rr(as, op, dest|REX_64, tmp); ra_left(as, tmp, lref); - } else { - if (LJ_64 && irt_isu32(ir->t)) - emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ + + } else +#endif + { emit_mrm(as, op, - dest|((LJ_64 && - (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), + dest|((LJ_64 && irt_is64(ir->t)) ? REX_64 : 0), asm_fuseload(as, lref, RSET_FPR)); } } @@ -1020,6 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); Reg lo, hi; + int usehi = ra_used(ir); lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); hi = ra_dest(as, ir, RSET_GPR); @@ -1032,21 +1032,24 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff); } if (dt == IRT_U64) { - /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */ + /* For the indefinite result -2^63, add -2^64 and convert again. */ MCLabel l_pop, l_end = emit_label(as); emit_x87op(as, XI_FPOP); l_pop = emit_label(as); emit_sjmp(as, l_end); - emit_rmro(as, XO_MOV, hi, RID_ESP, 4); + if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4); if ((as->flags & JIT_F_SSE3)) emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); else emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); - emit_sjcc(as, CC_NS, l_pop); - emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ - } - emit_rmro(as, XO_MOV, hi, RID_ESP, 4); + emit_rma(as, XO_FADDd, XOg_FADDd, &as->J->k32[LJ_K32_M2P64]); + emit_sjcc(as, CC_NE, l_pop); + emit_gmroi(as, XG_ARITHi(XOg_CMP), RID_ESP, 0, 0); + emit_sjcc(as, CC_NO, l_pop); + emit_gmrmi(as, XG_ARITHi(XOg_CMP), hi, 1); + usehi = 1; + } + if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4); if ((as->flags & JIT_F_SSE3)) { /* Truncation is easy with SSE3. */ emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); } else { /* Otherwise set FPU rounding mode to truncate before the store. */ diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index ec6f13c8..cd7ae942 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -59,9 +59,9 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) p = lj_strfmt_wuleb128(p, intV(o)); } else if (tvisnum(o)) { if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ - lua_Number num = numV(o); - int32_t k = lj_num2int(num); - if (num == (lua_Number)k) { /* -0 is never a constant. */ + int64_t i64; + int32_t k; + if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */ *p++ = BCDUMP_KTAB_INT; p = lj_strfmt_wuleb128(p, k); ctx->sb.w = p; @@ -270,9 +270,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */ if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) { /* Narrow number constants to integers. */ - lua_Number num = numV(o); - k = lj_num2int(num); - if (num == (lua_Number)k) { /* -0 is never a constant. */ + int64_t i64; + if (lj_num2int_check(numV(o), i64, k)) { /* -0 is never a constant. */ save_int: p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); if (k < 0) diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 854b51db..2b9349cd 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c @@ -197,18 +197,16 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, else goto err_conv; /* NYI: long double. */ /* Then convert double to integer. */ /* The conversion must exactly match the semantics of JIT-compiled code! */ - if (dsize < 4 || (dsize == 4 && !(dinfo & CTF_UNSIGNED))) { - int32_t i = (int32_t)n; + if (dsize < 8) { + int64_t i = lj_num2i64(n); /* Always convert via int64_t. */ if (dsize == 4) *(int32_t *)dp = i; else if (dsize == 2) *(int16_t *)dp = (int16_t)i; else *(int8_t *)dp = (int8_t)i; - } else if (dsize == 4) { - *(uint32_t *)dp = (uint32_t)n; } else if (dsize == 8) { - if (!(dinfo & CTF_UNSIGNED)) - *(int64_t *)dp = (int64_t)n; - else + if ((dinfo & CTF_UNSIGNED)) *(uint64_t *)dp = lj_num2u64(n); + else + *(int64_t *)dp = lj_num2i64(n); } else { goto err_conv; /* NYI: conversion to >64 bit integers. */ } diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 3b48f76c..2dc56a80 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -133,12 +133,7 @@ collect_attrib: idx = (ptrdiff_t)intV(key); goto integer_key; } else if (tvisnum(key)) { /* Numeric key. */ -#ifdef _MSC_VER - /* Workaround for MSVC bug. */ - volatile -#endif - lua_Number n = numV(key); - idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n); + idx = lj_num2int_type(numV(key), ptrdiff_t); integer_key: if (ctype_ispointer(ct->info)) { CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 27f2c1dd..45c559cf 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -445,7 +445,20 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, /* fallthrough */ case CCX(I, F): if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; - sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY); + conv_I_F: +#if LJ_SOFTFP || LJ_32 + if (st == IRT_FLOAT) { /* Uncommon. Simplify split backends. */ + sp = emitconv(sp, IRT_NUM, IRT_FLOAT, 0); + st = IRT_NUM; + } +#endif + if (dsize < 8) { + lj_needsplit(J); + sp = emitconv(sp, IRT_I64, st, IRCONV_ANY); + sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, IRT_I64, 0); + } else { + sp = emitconv(sp, dt, st, IRCONV_ANY); + } goto xstore; case CCX(I, P): case CCX(I, A): @@ -523,10 +536,9 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, goto xstore; case CCX(P, F): if (st == IRT_CDATA) goto err_nyi; - /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ - sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, - st, IRCONV_ANY); - goto xstore; + /* The signed 64 bit conversion is cheaper. */ + dt = (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32; + goto conv_I_F; /* Destination is an array. */ case CCX(A, A): @@ -1878,7 +1890,7 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) if (J->base[0] && tref_iscdata(J->base[1])) { tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64), J->base[1], &rd->argv[1]); - if (!tref_isinteger(tsh)) + if (LJ_32 && !tref_isinteger(tsh)) tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); J->base[1] = tsh; } @@ -1886,15 +1898,17 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) if (id) { TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]); uint32_t op = rd->data; + IRType t; if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); + t = tref_isinteger(tsh) ? IRT_INT : tref_type(tsh); if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && !tref_isk(tsh)) - tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); + tsh = emitir(IRT(IR_BAND, t), tsh, lj_ir_kint(J, 63)); #ifdef LJ_TARGET_UNIFYROT - if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { - op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; - tsh = emitir(IRTI(IR_NEG), tsh, tsh); - } + if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { + op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; + tsh = emitir(IRT(IR_NEG, t), tsh, tsh); + } #endif tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); diff --git a/src/lj_def.h b/src/lj_def.h index a9e23729..f34b1a39 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -127,6 +127,7 @@ typedef uintptr_t BloomFilter; #define LJ_INLINE inline #define LJ_AINLINE inline __attribute__((always_inline)) #define LJ_NOINLINE __attribute__((noinline)) +#define LJ_CONSTF __attribute__((nothrow,const)) #if defined(__ELF__) || defined(__MACH__) || defined(__psp2__) #if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__)) @@ -245,6 +246,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p) #define LJ_INLINE __inline #define LJ_AINLINE __forceinline #define LJ_NOINLINE __declspec(noinline) +#define LJ_CONSTF __declspec(nothrow noalias) #if defined(_M_IX86) #define LJ_FASTCALL __fastcall #endif diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 527b6c06..290986f6 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -70,7 +70,7 @@ static int32_t argv2int(jit_State *J, TValue *o) { if (!lj_strscan_numberobj(o)) lj_trace_err(J, LJ_TRERR_BADTYPE); - return tvisint(o) ? intV(o) : lj_num2int(numV(o)); + return numberVint(o); } /* Get runtime value of string argument. */ @@ -586,7 +586,7 @@ static void LJ_FASTCALL recff_math_round(jit_State *J, RecordFFData *rd) /* Result is integral (or NaN/Inf), but may not fit an int32_t. */ if (LJ_DUALNUM) { /* Try to narrow using a guarded conversion to int. */ lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data); - if (n == (lua_Number)lj_num2int(n)) + if (lj_num2int_ok(n)) tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK); } J->base[0] = tr; diff --git a/src/lj_ir.c b/src/lj_ir.c index e7a5e8bc..e24fead4 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -248,28 +248,15 @@ TRef lj_ir_kint64(jit_State *J, uint64_t u64) return lj_ir_k64(J, IR_KINT64, u64); } -/* Check whether a number is int and return it. -0 is NOT considered an int. */ -static int numistrueint(lua_Number n, int32_t *kp) -{ - int32_t k = lj_num2int(n); - if (n == (lua_Number)k) { - if (kp) *kp = k; - if (k == 0) { /* Special check for -0. */ - TValue tv; - setnumV(&tv, n); - if (tv.u32.hi != 0) - return 0; - } - return 1; - } - return 0; -} - /* Intern number as int32_t constant if possible, otherwise as FP constant. */ TRef lj_ir_knumint(jit_State *J, lua_Number n) { + int64_t i64; int32_t k; - if (numistrueint(n, &k)) + TValue tv; + setnumV(&tv, n); + /* -0 is NOT considered an int. */ + if (lj_num2int_check(n, i64, k) && !tvismzero(&tv)) return lj_ir_kint(J, k); else return lj_ir_knum(J, n); diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 5196144e..60b196c6 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -233,20 +233,15 @@ typedef struct CCallInfo { _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ - _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ - _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ - _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \ - _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \ - _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ - _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ + _(FP64_FFI, lj_vm_num2u64, 1, N, U64, XA_FP) \ _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ @@ -291,27 +286,14 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; #define softfp_d2i __aeabi_d2iz #define softfp_ui2d __aeabi_ui2d #define softfp_f2d __aeabi_f2d -#define softfp_d2ui __aeabi_d2uiz #define softfp_d2f __aeabi_d2f #define softfp_i2f __aeabi_i2f #define softfp_ui2f __aeabi_ui2f #define softfp_f2i __aeabi_f2iz -#define softfp_f2ui __aeabi_f2uiz #define fp64_l2d __aeabi_l2d #define fp64_ul2d __aeabi_ul2d #define fp64_l2f __aeabi_l2f #define fp64_ul2f __aeabi_ul2f -#if LJ_TARGET_IOS -#define fp64_d2l __fixdfdi -#define fp64_d2ul __fixunsdfdi -#define fp64_f2l __fixsfdi -#define fp64_f2ul __fixunssfdi -#else -#define fp64_d2l __aeabi_d2lz -#define fp64_d2ul __aeabi_d2ulz -#define fp64_f2l __aeabi_f2lz -#define fp64_f2ul __aeabi_f2ulz -#endif #elif LJ_TARGET_MIPS || LJ_TARGET_PPC #define softfp_add __adddf3 #define softfp_sub __subdf3 @@ -322,12 +304,10 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; #define softfp_d2i __fixdfsi #define softfp_ui2d __floatunsidf #define softfp_f2d __extendsfdf2 -#define softfp_d2ui __fixunsdfsi #define softfp_d2f __truncdfsf2 #define softfp_i2f __floatsisf #define softfp_ui2f __floatunsisf #define softfp_f2i __fixsfsi -#define softfp_f2ui __fixunssfsi #else #error "Missing soft-float definitions for target architecture" #endif @@ -341,12 +321,10 @@ extern int32_t softfp_d2i(double a); #if LJ_HASFFI extern double softfp_ui2d(uint32_t a); extern double softfp_f2d(float a); -extern uint32_t softfp_d2ui(double a); extern float softfp_d2f(double a); extern float softfp_i2f(int32_t a); extern float softfp_ui2f(uint32_t a); extern int32_t softfp_f2i(float a); -extern uint32_t softfp_f2ui(float a); #endif #if LJ_TARGET_MIPS extern double lj_vm_sfmin(double a, double b); @@ -360,10 +338,6 @@ extern double lj_vm_sfmax(double a, double b); #define fp64_ul2d __floatundidf #define fp64_l2f __floatdisf #define fp64_ul2f __floatundisf -#define fp64_d2l __fixdfdi -#define fp64_d2ul __fixunsdfdi -#define fp64_f2l __fixsfdi -#define fp64_f2ul __fixunssfdi #else #error "Missing fp64 helper definitions for this compiler" #endif @@ -374,10 +348,6 @@ extern double fp64_l2d(int64_t a); extern double fp64_ul2d(uint64_t a); extern float fp64_l2f(int64_t a); extern float fp64_ul2f(uint64_t a); -extern int64_t fp64_d2l(double a); -extern uint64_t fp64_d2ul(double a); -extern int64_t fp64_f2l(float a); -extern uint64_t fp64_f2ul(float a); #endif #endif diff --git a/src/lj_jit.h b/src/lj_jit.h index 05a8e9bb..c0523457 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -350,22 +350,18 @@ enum { }; enum { +#if LJ_TARGET_X64 || LJ_TARGET_MIPS64 + LJ_K64_M2P64, /* -2^64 */ +#endif #if LJ_TARGET_X86ORX64 LJ_K64_TOBIT, /* 2^52 + 2^51 */ LJ_K64_2P64, /* 2^64 */ - LJ_K64_M2P64, /* -2^64 */ -#if LJ_32 - LJ_K64_M2P64_31, /* -2^64 or -2^31 */ -#else - LJ_K64_M2P64_31 = LJ_K64_M2P64, #endif +#if LJ_TARGET_MIPS64 + LJ_K64_2P63, /* 2^63 */ #endif #if LJ_TARGET_MIPS LJ_K64_2P31, /* 2^31 */ -#if LJ_64 - LJ_K64_2P63, /* 2^63 */ - LJ_K64_M2P64, /* -2^64 */ -#endif #endif #if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 LJ_K64_VM_EXIT_HANDLER, @@ -376,20 +372,19 @@ enum { #define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) enum { -#if LJ_TARGET_X86ORX64 - LJ_K32_M2P64_31, /* -2^64 or -2^31 */ +#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 + LJ_K32_M2P64, /* -2^64 */ +#endif +#if LJ_TARGET_MIPS64 + LJ_K32_2P63, /* 2^63 */ #endif #if LJ_TARGET_PPC LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ LJ_K32_2P52, /* 2^52 */ #endif -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC LJ_K32_2P31, /* 2^31 */ #endif -#if LJ_TARGET_MIPS64 - LJ_K32_2P63, /* 2^63 */ - LJ_K32_M2P64, /* -2^64 */ -#endif #if LJ_TARGET_PPC || LJ_TARGET_MIPS32 LJ_K32_VM_EXIT_HANDLER, LJ_K32_VM_EXIT_INTERP, diff --git a/src/lj_lib.c b/src/lj_lib.c index 88cb2bdd..d51351b8 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -349,7 +349,7 @@ int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b) ** integer overflow. Overflow detection still works, since all FPUs ** return either MININT or MAXINT, which is then out of range. */ - int32_t i = (int32_t)numV(o); + int32_t i = lj_num2int(numV(o)); if (i >= a && i <= b) return i; #if LJ_HASFFI } else if (tviscdata(o)) { diff --git a/src/lj_meta.c b/src/lj_meta.c index c9307615..3f30fafb 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -465,7 +465,8 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o) if (tvisint(o+i)) { k[i] = intV(o+i); nint++; } else { - k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i)); + int64_t i64; + if (lj_num2int_check(numV(o+i), i64, k[i])) nint++; } } if (nint == 3) { /* Narrow to integers. */ diff --git a/src/lj_obj.h b/src/lj_obj.h index 73b186e2..58e5049c 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -981,43 +981,68 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) /* -- Number to integer conversion ---------------------------------------- */ -#if LJ_SOFTFP -LJ_ASMF int32_t lj_vm_tobit(double x); -#if LJ_TARGET_MIPS64 -LJ_ASMF int32_t lj_vm_tointg(double x); -#endif -#endif +/* +** The C standard leaves many aspects of FP to integer conversions as +** undefined behavior. Portability is a mess, hardware support varies, +** and modern C compilers are like a box of chocolates -- you never know +** what you're gonna get. +** +** However, we need 100% matching behavior between the interpreter (asm + C), +** optimizations (C) and the code generated by the JIT compiler (asm). +** Mixing Lua numbers with FFI numbers creates some extra requirements. +** +** These conversions have been moved to assembler code, even if they seem +** trivial, to foil unanticipated C compiler 'optimizations' with the +** surrounding code. Only the unchecked double to int32_t conversion +** is still in C, because it ought to be pretty safe -- we'll see. +** +** These macros also serve to document all places where FP to integer +** conversions happen. +*/ -static LJ_AINLINE int32_t lj_num2bit(lua_Number n) -{ -#if LJ_SOFTFP - return lj_vm_tobit(n); -#else - TValue o; - o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */ - return (int32_t)o.u32.lo; -#endif -} +/* Unchecked double to int32_t conversion. */ +#define lj_num2int(n) ((int32_t)(n)) -#define lj_num2int(n) ((int32_t)(n)) +/* Unchecked double to arch/os-dependent signed integer type conversion. +** This assumes the 32/64-bit signed conversions are NOT range-extended. +*/ +#define lj_num2int_type(n, tp) ((tp)(n)) -/* -** This must match the JIT backend behavior. In particular for archs -** that don't have a common hardware instruction for this conversion. -** Note that signed FP to unsigned int conversions have an undefined -** result and should never be relied upon in portable FFI code. -** See also: C99 or C11 standard, 6.3.1.4, footnote of (1). +/* Convert a double to int32_t and check for exact conversion. +** Returns the zero-extended int32_t on success. -0 is OK, too. +** Returns 0x8000000080000000LL on failure (simplifies range checks). */ -static LJ_AINLINE uint64_t lj_num2u64(lua_Number n) -{ -#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS - int64_t i = (int64_t)n; - if (i < 0) i = (int64_t)(n - 18446744073709551616.0); - return (uint64_t)i; -#else - return (uint64_t)n; -#endif -} +LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x); + +/* Check for exact conversion only, without storing the result. */ +#define lj_num2int_ok(x) (lj_vm_num2int_check((x)) >= 0) + +/* Check for exact conversion and conditionally store result. +** Note: conditions that fail for 0x80000000 may check only the lower +** 32 bits. This generates good code for both 32 and 64 bit archs. +*/ +#define lj_num2int_cond(x, i64, i, cond) \ + (i64 = lj_vm_num2int_check((x)), cond ? (i = (int32_t)i64, 1) : 0) + +/* This is the generic check for a full-range int32_t result. */ +#define lj_num2int_check(x, i64, i) \ + lj_num2int_cond((x), i64, i, i64 >= 0) + +/* Predictable conversion from double to int64_t or uint64_t. +** Truncates towards zero. Out-of-range values, NaN and +-Inf return +** an arch-dependent result, but do not cause C undefined behavior. +** The uint64_t conversion accepts the union of the unsigned + signed range. +*/ +LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x); +LJ_ASMF LJ_CONSTF int64_t lj_vm_num2u64(double x); + +#define lj_num2i64(x) (lj_vm_num2i64((x))) +#define lj_num2u64(x) (lj_vm_num2u64((x))) + +/* Lua BitOp conversion semantics use the 2^52 + 2^51 trick. */ +LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x); + +#define lj_num2bit(x) lj_vm_tobit((x)) static LJ_AINLINE int32_t numberVint(cTValue *o) { diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 6fdf4566..456c04b2 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -303,17 +303,18 @@ LJFOLDF(kfold_intarith) return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); } +/* Forward declaration. */ +static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, + IROp op); + LJFOLD(ADDOV KINT KINT) LJFOLD(SUBOV KINT KINT) LJFOLD(MULOV KINT KINT) LJFOLDF(kfold_intovarith) { - lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i, - fins->o - IR_ADDOV); - int32_t k = lj_num2int(n); - if (n != (lua_Number)k) - return FAILFOLD; - return INTFOLD(k); + int64_t k = kfold_int64arith(J, (int64_t)fleft->i, (int64_t)fright->i, + (IROp)((int)fins->o - (int)IR_ADDOV + (int)IR_ADD)); + return checki32(k) ? INTFOLD(k) : FAILFOLD; } LJFOLD(BNOT KINT) @@ -368,11 +369,11 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, IROp op) { UNUSED(J); -#if LJ_HASFFI switch (op) { case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; case IR_MUL: k1 *= k2; break; +#if LJ_HASFFI case IR_BAND: k1 &= k2; break; case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; @@ -382,11 +383,8 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break; case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break; default: lj_assertJ(0, "bad IR op %d", op); break; - } -#else - UNUSED(k2); UNUSED(op); - lj_assertJ(0, "FFI IR op without FFI"); #endif + } return k1; } @@ -883,8 +881,11 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) LJFOLDF(kfold_conv_knum_int_num) { lua_Number n = knumleft; - int32_t k = lj_num2int(n); - if (irt_isguard(fins->t) && n != (lua_Number)k) { + if (irt_isguard(fins->t)) { + int64_t i64; + int32_t k; + if (lj_num2int_check(n, i64, k)) + return INTFOLD(k); /* We're about to create a guard which always fails, like CONV +1.5. ** Some pathological loops cause this during LICM, e.g.: ** local x,k,t = 0,1.5,{1,[1.5]=2} @@ -892,27 +893,15 @@ LJFOLDF(kfold_conv_knum_int_num) ** assert(x == 300) */ return FAILFOLD; + } else { + return INTFOLD(lj_num2int(n)); } - return INTFOLD(k); -} - -LJFOLD(CONV KNUM IRCONV_U32_NUM) -LJFOLDF(kfold_conv_knum_u32_num) -{ -#ifdef _MSC_VER - { /* Workaround for MSVC bug. */ - volatile uint32_t u = (uint32_t)knumleft; - return INTFOLD((int32_t)u); - } -#else - return INTFOLD((int32_t)(uint32_t)knumleft); -#endif } LJFOLD(CONV KNUM IRCONV_I64_NUM) LJFOLDF(kfold_conv_knum_i64_num) { - return INT64FOLD((uint64_t)(int64_t)knumleft); + return INT64FOLD((uint64_t)lj_num2i64(knumleft)); } LJFOLD(CONV KNUM IRCONV_U64_NUM) @@ -1135,7 +1124,6 @@ LJFOLDF(shortcut_conv_num_int) } LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ -LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */ LJFOLDF(simplify_conv_int_num) { /* Fold even across PHI to avoid expensive num->int conversions in loop. */ @@ -1334,6 +1322,24 @@ LJFOLDF(narrow_convert) return lj_opt_narrow_convert(J); } +LJFOLD(XSTORE any CONV) +LJFOLDF(xstore_conv) +{ +#if LJ_64 + PHIBARRIER(fright); + if (!irt_is64(fins->t) && + irt_type(fins->t) == (IRType)((fright->op2&IRCONV_DSTMASK)>>IRCONV_DSH) && + ((fright->op2&IRCONV_SRCMASK) == IRT_I64 || + (fright->op2&IRCONV_SRCMASK) == IRT_U64)) { + fins->op2 = fright->op1; + return RETRYFOLD; + } +#else + UNUSED(J); +#endif + return NEXTFOLD; +} + /* -- Integer algebraic simplifications ----------------------------------- */ LJFOLD(ADD any KINT) diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 01b5833d..3085c837 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -281,22 +281,20 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth) return 0; } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */ lua_Number n = ir_knum(ir)->n; + int64_t i64; + int32_t k; if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) { - /* Allows a wider range of constants. */ - int64_t k64 = (int64_t)n; - if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */ - *nc->sp++ = NARROWINS(NARROW_INT, 0); - *nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */ - return 0; - } - } else { - int32_t k = lj_num2int(n); - /* Only if constant is a small integer. */ - if (checki16(k) && n == (lua_Number)k) { + /* Allows a wider range of constants, if const doesn't lose precision. */ + if (lj_num2int_check(n, i64, k)) { *nc->sp++ = NARROWINS(NARROW_INT, 0); *nc->sp++ = (NarrowIns)k; return 0; } + } else if (lj_num2int_cond(n, i64, k, checki16((int32_t)i64))) { + /* Only if constant is a small integer. */ + *nc->sp++ = NARROWINS(NARROW_INT, 0); + *nc->sp++ = (NarrowIns)k; + return 0; } return 10; /* Never narrow other FP constants (this is rare). */ } @@ -512,12 +510,6 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) /* -- Narrowing of arithmetic operators ----------------------------------- */ -/* Check whether a number fits into an int32_t (-0 is ok, too). */ -static int numisint(lua_Number n) -{ - return (n == (lua_Number)lj_num2int(n)); -} - /* Convert string to number. Error out for non-numeric string values. */ static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o) { @@ -539,8 +531,8 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && tref_isinteger(rb) && tref_isinteger(rc) && - numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), - (int)op - (int)IR_ADD))) + lj_num2int_ok(lj_vm_foldarith(numberVnum(vb), numberVnum(vc), + (int)op - (int)IR_ADD))) return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc); if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT); if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); @@ -591,7 +583,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) static int narrow_forl(jit_State *J, cTValue *o) { if (tvisint(o)) return 1; - if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o)); + if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return lj_num2int_ok(numV(o)); return 0; } diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 8d025911..d29d1eab 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -573,13 +573,9 @@ static void split_ir(jit_State *J) case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); #if LJ_SOFTFP + lj_assertJ(st != IRT_FLOAT, "bad CONV *64.float emitted"); if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ - hi = split_call_l(J, hisubst, oir, ir, - irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); - } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ - nir->o = IR_CALLN; - nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; - hi = split_emit(J, IRTI(IR_HIOP), nref, nref); + hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_num2u64); } #else if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ @@ -692,8 +688,9 @@ static void split_ir(jit_State *J) nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; } } else if (st == IRT_FLOAT) { + lj_assertJ(!irt_isu32(ir->t), "bad CONV u32.fp emitted"); nir->o = IR_CALLN; - nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; + nir->op2 = IRCALL_softfp_f2i; } else #endif #if LJ_SOFTFP @@ -705,9 +702,7 @@ static void split_ir(jit_State *J) } else { split_call_l(J, hisubst, oir, ir, #if LJ_32 && LJ_HASFFI - st == IRT_NUM ? - (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : - (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) + st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i #else IRCALL_softfp_d2i #endif diff --git a/src/lj_parse.c b/src/lj_parse.c index 181ce4d7..832f6bf4 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -522,9 +522,9 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv)); else #else - lua_Number n = expr_numberV(e); - int32_t k = lj_num2int(n); - if (checki16(k) && n == (lua_Number)k) + int64_t i64; + int32_t k; + if (lj_num2int_cond(expr_numberV(e), i64, k, checki16((int32_t)i64))) ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k); else #endif @@ -782,8 +782,9 @@ static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2) setnumV(&o, n); if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */ if (LJ_DUALNUM) { - int32_t k = lj_num2int(n); - if ((lua_Number)k == n) { + int64_t i64; + int32_t k; + if (lj_num2int_check(n, i64, k)) { setintV(&e1->u.nval, k); return 1; } @@ -1386,10 +1387,10 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) if (tvisnum(&n->key)) { TValue *tv = &((TValue *)kptr)[kidx]; if (LJ_DUALNUM) { - lua_Number nn = numV(&n->key); - int32_t k = lj_num2int(nn); + int64_t i64; + int32_t k; lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); - if ((lua_Number)k == nn) + if (lj_num2int_check(numV(&n->key), i64, k)) setintV(tv, k); else *tv = n->key; @@ -1656,9 +1657,9 @@ static void expr_index(FuncState *fs, ExpDesc *t, ExpDesc *e) } } #else - lua_Number n = expr_numberV(e); - int32_t k = lj_num2int(n); - if (checku8(k) && n == (lua_Number)k) { + int64_t i64; + int32_t k; + if (lj_num2int_cond(expr_numberV(e), i64, k, checku8((int32_t)i64))) { t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */ return; } diff --git a/src/lj_record.c b/src/lj_record.c index 6543f274..536d7171 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -351,9 +351,14 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) } else { cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); if (t == IRT_INT) { - int32_t k = numberVint(tv); - if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ - return lj_ir_kint(J, k); + if (tvisint(tv)) { + return lj_ir_kint(J, intV(tv)); + } else { + int64_t i64; + int32_t k; + if (lj_num2int_check(numV(tv), i64, k)) /* -0 is ok here. */ + return lj_ir_kint(J, k); + } return 0; /* Type mismatch. */ } else { return lj_ir_knum(J, numberVnum(tv)); @@ -1426,9 +1431,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, /* Integer keys are looked up in the array part first. */ key = ix->key; if (tref_isnumber(key)) { - int32_t k = numberVint(&ix->keyv); - if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) - k = LJ_MAX_ASIZE; + int32_t k; + if (tvisint(&ix->keyv)) { + k = intV(&ix->keyv); + } else { + int64_t i64; + if (!lj_num2int_check(numV(&ix->keyv), i64, k)) k = LJ_MAX_ASIZE; + } if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ TRef ikey = lj_opt_narrow_index(J, key); TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index bb649fc8..0936298d 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -351,7 +351,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) /* Add number formatted as signed integer to buffer. */ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) { - int64_t k = (int64_t)n; + int64_t k = lj_num2i64(n); if (checki32(k) && sf == STRFMT_INT) return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ else @@ -361,12 +361,7 @@ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) /* Add number formatted as unsigned integer to buffer. */ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) { - int64_t k; - if (n >= 9223372036854775808.0) - k = (int64_t)(n - 18446744073709551616.0); - else - k = (int64_t)n; - return lj_strfmt_putfxint(sb, sf, (uint64_t)k); + return lj_strfmt_putfxint(sb, sf, lj_num2u64(n)); } /* Format stack arguments to buffer. */ diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 502c78e9..fbb959c5 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c @@ -523,10 +523,10 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); /* Try to convert number to integer, if requested. */ - if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) { - double n = o->n; - int32_t i = lj_num2int(n); - if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; } + if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) { + int64_t tmp; + if (lj_num2int_check(o->n, tmp, o->i) && !tvismzero(o)) + return STRSCAN_INT; } return fmt; } diff --git a/src/lj_tab.c b/src/lj_tab.c index 62e33611..2959fadb 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -295,9 +295,9 @@ static uint32_t countint(cTValue *key, uint32_t *bins) { lj_assertX(!tvisint(key), "bad integer key"); if (tvisnum(key)) { - lua_Number nk = numV(key); - int32_t k = lj_num2int(nk); - if ((uint32_t)k < LJ_MAX_ASIZE && nk == (lua_Number)k) { + int64_t i64; + int32_t k; + if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < LJ_MAX_ASIZE)) { bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++; return 1; } @@ -409,9 +409,9 @@ cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key) if (tv) return tv; } else if (tvisnum(key)) { - lua_Number nk = numV(key); - int32_t k = lj_num2int(nk); - if (nk == (lua_Number)k) { + int64_t i64; + int32_t k; + if (lj_num2int_check(numV(key), i64, k)) { cTValue *tv = lj_tab_getint(t, k); if (tv) return tv; @@ -542,9 +542,9 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) } else if (tvisint(key)) { return lj_tab_setint(L, t, intV(key)); } else if (tvisnum(key)) { - lua_Number nk = numV(key); - int32_t k = lj_num2int(nk); - if (nk == (lua_Number)k) + int64_t i64; + int32_t k; + if (lj_num2int_check(numV(key), i64, k)) return lj_tab_setint(L, t, k); if (tvisnan(key)) lj_err_msg(L, LJ_ERR_NANIDX); @@ -580,9 +580,9 @@ uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key) setnumV(&tmp, (lua_Number)k); key = &tmp; } else if (tvisnum(key)) { - lua_Number nk = numV(key); - int32_t k = lj_num2int(nk); - if ((uint32_t)k < t->asize && nk == (lua_Number)k) + int64_t i64; + int32_t k; + if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < t->asize)) return (uint32_t)k + 1; } if (!tvisnil(key)) { diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index fa32a5d4..193102ee 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -314,6 +314,7 @@ typedef enum { XO_FSTPq = XO_(dd), XOg_FSTPq = 3, XO_FISTPq = XO_(df), XOg_FISTPq = 7, XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, + XO_FADDd = XO_(d8), XOg_FADDd = 0, XO_FADDq = XO_(dc), XOg_FADDq = 0, XO_FLDCW = XO_(d9), XOg_FLDCW = 5, XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 diff --git a/src/lj_trace.c b/src/lj_trace.c index 47d7faa5..ad329540 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -317,32 +317,34 @@ void lj_trace_initstate(global_State *g) tv[1].u64 = U64x(80000000,00000000); /* Initialize 32/64 bit constants. */ +#if LJ_TARGET_X64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); +#endif #if LJ_TARGET_X86ORX64 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); -#if LJ_32 - J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); -#endif J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); - J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; #endif +#if LJ_TARGET_MIPS64 + J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); +#endif +#if LJ_TARGET_MIPS + J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); +#endif + #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 - J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); + J->k32[LJ_K32_M2P64] = 0xdf800000; +#endif +#if LJ_TARGET_MIPS64 + J->k32[LJ_K32_2P63] = 0x5f000000; #endif #if LJ_TARGET_PPC J->k32[LJ_K32_2P52_2P31] = 0x59800004; J->k32[LJ_K32_2P52] = 0x59800000; #endif -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC J->k32[LJ_K32_2P31] = 0x4f000000; #endif -#if LJ_TARGET_MIPS - J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); -#if LJ_64 - J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); - J->k32[LJ_K32_2P63] = 0x5f000000; - J->k32[LJ_K32_M2P64] = 0xdf800000; -#endif -#endif + #if LJ_TARGET_PPC || LJ_TARGET_MIPS32 J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; diff --git a/src/lj_vm.h b/src/lj_vm.h index 9cc42613..96ad2d07 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -37,13 +37,19 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); #if LJ_TARGET_PPC void lj_vm_cachesync(void *start, void *end); #endif -LJ_ASMF double lj_vm_foldarith(double x, double y, int op); +LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op); #if LJ_HASJIT -LJ_ASMF double lj_vm_foldfpm(double x, int op); +LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op); #endif -#if !LJ_ARCH_HASFPU -/* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */ +#if LJ_SOFTFP && LJ_TARGET_MIPS64 +LJ_ASMF LJ_CONSTF int32_t lj_vm_tointg(double x); #endif +/* Declared in lj_obj.h: +** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x); +** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x); +** LJ_ASMF LJ_CONSTF uint64_t lj_vm_num2u64(double x); +** LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x); +*/ /* Dispatch targets for recording and hooks. */ LJ_ASMF void lj_vm_record(void); @@ -62,15 +68,15 @@ LJ_ASMF char lj_vm_exit_interp[]; #define lj_vm_floor floor #define lj_vm_ceil ceil #else -LJ_ASMF double lj_vm_floor(double); -LJ_ASMF double lj_vm_ceil(double); +LJ_ASMF LJ_CONSTF double lj_vm_floor(double); +LJ_ASMF LJ_CONSTF double lj_vm_ceil(double); #if LJ_TARGET_ARM -LJ_ASMF double lj_vm_floor_sf(double); -LJ_ASMF double lj_vm_ceil_sf(double); +LJ_ASMF LJ_CONSTF double lj_vm_floor_sf(double); +LJ_ASMF LJ_CONSTF double lj_vm_ceil_sf(double); #endif #endif #ifdef LUAJIT_NO_LOG2 -LJ_ASMF double lj_vm_log2(double); +LJ_ASMF LJ_CONSTF double lj_vm_log2(double); #else #define lj_vm_log2 log2 #endif @@ -80,16 +86,16 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); #if LJ_HASJIT #if LJ_TARGET_X86ORX64 -LJ_ASMF void lj_vm_floor_sse(void); -LJ_ASMF void lj_vm_ceil_sse(void); -LJ_ASMF void lj_vm_trunc_sse(void); +LJ_ASMF LJ_CONSTF void lj_vm_floor_sse(void); +LJ_ASMF LJ_CONSTF void lj_vm_ceil_sse(void); +LJ_ASMF LJ_CONSTF void lj_vm_trunc_sse(void); #endif #if LJ_TARGET_PPC || LJ_TARGET_ARM64 #define lj_vm_trunc trunc #else -LJ_ASMF double lj_vm_trunc(double); +LJ_ASMF LJ_CONSTF double lj_vm_trunc(double); #if LJ_TARGET_ARM -LJ_ASMF double lj_vm_trunc_sf(double); +LJ_ASMF LJ_CONSTF double lj_vm_trunc_sf(double); #endif #endif #if LJ_HASFFI diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 2c9b96cc..1495102f 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -59,7 +59,7 @@ double lj_vm_foldarith(double x, double y, int op) case IR_NEG - IR_ADD: return -x; break; case IR_ABS - IR_ADD: return fabs(x); break; #if LJ_HASJIT - case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; + case IR_LDEXP - IR_ADD: return ldexp(x, lj_num2int(y)); break; case IR_MIN - IR_ADD: return x < y ? x : y; break; case IR_MAX - IR_ADD: return x > y ? x : y; break; #endif diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 86bef0cf..2cd7eedb 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2452,6 +2452,118 @@ static void build_subroutines(BuildCtx *ctx) | bx lr | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if FPU + |.if not HFABI + | vmov d0, CARG1, CARG2 + |.endif + | vcvt.s32.f64 s4, d0 + | vcvt.f64.s32 d1, s4 + | vcmp.f64 d0, d1 + | vmrs + | bne >1 + | vmov CRET1, s4 + | mov CRET2, #0 + | bx lr + | + |.else + | + | asr CARG4, CARG2, #31 // sign = 0 or -1. + | lsl CARG2, CARG2, #1 + | orrs RB, CARG2, CARG1 + | bxeq lr // Return 0 for +-0. + | mov RB, #1024 + | add RB, RB, #30 + | sub RB, RB, CARG2, lsr #21 + | cmp RB, #32 + | bhs >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. + | lsr CARG3, CARG1, #21 + | orr CARG2, CARG3, CARG2, lsl #10 // Left-aligned mantissa. + | rsb CARG3, RB, #32 + | lsl CARG3, CARG2, CARG3 + | orr CARG2, CARG2, #0x80000000 // Merge leading 1. + | orrs CARG3, CARG3, CARG1, lsl #11 + | lsr CARG1, CARG2, RB // lo = right-aligned absolute value. + | bne >1 // Fail if fractional part != 0. + | adds CRET1, CARG1, CARG4 + | bmi >1 // Fail if lo+sign >= 0x80000000. + | eor CRET1, CRET1, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign. + | mov CRET2, #0 + | bx lr + |.endif + |1: + | mov CRET1, #0x80000000 + | mov CRET2, #0x80000000 + | bx lr + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |// fallthrough, same as lj_vm_num2u64. + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if HFABI + | vmov CARG1, CARG2, d0 + |.endif + | lsl RB, CARG2, #1 + | lsr RB, RB, #21 + | sub RB, RB, #1020 + | sub RB, RB, #3 + | cmp RB, #116 + | bhs >3 // Exponent out of range. + | asr CARG4, CARG2, #31 // sign = 0 or -1. + | lsl CARG2, CARG2, #12 + | lsr CARG2, CARG2, #12 + | rsbs RB, RB, #52 + | orr CARG2, CARG2, #0x00100000 + | bmi >2 // Shift mantissa left or right? + | lsr CARG1, CARG1, RB // 64 bit right shift. + | lsr CARG3, CARG2, RB + | rsb RB, RB, #32 + | orr CARG1, CARG1, CARG2, lsl RB + | rsb RB, RB, #0 + | orr CARG1, CARG1, CARG2, lsr RB + | adds CRET1, CARG1, CARG4 // m = sign?-m:m = (m+sign)^sign. + | adc CRET2, CARG3, CARG4 + |1: + | eor CRET1, CRET1, CARG4 + | eor CRET2, CRET2, CARG4 + | bx lr + |2: + | rsb RB, RB, #0 + | lsl CARG2, CARG2, RB // 64 bit left shift. + | lsl CARG3, CARG1, RB + | sub RB, RB, #32 + | orr CARG2, CARG2, CARG1, lsl RB + | rsb RB, RB, #0 + | orr CARG2, CARG2, CARG1, lsr RB + | adds CRET1, CARG3, CARG4 + | adc CRET2, CARG2, CARG4 + | b <1 + |3: + | mov CRET1, #0 + | mov CRET2, #0 + | bx lr + | + |// int32_t lj_vm_tobit(double x) + |.if FPU + |->vm_tobit: + | vldr d1, >9 + |.if not HFABI + | vmov d0, CARG1, CARG2 + |.endif + | vadd.f64 d0, d0, d1 + | vmov CARG1, s0 + | bx lr + |9: + | .long 0, 0x43380000 // (double)(2^52 + 2^51). + |.endif + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | @@ -4097,7 +4209,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] | // Subsumes ins_next1 and ins_next2. | ldr INS, TRACE:CARG1->startins - | bfi INS, OP, #0, #8 + | bic INS, INS, #0xff + | orr INS, INS, OP | str INS, [PC], #4 | b <1 |.endif diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index a437b657..eb6d0c2f 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -2156,6 +2156,42 @@ static void build_subroutines(BuildCtx *ctx) | ret | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + | fcvtzs CRET1w, FARG1 + | scvtf FARG2, CRET1w + | fcmp FARG2, FARG1 + | bne >1 + | ret + |1: + | mov CRET1, #0x8000000080000000 + | ret + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + | fcvtzs CRET1, FARG1 + | ret + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + | fcvtzs CRET1, FARG1 + | fcvtzu CARG2, FARG1 + | cmn CRET1, #1 // Set overflow if CRET1 == INT64_MAX. + | csel CRET1, CRET1, CARG2, vc // No overflow ? i64 : u64. + | ret + | + |// int32_t lj_vm_tobit(double x) + |->vm_tobit: + | movz CRET1, #0x4338, lsl #48 // 2^52 + 2^51. + | fmov FARG2, CRET1 + | fadd FARG1, FARG1, FARG2 + | fmov CRET1w, s0 + | ret + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 02e588ee..8a6b8270 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -85,6 +85,7 @@ | |.if FPU |.define FARG1, f12 +|.define FARG1HI, f13 |.define FARG2, f14 | |.define FRET1, f0 @@ -2560,7 +2561,7 @@ static void build_subroutines(BuildCtx *ctx) | mtc1 r0, f4 | mtc1 TMP0, f5 | abs.d FRET2, FARG1 // |x| - | mfc1 AT, f13 + | mfc1 AT, FARG1HI | c.olt.d 0, FRET2, f4 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 | bc1f 0, >1 // Truncate only if |x| < 2^52. @@ -2822,6 +2823,122 @@ static void build_subroutines(BuildCtx *ctx) | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if FPU + | trunc.w.d FARG2, FARG1 + | mfc1 SFRETLO, FARG2 + | cvt.d.w FARG2, FARG2 + | c.eq.d FARG1, FARG2 + | bc1f 0, >2 + |. nop + | jr ra + |. move SFRETHI, r0 + | + |.else + | + | sll SFRETLO, SFARG1HI, 1 + | or SFRETHI, SFRETLO, SFARG1LO + | beqz SFRETHI, >1 // Return 0 for +-0. + |. li TMP0, 1054 + | srl AT, SFRETLO, 21 + | subu TMP0, TMP0, AT + | sltiu AT, TMP0, 32 + | beqz AT, >2 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. + |. sll SFRETLO, SFARG1HI, 11 + | srl SFRETHI, SFARG1LO, 21 + | negu TMP1, TMP0 + | or SFRETLO, SFRETLO, SFRETHI // Left-aligned mantissa. + | sllv TMP2, SFRETLO, TMP1 + | lui AT, 0x8000 + | sll SFRETHI, SFARG1LO, 11 + | or SFRETLO, SFRETLO, AT // Merge leading 1. + | or TMP2, TMP2, SFRETHI + | srlv SFRETLO, SFRETLO, TMP0 // lo = right-aligned absolute value. + | bnez TMP2, >2 // Fail if fractional part != 0. + |. sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1. + | addu SFRETLO, SFRETLO, SFARG1HI + | bltz SFRETLO, >2 // Fail if lo+sign >= 0x80000000. + |. xor SFRETLO, SFRETLO, SFARG1HI // lo = sign?-lo:lo = (lo+sign)^sign. + |1: + | jr ra + |. move SFRETHI, r0 + |.endif + |2: // Not an integer, return 0x8000000080000000LL. + | lui SFRETHI, 0x8000 + | jr ra + |. lui SFRETLO, 0x8000 + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |// fallthrough, same as lj_vm_num2u64. + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if FPU + | mfc1 SFARG1HI, FARG1HI + | mfc1 SFARG1LO, FARG1 + |.endif + | srl TMP0, SFARG1HI, 20 + | andi TMP0, TMP0, 0x7ff + | addiu SFRETLO, TMP0, -1023 + | sltiu SFRETLO, SFRETLO, 116 + | beqz SFRETLO, >3 // Exponent out of range. + |. sll SFRETHI, SFARG1HI, 12 + | lui AT, 0x0010 + | srl SFRETHI, SFRETHI, 12 + | addiu TMP0, TMP0, -1075 + | sra SFARG1HI, SFARG1HI, 31 // sign = 0 or -1. + | bgez TMP0, >2 // Shift mantissa left or right? + |. or SFRETHI, SFRETHI, AT // Merge leading 1 into masked mantissa. + | subu TMP1, r0, TMP0 + | sll AT, SFRETHI, 1 + | nor TMP0, r0, TMP1 + | srlv SFRETHI, SFRETHI, TMP1 // Shift hi mantissa right for low exp. + | sllv AT, AT, TMP0 // Shifted-out hi mantissa. + | srlv SFRETLO, SFARG1LO, TMP1 // Shift lo mantissa right for low exp. + | andi TMP1, TMP1, 0x20 // Conditional right shift by 32. + | or AT, AT, SFRETLO // Merge into lo mantissa. + | movn AT, SFRETHI, TMP1 + | movn SFRETHI, r0, TMP1 + |1: + | addu SFRETLO, AT, SFARG1HI // m = sign?-m:m = (m+sign)^sign. + | addu SFRETHI, SFRETHI, SFARG1HI + | sltu TMP0, SFRETLO, AT // Carry + | addu SFRETHI, SFRETHI, TMP0 + | xor SFRETLO, SFRETLO, SFARG1HI + | jr ra + |. xor SFRETHI, SFRETHI, SFARG1HI + |2: + | srl TMP2, SFARG1LO, 1 + | nor AT, r0, TMP0 + | sllv SFRETHI, SFRETHI, TMP0 // Shift hi mantissa left for high exp. + | srlv TMP2, TMP2, AT // Shifted-out lo mantissa. + | sllv AT, SFARG1LO, TMP0 // Shift lo mantissa left for high exp. + | andi TMP0, TMP0, 0x20 // Conditional left shift by 32. + | or SFRETHI, SFRETHI, TMP2 // Merge into hi mantissa. + | movn SFRETHI, AT, TMP0 + | b <1 + |. movn AT, r0, TMP0 + |3: + | jr ra + |. li SFRETHI, 0 + | + |// int32_t lj_vm_tobit(double x) + |.if FPU + |->vm_tobit: + | lui AT, 0x59c0 // 2^52 + 2^51 (float). + | mtc1 AT, FARG2 + | cvt.d.s FARG2, FARG2 + | add.d FARG1, FARG1, FARG2 + | jr ra + |. mfc1 CRET1, FARG1 + |.endif + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 859c0aee..4dc40d8a 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -2113,7 +2113,7 @@ static void build_subroutines(BuildCtx *ctx) | dinsu CRET2, AT, 21, 21 | slt AT, CARG1, r0 | dsrlv CRET1, CRET2, TMP0 - | dsubu CARG1, r0, CRET1 + | negu CARG1, CRET1 |.if MIPSR6 | seleqz CRET1, CRET1, AT | selnez CARG1, CARG1, AT @@ -2121,20 +2121,12 @@ static void build_subroutines(BuildCtx *ctx) |.else | movn CRET1, CARG1, AT |.endif - | li CARG1, 64 - | subu TMP0, CARG1, TMP0 + | negu TMP0, TMP0 | dsllv CRET2, CRET2, TMP0 // Integer check. | sextw AT, CRET1 | xor AT, CRET1, AT // Range check. - |.if MIPSR6 - | seleqz AT, AT, CRET2 - | selnez CRET2, CRET2, CRET2 | jr ra |. or CRET2, AT, CRET2 - |.else - | jr ra - |. movz CRET2, AT, CRET2 - |.endif |1: | jr ra |. li CRET2, 1 @@ -2929,6 +2921,136 @@ static void build_subroutines(BuildCtx *ctx) | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if FPU + | trunc.w.d FARG2, FARG1 + | mfc1 CRET1, FARG2 + | cvt.d.w FARG2, FARG2 + |.if MIPSR6 + | cmp.eq.d FARG2, FARG1, FARG2 + | bc1eqz FARG2, >2 + |.else + | c.eq.d FARG1, FARG2 + | bc1f 0, >2 + |.endif + |. nop + | jr ra + |. zextw CRET1, CRET1 + | + |.else + | + | dsll CRET2, CARG1, 1 + | beqz CRET2, >1 + |. li TMP0, 1076 + | dsrl AT, CRET2, 53 + | dsubu TMP0, TMP0, AT + | sltiu AT, TMP0, 54 + | beqz AT, >2 + |. dextm CRET2, CRET2, 0, 20 + | dinsu CRET2, AT, 21, 21 + | slt AT, CARG1, r0 + | dsrlv CRET1, CRET2, TMP0 + | negu CARG1, CRET1 + |.if MIPSR6 + | seleqz CRET1, CRET1, AT + | selnez CARG1, CARG1, AT + | or CRET1, CRET1, CARG1 + |.else + | movn CRET1, CARG1, AT + |.endif + | negu TMP0, TMP0 + | dsllv CRET2, CRET2, TMP0 // Integer check. + | sextw AT, CRET1 + | xor AT, CRET1, AT // Range check. + | or AT, AT, CRET2 + | bnez AT, >2 + |. nop + | jr ra + |. zextw CRET1, CRET1 + |1: + | jr ra + |. move CRET1, r0 + |.endif + |2: + | lui CRET1, 0x8000 + | dsll CRET1, CRET1, 16 + | ori CRET1, CRET1, 0x8000 + | jr ra + |. dsll CRET1, CRET1, 16 + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |.if FPU + | trunc.l.d FARG1, FARG1 + | jr ra + |. dmfc1 CRET1, FARG1 + |.else + |// fallthrough, same as lj_vm_num2u64 for soft-float. + |.endif + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if FPU + | trunc.l.d FARG2, FARG1 + | dmfc1 CRET1, FARG2 + | li AT, -1 + | dsrl AT, AT, 1 + | beq CRET1, AT, >1 + |. lui AT, 0xdf80 // -2^64 (float). + | jr ra + |. nop + |1: + | mtc1 AT, FARG2 + | cvt.d.s FARG2, FARG2 + | add.d FARG1, FARG1, FARG2 + | trunc.l.d FARG2, FARG1 + | jr ra + |. dmfc1 CRET1, FARG2 + | + |.else + | + | dextu CARG2, CARG1, 20, 10 + | addiu AT, CARG2, -1023 + | sltiu AT, AT, 116 + | beqz AT, >2 // Exponent out of range. + |. addiu CARG2, CARG2, -1075 + | dextm CRET1, CARG1, 0, 19 + | dsll AT, AT, 52 + | dsra CARG1, CARG1, 63 // sign = 0 or -1. + | bgez CARG2, >1 // Shift mantissa left or right? + |. or CRET1, CRET1, AT // Merge leading 1 into masked mantissa. + | subu CARG2, r0, CARG2 + | dsrlv CRET1, CRET1, CARG2 // Shift mantissa right for low exp. + | daddu CRET1, CRET1, CARG1 + | jr ra + |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign. + |1: + | dsllv CRET1, CRET1, CARG2 // Shift mantissa left for high exp. + | daddu CRET1, CRET1, CARG1 + | jr ra + |. xor CRET1, CRET1, CARG1 // m = sign?-m:m = (m+sign)^sign. + |2: + | jr ra + |. move CRET1, r0 + |.endif + | + |// int32_t lj_vm_tobit(double x) + |.if FPU + |->vm_tobit: + | lui AT, 0x59c0 // 2^52 + 2^51 (float). + | mtc1 AT, FARG2 + | cvt.d.s FARG2, FARG2 + | add.d FARG1, FARG1, FARG2 + | mfc1 CRET1, FARG1 + | jr ra + |. sextw CRET1, CRET1 + |.endif + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 2ddeefbf..1761e39b 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -3160,6 +3160,152 @@ static void build_subroutines(BuildCtx *ctx) | blr | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if FPU + | subi sp, sp, 16 + | stfd FARG1, 0(sp) + | lwz CARG1, 0(sp) + | lwz CARG2, 4(sp) + |.endif + | slwi TMP1, CARG1, 1 + |.if PPE + | or TMP1, TMP1, CARG2 + | cmpwi TMP1, 0 + |.else + | or. TMP1, TMP1, CARG2 + |.endif + | beq >2 // Return 0 for +-0. + | rlwinm RB, CARG1, 12, 21, 31 + | subfic RB, RB, 1054 + | cmplwi RB, 32 + | bge >1 // Fail if |x| < 0x1p0 || |x| >= 0x1p32. + | slwi CARG3, CARG1, 11 + | rlwimi CARG3, CARG2, 11, 21, 31 // Left-aligned mantissa. + | subfic TMP1, RB, 32 + | slw TMP1, CARG3, TMP1 + | slwi TMP2, CARG2, 11 + |.if PPE + | or. TMP1, TMP1, TMP2 + |.else + | or TMP1, TMP1, TMP2 + | cmpwi TMP1, 0 + |.endif + | bne >1 // Fail if fractional part != 0. + | oris CARG3, CARG3, 0x8000 // Merge leading 1. + | srw CRET2, CARG3, RB // lo = right-aligned absolute value. + | srawi CARG4, CARG1, 31 // sign = 0 or -1. + |.if GPR64 + | add CRET2, CRET2, CARG4 + | cmpwi CRET2, 0 + |.else + | add. CRET2, CRET2, CARG4 + |.endif + | blt >1 // Fail if fractional part != 0. + | xor CRET2, CRET2, CARG4 // lo = sign?-lo:lo = (lo+sign)^sign. + |2: + |.if GPR64 + | rldicl CRET1, CRET1, 0, 32 + |.else + | li CRET1, 0 + |.endif + |.if FPU + | addi sp, sp, 16 + |.endif + | blr + |1: + |.if GPR64 + | lus CRET1, 0x8000 + | rldicr CRET1, CRET1, 32, 32 + |.else + | lus CRET1, 0x8000 + | lus CRET2, 0x8000 + |.endif + |.if FPU + | addi sp, sp, 16 + |.endif + | blr + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |// fallthrough, same as lj_vm_num2u64. + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if FPU + | subi sp, sp, 16 + | stfd FARG1, 0(sp) + | lwz CARG1, 0(sp) + | lwz CARG2, 4(sp) + |.endif + | rlwinm RB, CARG1, 12, 21, 31 + | addi RB, RB, -1023 + | cmplwi RB, 116 + | bge >3 // Exponent out of range. + | srawi CARG4, CARG1, 31 // sign = 0 or -1. + | clrlwi CARG1, CARG1, 12 + | subfic RB, RB, 52 + | oris CARG1, CARG1, 0x0010 + | cmpwi RB, 0 + | blt >2 // Shift mantissa left or right? + | subfic TMP1, RB, 32 // 64 bit right shift. + | srw CARG2, CARG2, RB + | slw TMP2, CARG1, TMP1 + | addi TMP1, RB, -32 + | or CARG2, CARG2, TMP2 + | srw TMP2, CARG1, TMP1 + | or CARG2, CARG2, TMP2 + | srw CARG1, CARG1, RB + |1: + | addc CARG2, CARG2, CARG4 + | adde CARG1, CARG1, CARG4 + | xor CRET2, CARG2, CARG4 + | xor CRET1, CARG1, CARG4 + |.if GPR64 + | rldimi CRET2, CRET1, 0, 32 + | mr CRET1, CRET2 + |.endif + | addi sp, sp, 16 + | blr + |2: + | subfic TMP1, RB, 0 // 64 bit left shift. + | addi RB, RB, -32 + | slw CARG1, CARG1, TMP1 + | srw TMP2, CARG2, RB + | addi RB, TMP1, -32 + | or CARG1, CARG1, TMP2 + | slw TMP2, CARG2, RB + | or CARG1, CARG1, TMP2 + | slw CARG2, CARG2, TMP1 + | b <1 + |3: + | li CRET1, 0 + |.if not GPR64 + | li CRET2, 0 + |.endif + |.if FPU + | addi sp, sp, 16 + |.endif + | blr + | + |// int32_t lj_vm_tobit(double x) + |.if FPU + |->vm_tobit: + | lus TMP0, 0x59c0 // 2^52 + 2^51 (float). + | subi sp, sp, 16 + | stw TMP0, 0(sp) + | lfs FARG2, 0(sp) + | fadd FARG1, FARG1, FARG2 + | stfd FARG1, 0(sp) + | lwz CRET1, 4(sp) + | addi sp, sp, 16 + | blr + |.endif + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 4cfb7b6a..970e8e43 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -2625,6 +2625,49 @@ static void build_subroutines(BuildCtx *ctx) | ret | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + | cvttsd2si eax, xmm0 + | xorps xmm1, xmm1 + | cvtsi2sd xmm1, eax + | ucomisd xmm1, xmm0 + | jp >1 + | jne >1 + | ret + |1: + | mov64 rax, U64x(80000000,80000000) + | ret + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + | cvttsd2si rax, xmm0 + | ret + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range. + | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow. + | jo >1 + | ret + |1: + | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double). + | movd xmm1, rdx + | addsd xmm0, xmm1 + | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range. + | // Note that -0x1p63 converts to -0x8000000000000000LL either way. + | ret + | + |// int32_t lj_vm_tobit(double x) + |->vm_tobit: + | sseconst_tobit xmm1, RC + | addsd xmm0, xmm1 + | movd eax, xmm0 + | ret + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 77c4069d..485ed809 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -3059,6 +3059,98 @@ static void build_subroutines(BuildCtx *ctx) | ret | |//----------------------------------------------------------------------- + |//-- Number conversion functions ---------------------------------------- + |//----------------------------------------------------------------------- + | + |// int64_t lj_vm_num2int_check(double x) + |->vm_num2int_check: + |.if not X64 + | movsd xmm0, qword [esp+4] + |.endif + | cvttsd2si eax, xmm0 + | xorps xmm1, xmm1 + | cvtsi2sd xmm1, eax + | ucomisd xmm1, xmm0 + | jp >1 + | jne >1 + |.if not X64 + | xor edx, edx + |.endif + | ret + |1: + |.if X64 + | mov64 rax, U64x(80000000,80000000) + |.else + | mov eax, 0x80000000 + | mov edx, eax + |.endif + | ret + | + |// int64_t lj_vm_num2i64(double x) + |->vm_num2i64: + |.if X64 + | cvttsd2si rax, xmm0 + | ret + |.else + | sub esp, 12 + | fld qword [esp+16] + | fisttp qword [esp] + | mov eax, dword [esp] + | mov edx, dword [esp+4] + | add esp, 12 + | ret + |.endif + | + |// uint64_t lj_vm_num2u64(double x) + |->vm_num2u64: + |.if X64 + | cvttsd2si rax, xmm0 // Convert [-2^63..2^63) range. + | cmp rax, 1 // Indefinite result -0x8000000000000000LL - 1 sets overflow. + | jo >1 + | ret + |1: + | mov64 rdx, U64x(c3f00000,00000000) // -0x1p64 (double). + | movd xmm1, rdx + | addsd xmm0, xmm1 + | cvttsd2si rax, xmm0 // Convert [2^63..2^64+2^63) range. + | // Note that -0x1p63 converts to -0x8000000000000000LL either way. + | ret + |.else + | sub esp, 12 + | fld qword [esp+16] + | fld st0 + | fisttp qword [esp] + | mov edx, dword [esp+4] + | mov eax, dword [esp] + | cmp edx, 1 + | jo >2 + |1: + | fpop + | add esp, 12 + | ret + |2: + | cmp eax, 0 + | jne <1 + | mov dword [esp+8], 0xdf800000 // -0x1p64 (float). + | fadd dword [esp+8] + | fisttp qword [esp] + | mov eax, dword [esp] + | mov edx, dword [esp+4] + | add esp, 12 + | ret + |.endif + | + |// int32_t lj_vm_tobit(double x) + |->vm_tobit: + |.if not X64 + | movsd xmm0, qword [esp+4] + |.endif + | sseconst_tobit xmm1, RCa + | addsd xmm0, xmm1 + | movd eax, xmm0 + | ret + | + |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- |