|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro
|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro
|
-|// Instruction decode+dispatch.
-|.macro ins_NEXT
+|// Instruction fetch.
+|.macro ins_NEXT1
| lwz INS, 0(PC)
| addi PC, PC, 4
-| decode_OP4 TMP0, INS
+|.endmacro
+|// Instruction decode+dispatch.
+|.macro ins_NEXT2
+| decode_OP4 TMP1, INS
| decode_RB8 RB, INS
-| lwzx TMP0, DISPATCH, TMP0
| decode_RD8 RD, INS
+| lwzx TMP0, DISPATCH, TMP1
+| decode_RA8 RA, INS
| decode_RC8 RC, INS
| mtctr TMP0
-| decode_RA8 RA, INS
| bctr
|.endmacro
+|.macro ins_NEXT
+| ins_NEXT1
+| ins_NEXT2
+|.endmacro
|
|// Instruction footer.
|.if 1
| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
| .define ins_next, ins_NEXT
| .define ins_next_, ins_NEXT
+| .define ins_next1, ins_NEXT1
+| .define ins_next2, ins_NEXT2
|.else
| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
| // Affects only certain kinds of benchmarks (and only with -j off).
| .macro ins_next
| b ->ins_next
| .endmacro
+| .macro ins_next1
+| .endmacro
+| .macro ins_next2
+| b ->ins_next
+| .endmacro
| .macro ins_next_
| ->ins_next:
| ins_NEXT
| lwz PC, LFUNC:RB->pc
| lwz INS, 0(PC)
| addi PC, PC, 4
-| decode_OP4 TMP0, INS
+| decode_OP4 TMP1, INS
| decode_RA8 RA, INS
-| lwzx TMP0, DISPATCH, TMP0
+| lwzx TMP0, DISPATCH, TMP1
| add RA, RA, BASE
| mtctr TMP0
| bctr
case BC_MOV:
| // RA = dst*8, RD = src*8
+ | ins_next1
| evlddx TMP0, BASE, RD
| evstddx TMP0, BASE, RA
- | ins_next_
+ | ins_next2
break;
case BC_NOT:
| // RA = dst*8, RD = src*8
+ | ins_next1
| lwzx TMP0, BASE, RD
| subfic TMP1, TMP0, LJ_TTRUE
| adde TMP0, TMP0, TMP1
| stwx TMP0, BASE, RA
- | ins_next
+ | ins_next2
break;
case BC_UNM:
| // RA = dst*8, RD = src*8
| evmergelo TMP1, TMP1, TMP2
| checkfail ->vmeta_unm
| evxor TMP0, TMP0, TMP1
+ | ins_next1
| evstddx TMP0, BASE, RA
- | ins_next
+ | ins_next2
break;
case BC_LEN:
| // RA = dst*8, RD = src*8
| checkfail >2
| lwz CRET1, STR:CARG1->len
|1:
+ | ins_next1
| efdcfsi TMP0, CRET1
| evstddx TMP0, BASE, RA
- | ins_next
+ | ins_next2
|2:
| checktab CARG1
| checkfail ->vmeta_len
|
|.macro ins_arith, ins
| ins_arithpre TMP0, TMP1
+ | ins_next1
| ins TMP0, TMP0, TMP1
| evstddx TMP0, BASE, RA
- | ins_next
+ | ins_next2
|.endmacro
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
| efddiv CARG2, RD, SAVE0
| bl ->vm_floor // floor(b/c)
| efdmul TMP0, CRET2, SAVE0
+ | ins_next1
| efdsub TMP0, RD, TMP0 // b - floor(b/c)*c
| evstddx TMP0, BASE, RA
- | ins_next
+ | ins_next2
break;
case BC_MODNV: case BC_MODVV:
| ins_arithpre RD, SAVE0
case BC_KSTR:
| // RA = dst*8, RD = str_const*8 (~)
+ | ins_next1
| srwi TMP1, RD, 1
| subfic TMP1, TMP1, -4
| lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
| evmergelo TMP0, TISSTR, TMP0
| evstddx TMP0, BASE, RA
- | ins_next
+ | ins_next2
break;
case BC_KSHORT:
| // RA = dst*8, RD = int16_literal*8
| srwi TMP1, RD, 3
| extsh TMP1, TMP1
+ | ins_next1
| efdcfsi TMP0, TMP1
| evstddx TMP0, BASE, RA
- | ins_next
+ | ins_next2
break;
case BC_KNUM:
| // RA = dst*8, RD = num_const*8
| evlddx TMP0, KBASE, RD
+ | ins_next1
| evstddx TMP0, BASE, RA
- | ins_next
+ | ins_next2
break;
case BC_KPRI:
| // RA = dst*8, RD = primitive_type*8 (~)
| srwi TMP1, RD, 3
| not TMP0, TMP1
+ | ins_next1
| stwx TMP0, BASE, RA
- | ins_next
+ | ins_next2
break;
case BC_KNIL:
| // RA = base*8, RD = end*8
| cmpw RA, RD
| addi RA, RA, 8
| blt <1
- | ins_next
+ | ins_next_
break;
/* -- Upvalue and function ops ------------------------------------------ */
case BC_UGET:
| // RA = dst*8, RD = uvnum*8
+ | ins_next1
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RD, RD, 1
| addi RD, RD, offsetof(GCfuncL, uvptr)
| lwz TMP1, UPVAL:RB->v
| evldd TMP0, 0(TMP1)
| evstddx TMP0, BASE, RA
- | ins_next
+ | ins_next2
break;
case BC_USETV:
| // RA = uvnum*8, RD = src*8
break;
case BC_USETS:
| // RA = uvnum*8, RD = str_const*8 (~)
+ | ins_next1
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi TMP1, RD, 1
| srwi RA, RA, 1
| evstdd STR:TMP1, 0(CARG2)
| bne >2
|1:
- | ins_next
+ | ins_next2
|
|2: // Check if string is white and ensure upvalue is closed.
| andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str)
break;
case BC_USETN:
| // RA = uvnum*8, RD = num_const*8
+ | ins_next1
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RA, RA, 1
| addi RA, RA, offsetof(GCfuncL, uvptr)
| lwzx UPVAL:RB, LFUNC:RB, RA
| lwz TMP1, UPVAL:RB->v
| evstdd TMP0, 0(TMP1)
- | ins_next
+ | ins_next2
break;
case BC_USETP:
| // RA = uvnum*8, RD = primitive_type*8 (~)
+ | ins_next1
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RA, RA, 1
| addi RA, RA, offsetof(GCfuncL, uvptr)
| not TMP0, TMP0
| lwz TMP1, UPVAL:RB->v
| stw TMP0, 0(TMP1)
- | ins_next
+ | ins_next2
break;
case BC_UCLO:
| checknil TMP1
| checkok >5
|1:
+ | ins_next1
| evstddx TMP1, BASE, RA
- | ins_next
+ | ins_next2
|
|5: // Check for __index if table value is nil.
| lwz TAB:TMP2, TAB:RB->metatable
| bgt >6
| sub BASE, TMP2, RA
| lwz LFUNC:TMP1, FRAME_FUNC(BASE)
+ | ins_next1
| lwz TMP1, LFUNC:TMP1->pc
| lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next
+ | ins_next2
|
|6: // Fill up results with nil.
| subi TMP1, RD, 8
| bgt >6
| sub BASE, TMP2, RA
| lwz LFUNC:TMP1, FRAME_FUNC(BASE)
+ | ins_next1
| lwz TMP1, LFUNC:TMP1->pc
| lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next
+ | ins_next2
|
|6: // Fill up results with nil.
| subi TMP1, RD, 8
vk = (op == BC_IFORL || op == BC_JFORL);
| add RA, BASE, RA
| evldd TMP1, FORL_IDX*8(RA)
- | evldd TMP2, FORL_STOP*8(RA)
| evldd TMP3, FORL_STEP*8(RA)
+ | evldd TMP2, FORL_STOP*8(RA)
if (!vk) {
| evcmpgtu cr0, TMP1, TISNUM
- | evcmpgtu cr1, TMP2, TISNUM
| evcmpgtu cr7, TMP3, TISNUM
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ | evcmpgtu cr1, TMP2, TISNUM
| cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
+ | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| blt ->vmeta_for
}
if (vk) {
| efdadd TMP1, TMP1, TMP3
- }
- if (vk) {
| evstdd TMP1, FORL_IDX*8(RA)
}
| evcmpgts TMP3, TISNIL
| cmplw RA, TMP2
| slwi TMP1, TMP1, 3
| bgt ->vm_growstack_l
+ | ins_next1
|2:
| cmplw NARGS8:RC, TMP1 // Check for missing parameters.
| ble >3
if (op == BC_JFUNCF) {
| NYI
} else {
- | ins_next
+ | ins_next2
}
|
|3: // Clear missing parameters.
| lbz TMP2, -4+PC2PROTO(numparams)(PC)
| mr RA, BASE
| mr RC, TMP1
+ | ins_next1
| cmpwi TMP2, 0
| addi BASE, TMP1, 8
| beq >3
| addi TMP1, TMP1, 8
| bne <1
|3:
- | ins_next
+ | ins_next2
|
|4: // Clear missing parameters.
| evmr TMP0, TISNIL
case BC_FUNCCW:
| // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
if (op == BC_FUNCC) {
- | lwz TMP0, CFUNC:RB->f
+ | lwz TMP3, CFUNC:RB->f
} else {
- | lwz TMP0, DISPATCH_GL(wrapf)(DISPATCH)
+ | lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH)
}
| add TMP1, RA, NARGS8:RC
| lwz TMP2, L->maxstack
| add RC, BASE, NARGS8:RC
| stw BASE, L->base
- | mtctr TMP0
| cmplw TMP1, TMP2
| stw RC, L->top
| li_vmstate C
+ | mtctr TMP3
if (op == BC_FUNCCW) {
| lwz CARG2, CFUNC:RB->f
}