available on any specific host. For example on x86, the available
classes are: Int32, Flt64, Vec128 only.
- IMPORTANT NOTE: Vec128 is the only >= 128-bit-sized class, and
- reg_alloc2.c handles it specially when assigning spill slots. If
- you add another 128-bit or larger regclass, you must remember to
- update reg_alloc2.c accordingly.
+ IMPORTANT NOTE: reg_alloc2.c needs how much space is needed to spill
+ each class of register. It has the following knowledge hardwired in:
+
+ HRcInt32 32 bits
+ HRcInt64 64 bits
+ HRcFlt64 80 bits (on x86 these are spilled by fstpt/fldt)
+ HRcVec64 64 bits
+ HRcVec128 128 bits
+
+ If you add another regclass, you must remember to update
+ reg_alloc2.c accordingly.
*/
typedef
enum {
/* --------- Stage 3: allocate spill slots. --------- */
- /* Each spill slot is 8 bytes long. For 128-bit vregs
- we have to allocate two spill slots.
+ /* Each spill slot is 8 bytes long. For vregs which take more than
+ 64 bits to spill (classes Flt64 and Vec128), we have to allocate
+ two spill slots.
Do a rank-based allocation of vregs to spill slot numbers. We
put as few values as possible in spill slows, but nevertheless
continue;
}
- /* The spill slots are 64 bits in size. That means, to spill a
- Vec128-class vreg, we'll need to find two adjacent spill
- slots to use. Note, this special-casing needs to happen for
- all 128-bit sized register classes. Currently though
- HRcVector is the only such class. */
+ /* The spill slots are 64 bits in size. As per the comment on
+ definition of HRegClass in h_generic_regs.h, that means, to
+ spill a vreg of class Flt64 or Vec128, we'll need to find two
+ adjacent spill slots to use. Note, this logic needs to kept
+ in sync with the size info on the definition of HRegClass. */
- if (vreg_lrs[j].reg_class != HRcVec128) {
+ if (vreg_lrs[j].reg_class == HRcVec128
+ || vreg_lrs[j].reg_class == HRcFlt64) {
- /* The ordinary case -- just find a single spill slot. */
+ /* Find two adjacent free slots in which between them provide
+ up to 128 bits in which to spill the vreg. */
- /* Find the lowest-numbered spill slot which is available at
- the start point of this interval, and assign the interval
- to it. */
- for (k = 0; k < N_SPILL64S; k++)
- if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
+ for (k = 0; k < N_SPILL64S-1; k++)
+ if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
+ && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
break;
- if (k == N_SPILL64S) {
+ if (k == N_SPILL64S-1) {
vpanic("LibVEX_N_SPILL_BYTES is too low. "
"Increase and recompile.");
}
- ss_busy_until_before[k] = vreg_lrs[j].dead_before;
+ ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
+ ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
} else {
- /* Find two adjacent free slots in which to spill a 128-bit
- vreg. */
+ /* The ordinary case -- just find a single spill slot. */
- for (k = 0; k < N_SPILL64S-1; k++)
- if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
- && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
+ /* Find the lowest-numbered spill slot which is available at
+ the start point of this interval, and assign the interval
+ to it. */
+ for (k = 0; k < N_SPILL64S; k++)
+ if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
break;
- if (k == N_SPILL64S-1) {
+ if (k == N_SPILL64S) {
vpanic("LibVEX_N_SPILL_BYTES is too low. "
"Increase and recompile.");
}
- ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
- ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
+ ss_busy_until_before[k] = vreg_lrs[j].dead_before;
}
i->Xin.FpLdSt.sz = sz;
i->Xin.FpLdSt.reg = reg;
i->Xin.FpLdSt.addr = addr;
- vassert(sz == 4 || sz == 8);
+ vassert(sz == 4 || sz == 8 || sz == 10);
return i;
}
X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
break;
case Xin_FpLdSt:
if (i->Xin.FpLdSt.isLoad) {
- vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
+ vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T'
+ : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
ppX86AMode(i->Xin.FpLdSt.addr);
vex_printf(", ");
ppHRegX86(i->Xin.FpLdSt.reg);
} else {
- vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
+ vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
+ : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
ppHRegX86(i->Xin.FpLdSt.reg);
vex_printf(", ");
ppX86AMode(i->Xin.FpLdSt.addr);
case HRcInt32:
return X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
case HRcFlt64:
- return X86Instr_FpLdSt ( False/*store*/, 8, rreg, am );
+ return X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
case HRcVec128:
return X86Instr_SseLdSt ( False/*store*/, rreg, am );
default:
case HRcInt32:
return X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
case HRcFlt64:
- return X86Instr_FpLdSt ( True/*load*/, 8, rreg, am );
+ return X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
case HRcVec128:
return X86Instr_SseLdSt ( True/*load*/, rreg, am );
default:
goto done;
case Xin_FpLdSt:
- vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8);
if (i->Xin.FpLdSt.isLoad) {
/* Load from memory into %fakeN.
- --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1)
+ --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
*/
p = do_ffree_st7(p);
- *p++ = toUChar(i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD);
- p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+ switch (i->Xin.FpLdSt.sz) {
+ case 4:
+ *p++ = 0xD9;
+ p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ case 8:
+ *p++ = 0xDD;
+ p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ case 10:
+ *p++ = 0xDB;
+ p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ default:
+ vpanic("emitX86Instr(FpLdSt,load)");
+ }
p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
goto done;
} else {
*/
p = do_ffree_st7(p);
p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
- *p++ = toUChar(i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD);
- p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+ switch (i->Xin.FpLdSt.sz) {
+ case 4:
+ *p++ = 0xD9;
+ p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ case 8:
+ *p++ = 0xDD;
+ p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ case 10:
+ *p++ = 0xDB;
+ p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ default:
+ vpanic("emitX86Instr(FpLdSt,store)");
+ }
goto done;
}
break;