HChar* fName = NULL;
void* fAddr = NULL;
if (haveF2orF3(pfx)) goto decode_failure;
- switch (archinfo->subarch) {
- case VexSubArch_NONE:
- fName = "amd64g_dirtyhelper_CPUID";
- fAddr = &amd64g_dirtyhelper_CPUID;
- break;
- default:
- vpanic("disInstr(amd64)(cpuid)");
+ if (archinfo->hwcaps == 0/*baseline, == SSE2*/) {
+ fName = "amd64g_dirtyhelper_CPUID";
+ fAddr = &amd64g_dirtyhelper_CPUID;
}
+ else
+ vpanic("disInstr(amd64)(cpuid)");
+
vassert(fName); vassert(fAddr);
d = unsafeIRDirty_0_N ( 0/*regparms*/,
fName, fAddr, mkIRExprVec_0() );
vassert(vex_control.guest_chase_thresh >= 0);
vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
- vassert(archinfo_guest->subarch == VexSubArchARM_v4);
+ vassert(archinfo_guest->hwcaps == 0);
/* Start a new, empty extent. */
vge->n_used = 1;
break;
case 0x3D7: // stfiwx (Store Float as Int, Indexed, PPC32 p517)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
DIP("stfiwx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
storeBE( mkexpr(EA),
break;
case 0x16: // fsqrts (Floating SqRt (Single-Precision), PPC32 p428)
+ // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX)
if (frA_addr != 0 || frC_addr != 0) {
vex_printf("dis_fp_arith(ppc)(instr,fsqrts)\n");
return False;
break;
case 0x18: // fres (Floating Reciprocal Estimate Single, PPC32 p421)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
if (frA_addr != 0 || frC_addr != 0) {
vex_printf("dis_fp_arith(ppc)(instr,fres)\n");
return False;
break;
case 0x16: // fsqrt (Floating SqRt (Double-Precision), PPC32 p427)
+ // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX)
if (frA_addr != 0 || frC_addr != 0) {
vex_printf("dis_fp_arith(ppc)(instr,fsqrt)\n");
return False;
break;
case 0x17: { // fsel (Floating Select, PPC32 p426)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
IRTemp cc = newTemp(Ity_I32);
IRTemp cc_b0 = newTemp(Ity_I32);
break;
case 0x1A: // frsqrte (Floating Recip SqRt Est., PPC32 p424)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
if (frA_addr != 0 || frC_addr != 0) {
vex_printf("dis_fp_arith(ppc)(instr,frsqrte)\n");
return False;
DisResult dres;
UInt theInstr;
IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ Bool allow_F = False;
+ Bool allow_V = False;
+ Bool allow_FX = False;
+ Bool allow_GX = False;
+ UInt hwcaps = archinfo->hwcaps;
+ Long delta;
/* What insn variants are we supporting today? */
- Bool allow_FP = archinfo->subarch == VexSubArchPPC32_FI ||
- archinfo->subarch == VexSubArchPPC32_VFI ||
- archinfo->subarch == VexSubArchPPC64_FI ||
- archinfo->subarch == VexSubArchPPC64_VFI;
-
- Bool allow_VMX = archinfo->subarch == VexSubArchPPC32_VFI ||
- archinfo->subarch == VexSubArchPPC64_VFI;
+ if (mode64) {
+ allow_F = True;
+ allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC64_V));
+ allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC64_FX));
+ allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX));
+ } else {
+ allow_F = (0 != (hwcaps & VEX_HWCAPS_PPC32_F));
+ allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC32_V));
+ allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC32_FX));
+ allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX));
+ }
/* The running delta */
- Long delta = (Long)mkSzAddr(ty, (ULong)delta64);
+ delta = (Long)mkSzAddr(ty, (ULong)delta64);
/* Set result defaults. */
dres.whatNext = Dis_Continue;
and have done. */
theInstr = getUIntBigendianly( (UChar*)(&guest_code[delta]) );
-// vex_printf("insn: 0x%x\n", theInstr);
+ if (0) vex_printf("insn: 0x%x\n", theInstr);
DIP("\t0x%llx: ", (ULong)guest_CIA_curr_instr);
/* Floating Point Load Instructions */
case 0x30: case 0x31: case 0x32: // lfs, lfsu, lfd
case 0x33: // lfdu
- if (!allow_FP) goto decode_noFP;
+ if (!allow_F) goto decode_noF;
if (dis_fp_load( theInstr )) goto decode_success;
goto decode_failure;
/* Floating Point Store Instructions */
case 0x34: case 0x35: case 0x36: // stfsx, stfsux, stfdx
case 0x37: // stfdux
- if (!allow_FP) goto decode_noFP;
+ if (!allow_F) goto decode_noF;
if (dis_fp_store( theInstr )) goto decode_success;
goto decode_failure;
goto decode_failure;
case 0x3B:
- if (!allow_FP) goto decode_noFP;
-
+ if (!allow_F) goto decode_noF;
opc2 = IFIELD(theInstr, 1, 5);
switch (opc2) {
/* Floating Point Arith Instructions */
case 0x12: case 0x14: case 0x15: // fdivs, fsubs, fadds
- case 0x16: case 0x18: case 0x19: // fsqrts, fres, fmuls
+ case 0x19: // fmuls
if (dis_fp_arith(theInstr)) goto decode_success;
goto decode_failure;
-
+ case 0x16: // fsqrts
+ if (!allow_FX) goto decode_noFX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+ case 0x18: // fres
+ if (!allow_GX) goto decode_noGX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+
/* Floating Point Mult-Add Instructions */
case 0x1C: case 0x1D: case 0x1E: // fmsubs, fmadds, fnmsubs
case 0x1F: // fnmadds
goto decode_failure;
case 0x3F:
- if (!allow_FP) goto decode_noFP;
+ if (!allow_F) goto decode_noF;
/* Instrs using opc[1:5] never overlap instrs using opc[1:10],
so we can simply fall through the first switch statement */
opc2 = IFIELD(theInstr, 1, 5);
switch (opc2) {
/* Floating Point Arith Instructions */
- case 0x12: case 0x14: case 0x15: // fdiv, fsub, fadd
- case 0x16: case 0x17: case 0x19: // fsqrt, fsel, fmul
- case 0x1A: // frsqrte
+ case 0x12: case 0x14: case 0x15: // fdiv, fsub, fadd
+ case 0x19: // fmul
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+ case 0x16: // fsqrt
+ if (!allow_FX) goto decode_noFX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+ case 0x17: case 0x1A: // fsel, frsqrte
+ if (!allow_GX) goto decode_noGX;
if (dis_fp_arith(theInstr)) goto decode_success;
goto decode_failure;
/* Floating Point Load Instructions */
case 0x217: case 0x237: case 0x257: // lfsx, lfsux, lfdx
case 0x277: // lfdux
- if (!allow_FP) goto decode_noFP;
+ if (!allow_F) goto decode_noF;
if (dis_fp_load( theInstr )) goto decode_success;
goto decode_failure;
/* Floating Point Store Instructions */
case 0x297: case 0x2B7: case 0x2D7: // stfs, stfsu, stfd
- case 0x2F7: case 0x3D7: // stfdu, stfiwx
- if (!allow_FP) goto decode_noFP;
+ case 0x2F7: // stfdu, stfiwx
+ if (!allow_F) goto decode_noF;
+ if (dis_fp_store( theInstr )) goto decode_success;
+ goto decode_failure;
+ case 0x3D7: // stfiwx
+ if (!allow_F) goto decode_noF;
+ if (!allow_GX) goto decode_noGX;
if (dis_fp_store( theInstr )) goto decode_success;
goto decode_failure;
-
/* AltiVec instructions */
/* AV Cache Control - Data streams */
case 0x156: case 0x176: case 0x336: // dst, dstst, dss
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_datastream( theInstr )) goto decode_success;
goto decode_failure;
case 0x006: case 0x026: // lvsl, lvsr
case 0x007: case 0x027: case 0x047: // lvebx, lvehx, lvewx
case 0x067: case 0x167: // lvx, lvxl
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_load( theInstr )) goto decode_success;
goto decode_failure;
/* AV Store */
case 0x087: case 0x0A7: case 0x0C7: // stvebx, stvehx, stvewx
case 0x0E7: case 0x1E7: // stvx, stvxl
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_store( theInstr )) goto decode_success;
goto decode_failure;
case 0x20: case 0x21: case 0x22: // vmhaddshs, vmhraddshs, vmladduhm
case 0x24: case 0x25: case 0x26: // vmsumubm, vmsummbm, vmsumuhm
case 0x27: case 0x28: case 0x29: // vmsumuhs, vmsumshm, vmsumshs
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_multarith( theInstr )) goto decode_success;
goto decode_failure;
case 0x2A: // vsel
case 0x2B: // vperm
case 0x2C: // vsldoi
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_permute( theInstr )) goto decode_success;
goto decode_failure;
/* AV Floating Point Mult-Add/Sub */
case 0x2E: case 0x2F: // vmaddfp, vnmsubfp
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_fp_arith( theInstr )) goto decode_success;
goto decode_failure;
case 0x308: case 0x348: // vmulesb, vmulesh
case 0x608: case 0x708: case 0x648: // vsum4ubs, vsum4sbs, vsum4shs
case 0x688: case 0x788: // vsum2sws, vsumsws
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_arith( theInstr )) goto decode_success;
goto decode_failure;
case 0x304: case 0x344: case 0x384: // vsrab, vsrah, vsraw
case 0x1C4: case 0x2C4: // vsl, vsr
case 0x40C: case 0x44C: // vslo, vsro
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_shift( theInstr )) goto decode_success;
goto decode_failure;
/* AV Logic */
case 0x404: case 0x444: case 0x484: // vand, vandc, vor
case 0x4C4: case 0x504: // vxor, vnor
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_logic( theInstr )) goto decode_success;
goto decode_failure;
/* AV Processor Control */
case 0x604: case 0x644: // mfvscr, mtvscr
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_procctl( theInstr )) goto decode_success;
goto decode_failure;
case 0x10A: case 0x14A: case 0x18A: // vrefp, vrsqrtefp, vexptefp
case 0x1CA: // vlogefp
case 0x40A: case 0x44A: // vmaxfp, vminfp
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_fp_arith( theInstr )) goto decode_success;
goto decode_failure;
case 0x2CA: // vrfim
case 0x30A: case 0x34A: case 0x38A: // vcfux, vcfsx, vctuxs
case 0x3CA: // vctsxs
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_fp_convert( theInstr )) goto decode_success;
goto decode_failure;
case 0x10C: case 0x14C: case 0x18C: // vmrglb, vmrglh, vmrglw
case 0x20C: case 0x24C: case 0x28C: // vspltb, vsplth, vspltw
case 0x30C: case 0x34C: case 0x38C: // vspltisb, vspltish, vspltisw
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_permute( theInstr )) goto decode_success;
goto decode_failure;
case 0x20E: case 0x24E: case 0x28E: // vupkhsb, vupkhsh, vupklsb
case 0x2CE: // vupklsh
case 0x30E: case 0x34E: case 0x3CE: // vpkpx, vupkhpx, vupklpx
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_pack( theInstr )) goto decode_success;
goto decode_failure;
case 0x006: case 0x046: case 0x086: // vcmpequb, vcmpequh, vcmpequw
case 0x206: case 0x246: case 0x286: // vcmpgtub, vcmpgtuh, vcmpgtuw
case 0x306: case 0x346: case 0x386: // vcmpgtsb, vcmpgtsh, vcmpgtsw
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_cmp( theInstr )) goto decode_success;
goto decode_failure;
/* AV Floating Point Compare */
case 0x0C6: case 0x1C6: case 0x2C6: // vcmpeqfp, vcmpgefp, vcmpgtfp
case 0x3C6: // vcmpbfp
- if (!allow_VMX) goto decode_noVMX;
+ if (!allow_V) goto decode_noV;
if (dis_av_fp_cmp( theInstr )) goto decode_success;
goto decode_failure;
break;
default:
- decode_noFP:
- vassert(!allow_FP);
- vex_printf("disInstr(ppc): Floating Point insns disabled for this arch.\n");
goto decode_failure;
- decode_noVMX:
- vassert(!allow_VMX);
- vex_printf("disInstr(ppc): AltiVec insns disabled for this arch.\n");
+ decode_noF:
+ vassert(!allow_F);
+ vex_printf("disInstr(ppc): declined to decode an FP insn.\n");
+ goto decode_failure;
+ decode_noV:
+ vassert(!allow_V);
+ vex_printf("disInstr(ppc): declined to decode an AltiVec insn.\n");
+ goto decode_failure;
+ decode_noFX:
+ vassert(!allow_V);
+ vex_printf("disInstr(ppc): "
+ "declined to decode an GeneralPurpose-Optional insn.\n");
+ goto decode_failure;
+ decode_noGX:
+ vassert(!allow_V);
+ vex_printf("disInstr(ppc): "
+ "declined to decode a Graphics-Optional insn.\n");
goto decode_failure;
decode_failure:
{
IRType ty;
DisResult dres;
- VexSubArch gsa = archinfo->subarch;
+ Bool is32, is64;
+ UInt mask32, mask64;
+ UInt hwcaps_guest = archinfo->hwcaps;
+
+ /* global -- ick */
+ mode64 = False;
/* Figure out whether we're being ppc32 or ppc64 today. */
- switch (gsa) {
- case VexSubArchPPC32_VFI:
- case VexSubArchPPC32_FI:
- case VexSubArchPPC32_I:
- mode64 = False;
- break;
- case VexSubArchPPC64_VFI:
- case VexSubArchPPC64_FI:
- mode64 = True;
- break;
- default:
- vpanic("disInstr_PPC(): illegal subarch");
- }
+ mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
+ | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX;
+
+ is32 = (hwcaps_guest & ~mask32) > 0;
+
+ mask64 = VEX_HWCAPS_PPC64_V
+ | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX;
+
+ is64 = (hwcaps_guest & ~mask64) > 0;
+
+ if (is32 && !is64)
+ mode64 = False;
+ else if (is64 && !is32)
+ mode64 = True;
+ else
+ vpanic("distInstr_PPC: illegal subarch");
+
ty = mode64 ? Ity_I64 : Ity_I32;
/* Skip parts of the decoder which don't apply given the stated
guest subarchitecture. */
- if (archinfo->subarch == VexSubArchX86_sse0)
+ if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
goto after_sse_decoders;
/* Otherwise we must be doing sse1 or sse2, so we can at least try
/* Skip parts of the decoder which don't apply given the stated
guest subarchitecture. */
- if (archinfo->subarch == VexSubArchX86_sse0
- || archinfo->subarch == VexSubArchX86_sse1)
- goto after_sse_decoders;
+ if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
+ goto after_sse_decoders; /* no SSE2 capabilities */
insn = (UChar*)&guest_code[delta];
/* Skip parts of the decoder which don't apply given the stated
guest subarchitecture. */
- if (archinfo->subarch == VexSubArchX86_sse0
- || archinfo->subarch == VexSubArchX86_sse1
- /* || archinfo->subarch == VexSubArchX86_sse2 */)
- goto after_sse_decoders;
+ /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */
+ /* In fact this is highly bogus; we accept SSE3 insns even on a
+ SSE2-only guest since they turn into IR which can be re-emitted
+ successfully on an SSE2 host. */
+ if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
+ goto after_sse_decoders; /* no SSE3 capabilities */
insn = (UChar*)&guest_code[delta];
IRDirty* d = NULL;
HChar* fName = NULL;
void* fAddr = NULL;
- switch (archinfo->subarch) {
- case VexSubArchX86_sse0:
- fName = "x86g_dirtyhelper_CPUID_sse0";
- fAddr = &x86g_dirtyhelper_CPUID_sse0;
- break;
- case VexSubArchX86_sse1:
- fName = "x86g_dirtyhelper_CPUID_sse1";
- fAddr = &x86g_dirtyhelper_CPUID_sse1;
- break;
- case VexSubArchX86_sse2:
- fName = "x86g_dirtyhelper_CPUID_sse2";
- fAddr = &x86g_dirtyhelper_CPUID_sse2;
- break;
- default:
- vpanic("disInstr(x86)(cpuid)");
- }
+ if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) {
+ fName = "x86g_dirtyhelper_CPUID_sse2";
+ fAddr = &x86g_dirtyhelper_CPUID_sse2;
+ }
+ else
+ if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) {
+ fName = "x86g_dirtyhelper_CPUID_sse1";
+ fAddr = &x86g_dirtyhelper_CPUID_sse1;
+ }
+ else
+ if (archinfo->hwcaps == 0/*no SSE*/) {
+ fName = "x86g_dirtyhelper_CPUID_sse0";
+ fAddr = &x86g_dirtyhelper_CPUID_sse0;
+ } else
+ vpanic("disInstr(x86)(cpuid)");
+
vassert(fName); vassert(fAddr);
d = unsafeIRDirty_0_N ( 0/*regparms*/,
fName, fAddr, mkIRExprVec_0() );
Int vreg_ctr;
- VexSubArch subarch;
+ /* Currently (27 Jan 06) unused */
+ UInt hwcaps;
}
ISelEnv;
vec_fail:
vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
- LibVEX_ppVexSubArch(env->subarch));
+ LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
ppIRExpr(e);
vpanic("iselVecExpr_wrk");
}
HInstrArray* iselBB_AMD64 ( IRBB* bb, VexArchInfo* archinfo_host )
{
- Int i, j;
- HReg hreg, hregHI;
- ISelEnv* env;
- VexSubArch subarch_host = archinfo_host->subarch;
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
/* sanity ... */
- vassert(subarch_host == VexSubArch_NONE);
+ vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3)));
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
/* and finally ... */
- env->subarch = subarch_host;
+ env->hwcaps = hwcaps_host;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
Int vreg_ctr;
- VexSubArch subarch;
+ /* 27 Jan 06: Not currently used, but should be */
+ UInt hwcaps;
Bool mode64;
}
} /* if (e->tag == Iex_Binop) */
vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n",
- LibVEX_ppVexSubArch(env->subarch));
+ LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32,
+ env->hwcaps));
ppIRExpr(e);
vpanic("iselVecExpr_wrk(ppc)");
}
HInstrArray* iselBB_PPC ( IRBB* bb, VexArchInfo* archinfo_host )
{
- Int i, j;
- HReg hreg, hregHI;
- ISelEnv* env;
- VexSubArch subarch_host = archinfo_host->subarch;
- Bool mode64;
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ Bool mode64 = False;
+ Bool is32, is64;
+ UInt mask32, mask64;
/* Figure out whether we're being ppc32 or ppc64 today. */
- switch (subarch_host) {
- case VexSubArchPPC32_VFI:
- case VexSubArchPPC32_FI:
- case VexSubArchPPC32_I:
+ mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
+ | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX;
+
+ is32 = (hwcaps_host & ~mask32) > 0;
+
+ mask64 = VEX_HWCAPS_PPC64_V
+ | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX;
+
+ is64 = (hwcaps_host & ~mask64) > 0;
+
+ if (is32 && !is64)
mode64 = False;
- break;
- case VexSubArchPPC64_VFI:
- case VexSubArchPPC64_FI:
+ else if (is64 && !is32)
mode64 = True;
- break;
- default:
+ else
vpanic("iselBB_PPC: illegal subarch");
- }
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
/* and finally ... */
- env->subarch = subarch_host;
+ env->hwcaps = hwcaps_host;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
i->Xin.Bsfr32.dst = dst;
return i;
}
-X86Instr* X86Instr_MFence ( VexSubArch subarch )
+X86Instr* X86Instr_MFence ( UInt hwcaps )
{
- X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
- i->tag = Xin_MFence;
- i->Xin.MFence.subarch = subarch;
- vassert(subarch == VexSubArchX86_sse0
- || subarch == VexSubArchX86_sse1
- || subarch == VexSubArchX86_sse2);
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_MFence;
+ i->Xin.MFence.hwcaps = hwcaps;
+ vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1|VEX_HWCAPS_X86_SSE2
+ |VEX_HWCAPS_X86_SSE3)));
return i;
}
return;
case Xin_MFence:
vex_printf("mfence(%s)",
- LibVEX_ppVexSubArch(i->Xin.MFence.subarch));
+ LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
return;
case Xin_FpUnary:
vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
case Xin_MFence:
/* see comment in hdefs.h re this insn */
if (0) vex_printf("EMIT FENCE\n");
- switch (i->Xin.MFence.subarch) {
- case VexSubArchX86_sse0:
- /* lock addl $0,0(%esp) */
- *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
- *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
- goto done;
- case VexSubArchX86_sse1:
- /* sfence */
- *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
- /* lock addl $0,0(%esp) */
- *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
- *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
- goto done;
- case VexSubArchX86_sse2:
- /* mfence */
- *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
- goto done;
- default:
- vpanic("emit_X86Instr:mfence:subarch");
+ if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
+ |VEX_HWCAPS_X86_SSE2)) {
+ /* mfence */
+ *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
+ goto done;
+ }
+ if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) {
+ /* sfence */
+ *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
+ /* lock addl $0,0(%esp) */
+ *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
+ *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
+ goto done;
}
+ if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
+ /* lock addl $0,0(%esp) */
+ *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
+ *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
+ goto done;
+ }
+ vpanic("emit_X86Instr:mfence:hwcaps");
+ /*NOTREACHED*/
break;
case Xin_Store:
much as possible before continuing. On SSE2 we emit a
real "mfence", on SSE1 "sfence ; lock addl $0,0(%esp)" and
on SSE0 "lock addl $0,0(%esp)". This insn therefore
- carries the subarch so the assembler knows what to
+ carries the host's hwcaps so the assembler knows what to
emit. */
struct {
- VexSubArch subarch;
+ UInt hwcaps;
} MFence;
/* X86 Floating point (fake 3-operand, "flat reg file" insns) */
extern X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst );
extern X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst );
extern X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst );
-extern X86Instr* X86Instr_MFence ( VexSubArch );
+extern X86Instr* X86Instr_MFence ( UInt hwcaps );
extern X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst );
extern X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst );
Int vreg_ctr;
- VexSubArch subarch;
+ UInt hwcaps;
}
ISelEnv;
static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
{
-# define REQUIRE_SSE1 \
- do { if (env->subarch == VexSubArchX86_sse0) \
- goto vec_fail; \
+# define REQUIRE_SSE1 \
+ do { if (env->hwcaps == 0/*baseline, no sse*/) \
+ goto vec_fail; \
} while (0)
-# define REQUIRE_SSE2 \
- do { if (env->subarch == VexSubArchX86_sse0 \
- || env->subarch == VexSubArchX86_sse1) \
- goto vec_fail; \
+# define REQUIRE_SSE2 \
+ do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
+ goto vec_fail; \
} while (0)
-# define SSE2_OR_ABOVE \
- (env->subarch != VexSubArchX86_sse0 \
- && env->subarch != VexSubArchX86_sse1)
+# define SSE2_OR_ABOVE \
+ (env->hwcaps & VEX_HWCAPS_X86_SSE2)
MatchInfo mi;
Bool arg1isEReg = False;
}
vec_fail:
- vex_printf("iselVecExpr (subarch = %s): can't reduce\n",
- LibVEX_ppVexSubArch(env->subarch));
+ vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
+ LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
ppIRExpr(e);
vpanic("iselVecExpr_wrk");
/* --------- MEM FENCE --------- */
case Ist_MFence:
- addInstr(env, X86Instr_MFence(env->subarch));
+ addInstr(env, X86Instr_MFence(env->hwcaps));
return;
/* --------- INSTR MARK --------- */
HInstrArray* iselBB_X86 ( IRBB* bb, VexArchInfo* archinfo_host )
{
- Int i, j;
- HReg hreg, hregHI;
- ISelEnv* env;
- VexSubArch subarch_host = archinfo_host->subarch;
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
/* sanity ... */
- vassert(subarch_host == VexSubArchX86_sse0
- || subarch_host == VexSubArchX86_sse1
- || subarch_host == VexSubArchX86_sse2);
+ vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_X86_SSE1
+ |VEX_HWCAPS_X86_SSE2
+ |VEX_HWCAPS_X86_SSE3)));
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
/* and finally ... */
- env->subarch = subarch_host;
+ env->hwcaps = hwcaps_host;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
/* This file contains the top level interface to the library. */
+/* --------- fwds ... --------- */
+
+static Bool are_valid_hwcaps ( VexArch arch, UInt hwcaps );
+static HChar* show_hwcaps ( VexArch arch, UInt hwcaps );
+
+
/* --------- Initialise the library. --------- */
/* Exported to library client. */
emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr;
host_is_bigendian = False;
host_word_type = Ity_I32;
- vassert(vta->archinfo_host.subarch == VexSubArchX86_sse0
- || vta->archinfo_host.subarch == VexSubArchX86_sse1
- || vta->archinfo_host.subarch == VexSubArchX86_sse2);
+ vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps));
vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */
break;
emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_AMD64Instr;
host_is_bigendian = False;
host_word_type = Ity_I64;
- vassert(vta->archinfo_host.subarch == VexSubArch_NONE);
+ vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps));
vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */
break;
emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr;
host_is_bigendian = True;
host_word_type = Ity_I32;
- vassert(vta->archinfo_guest.subarch == VexSubArchPPC32_I
- || vta->archinfo_guest.subarch == VexSubArchPPC32_FI
- || vta->archinfo_guest.subarch == VexSubArchPPC32_VFI);
+ vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_host.hwcaps));
vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
break;
emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr;
host_is_bigendian = True;
host_word_type = Ity_I64;
- vassert(vta->archinfo_guest.subarch == VexSubArchPPC64_FI
- || vta->archinfo_guest.subarch == VexSubArchPPC64_VFI);
+ vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_host.hwcaps));
vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
break;
guest_layout = &x86guest_layout;
offB_TISTART = offsetof(VexGuestX86State,guest_TISTART);
offB_TILEN = offsetof(VexGuestX86State,guest_TILEN);
- vassert(vta->archinfo_guest.subarch == VexSubArchX86_sse0
- || vta->archinfo_guest.subarch == VexSubArchX86_sse1
- || vta->archinfo_guest.subarch == VexSubArchX86_sse2);
+ vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestX86State) % 8);
vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART) == 4);
vassert(sizeof( ((VexGuestX86State*)0)->guest_TILEN ) == 4);
guest_layout = &amd64guest_layout;
offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART);
offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN);
- vassert(vta->archinfo_guest.subarch == VexSubArch_NONE);
+ vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestAMD64State) % 8);
vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8);
vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TILEN ) == 8);
guest_layout = &armGuest_layout;
offB_TISTART = 0; /* hack ... arm has bitrot */
offB_TILEN = 0; /* hack ... arm has bitrot */
- vassert(vta->archinfo_guest.subarch == VexSubArchARM_v4);
+ vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_guest.hwcaps));
break;
case VexArchPPC32:
guest_layout = &ppc32Guest_layout;
offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART);
offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN);
- vassert(vta->archinfo_guest.subarch == VexSubArchPPC32_I
- || vta->archinfo_guest.subarch == VexSubArchPPC32_FI
- || vta->archinfo_guest.subarch == VexSubArchPPC32_VFI);
+ vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestPPC32State) % 8);
vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TISTART ) == 4);
vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TILEN ) == 4);
guest_layout = &ppc64Guest_layout;
offB_TISTART = offsetof(VexGuestPPC64State,guest_TISTART);
offB_TILEN = offsetof(VexGuestPPC64State,guest_TILEN);
- vassert(vta->archinfo_guest.subarch == VexSubArchPPC64_FI
- || vta->archinfo_guest.subarch == VexSubArchPPC64_VFI);
+ vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestPPC64State) % 16);
vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TISTART ) == 8);
vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TILEN ) == 8);
/* doesn't necessarily have to be true, but if it isn't it means
we are simulating one flavour of an architecture a different
flavour of the same architecture, which is pretty strange. */
- vassert(vta->archinfo_guest.subarch == vta->archinfo_host.subarch);
+ vassert(vta->archinfo_guest.hwcaps == vta->archinfo_host.hwcaps);
}
vexAllocSanityCheck();
}
}
-/* --------- Arch/Subarch stuff. --------- */
+/* ------------------ Arch/HwCaps stuff. ------------------ */
const HChar* LibVEX_ppVexArch ( VexArch arch )
{
}
}
-const HChar* LibVEX_ppVexSubArch ( VexSubArch subarch )
+const HChar* LibVEX_ppVexHwCaps ( VexArch arch, UInt hwcaps )
{
- switch (subarch) {
- case VexSubArch_INVALID: return "INVALID";
- case VexSubArch_NONE: return "NONE";
- case VexSubArchX86_sse0: return "x86-sse0";
- case VexSubArchX86_sse1: return "x86-sse1";
- case VexSubArchX86_sse2: return "x86-sse2";
- case VexSubArchARM_v4: return "arm-v4";
- case VexSubArchPPC32_I: return "ppc32-int-only";
- case VexSubArchPPC32_FI: return "ppc32-int-and-fp";
- case VexSubArchPPC32_VFI: return "ppc32-int-fp-and-AV";
- case VexSubArchPPC64_FI: return "ppc64-int-and-fp";
- case VexSubArchPPC64_VFI: return "ppc64-int-fp-and-AV";
- default: return "VexSubArch???";
- }
+ HChar* str = show_hwcaps(arch,hwcaps);
+ return str ? str : "INVALID";
}
+
+
/* Write default settings info *vai. */
void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai )
{
- vai->subarch = VexSubArch_INVALID;
+ vai->hwcaps = 0;
vai->ppc_cache_line_szB = 0;
}
+/* Return a string showing the hwcaps in a nice way. The string will
+ be NULL for invalid combinations of flags, so these functions also
+ serve as a way to validate hwcaps values. */
+
+static HChar* show_hwcaps_x86 ( UInt hwcaps )
+{
+ /* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */
+ if (hwcaps == 0)
+ return "x86-sse0";
+ if (hwcaps == VEX_HWCAPS_X86_SSE1)
+ return "x86-sse1";
+ if (hwcaps == (VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2))
+ return "x86-sse1-sse2";
+ if (hwcaps == (VEX_HWCAPS_X86_SSE1
+ | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3))
+ return "x86-sse1-sse2-sse3";
+
+ return False;
+}
+
+static HChar* show_hwcaps_amd64 ( UInt hwcaps )
+{
+ /* Monotonic, SSE3 > baseline. */
+ if (hwcaps == 0)
+ return "amd64-sse2";
+ if (hwcaps == VEX_HWCAPS_AMD64_SSE3)
+ return "amd64-sse3";
+ return False;
+}
+
+static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
+{
+ /* Monotonic with complications. Basically V > F > baseline,
+ but once you have F then you can have FX or GX too. */
+ const UInt F = VEX_HWCAPS_PPC32_F;
+ const UInt V = VEX_HWCAPS_PPC32_V;
+ const UInt FX = VEX_HWCAPS_PPC32_FX;
+ const UInt GX = VEX_HWCAPS_PPC32_GX;
+ UInt c = hwcaps;
+ if (c == 0) return "ppc32-int";
+ if (c == F) return "ppc32-int-flt";
+ if (c == (F|FX)) return "ppc32-int-flt-FX";
+ if (c == (F|GX)) return "ppc32-int-flt-GX";
+ if (c == (F|FX|GX)) return "ppc32-int-flt-FX-GX";
+ if (c == (F|V)) return "ppc32-int-flt-vmx";
+ if (c == (F|V|FX)) return "ppc32-int-flt-vmx-FX";
+ if (c == (F|V|GX)) return "ppc32-int-flt-vmx-GX";
+ if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX";
+ return NULL;
+}
+
+static HChar* show_hwcaps_ppc64 ( UInt hwcaps )
+{
+ /* Monotonic with complications. Basically V > baseline(==F),
+ but once you have F then you can have FX or GX too. */
+ const UInt V = VEX_HWCAPS_PPC32_V;
+ const UInt FX = VEX_HWCAPS_PPC32_FX;
+ const UInt GX = VEX_HWCAPS_PPC32_GX;
+ UInt c = hwcaps;
+ if (c == 0) return "ppc64-int-flt";
+ if (c == FX) return "ppc64-int-flt-FX";
+ if (c == GX) return "ppc64-int-flt-GX";
+ if (c == (FX|GX)) return "ppc64-int-flt-FX-GX";
+ if (c == V) return "ppc64-int-flt-vmx";
+ if (c == (V|FX)) return "ppc64-int-flt-vmx-FX";
+ if (c == (V|GX)) return "ppc64-int-flt-vmx-GX";
+ if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX";
+ return NULL;
+}
+
+static HChar* show_hwcaps_arm ( UInt hwcaps )
+{
+ if (hwcaps == 0) return "arm-baseline";
+ return NULL;
+}
+
+/* ---- */
+static HChar* show_hwcaps ( VexArch arch, UInt hwcaps )
+{
+ switch (arch) {
+ case VexArchX86: return show_hwcaps_x86(hwcaps);
+ case VexArchAMD64: return show_hwcaps_amd64(hwcaps);
+ case VexArchPPC32: return show_hwcaps_ppc32(hwcaps);
+ case VexArchPPC64: return show_hwcaps_ppc64(hwcaps);
+ case VexArchARM: return show_hwcaps_arm(hwcaps);
+ default: return NULL;
+ }
+}
+
+static Bool are_valid_hwcaps ( VexArch arch, UInt hwcaps )
+{
+ return show_hwcaps(arch,hwcaps) != NULL;
+}
+
+
/*---------------------------------------------------------------*/
/*--- end main/vex_main.c ---*/
/*---------------------------------------------------------------*/
}
VexArch;
-typedef
- enum {
- VexSubArch_INVALID,
- VexSubArch_NONE, /* Arch has no variants */
- VexSubArchX86_sse0, /* no SSE state; or SSE state but no insns */
- VexSubArchX86_sse1, /* SSE1 support (Pentium III) */
- VexSubArchX86_sse2, /* SSE2 support (Pentium 4) */
- VexSubArchARM_v4, /* ARM version 4 */
- VexSubArchPPC32_I, /* 32-bit PowerPC, no FP, no Altivec */
- VexSubArchPPC32_FI, /* 32-bit PowerPC, with FP but no Altivec */
- VexSubArchPPC32_VFI, /* 32-bit PowerPC, with FP and Altivec */
- VexSubArchPPC64_FI, /* 64-bit PowerPC, with FP but no Altivec */
- VexSubArchPPC64_VFI /* 64-bit PowerPC, with FP and Altivec */
- }
- VexSubArch;
+
+/* For a given architecture, these specify extra capabilities beyond
+ the minimum supported (baseline) capabilities. They may be OR'd
+ together, although some combinations don't make sense. (eg, SSE2
+ but not SSE1). LibVEX_Translate will check for nonsensical
+ combinations. */
+
+/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE) */
+#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
+#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
+#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
+
+/* amd64: baseline capability is SSE2 */
+#define VEX_HWCAPS_AMD64_SSE3 (1<<4) /* SSE3 support */
+
+/* ppc32: baseline capability is integer only */
+#define VEX_HWCAPS_PPC32_F (1<<5) /* basic (non-optional) FP */
+#define VEX_HWCAPS_PPC32_V (1<<6) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC32_FX (1<<7) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC32_GX (1<<8) /* Graphics extns
+ (fres,frsqrte,fsel,stfiwx) */
+
+/* ppc64: baseline capability is integer and basic FP insns */
+#define VEX_HWCAPS_PPC64_V (1<<9) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC64_FX (1<<10) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC64_GX (1<<11) /* Graphics extns
+ (fres,frsqrte,fsel,stfiwx) */
+
+/* arm: baseline capability is ARMv4 */
+/* No extra capabilities */
+
/* These return statically allocated strings. */
extern const HChar* LibVEX_ppVexArch ( VexArch );
-extern const HChar* LibVEX_ppVexSubArch ( VexSubArch );
+extern const HChar* LibVEX_ppVexHwCaps ( VexArch, UInt );
/* This struct is a bit of a hack, but is needed to carry misc
typedef
struct {
/* This is the only mandatory field. */
- VexSubArch subarch;
+ UInt hwcaps;
/* PPC32/PPC64 only: size of cache line */
Int ppc_cache_line_szB;
}
origbuf[i] = (UChar)u;
}
+ /* FIXME: put sensible values into the .hwcaps fields */
LibVEX_default_VexArchInfo(&vai_x86);
- vai_x86.subarch = VexSubArchX86_sse1;
+ vai_x86.hwcaps = 0;
LibVEX_default_VexArchInfo(&vai_amd64);
- vai_amd64.subarch = VexSubArch_NONE;
+ vai_amd64.hwcaps = 0;
LibVEX_default_VexArchInfo(&vai_ppc32);
- vai_ppc32.subarch = VexSubArchPPC32_VFI;
+ vai_ppc32.hwcaps = 0;
vai_ppc32.ppc_cache_line_szB = 128;
/* ----- Set up args for LibVEX_Translate ----- */