From: Julian Seward Date: Fri, 27 Jan 2006 21:20:15 +0000 (+0000) Subject: Change the way Vex represents architecture variants into something X-Git-Tag: svn/VALGRIND_3_2_3^2~106 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=882a94cddd86c13aa27dcb079e6e20fd7436df5a;p=thirdparty%2Fvalgrind.git Change the way Vex represents architecture variants into something more flexible. Prior to this change, the type VexSubArch effectively imposed a total ordering on subarchitecture capabilities, which was overly restrictive. This change moves to effectively using a bit-set, allowing some features (instruction groups) to be supported or not supported independently of each other. git-svn-id: svn://svn.valgrind.org/vex/trunk@1555 --- diff --git a/VEX/priv/guest-amd64/toIR.c b/VEX/priv/guest-amd64/toIR.c index 6dcee7b2a7..ab3034c9eb 100644 --- a/VEX/priv/guest-amd64/toIR.c +++ b/VEX/priv/guest-amd64/toIR.c @@ -13231,14 +13231,13 @@ DisResult disInstr_AMD64_WRK ( HChar* fName = NULL; void* fAddr = NULL; if (haveF2orF3(pfx)) goto decode_failure; - switch (archinfo->subarch) { - case VexSubArch_NONE: - fName = "amd64g_dirtyhelper_CPUID"; - fAddr = &amd64g_dirtyhelper_CPUID; - break; - default: - vpanic("disInstr(amd64)(cpuid)"); + if (archinfo->hwcaps == 0/*baseline, == SSE2*/) { + fName = "amd64g_dirtyhelper_CPUID"; + fAddr = &amd64g_dirtyhelper_CPUID; } + else + vpanic("disInstr(amd64)(cpuid)"); + vassert(fName); vassert(fAddr); d = unsafeIRDirty_0_N ( 0/*regparms*/, fName, fAddr, mkIRExprVec_0() ); diff --git a/VEX/priv/guest-arm/toIR.c b/VEX/priv/guest-arm/toIR.c index 64706bf348..421fade3e4 100644 --- a/VEX/priv/guest-arm/toIR.c +++ b/VEX/priv/guest-arm/toIR.c @@ -194,7 +194,7 @@ IRBB* bbToIR_ARM ( UChar* armCode, vassert(vex_control.guest_chase_thresh >= 0); vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns); - vassert(archinfo_guest->subarch == VexSubArchARM_v4); + vassert(archinfo_guest->hwcaps == 0); /* Start a new, empty extent. */ vge->n_used = 1; diff --git a/VEX/priv/guest-ppc/toIR.c b/VEX/priv/guest-ppc/toIR.c index 47a76228dc..9fe7189f3f 100644 --- a/VEX/priv/guest-ppc/toIR.c +++ b/VEX/priv/guest-ppc/toIR.c @@ -5610,6 +5610,7 @@ static Bool dis_fp_store ( UInt theInstr ) break; case 0x3D7: // stfiwx (Store Float as Int, Indexed, PPC32 p517) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) DIP("stfiwx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr); assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) ); storeBE( mkexpr(EA), @@ -5692,6 +5693,7 @@ static Bool dis_fp_arith ( UInt theInstr ) break; case 0x16: // fsqrts (Floating SqRt (Single-Precision), PPC32 p428) + // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX) if (frA_addr != 0 || frC_addr != 0) { vex_printf("dis_fp_arith(ppc)(instr,fsqrts)\n"); return False; @@ -5702,6 +5704,7 @@ static Bool dis_fp_arith ( UInt theInstr ) break; case 0x18: // fres (Floating Reciprocal Estimate Single, PPC32 p421) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) if (frA_addr != 0 || frC_addr != 0) { vex_printf("dis_fp_arith(ppc)(instr,fres)\n"); return False; @@ -5761,6 +5764,7 @@ static Bool dis_fp_arith ( UInt theInstr ) break; case 0x16: // fsqrt (Floating SqRt (Double-Precision), PPC32 p427) + // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX) if (frA_addr != 0 || frC_addr != 0) { vex_printf("dis_fp_arith(ppc)(instr,fsqrt)\n"); return False; @@ -5771,6 +5775,7 @@ static Bool dis_fp_arith ( UInt theInstr ) break; case 0x17: { // fsel (Floating Select, PPC32 p426) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) IRTemp cc = newTemp(Ity_I32); IRTemp cc_b0 = newTemp(Ity_I32); @@ -5805,6 +5810,7 @@ static Bool dis_fp_arith ( UInt theInstr ) break; case 0x1A: // frsqrte (Floating Recip SqRt Est., PPC32 p424) + // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX) if (frA_addr != 0 || frC_addr != 0) { vex_printf("dis_fp_arith(ppc)(instr,frsqrte)\n"); return False; @@ -8401,18 +8407,28 @@ DisResult disInstr_PPC_WRK ( DisResult dres; UInt theInstr; IRType ty = mode64 ? Ity_I64 : Ity_I32; + Bool allow_F = False; + Bool allow_V = False; + Bool allow_FX = False; + Bool allow_GX = False; + UInt hwcaps = archinfo->hwcaps; + Long delta; /* What insn variants are we supporting today? */ - Bool allow_FP = archinfo->subarch == VexSubArchPPC32_FI || - archinfo->subarch == VexSubArchPPC32_VFI || - archinfo->subarch == VexSubArchPPC64_FI || - archinfo->subarch == VexSubArchPPC64_VFI; - - Bool allow_VMX = archinfo->subarch == VexSubArchPPC32_VFI || - archinfo->subarch == VexSubArchPPC64_VFI; + if (mode64) { + allow_F = True; + allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC64_V)); + allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC64_FX)); + allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX)); + } else { + allow_F = (0 != (hwcaps & VEX_HWCAPS_PPC32_F)); + allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC32_V)); + allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC32_FX)); + allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX)); + } /* The running delta */ - Long delta = (Long)mkSzAddr(ty, (ULong)delta64); + delta = (Long)mkSzAddr(ty, (ULong)delta64); /* Set result defaults. */ dres.whatNext = Dis_Continue; @@ -8424,7 +8440,7 @@ DisResult disInstr_PPC_WRK ( and have done. */ theInstr = getUIntBigendianly( (UChar*)(&guest_code[delta]) ); -// vex_printf("insn: 0x%x\n", theInstr); + if (0) vex_printf("insn: 0x%x\n", theInstr); DIP("\t0x%llx: ", (ULong)guest_CIA_curr_instr); @@ -8579,14 +8595,14 @@ DisResult disInstr_PPC_WRK ( /* Floating Point Load Instructions */ case 0x30: case 0x31: case 0x32: // lfs, lfsu, lfd case 0x33: // lfdu - if (!allow_FP) goto decode_noFP; + if (!allow_F) goto decode_noF; if (dis_fp_load( theInstr )) goto decode_success; goto decode_failure; /* Floating Point Store Instructions */ case 0x34: case 0x35: case 0x36: // stfsx, stfsux, stfdx case 0x37: // stfdux - if (!allow_FP) goto decode_noFP; + if (!allow_F) goto decode_noF; if (dis_fp_store( theInstr )) goto decode_success; goto decode_failure; @@ -8597,16 +8613,23 @@ DisResult disInstr_PPC_WRK ( goto decode_failure; case 0x3B: - if (!allow_FP) goto decode_noFP; - + if (!allow_F) goto decode_noF; opc2 = IFIELD(theInstr, 1, 5); switch (opc2) { /* Floating Point Arith Instructions */ case 0x12: case 0x14: case 0x15: // fdivs, fsubs, fadds - case 0x16: case 0x18: case 0x19: // fsqrts, fres, fmuls + case 0x19: // fmuls if (dis_fp_arith(theInstr)) goto decode_success; goto decode_failure; - + case 0x16: // fsqrts + if (!allow_FX) goto decode_noFX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + case 0x18: // fres + if (!allow_GX) goto decode_noGX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + /* Floating Point Mult-Add Instructions */ case 0x1C: case 0x1D: case 0x1E: // fmsubs, fmadds, fnmsubs case 0x1F: // fnmadds @@ -8625,16 +8648,23 @@ DisResult disInstr_PPC_WRK ( goto decode_failure; case 0x3F: - if (!allow_FP) goto decode_noFP; + if (!allow_F) goto decode_noF; /* Instrs using opc[1:5] never overlap instrs using opc[1:10], so we can simply fall through the first switch statement */ opc2 = IFIELD(theInstr, 1, 5); switch (opc2) { /* Floating Point Arith Instructions */ - case 0x12: case 0x14: case 0x15: // fdiv, fsub, fadd - case 0x16: case 0x17: case 0x19: // fsqrt, fsel, fmul - case 0x1A: // frsqrte + case 0x12: case 0x14: case 0x15: // fdiv, fsub, fadd + case 0x19: // fmul + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + case 0x16: // fsqrt + if (!allow_FX) goto decode_noFX; + if (dis_fp_arith(theInstr)) goto decode_success; + goto decode_failure; + case 0x17: case 0x1A: // fsel, frsqrte + if (!allow_GX) goto decode_noGX; if (dis_fp_arith(theInstr)) goto decode_success; goto decode_failure; @@ -8868,23 +8898,27 @@ DisResult disInstr_PPC_WRK ( /* Floating Point Load Instructions */ case 0x217: case 0x237: case 0x257: // lfsx, lfsux, lfdx case 0x277: // lfdux - if (!allow_FP) goto decode_noFP; + if (!allow_F) goto decode_noF; if (dis_fp_load( theInstr )) goto decode_success; goto decode_failure; /* Floating Point Store Instructions */ case 0x297: case 0x2B7: case 0x2D7: // stfs, stfsu, stfd - case 0x2F7: case 0x3D7: // stfdu, stfiwx - if (!allow_FP) goto decode_noFP; + case 0x2F7: // stfdu, stfiwx + if (!allow_F) goto decode_noF; + if (dis_fp_store( theInstr )) goto decode_success; + goto decode_failure; + case 0x3D7: // stfiwx + if (!allow_F) goto decode_noF; + if (!allow_GX) goto decode_noGX; if (dis_fp_store( theInstr )) goto decode_success; goto decode_failure; - /* AltiVec instructions */ /* AV Cache Control - Data streams */ case 0x156: case 0x176: case 0x336: // dst, dstst, dss - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_datastream( theInstr )) goto decode_success; goto decode_failure; @@ -8892,14 +8926,14 @@ DisResult disInstr_PPC_WRK ( case 0x006: case 0x026: // lvsl, lvsr case 0x007: case 0x027: case 0x047: // lvebx, lvehx, lvewx case 0x067: case 0x167: // lvx, lvxl - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_load( theInstr )) goto decode_success; goto decode_failure; /* AV Store */ case 0x087: case 0x0A7: case 0x0C7: // stvebx, stvehx, stvewx case 0x0E7: case 0x1E7: // stvx, stvxl - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_store( theInstr )) goto decode_success; goto decode_failure; @@ -8918,7 +8952,7 @@ DisResult disInstr_PPC_WRK ( case 0x20: case 0x21: case 0x22: // vmhaddshs, vmhraddshs, vmladduhm case 0x24: case 0x25: case 0x26: // vmsumubm, vmsummbm, vmsumuhm case 0x27: case 0x28: case 0x29: // vmsumuhs, vmsumshm, vmsumshs - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_multarith( theInstr )) goto decode_success; goto decode_failure; @@ -8926,13 +8960,13 @@ DisResult disInstr_PPC_WRK ( case 0x2A: // vsel case 0x2B: // vperm case 0x2C: // vsldoi - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_permute( theInstr )) goto decode_success; goto decode_failure; /* AV Floating Point Mult-Add/Sub */ case 0x2E: case 0x2F: // vmaddfp, vnmsubfp - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_fp_arith( theInstr )) goto decode_success; goto decode_failure; @@ -8963,7 +8997,7 @@ DisResult disInstr_PPC_WRK ( case 0x308: case 0x348: // vmulesb, vmulesh case 0x608: case 0x708: case 0x648: // vsum4ubs, vsum4sbs, vsum4shs case 0x688: case 0x788: // vsum2sws, vsumsws - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_arith( theInstr )) goto decode_success; goto decode_failure; @@ -8974,20 +9008,20 @@ DisResult disInstr_PPC_WRK ( case 0x304: case 0x344: case 0x384: // vsrab, vsrah, vsraw case 0x1C4: case 0x2C4: // vsl, vsr case 0x40C: case 0x44C: // vslo, vsro - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_shift( theInstr )) goto decode_success; goto decode_failure; /* AV Logic */ case 0x404: case 0x444: case 0x484: // vand, vandc, vor case 0x4C4: case 0x504: // vxor, vnor - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_logic( theInstr )) goto decode_success; goto decode_failure; /* AV Processor Control */ case 0x604: case 0x644: // mfvscr, mtvscr - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_procctl( theInstr )) goto decode_success; goto decode_failure; @@ -8996,7 +9030,7 @@ DisResult disInstr_PPC_WRK ( case 0x10A: case 0x14A: case 0x18A: // vrefp, vrsqrtefp, vexptefp case 0x1CA: // vlogefp case 0x40A: case 0x44A: // vmaxfp, vminfp - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_fp_arith( theInstr )) goto decode_success; goto decode_failure; @@ -9005,7 +9039,7 @@ DisResult disInstr_PPC_WRK ( case 0x2CA: // vrfim case 0x30A: case 0x34A: case 0x38A: // vcfux, vcfsx, vctuxs case 0x3CA: // vctsxs - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_fp_convert( theInstr )) goto decode_success; goto decode_failure; @@ -9014,7 +9048,7 @@ DisResult disInstr_PPC_WRK ( case 0x10C: case 0x14C: case 0x18C: // vmrglb, vmrglh, vmrglw case 0x20C: case 0x24C: case 0x28C: // vspltb, vsplth, vspltw case 0x30C: case 0x34C: case 0x38C: // vspltisb, vspltish, vspltisw - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_permute( theInstr )) goto decode_success; goto decode_failure; @@ -9026,7 +9060,7 @@ DisResult disInstr_PPC_WRK ( case 0x20E: case 0x24E: case 0x28E: // vupkhsb, vupkhsh, vupklsb case 0x2CE: // vupklsh case 0x30E: case 0x34E: case 0x3CE: // vpkpx, vupkhpx, vupklpx - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_pack( theInstr )) goto decode_success; goto decode_failure; @@ -9041,14 +9075,14 @@ DisResult disInstr_PPC_WRK ( case 0x006: case 0x046: case 0x086: // vcmpequb, vcmpequh, vcmpequw case 0x206: case 0x246: case 0x286: // vcmpgtub, vcmpgtuh, vcmpgtuw case 0x306: case 0x346: case 0x386: // vcmpgtsb, vcmpgtsh, vcmpgtsw - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_cmp( theInstr )) goto decode_success; goto decode_failure; /* AV Floating Point Compare */ case 0x0C6: case 0x1C6: case 0x2C6: // vcmpeqfp, vcmpgefp, vcmpgtfp case 0x3C6: // vcmpbfp - if (!allow_VMX) goto decode_noVMX; + if (!allow_V) goto decode_noV; if (dis_av_fp_cmp( theInstr )) goto decode_success; goto decode_failure; @@ -9058,14 +9092,25 @@ DisResult disInstr_PPC_WRK ( break; default: - decode_noFP: - vassert(!allow_FP); - vex_printf("disInstr(ppc): Floating Point insns disabled for this arch.\n"); goto decode_failure; - decode_noVMX: - vassert(!allow_VMX); - vex_printf("disInstr(ppc): AltiVec insns disabled for this arch.\n"); + decode_noF: + vassert(!allow_F); + vex_printf("disInstr(ppc): declined to decode an FP insn.\n"); + goto decode_failure; + decode_noV: + vassert(!allow_V); + vex_printf("disInstr(ppc): declined to decode an AltiVec insn.\n"); + goto decode_failure; + decode_noFX: + vassert(!allow_V); + vex_printf("disInstr(ppc): " + "declined to decode an GeneralPurpose-Optional insn.\n"); + goto decode_failure; + decode_noGX: + vassert(!allow_V); + vex_printf("disInstr(ppc): " + "declined to decode a Graphics-Optional insn.\n"); goto decode_failure; decode_failure: @@ -9125,22 +9170,31 @@ DisResult disInstr_PPC ( IRBB* irbb_IN, { IRType ty; DisResult dres; - VexSubArch gsa = archinfo->subarch; + Bool is32, is64; + UInt mask32, mask64; + UInt hwcaps_guest = archinfo->hwcaps; + + /* global -- ick */ + mode64 = False; /* Figure out whether we're being ppc32 or ppc64 today. */ - switch (gsa) { - case VexSubArchPPC32_VFI: - case VexSubArchPPC32_FI: - case VexSubArchPPC32_I: - mode64 = False; - break; - case VexSubArchPPC64_VFI: - case VexSubArchPPC64_FI: - mode64 = True; - break; - default: - vpanic("disInstr_PPC(): illegal subarch"); - } + mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V + | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX; + + is32 = (hwcaps_guest & ~mask32) > 0; + + mask64 = VEX_HWCAPS_PPC64_V + | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX; + + is64 = (hwcaps_guest & ~mask64) > 0; + + if (is32 && !is64) + mode64 = False; + else if (is64 && !is32) + mode64 = True; + else + vpanic("distInstr_PPC: illegal subarch"); + ty = mode64 ? Ity_I64 : Ity_I32; diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c index 08cc5defc2..89bd4b1ced 100644 --- a/VEX/priv/guest-x86/toIR.c +++ b/VEX/priv/guest-x86/toIR.c @@ -7253,7 +7253,7 @@ DisResult disInstr_X86_WRK ( /* Skip parts of the decoder which don't apply given the stated guest subarchitecture. */ - if (archinfo->subarch == VexSubArchX86_sse0) + if (archinfo->hwcaps == 0/*baseline, no sse at all*/) goto after_sse_decoders; /* Otherwise we must be doing sse1 or sse2, so we can at least try @@ -8274,9 +8274,8 @@ DisResult disInstr_X86_WRK ( /* Skip parts of the decoder which don't apply given the stated guest subarchitecture. */ - if (archinfo->subarch == VexSubArchX86_sse0 - || archinfo->subarch == VexSubArchX86_sse1) - goto after_sse_decoders; + if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) + goto after_sse_decoders; /* no SSE2 capabilities */ insn = (UChar*)&guest_code[delta]; @@ -10459,10 +10458,12 @@ DisResult disInstr_X86_WRK ( /* Skip parts of the decoder which don't apply given the stated guest subarchitecture. */ - if (archinfo->subarch == VexSubArchX86_sse0 - || archinfo->subarch == VexSubArchX86_sse1 - /* || archinfo->subarch == VexSubArchX86_sse2 */) - goto after_sse_decoders; + /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */ + /* In fact this is highly bogus; we accept SSE3 insns even on a + SSE2-only guest since they turn into IR which can be re-emitted + successfully on an SSE2 host. */ + if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2)) + goto after_sse_decoders; /* no SSE3 capabilities */ insn = (UChar*)&guest_code[delta]; @@ -12148,22 +12149,22 @@ DisResult disInstr_X86_WRK ( IRDirty* d = NULL; HChar* fName = NULL; void* fAddr = NULL; - switch (archinfo->subarch) { - case VexSubArchX86_sse0: - fName = "x86g_dirtyhelper_CPUID_sse0"; - fAddr = &x86g_dirtyhelper_CPUID_sse0; - break; - case VexSubArchX86_sse1: - fName = "x86g_dirtyhelper_CPUID_sse1"; - fAddr = &x86g_dirtyhelper_CPUID_sse1; - break; - case VexSubArchX86_sse2: - fName = "x86g_dirtyhelper_CPUID_sse2"; - fAddr = &x86g_dirtyhelper_CPUID_sse2; - break; - default: - vpanic("disInstr(x86)(cpuid)"); - } + if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) { + fName = "x86g_dirtyhelper_CPUID_sse2"; + fAddr = &x86g_dirtyhelper_CPUID_sse2; + } + else + if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) { + fName = "x86g_dirtyhelper_CPUID_sse1"; + fAddr = &x86g_dirtyhelper_CPUID_sse1; + } + else + if (archinfo->hwcaps == 0/*no SSE*/) { + fName = "x86g_dirtyhelper_CPUID_sse0"; + fAddr = &x86g_dirtyhelper_CPUID_sse0; + } else + vpanic("disInstr(x86)(cpuid)"); + vassert(fName); vassert(fAddr); d = unsafeIRDirty_0_N ( 0/*regparms*/, fName, fAddr, mkIRExprVec_0() ); diff --git a/VEX/priv/host-amd64/isel.c b/VEX/priv/host-amd64/isel.c index 63ea2c1bfd..0fd869c6de 100644 --- a/VEX/priv/host-amd64/isel.c +++ b/VEX/priv/host-amd64/isel.c @@ -145,7 +145,8 @@ typedef Int vreg_ctr; - VexSubArch subarch; + /* Currently (27 Jan 06) unused */ + UInt hwcaps; } ISelEnv; @@ -3494,7 +3495,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) vec_fail: vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n", - LibVEX_ppVexSubArch(env->subarch)); + LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps)); ppIRExpr(e); vpanic("iselVecExpr_wrk"); } @@ -3784,13 +3785,13 @@ static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) HInstrArray* iselBB_AMD64 ( IRBB* bb, VexArchInfo* archinfo_host ) { - Int i, j; - HReg hreg, hregHI; - ISelEnv* env; - VexSubArch subarch_host = archinfo_host->subarch; + Int i, j; + HReg hreg, hregHI; + ISelEnv* env; + UInt hwcaps_host = archinfo_host->hwcaps; /* sanity ... */ - vassert(subarch_host == VexSubArch_NONE); + vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3))); /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); @@ -3809,7 +3810,7 @@ HInstrArray* iselBB_AMD64 ( IRBB* bb, VexArchInfo* archinfo_host ) env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); /* and finally ... */ - env->subarch = subarch_host; + env->hwcaps = hwcaps_host; /* For each IR temporary, allocate a suitably-kinded virtual register. */ diff --git a/VEX/priv/host-ppc/isel.c b/VEX/priv/host-ppc/isel.c index 7f725a614f..4d46477e12 100644 --- a/VEX/priv/host-ppc/isel.c +++ b/VEX/priv/host-ppc/isel.c @@ -206,7 +206,8 @@ typedef Int vreg_ctr; - VexSubArch subarch; + /* 27 Jan 06: Not currently used, but should be */ + UInt hwcaps; Bool mode64; } @@ -3350,7 +3351,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) } /* if (e->tag == Iex_Binop) */ vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n", - LibVEX_ppVexSubArch(env->subarch)); + LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32, + env->hwcaps)); ppIRExpr(e); vpanic("iselVecExpr_wrk(ppc)"); } @@ -3640,26 +3642,31 @@ static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) HInstrArray* iselBB_PPC ( IRBB* bb, VexArchInfo* archinfo_host ) { - Int i, j; - HReg hreg, hregHI; - ISelEnv* env; - VexSubArch subarch_host = archinfo_host->subarch; - Bool mode64; + Int i, j; + HReg hreg, hregHI; + ISelEnv* env; + UInt hwcaps_host = archinfo_host->hwcaps; + Bool mode64 = False; + Bool is32, is64; + UInt mask32, mask64; /* Figure out whether we're being ppc32 or ppc64 today. */ - switch (subarch_host) { - case VexSubArchPPC32_VFI: - case VexSubArchPPC32_FI: - case VexSubArchPPC32_I: + mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V + | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX; + + is32 = (hwcaps_host & ~mask32) > 0; + + mask64 = VEX_HWCAPS_PPC64_V + | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX; + + is64 = (hwcaps_host & ~mask64) > 0; + + if (is32 && !is64) mode64 = False; - break; - case VexSubArchPPC64_VFI: - case VexSubArchPPC64_FI: + else if (is64 && !is32) mode64 = True; - break; - default: + else vpanic("iselBB_PPC: illegal subarch"); - } /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); @@ -3681,7 +3688,7 @@ HInstrArray* iselBB_PPC ( IRBB* bb, VexArchInfo* archinfo_host ) env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); /* and finally ... */ - env->subarch = subarch_host; + env->hwcaps = hwcaps_host; /* For each IR temporary, allocate a suitably-kinded virtual register. */ diff --git a/VEX/priv/host-x86/hdefs.c b/VEX/priv/host-x86/hdefs.c index a53a8c2a75..0402b69f59 100644 --- a/VEX/priv/host-x86/hdefs.c +++ b/VEX/priv/host-x86/hdefs.c @@ -703,14 +703,13 @@ X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) { i->Xin.Bsfr32.dst = dst; return i; } -X86Instr* X86Instr_MFence ( VexSubArch subarch ) +X86Instr* X86Instr_MFence ( UInt hwcaps ) { - X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); - i->tag = Xin_MFence; - i->Xin.MFence.subarch = subarch; - vassert(subarch == VexSubArchX86_sse0 - || subarch == VexSubArchX86_sse1 - || subarch == VexSubArchX86_sse2); + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_MFence; + i->Xin.MFence.hwcaps = hwcaps; + vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1|VEX_HWCAPS_X86_SSE2 + |VEX_HWCAPS_X86_SSE3))); return i; } @@ -988,7 +987,7 @@ void ppX86Instr ( X86Instr* i, Bool mode64 ) { return; case Xin_MFence: vex_printf("mfence(%s)", - LibVEX_ppVexSubArch(i->Xin.MFence.subarch)); + LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps)); return; case Xin_FpUnary: vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op)); @@ -2340,26 +2339,28 @@ Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, case Xin_MFence: /* see comment in hdefs.h re this insn */ if (0) vex_printf("EMIT FENCE\n"); - switch (i->Xin.MFence.subarch) { - case VexSubArchX86_sse0: - /* lock addl $0,0(%esp) */ - *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; - *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; - goto done; - case VexSubArchX86_sse1: - /* sfence */ - *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; - /* lock addl $0,0(%esp) */ - *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; - *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; - goto done; - case VexSubArchX86_sse2: - /* mfence */ - *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; - goto done; - default: - vpanic("emit_X86Instr:mfence:subarch"); + if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3 + |VEX_HWCAPS_X86_SSE2)) { + /* mfence */ + *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; + goto done; + } + if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) { + /* sfence */ + *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; + /* lock addl $0,0(%esp) */ + *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; + *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; + goto done; } + if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) { + /* lock addl $0,0(%esp) */ + *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; + *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; + goto done; + } + vpanic("emit_X86Instr:mfence:hwcaps"); + /*NOTREACHED*/ break; case Xin_Store: diff --git a/VEX/priv/host-x86/hdefs.h b/VEX/priv/host-x86/hdefs.h index b6656ceb62..5bc8ec6dbf 100644 --- a/VEX/priv/host-x86/hdefs.h +++ b/VEX/priv/host-x86/hdefs.h @@ -491,10 +491,10 @@ typedef much as possible before continuing. On SSE2 we emit a real "mfence", on SSE1 "sfence ; lock addl $0,0(%esp)" and on SSE0 "lock addl $0,0(%esp)". This insn therefore - carries the subarch so the assembler knows what to + carries the host's hwcaps so the assembler knows what to emit. */ struct { - VexSubArch subarch; + UInt hwcaps; } MFence; /* X86 Floating point (fake 3-operand, "flat reg file" insns) */ @@ -629,7 +629,7 @@ extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, extern X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ); extern X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ); extern X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ); -extern X86Instr* X86Instr_MFence ( VexSubArch ); +extern X86Instr* X86Instr_MFence ( UInt hwcaps ); extern X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ); extern X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ); diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c index 50fb29a59f..8174dfd475 100644 --- a/VEX/priv/host-x86/isel.c +++ b/VEX/priv/host-x86/isel.c @@ -158,7 +158,7 @@ typedef Int vreg_ctr; - VexSubArch subarch; + UInt hwcaps; } ISelEnv; @@ -2773,20 +2773,18 @@ static HReg iselVecExpr ( ISelEnv* env, IRExpr* e ) static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) { -# define REQUIRE_SSE1 \ - do { if (env->subarch == VexSubArchX86_sse0) \ - goto vec_fail; \ +# define REQUIRE_SSE1 \ + do { if (env->hwcaps == 0/*baseline, no sse*/) \ + goto vec_fail; \ } while (0) -# define REQUIRE_SSE2 \ - do { if (env->subarch == VexSubArchX86_sse0 \ - || env->subarch == VexSubArchX86_sse1) \ - goto vec_fail; \ +# define REQUIRE_SSE2 \ + do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \ + goto vec_fail; \ } while (0) -# define SSE2_OR_ABOVE \ - (env->subarch != VexSubArchX86_sse0 \ - && env->subarch != VexSubArchX86_sse1) +# define SSE2_OR_ABOVE \ + (env->hwcaps & VEX_HWCAPS_X86_SSE2) MatchInfo mi; Bool arg1isEReg = False; @@ -3271,8 +3269,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) } vec_fail: - vex_printf("iselVecExpr (subarch = %s): can't reduce\n", - LibVEX_ppVexSubArch(env->subarch)); + vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n", + LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps)); ppIRExpr(e); vpanic("iselVecExpr_wrk"); @@ -3522,7 +3520,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) /* --------- MEM FENCE --------- */ case Ist_MFence: - addInstr(env, X86Instr_MFence(env->subarch)); + addInstr(env, X86Instr_MFence(env->hwcaps)); return; /* --------- INSTR MARK --------- */ @@ -3582,15 +3580,15 @@ static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk ) HInstrArray* iselBB_X86 ( IRBB* bb, VexArchInfo* archinfo_host ) { - Int i, j; - HReg hreg, hregHI; - ISelEnv* env; - VexSubArch subarch_host = archinfo_host->subarch; + Int i, j; + HReg hreg, hregHI; + ISelEnv* env; + UInt hwcaps_host = archinfo_host->hwcaps; /* sanity ... */ - vassert(subarch_host == VexSubArchX86_sse0 - || subarch_host == VexSubArchX86_sse1 - || subarch_host == VexSubArchX86_sse2); + vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_X86_SSE1 + |VEX_HWCAPS_X86_SSE2 + |VEX_HWCAPS_X86_SSE3))); /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); @@ -3609,7 +3607,7 @@ HInstrArray* iselBB_X86 ( IRBB* bb, VexArchInfo* archinfo_host ) env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg)); /* and finally ... */ - env->subarch = subarch_host; + env->hwcaps = hwcaps_host; /* For each IR temporary, allocate a suitably-kinded virtual register. */ diff --git a/VEX/priv/main/vex_main.c b/VEX/priv/main/vex_main.c index b232fbf63a..a93a17a749 100644 --- a/VEX/priv/main/vex_main.c +++ b/VEX/priv/main/vex_main.c @@ -70,6 +70,12 @@ /* This file contains the top level interface to the library. */ +/* --------- fwds ... --------- */ + +static Bool are_valid_hwcaps ( VexArch arch, UInt hwcaps ); +static HChar* show_hwcaps ( VexArch arch, UInt hwcaps ); + + /* --------- Initialise the library. --------- */ /* Exported to library client. */ @@ -253,9 +259,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr; host_is_bigendian = False; host_word_type = Ity_I32; - vassert(vta->archinfo_host.subarch == VexSubArchX86_sse0 - || vta->archinfo_host.subarch == VexSubArchX86_sse1 - || vta->archinfo_host.subarch == VexSubArchX86_sse2); + vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps)); vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */ break; @@ -274,7 +278,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_AMD64Instr; host_is_bigendian = False; host_word_type = Ity_I64; - vassert(vta->archinfo_host.subarch == VexSubArch_NONE); + vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps)); vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */ break; @@ -293,9 +297,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr; host_is_bigendian = True; host_word_type = Ity_I32; - vassert(vta->archinfo_guest.subarch == VexSubArchPPC32_I - || vta->archinfo_guest.subarch == VexSubArchPPC32_FI - || vta->archinfo_guest.subarch == VexSubArchPPC32_VFI); + vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_host.hwcaps)); vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */ break; @@ -314,8 +316,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr; host_is_bigendian = True; host_word_type = Ity_I64; - vassert(vta->archinfo_guest.subarch == VexSubArchPPC64_FI - || vta->archinfo_guest.subarch == VexSubArchPPC64_VFI); + vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_host.hwcaps)); vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */ break; @@ -335,9 +336,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) guest_layout = &x86guest_layout; offB_TISTART = offsetof(VexGuestX86State,guest_TISTART); offB_TILEN = offsetof(VexGuestX86State,guest_TILEN); - vassert(vta->archinfo_guest.subarch == VexSubArchX86_sse0 - || vta->archinfo_guest.subarch == VexSubArchX86_sse1 - || vta->archinfo_guest.subarch == VexSubArchX86_sse2); + vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestX86State) % 8); vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART) == 4); vassert(sizeof( ((VexGuestX86State*)0)->guest_TILEN ) == 4); @@ -353,7 +352,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) guest_layout = &amd64guest_layout; offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART); offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN); - vassert(vta->archinfo_guest.subarch == VexSubArch_NONE); + vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestAMD64State) % 8); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8); vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TILEN ) == 8); @@ -369,7 +368,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) guest_layout = &armGuest_layout; offB_TISTART = 0; /* hack ... arm has bitrot */ offB_TILEN = 0; /* hack ... arm has bitrot */ - vassert(vta->archinfo_guest.subarch == VexSubArchARM_v4); + vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_guest.hwcaps)); break; case VexArchPPC32: @@ -381,9 +380,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) guest_layout = &ppc32Guest_layout; offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART); offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN); - vassert(vta->archinfo_guest.subarch == VexSubArchPPC32_I - || vta->archinfo_guest.subarch == VexSubArchPPC32_FI - || vta->archinfo_guest.subarch == VexSubArchPPC32_VFI); + vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestPPC32State) % 8); vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TISTART ) == 4); vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TILEN ) == 4); @@ -399,8 +396,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) guest_layout = &ppc64Guest_layout; offB_TISTART = offsetof(VexGuestPPC64State,guest_TISTART); offB_TILEN = offsetof(VexGuestPPC64State,guest_TILEN); - vassert(vta->archinfo_guest.subarch == VexSubArchPPC64_FI - || vta->archinfo_guest.subarch == VexSubArchPPC64_VFI); + vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_guest.hwcaps)); vassert(0 == sizeof(VexGuestPPC64State) % 16); vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TISTART ) == 8); vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TILEN ) == 8); @@ -416,7 +412,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta ) /* doesn't necessarily have to be true, but if it isn't it means we are simulating one flavour of an architecture a different flavour of the same architecture, which is pretty strange. */ - vassert(vta->archinfo_guest.subarch == vta->archinfo_host.subarch); + vassert(vta->archinfo_guest.hwcaps == vta->archinfo_host.hwcaps); } vexAllocSanityCheck(); @@ -675,7 +671,7 @@ HChar* LibVEX_EmWarn_string ( VexEmWarn ew ) } } -/* --------- Arch/Subarch stuff. --------- */ +/* ------------------ Arch/HwCaps stuff. ------------------ */ const HChar* LibVEX_ppVexArch ( VexArch arch ) { @@ -690,32 +686,117 @@ const HChar* LibVEX_ppVexArch ( VexArch arch ) } } -const HChar* LibVEX_ppVexSubArch ( VexSubArch subarch ) +const HChar* LibVEX_ppVexHwCaps ( VexArch arch, UInt hwcaps ) { - switch (subarch) { - case VexSubArch_INVALID: return "INVALID"; - case VexSubArch_NONE: return "NONE"; - case VexSubArchX86_sse0: return "x86-sse0"; - case VexSubArchX86_sse1: return "x86-sse1"; - case VexSubArchX86_sse2: return "x86-sse2"; - case VexSubArchARM_v4: return "arm-v4"; - case VexSubArchPPC32_I: return "ppc32-int-only"; - case VexSubArchPPC32_FI: return "ppc32-int-and-fp"; - case VexSubArchPPC32_VFI: return "ppc32-int-fp-and-AV"; - case VexSubArchPPC64_FI: return "ppc64-int-and-fp"; - case VexSubArchPPC64_VFI: return "ppc64-int-fp-and-AV"; - default: return "VexSubArch???"; - } + HChar* str = show_hwcaps(arch,hwcaps); + return str ? str : "INVALID"; } + + /* Write default settings info *vai. */ void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai ) { - vai->subarch = VexSubArch_INVALID; + vai->hwcaps = 0; vai->ppc_cache_line_szB = 0; } +/* Return a string showing the hwcaps in a nice way. The string will + be NULL for invalid combinations of flags, so these functions also + serve as a way to validate hwcaps values. */ + +static HChar* show_hwcaps_x86 ( UInt hwcaps ) +{ + /* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */ + if (hwcaps == 0) + return "x86-sse0"; + if (hwcaps == VEX_HWCAPS_X86_SSE1) + return "x86-sse1"; + if (hwcaps == (VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2)) + return "x86-sse1-sse2"; + if (hwcaps == (VEX_HWCAPS_X86_SSE1 + | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3)) + return "x86-sse1-sse2-sse3"; + + return False; +} + +static HChar* show_hwcaps_amd64 ( UInt hwcaps ) +{ + /* Monotonic, SSE3 > baseline. */ + if (hwcaps == 0) + return "amd64-sse2"; + if (hwcaps == VEX_HWCAPS_AMD64_SSE3) + return "amd64-sse3"; + return False; +} + +static HChar* show_hwcaps_ppc32 ( UInt hwcaps ) +{ + /* Monotonic with complications. Basically V > F > baseline, + but once you have F then you can have FX or GX too. */ + const UInt F = VEX_HWCAPS_PPC32_F; + const UInt V = VEX_HWCAPS_PPC32_V; + const UInt FX = VEX_HWCAPS_PPC32_FX; + const UInt GX = VEX_HWCAPS_PPC32_GX; + UInt c = hwcaps; + if (c == 0) return "ppc32-int"; + if (c == F) return "ppc32-int-flt"; + if (c == (F|FX)) return "ppc32-int-flt-FX"; + if (c == (F|GX)) return "ppc32-int-flt-GX"; + if (c == (F|FX|GX)) return "ppc32-int-flt-FX-GX"; + if (c == (F|V)) return "ppc32-int-flt-vmx"; + if (c == (F|V|FX)) return "ppc32-int-flt-vmx-FX"; + if (c == (F|V|GX)) return "ppc32-int-flt-vmx-GX"; + if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX"; + return NULL; +} + +static HChar* show_hwcaps_ppc64 ( UInt hwcaps ) +{ + /* Monotonic with complications. Basically V > baseline(==F), + but once you have F then you can have FX or GX too. */ + const UInt V = VEX_HWCAPS_PPC32_V; + const UInt FX = VEX_HWCAPS_PPC32_FX; + const UInt GX = VEX_HWCAPS_PPC32_GX; + UInt c = hwcaps; + if (c == 0) return "ppc64-int-flt"; + if (c == FX) return "ppc64-int-flt-FX"; + if (c == GX) return "ppc64-int-flt-GX"; + if (c == (FX|GX)) return "ppc64-int-flt-FX-GX"; + if (c == V) return "ppc64-int-flt-vmx"; + if (c == (V|FX)) return "ppc64-int-flt-vmx-FX"; + if (c == (V|GX)) return "ppc64-int-flt-vmx-GX"; + if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX"; + return NULL; +} + +static HChar* show_hwcaps_arm ( UInt hwcaps ) +{ + if (hwcaps == 0) return "arm-baseline"; + return NULL; +} + +/* ---- */ +static HChar* show_hwcaps ( VexArch arch, UInt hwcaps ) +{ + switch (arch) { + case VexArchX86: return show_hwcaps_x86(hwcaps); + case VexArchAMD64: return show_hwcaps_amd64(hwcaps); + case VexArchPPC32: return show_hwcaps_ppc32(hwcaps); + case VexArchPPC64: return show_hwcaps_ppc64(hwcaps); + case VexArchARM: return show_hwcaps_arm(hwcaps); + default: return NULL; + } +} + +static Bool are_valid_hwcaps ( VexArch arch, UInt hwcaps ) +{ + return show_hwcaps(arch,hwcaps) != NULL; +} + + /*---------------------------------------------------------------*/ /*--- end main/vex_main.c ---*/ /*---------------------------------------------------------------*/ diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index 8a6940f7d7..3fb21ad4f7 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -71,26 +71,42 @@ typedef } VexArch; -typedef - enum { - VexSubArch_INVALID, - VexSubArch_NONE, /* Arch has no variants */ - VexSubArchX86_sse0, /* no SSE state; or SSE state but no insns */ - VexSubArchX86_sse1, /* SSE1 support (Pentium III) */ - VexSubArchX86_sse2, /* SSE2 support (Pentium 4) */ - VexSubArchARM_v4, /* ARM version 4 */ - VexSubArchPPC32_I, /* 32-bit PowerPC, no FP, no Altivec */ - VexSubArchPPC32_FI, /* 32-bit PowerPC, with FP but no Altivec */ - VexSubArchPPC32_VFI, /* 32-bit PowerPC, with FP and Altivec */ - VexSubArchPPC64_FI, /* 64-bit PowerPC, with FP but no Altivec */ - VexSubArchPPC64_VFI /* 64-bit PowerPC, with FP and Altivec */ - } - VexSubArch; + +/* For a given architecture, these specify extra capabilities beyond + the minimum supported (baseline) capabilities. They may be OR'd + together, although some combinations don't make sense. (eg, SSE2 + but not SSE1). LibVEX_Translate will check for nonsensical + combinations. */ + +/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE) */ +#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */ +#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */ +#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */ + +/* amd64: baseline capability is SSE2 */ +#define VEX_HWCAPS_AMD64_SSE3 (1<<4) /* SSE3 support */ + +/* ppc32: baseline capability is integer only */ +#define VEX_HWCAPS_PPC32_F (1<<5) /* basic (non-optional) FP */ +#define VEX_HWCAPS_PPC32_V (1<<6) /* Altivec (VMX) */ +#define VEX_HWCAPS_PPC32_FX (1<<7) /* FP extns (fsqrt, fsqrts) */ +#define VEX_HWCAPS_PPC32_GX (1<<8) /* Graphics extns + (fres,frsqrte,fsel,stfiwx) */ + +/* ppc64: baseline capability is integer and basic FP insns */ +#define VEX_HWCAPS_PPC64_V (1<<9) /* Altivec (VMX) */ +#define VEX_HWCAPS_PPC64_FX (1<<10) /* FP extns (fsqrt, fsqrts) */ +#define VEX_HWCAPS_PPC64_GX (1<<11) /* Graphics extns + (fres,frsqrte,fsel,stfiwx) */ + +/* arm: baseline capability is ARMv4 */ +/* No extra capabilities */ + /* These return statically allocated strings. */ extern const HChar* LibVEX_ppVexArch ( VexArch ); -extern const HChar* LibVEX_ppVexSubArch ( VexSubArch ); +extern const HChar* LibVEX_ppVexHwCaps ( VexArch, UInt ); /* This struct is a bit of a hack, but is needed to carry misc @@ -100,7 +116,7 @@ extern const HChar* LibVEX_ppVexSubArch ( VexSubArch ); typedef struct { /* This is the only mandatory field. */ - VexSubArch subarch; + UInt hwcaps; /* PPC32/PPC64 only: size of cache line */ Int ppc_cache_line_szB; } diff --git a/VEX/test_main.c b/VEX/test_main.c index 0ccfec64f0..66ced176bd 100644 --- a/VEX/test_main.c +++ b/VEX/test_main.c @@ -124,14 +124,15 @@ int main ( int argc, char** argv ) origbuf[i] = (UChar)u; } + /* FIXME: put sensible values into the .hwcaps fields */ LibVEX_default_VexArchInfo(&vai_x86); - vai_x86.subarch = VexSubArchX86_sse1; + vai_x86.hwcaps = 0; LibVEX_default_VexArchInfo(&vai_amd64); - vai_amd64.subarch = VexSubArch_NONE; + vai_amd64.hwcaps = 0; LibVEX_default_VexArchInfo(&vai_ppc32); - vai_ppc32.subarch = VexSubArchPPC32_VFI; + vai_ppc32.hwcaps = 0; vai_ppc32.ppc_cache_line_szB = 128; /* ----- Set up args for LibVEX_Translate ----- */