From a05da5210face777ba6d2d06699059660eba94a0 Mon Sep 17 00:00:00 2001 From: Cerion Armour-Brown Date: Tue, 28 Jun 2005 20:59:18 +0000 Subject: [PATCH] Reshuffled host-ppc32 AltiVec integer insns Added some AltiVec fp insns and CMov git-svn-id: svn://svn.valgrind.org/vex/trunk@1227 --- VEX/priv/host-ppc32/hdefs.c | 628 +++++++++++++++++++++++++----------- VEX/priv/host-ppc32/hdefs.h | 110 ++++--- 2 files changed, 497 insertions(+), 241 deletions(-) diff --git a/VEX/priv/host-ppc32/hdefs.c b/VEX/priv/host-ppc32/hdefs.c index 9b12dd4527..f0fb4e74b1 100644 --- a/VEX/priv/host-ppc32/hdefs.c +++ b/VEX/priv/host-ppc32/hdefs.c @@ -513,119 +513,77 @@ HChar* showPPC32FpOp ( PPC32FpOp op ) { HChar* showPPC32AvOp ( PPC32AvOp op ) { switch (op) { - /* mov */ - case Pav_MOV: return "vmr"; + + /* Unary */ + case Pav_MOV: return "vmr"; /* Mov */ - /* Bitwise */ - case Pav_AND: return "vand"; + case Pav_AND: return "vand"; /* Bitwise */ case Pav_OR: return "vor"; case Pav_XOR: return "vxor"; case Pav_NOT: return "vnot"; + case Pav_UNPCKH8S: return "vupkhsb"; /* Unpack */ + case Pav_UNPCKH16S: return "vupkhsh"; + case Pav_UNPCKL8S: return "vupklsb"; + case Pav_UNPCKL16S: return "vupklsh"; + case Pav_UNPCKHPIX: return "vupkhpx"; + case Pav_UNPCKLPIX: return "vupklpx"; + /* Integer binary */ - case Pav_ADD8UM: return "vaddubm"; - case Pav_ADD16UM: return "vadduhm"; - case Pav_ADD32UM: return "vadduwm"; - case Pav_ADD8US: return "vaddubs"; - case Pav_ADD16US: return "vadduhs"; - case Pav_ADD32US: return "vadduws"; - case Pav_ADD8SS: return "vaddsbs"; - case Pav_ADD16SS: return "vaddshs"; - case Pav_ADD32SS: return "vaddsws"; + case Pav_ADDUM: return "vaddu_m"; // b,h,w + case Pav_ADDUS: return "vaddu_s"; // b,h,w + case Pav_ADDSS: return "vadds_s"; // b,h,w - case Pav_SUB8UM: return "vsububm"; - case Pav_SUB16UM: return "vsubuhm"; - case Pav_SUB32UM: return "vsubuwm"; - case Pav_SUB8US: return "vsububs"; - case Pav_SUB16US: return "vsubuhs"; - case Pav_SUB32US: return "vsubuws"; - case Pav_SUB8SS: return "vsubsbs"; - case Pav_SUB16SS: return "vsubshs"; - case Pav_SUB32SS: return "vsubsws"; + case Pav_SUBUM: return "vsubu_m"; // b,h,w + case Pav_SUBUS: return "vsubu_s"; // b,h,w + case Pav_SUBSS: return "vsubs_s"; // b,h,w - case Pav_OMUL8U: return "vmuloub"; - case Pav_OMUL16U: return "vmulouh"; - case Pav_OMUL8S: return "vmulosb"; - case Pav_OMUL16S: return "vmulosh"; - case Pav_EMUL8U: return "vmuleub"; - case Pav_EMUL16U: return "vmuleuh"; - case Pav_EMUL8S: return "vmulesb"; - case Pav_EMUL16S: return "vmulesh"; + case Pav_OMULU: return "vmulou"; // b,h + case Pav_OMULS: return "vmulos"; // b,h + case Pav_EMULU: return "vmuleu"; // b,h + case Pav_EMULS: return "vmules"; // b,h - case Pav_AVG8U: return "vavgub"; - case Pav_AVG16U: return "vavguh"; - case Pav_AVG32U: return "vavguw"; - case Pav_AVG8S: return "vavgsb"; - case Pav_AVG16S: return "vavgsh"; - case Pav_AVG32S: return "vavgsw"; + case Pav_AVGU: return "vavgu"; // b,h,w + case Pav_AVGS: return "vavgs"; // b,h,w - case Pav_MAX8U: return "vmaxub"; - case Pav_MAX16U: return "vmaxuh"; - case Pav_MAX32U: return "vmaxuw"; - case Pav_MAX8S: return "vmaxsb"; - case Pav_MAX16S: return "vmaxsh"; - case Pav_MAX32S: return "vmaxsw"; + case Pav_MAXU: return "vmaxu"; // b,h,w + case Pav_MAXS: return "vmaxs"; // b,h,w - case Pav_MIN8U: return "vminub"; - case Pav_MIN16U: return "vminuh"; - case Pav_MIN32U: return "vminuw"; - case Pav_MIN8S: return "vminsb"; - case Pav_MIN16S: return "vminsh"; - case Pav_MIN32S: return "vminsw"; + case Pav_MINU: return "vminu"; // b,h,w + case Pav_MINS: return "vmins"; // b,h,w /* Compare (always affects CR field 6) */ - case Pav_CMPEQ8U: return "vcmpequb"; - case Pav_CMPEQ16U: return "vcmpequh"; - case Pav_CMPEQ32U: return "vcmpequw"; - case Pav_CMPGT8U: return "vcmpgtub"; - case Pav_CMPGT16U: return "vcmpgtuh"; - case Pav_CMPGT32U: return "vcmpgtuw"; - case Pav_CMPGT8S: return "vcmpgtsb"; - case Pav_CMPGT16S: return "vcmpgtsh"; - case Pav_CMPGT32S: return "vcmpgtsw"; + case Pav_CMPEQU: return "vcmpequ"; // b,h,w + case Pav_CMPGTU: return "vcmpgtu"; // b,h,w + case Pav_CMPGTS: return "vcmpgts"; // b,h,w /* Shift */ - case Pav_SHL8: return "vslb"; - case Pav_SHL16: return "vslh"; - case Pav_SHL32: return "vslw"; - case Pav_SHL128: return "vsl"; - case Pav_SHR8: return "vsrb"; - case Pav_SHR16: return "vsrh"; - case Pav_SHR32: return "vsrw"; - case Pav_SHR128: return "vsr"; - case Pav_SAR8: return "vsrab"; - case Pav_SAR16: return "vsrah"; - case Pav_SAR32: return "vsraw"; - case Pav_ROTL8: return "vrlb"; - case Pav_ROTL16: return "vrlh"; - case Pav_ROTL32: return "vrlw"; - + case Pav_SHL: return "vsl"; // ' ',b,h,w + case Pav_SHR: return "vsr"; // ' ',b,h,w + case Pav_SAR: return "vsra"; // b,h,w + case Pav_ROTL: return "vrl"; // b,h,w + /* Pack */ - case Pav_PACKU16UM: return "vpkuhum"; - case Pav_PACKU32UM: return "vpkuwum"; - case Pav_PACKU16US: return "vpkuhus"; - case Pav_PACKU32US: return "vpkuwus"; - case Pav_PACKS16US: return "vpkshus"; - case Pav_PACKS32US: return "vpkswus"; - case Pav_PACKS16SS: return "vpkshss"; - case Pav_PACKS32SS: return "vpkswss"; + case Pav_PACKUUM: return "vpku_um"; // h,w + case Pav_PACKUUS: return "vpku_us"; // h,w + case Pav_PACKSUS: return "vpks_us"; // h,w + case Pav_PACKSSS: return "vpks_ss"; // h,w case Pav_PACKPXL: return "vpkpx"; - /* Unpack (srcL ignored) */ - case Pav_UNPCKH8S: return "vupkhsb"; - case Pav_UNPCKH16S: return "vupkhsh"; - case Pav_UNPCKL8S: return "vupklsb"; - case Pav_UNPCKL16S: return "vupklsh"; - case Pav_UNPCKHPIX: return "vupkhpx"; - case Pav_UNPCKLPIX: return "vupklpx"; - /* Merge */ - case Pav_MRG8HI: return "vmrghb"; - case Pav_MRG16HI: return "vmrghh"; - case Pav_MRG32HI: return "vmrghw"; - case Pav_MRG8LO: return "vmrglb"; - case Pav_MRG16LO: return "vmrglh"; - case Pav_MRG32LO: return "vmrglw"; + case Pav_MRGHI: return "vmrgh"; // b,h,w + case Pav_MRGLO: return "vmrgl"; // b,h,w + + + /* Floating Point Binary */ + case Pav_ADDF: return "vaddfp"; + case Pav_SUBF: return "vsubfp"; + case Pav_MULF: return "vmaddfp"; + case Pav_MAXF: return "vmaxfp"; + case Pav_MINF: return "vminfp"; + case Pav_CMPEQF: return "vcmpeqfp"; + case Pav_CMPGTF: return "vcmpgtfp"; + case Pav_CMPGEF: return "vcmpgefp"; default: vpanic("showPPC32AvOp"); } @@ -873,6 +831,42 @@ PPC32Instr* PPC32Instr_AvBinary ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR ) i->Pin.AvBinary.srcR = srcR; return i; } +PPC32Instr* PPC32Instr_AvBin8x16 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR ) { + PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); + i->tag = Pin_AvBin8x16; + i->Pin.AvBin8x16.op = op; + i->Pin.AvBin8x16.dst = dst; + i->Pin.AvBin8x16.srcL = srcL; + i->Pin.AvBin8x16.srcR = srcR; + return i; +} +PPC32Instr* PPC32Instr_AvBin16x8 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR ) { + PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); + i->tag = Pin_AvBin16x8; + i->Pin.AvBin16x8.op = op; + i->Pin.AvBin16x8.dst = dst; + i->Pin.AvBin16x8.srcL = srcL; + i->Pin.AvBin16x8.srcR = srcR; + return i; +} +PPC32Instr* PPC32Instr_AvBin32x4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR ) { + PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); + i->tag = Pin_AvBin32x4; + i->Pin.AvBin32x4.op = op; + i->Pin.AvBin32x4.dst = dst; + i->Pin.AvBin32x4.srcL = srcL; + i->Pin.AvBin32x4.srcR = srcR; + return i; +} +PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR ) { + PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); + i->tag = Pin_AvBin32Fx4; + i->Pin.AvBin32Fx4.op = op; + i->Pin.AvBin32Fx4.dst = dst; + i->Pin.AvBin32Fx4.srcL = srcL; + i->Pin.AvBin32Fx4.srcR = srcR; + return i; +} PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL, HReg srcR ) { PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); i->tag = Pin_AvPerm; @@ -908,6 +902,15 @@ PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32RI* src ) { i->Pin.AvSplat.src = src; return i; } +PPC32Instr* PPC32Instr_AvCMov ( PPC32CondCode cond, HReg dst, HReg src ) { + PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); + i->tag = Pin_AvCMov; + i->Pin.AvCMov.cond = cond; + i->Pin.AvCMov.dst = dst; + i->Pin.AvCMov.src = src; + vassert(cond.test != Pct_ALWAYS); + return i; +} PPC32Instr* PPC32Instr_AvLdVSCR ( HReg src ) { PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); i->tag = Pin_AvLdVSCR; @@ -1258,6 +1261,38 @@ void ppPPC32Instr ( PPC32Instr* i ) vex_printf(","); ppHRegPPC32(i->Pin.AvBinary.srcR); return; + case Pin_AvBin8x16: + vex_printf("%s(b) ", showPPC32AvOp(i->Pin.AvBin8x16.op)); + ppHRegPPC32(i->Pin.AvBin8x16.dst); + vex_printf(","); + ppHRegPPC32(i->Pin.AvBin8x16.srcL); + vex_printf(","); + ppHRegPPC32(i->Pin.AvBin8x16.srcR); + return; + case Pin_AvBin16x8: + vex_printf("%s(h) ", showPPC32AvOp(i->Pin.AvBin16x8.op)); + ppHRegPPC32(i->Pin.AvBin16x8.dst); + vex_printf(","); + ppHRegPPC32(i->Pin.AvBin16x8.srcL); + vex_printf(","); + ppHRegPPC32(i->Pin.AvBin16x8.srcR); + return; + case Pin_AvBin32x4: + vex_printf("%s(w) ", showPPC32AvOp(i->Pin.AvBin32x4.op)); + ppHRegPPC32(i->Pin.AvBin32x4.dst); + vex_printf(","); + ppHRegPPC32(i->Pin.AvBin32x4.srcL); + vex_printf(","); + ppHRegPPC32(i->Pin.AvBin32x4.srcR); + return; + case Pin_AvBin32Fx4: + vex_printf("%s ", showPPC32AvOp(i->Pin.AvBin32Fx4.op)); + ppHRegPPC32(i->Pin.AvBin32Fx4.dst); + vex_printf(","); + ppHRegPPC32(i->Pin.AvBin32Fx4.srcL); + vex_printf(","); + ppHRegPPC32(i->Pin.AvBin32Fx4.srcR); + return; case Pin_AvPerm: vex_printf("vperm "); ppHRegPPC32(i->Pin.AvPerm.dst); @@ -1306,6 +1341,25 @@ void ppPPC32Instr ( PPC32Instr* i ) return; } + case Pin_AvCMov: + vex_printf("avcmov (%s) ", showPPC32CondCode(i->Pin.AvCMov.cond)); + ppHRegPPC32(i->Pin.AvCMov.dst); + vex_printf(","); + ppHRegPPC32(i->Pin.AvCMov.src); + vex_printf(": "); + vex_printf("if (v_dst != v_src) { "); + if (i->Pin.AvCMov.cond.test != Pct_ALWAYS) { + vex_printf("if (%%crf0.%s) { ", showPPC32CondCode(i->Pin.AvCMov.cond)); + } + vex_printf("vmr "); + ppHRegPPC32(i->Pin.AvCMov.dst); + vex_printf(","); + ppHRegPPC32(i->Pin.AvCMov.src); + if (i->Pin.FpCMov.cond.test != Pct_ALWAYS) + vex_printf(" }"); + vex_printf(" }"); + return; + case Pin_AvLdVSCR: vex_printf("mtvscr "); ppHRegPPC32(i->Pin.AvLdVSCR.src); @@ -1482,6 +1536,28 @@ void getRegUsage_PPC32Instr ( HRegUsage* u, PPC32Instr* i ) addHRegUse(u, HRmRead, i->Pin.AvBinary.srcL); addHRegUse(u, HRmRead, i->Pin.AvBinary.srcR); return; + case Pin_AvBin8x16: + addHRegUse(u, HRmWrite, i->Pin.AvBin8x16.dst); + addHRegUse(u, HRmRead, i->Pin.AvBin8x16.srcL); + addHRegUse(u, HRmRead, i->Pin.AvBin8x16.srcR); + return; + case Pin_AvBin16x8: + addHRegUse(u, HRmWrite, i->Pin.AvBin16x8.dst); + addHRegUse(u, HRmRead, i->Pin.AvBin16x8.srcL); + addHRegUse(u, HRmRead, i->Pin.AvBin16x8.srcR); + return; + case Pin_AvBin32x4: + addHRegUse(u, HRmWrite, i->Pin.AvBin32x4.dst); + addHRegUse(u, HRmRead, i->Pin.AvBin32x4.srcL); + addHRegUse(u, HRmRead, i->Pin.AvBin32x4.srcR); + if (i->Pin.AvBin32x4.op == Pav_MULF) + addHRegUse(u, HRmWrite, hregPPC32_GPR29()); + return; + case Pin_AvBin32Fx4: + addHRegUse(u, HRmWrite, i->Pin.AvBin32Fx4.dst); + addHRegUse(u, HRmRead, i->Pin.AvBin32Fx4.srcL); + addHRegUse(u, HRmRead, i->Pin.AvBin32Fx4.srcR); + return; case Pin_AvPerm: addHRegUse(u, HRmWrite, i->Pin.AvPerm.dst); addHRegUse(u, HRmRead, i->Pin.AvPerm.ctl); @@ -1503,6 +1579,10 @@ void getRegUsage_PPC32Instr ( HRegUsage* u, PPC32Instr* i ) addHRegUse(u, HRmWrite, i->Pin.AvSplat.dst); addRegUsage_PPC32RI(u, i->Pin.AvSplat.src); return; + case Pin_AvCMov: + addHRegUse(u, HRmModify, i->Pin.AvCMov.dst); + addHRegUse(u, HRmRead, i->Pin.AvCMov.src); + return; case Pin_AvLdVSCR: addHRegUse(u, HRmRead, i->Pin.AvLdVSCR.src); return; @@ -1629,6 +1709,26 @@ void mapRegs_PPC32Instr (HRegRemap* m, PPC32Instr* i) mapReg(m, &i->Pin.AvBinary.srcL); mapReg(m, &i->Pin.AvBinary.srcR); return; + case Pin_AvBin8x16: + mapReg(m, &i->Pin.AvBin8x16.dst); + mapReg(m, &i->Pin.AvBin8x16.srcL); + mapReg(m, &i->Pin.AvBin8x16.srcR); + return; + case Pin_AvBin16x8: + mapReg(m, &i->Pin.AvBin16x8.dst); + mapReg(m, &i->Pin.AvBin16x8.srcL); + mapReg(m, &i->Pin.AvBin16x8.srcR); + return; + case Pin_AvBin32x4: + mapReg(m, &i->Pin.AvBin32x4.dst); + mapReg(m, &i->Pin.AvBin32x4.srcL); + mapReg(m, &i->Pin.AvBin32x4.srcR); + return; + case Pin_AvBin32Fx4: + mapReg(m, &i->Pin.AvBin32Fx4.dst); + mapReg(m, &i->Pin.AvBin32Fx4.srcL); + mapReg(m, &i->Pin.AvBin32Fx4.srcR); + return; case Pin_AvPerm: mapReg(m, &i->Pin.AvPerm.dst); mapReg(m, &i->Pin.AvPerm.srcL); @@ -1650,6 +1750,10 @@ void mapRegs_PPC32Instr (HRegRemap* m, PPC32Instr* i) mapReg(m, &i->Pin.AvSplat.dst); mapRegs_PPC32RI(m, i->Pin.AvSplat.src); return; + case Pin_AvCMov: + mapReg(m, &i->Pin.AvCMov.dst); + mapReg(m, &i->Pin.AvCMov.src); + return; case Pin_AvLdVSCR: mapReg(m, &i->Pin.AvLdVSCR.src); return; @@ -1997,7 +2101,21 @@ static UChar* mkFormVX ( UChar* p, UInt opc1, UInt r1, UInt r2, vassert(r2 < 0x20); vassert(r3 < 0x20); vassert(opc2 < 0x800); - theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) | (opc2<<1)); + theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) | opc2); + return emit32(p, theInstr); +} + +static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2, UInt Rc, + UInt r3, UInt opc2 ) +{ + UInt theInstr; + vassert(opc1 < 0x40); + vassert(r1 < 0x20); + vassert(r2 < 0x20); + vassert(r3 < 0x20); + vassert(Rc < 0x2); + vassert(opc2 < 0x400); + theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) | (Rc<<10) | opc2); return emit32(p, theInstr); } @@ -2669,108 +2787,107 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) case Pav_OR: opc2 = 1156; break; // vor case Pav_XOR: opc2 = 1120; break; // vxor - /* Add */ - case Pav_ADD8UM: opc2 = 0; break; // vaddubm - case Pav_ADD16UM: opc2 = 64; break; // vadduhm - case Pav_ADD32UM: opc2 = 128; break; // vadduwm - case Pav_ADD8US: opc2 = 512; break; // vaddubs - case Pav_ADD16US: opc2 = 576; break; // vadduhs - case Pav_ADD32US: opc2 = 640; break; // vadduws - case Pav_ADD8SS: opc2 = 768; break; // vaddsbs - case Pav_ADD16SS: opc2 = 832; break; // vaddshs - case Pav_ADD32SS: opc2 = 896; break; // vaddsws - - /* Subtract */ - case Pav_SUB8UM: opc2 = 1024; break; // vsububm - case Pav_SUB16UM: opc2 = 1088; break; // vsubuhm - case Pav_SUB32UM: opc2 = 1152; break; // vsubuwm - case Pav_SUB8US: opc2 = 1536; break; // vsububs - case Pav_SUB16US: opc2 = 1600; break; // vsubuhs - case Pav_SUB32US: opc2 = 1664; break; // vsubuws - case Pav_SUB8SS: opc2 = 1792; break; // vsubsbs - case Pav_SUB16SS: opc2 = 1856; break; // vsubshs - case Pav_SUB32SS: opc2 = 1920; break; // vsubsws - - /* Multiply odd/even */ - case Pav_OMUL8U: opc2 = 8; break; // vmuloub - case Pav_OMUL16U: opc2 = 72; break; // vmulouh - case Pav_OMUL8S: opc2 = 264; break; // vmulosb - case Pav_OMUL16S: opc2 = 328; break; // vmulosh - case Pav_EMUL8U: opc2 = 520; break; // vmuleub - case Pav_EMUL16U: opc2 = 584; break; // vmuleuh - case Pav_EMUL8S: opc2 = 776; break; // vmulesb - case Pav_EMUL16S: opc2 = 840; break; // vmulesh - - /* Average */ - case Pav_AVG8U: opc2 = 1026; break; // vavgub - case Pav_AVG16U: opc2 = 1090; break; // vavguh - case Pav_AVG32U: opc2 = 1154; break; // vavguw - case Pav_AVG8S: opc2 = 1282; break; // vavgsb - case Pav_AVG16S: opc2 = 1346; break; // vavgsh - case Pav_AVG32S: opc2 = 1410; break; // vavgsw - - /* Maximum */ - case Pav_MAX8U: opc2 = 2; break; // vmaxub - case Pav_MAX16U: opc2 = 66; break; // vmaxuh - case Pav_MAX32U: opc2 = 130; break; // vmaxuw - case Pav_MAX8S: opc2 = 258; break; // vmaxsb - case Pav_MAX16S: opc2 = 322; break; // vmaxsh - case Pav_MAX32S: opc2 = 386; break; // vmaxsw - - /* Minimum */ - case Pav_MIN8U: opc2 = 514; break; // vminub - case Pav_MIN16U: opc2 = 578; break; // vminuh - case Pav_MIN32U: opc2 = 642; break; // vminuw - case Pav_MIN8S: opc2 = 770; break; // vminsb - case Pav_MIN16S: opc2 = 834; break; // vminsh - case Pav_MIN32S: opc2 = 898; break; // vminsw - - /* Compare (always affects CR field 6) */ - /* XXX: Actually VXR-Form, but Rc always 0, so keep life easy... */ - case Pav_CMPEQ8U: opc2 = 6; break; // vcmpequb - case Pav_CMPEQ16U: opc2 = 70; break; // vcmpequh - case Pav_CMPEQ32U: opc2 = 134; break; // vcmpequw - case Pav_CMPGT8U: opc2 = 518; break; // vcmpgtub - case Pav_CMPGT16U: opc2 = 582; break; // vcmpgtuh - case Pav_CMPGT32U: opc2 = 646; break; // vcmpgtuw - case Pav_CMPGT8S: opc2 = 774; break; // vcmpgtsb - case Pav_CMPGT16S: opc2 = 838; break; // vcmpgtsh - case Pav_CMPGT32S: opc2 = 902; break; // vcmpgtsw - /* Shift */ - case Pav_SHL8: opc2 = 260; break; // vslb - case Pav_SHL16: opc2 = 324; break; // vslh - case Pav_SHL32: opc2 = 388; break; // vslw - case Pav_SHL128: opc2 = 452; break; // vsl - case Pav_SHR8: opc2 = 516; break; // vsrb - case Pav_SHR16: opc2 = 580; break; // vsrh - case Pav_SHR32: opc2 = 644; break; // vsrw - case Pav_SHR128: opc2 = 708; break; // vsr - case Pav_SAR8: opc2 = 772; break; // vsrab - case Pav_SAR16: opc2 = 836; break; // vsrah - case Pav_SAR32: opc2 = 900; break; // vsraw - case Pav_ROTL8: opc2 = 4; break; // vrlb - case Pav_ROTL16: opc2 = 68; break; // vrlh - case Pav_ROTL32: opc2 = 132; break; // vrlw - - /* Pack */ - case Pav_PACKU16UM: opc2 = 14; break; // vpkuhum - case Pav_PACKU32UM: opc2 = 78; break; // vpkuwum - case Pav_PACKU16US: opc2 = 142; break; // vpkuhus - case Pav_PACKU32US: opc2 = 206; break; // vpkuwus - case Pav_PACKS16US: opc2 = 270; break; // vpkshus - case Pav_PACKS32US: opc2 = 334; break; // vpkswus - case Pav_PACKS16SS: opc2 = 398; break; // vpkshss - case Pav_PACKS32SS: opc2 = 462; break; // vpkswss + case Pav_SHL: opc2 = 452; break; // vsl + case Pav_SHR: opc2 = 708; break; // vsr + + default: + goto bad; + } + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 ); + goto done; + } + + case Pin_AvBin8x16: { + UInt v_dst = vregNo(i->Pin.AvBin8x16.dst); + UInt v_srcL = vregNo(i->Pin.AvBin8x16.srcL); + UInt v_srcR = vregNo(i->Pin.AvBin8x16.srcR); + UInt opc2; + switch (i->Pin.AvBin8x16.op) { + + case Pav_ADDUM: opc2 = 0; break; // vaddubm + case Pav_ADDUS: opc2 = 512; break; // vaddubs + case Pav_ADDSS: opc2 = 768; break; // vaddsbs + + case Pav_SUBUM: opc2 = 1024; break; // vsububm + case Pav_SUBUS: opc2 = 1536; break; // vsububs + case Pav_SUBSS: opc2 = 1792; break; // vsubsbs + + case Pav_OMULU: opc2 = 8; break; // vmuloub + case Pav_OMULS: opc2 = 264; break; // vmulosb + case Pav_EMULU: opc2 = 520; break; // vmuleub + case Pav_EMULS: opc2 = 776; break; // vmulesb + + case Pav_AVGU: opc2 = 1026; break; // vavgub + case Pav_AVGS: opc2 = 1282; break; // vavgsb + case Pav_MAXU: opc2 = 2; break; // vmaxub + case Pav_MAXS: opc2 = 258; break; // vmaxsb + case Pav_MINU: opc2 = 514; break; // vminub + case Pav_MINS: opc2 = 770; break; // vminsb + + case Pav_CMPEQU: opc2 = 6; break; // vcmpequb + case Pav_CMPGTU: opc2 = 518; break; // vcmpgtub + case Pav_CMPGTS: opc2 = 774; break; // vcmpgtsb + + case Pav_SHL: opc2 = 260; break; // vslb + case Pav_SHR: opc2 = 516; break; // vsrb + case Pav_SAR: opc2 = 772; break; // vsrab + case Pav_ROTL: opc2 = 4; break; // vrlb + + case Pav_MRGHI: opc2 = 12; break; // vmrghb + case Pav_MRGLO: opc2 = 268; break; // vmrglb + + default: + goto bad; + } + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 ); + goto done; + } + + case Pin_AvBin16x8: { + UInt v_dst = vregNo(i->Pin.AvBin16x8.dst); + UInt v_srcL = vregNo(i->Pin.AvBin16x8.srcL); + UInt v_srcR = vregNo(i->Pin.AvBin16x8.srcR); + UInt opc2; + switch (i->Pin.AvBin16x8.op) { + + case Pav_ADDUM: opc2 = 64; break; // vadduhm + case Pav_ADDUS: opc2 = 576; break; // vadduhs + case Pav_ADDSS: opc2 = 832; break; // vaddshs + + case Pav_SUBUM: opc2 = 1088; break; // vsubuhm + case Pav_SUBUS: opc2 = 1600; break; // vsubuhs + case Pav_SUBSS: opc2 = 1856; break; // vsubshs + + case Pav_OMULU: opc2 = 72; break; // vmulouh + case Pav_OMULS: opc2 = 328; break; // vmulosh + case Pav_EMULU: opc2 = 584; break; // vmuleuh + case Pav_EMULS: opc2 = 840; break; // vmulesh + + case Pav_AVGU: opc2 = 1090; break; // vavguh + case Pav_AVGS: opc2 = 1346; break; // vavgsh + case Pav_MAXU: opc2 = 66; break; // vmaxuh + case Pav_MAXS: opc2 = 322; break; // vmaxsh + case Pav_MINS: opc2 = 834; break; // vminsh + case Pav_MINU: opc2 = 578; break; // vminuh + + case Pav_CMPEQU: opc2 = 70; break; // vcmpequh + case Pav_CMPGTU: opc2 = 582; break; // vcmpgtuh + case Pav_CMPGTS: opc2 = 838; break; // vcmpgtsh + + case Pav_SHL: opc2 = 324; break; // vslh + case Pav_SHR: opc2 = 580; break; // vsrh + case Pav_SAR: opc2 = 836; break; // vsrah + case Pav_ROTL: opc2 = 68; break; // vrlh + + case Pav_PACKUUM: opc2 = 14; break; // vpkuhum + case Pav_PACKUUS: opc2 = 142; break; // vpkuhus + case Pav_PACKSUS: opc2 = 270; break; // vpkshus + case Pav_PACKSSS: opc2 = 398; break; // vpkshss case Pav_PACKPXL: opc2 = 782; break; // vpkpx - /* Merge */ - case Pav_MRG8HI: opc2 = 12; break; // vmrghb - case Pav_MRG16HI: opc2 = 76; break; // vmrghh - case Pav_MRG32HI: opc2 = 140; break; // vmrghw - case Pav_MRG8LO: opc2 = 268; break; // vmrglb - case Pav_MRG16LO: opc2 = 332; break; // vmrglh - case Pav_MRG32LO: opc2 = 396; break; // vmrglw + case Pav_MRGHI: opc2 = 76; break; // vmrghh + case Pav_MRGLO: opc2 = 332; break; // vmrglh default: goto bad; @@ -2779,6 +2896,108 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) goto done; } + case Pin_AvBin32x4: { + UInt v_dst = vregNo(i->Pin.AvBin32x4.dst); + UInt v_srcL = vregNo(i->Pin.AvBin32x4.srcL); + UInt v_srcR = vregNo(i->Pin.AvBin32x4.srcR); + UInt opc2; + switch (i->Pin.AvBin32x4.op) { + + case Pav_ADDUM: opc2 = 128; break; // vadduwm + case Pav_ADDUS: opc2 = 640; break; // vadduws + case Pav_ADDSS: opc2 = 896; break; // vaddsws + + case Pav_SUBUM: opc2 = 1152; break; // vsubuwm + case Pav_SUBUS: opc2 = 1664; break; // vsubuws + case Pav_SUBSS: opc2 = 1920; break; // vsubsws + + case Pav_AVGU: opc2 = 1154; break; // vavguw + case Pav_AVGS: opc2 = 1410; break; // vavgsw + + case Pav_MAXU: opc2 = 130; break; // vmaxuw + case Pav_MAXS: opc2 = 386; break; // vmaxsw + + case Pav_MINS: opc2 = 898; break; // vminsw + case Pav_MINU: opc2 = 642; break; // vminuw + + case Pav_CMPEQU: opc2 = 134; break; // vcmpequw + case Pav_CMPGTS: opc2 = 902; break; // vcmpgtsw + case Pav_CMPGTU: opc2 = 646; break; // vcmpgtuw + + case Pav_SHL: opc2 = 388; break; // vslw + case Pav_SHR: opc2 = 644; break; // vsrw + case Pav_SAR: opc2 = 900; break; // vsraw + case Pav_ROTL: opc2 = 132; break; // vrlw + + case Pav_PACKUUM: opc2 = 78; break; // vpkuwum + case Pav_PACKUUS: opc2 = 206; break; // vpkuwus + case Pav_PACKSUS: opc2 = 334; break; // vpkswus + case Pav_PACKSSS: opc2 = 462; break; // vpkswss + + case Pav_MRGHI: opc2 = 140; break; // vmrghw + case Pav_MRGLO: opc2 = 396; break; // vmrglw + + default: + goto bad; + } + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 ); + goto done; + } + + case Pin_AvBin32Fx4: { + UInt v_dst = vregNo(i->Pin.AvBin32Fx4.dst); + UInt v_srcL = vregNo(i->Pin.AvBin32Fx4.srcL); + UInt v_srcR = vregNo(i->Pin.AvBin32Fx4.srcR); + switch (i->Pin.AvBin32Fx4.op) { + + case Pav_ADDF: + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 10 ); // vaddfp + break; + case Pav_SUBF: + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 74 ); // vsubfp + break; + case Pav_MAXF: + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1034 ); // vmaxfp + break; + case Pav_MINF: + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1098 ); // vminfp + break; + + case Pav_MULF: { + /* Make a vmulfp from a vmaddfp: + load -0.0 (0x8000_0000) to each 32-bit word of vB + this makes the add a noop. + */ + UInt vB = 29; // XXX: Using r29 for temp + UInt zero_simm = 0x80000000; + + // Better way to load zero_imm? + // vspltisw vB,0x1F (0x1F => each word of vB) + p = mkFormVX( p, 4, vB, zero_simm, 0, 908 ); + + // vslw vB,vB,vB (each word of vB = (0x1F << 0x1F) = 0x80000000 + p = mkFormVX( p, 4, vB, vB, vB, 388 ); + + // Finally, do the multiply: + p = mkFormVA( p, 4, v_dst, v_srcL, vB, v_srcR, 46 ); + break; + } + case Pav_CMPEQF: + p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 198 ); // vcmpeqfp + break; + case Pav_CMPGTF: + p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 710 ); // vcmpgtfp + break; + case Pav_CMPGEF: + p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 454 ); // vcmpgefp + break; + + default: + goto bad; + } + goto done; + } + case Pin_AvPerm: { // vperm UInt v_ctl = vregNo(i->Pin.AvPerm.ctl); UInt v_dst = vregNo(i->Pin.AvPerm.dst); @@ -2814,7 +3033,7 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) vassert(sz == 8 || sz == 16 || sz == 32); if (i->Pin.AvSplat.src->tag == Pri_Imm) { - opc2 = (sz == 8) ? 780 : (sz == 16) ? 844 : 908; // 8,16,32 + opc2 = (sz == 8) ? 780 : (sz == 16) ? 844 : 908; // 8,16,32 simm_src = i->Pin.AvSplat.src->Pri.Imm.imm32; p = mkFormVX( p, 4, v_dst, simm_src, 0, opc2 ); } else { // Pri_Reg @@ -2825,6 +3044,25 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) goto done; } + case Pin_AvCMov: { + UInt v_dst = vregNo(i->Pin.AvCMov.dst); + UInt v_src = vregNo(i->Pin.AvCMov.src); + PPC32CondCode cc = i->Pin.AvCMov.cond; + + if (v_dst == v_src) goto done; + + vassert(cc.test != Pct_ALWAYS); + + /* jmp fwds 2 insns if !condition */ + if (cc.test != Pct_ALWAYS) { + /* bc !ct,cf,n_bytes>>2 */ + p = mkFormB(p, invertCondTest(cc.test), cc.flag, 8>>2, 0, 0); + } + /* vmr */ + p = mkFormVX( p, 4, v_dst, v_src, v_src, 1156 ); + goto done; + } + case Pin_AvLdVSCR: { // mtvscr UInt v_src = vregNo(i->Pin.AvLdVSCR.src); p = mkFormVX( p, 4, 0, 0, v_src, 1604 ); diff --git a/VEX/priv/host-ppc32/hdefs.h b/VEX/priv/host-ppc32/hdefs.h index 239e162abf..b69c841391 100644 --- a/VEX/priv/host-ppc32/hdefs.h +++ b/VEX/priv/host-ppc32/hdefs.h @@ -340,52 +340,34 @@ typedef /* Integer Binary */ Pav_AND, Pav_OR, Pav_XOR, /* Bitwise */ - Pav_ADD8UM, Pav_ADD16UM, Pav_ADD32UM, - Pav_ADD8US, Pav_ADD16US, Pav_ADD32US, - Pav_ADD8SS, Pav_ADD16SS, Pav_ADD32SS, - - Pav_SUB8UM, Pav_SUB16UM, Pav_SUB32UM, - Pav_SUB8US, Pav_SUB16US, Pav_SUB32US, - Pav_SUB8SS, Pav_SUB16SS, Pav_SUB32SS, - - Pav_OMUL8U, Pav_OMUL16U, - Pav_OMUL8S, Pav_OMUL16S, - Pav_EMUL8U, Pav_EMUL16U, - Pav_EMUL8S, Pav_EMUL16S, - - Pav_AVG8U, Pav_AVG16U, Pav_AVG32U, - Pav_AVG8S, Pav_AVG16S, Pav_AVG32S, - Pav_MAX8U, Pav_MAX16U, Pav_MAX32U, - Pav_MAX8S, Pav_MAX16S, Pav_MAX32S, - Pav_MIN8U, Pav_MIN16U, Pav_MIN32U, - Pav_MIN8S, Pav_MIN16S, Pav_MIN32S, + Pav_ADDUM, Pav_ADDUS,Pav_ADDSS, + + Pav_SUBUM, Pav_SUBUS, Pav_SUBSS, + + Pav_OMULU, Pav_OMULS, Pav_EMULU, Pav_EMULS, + + Pav_AVGU, Pav_AVGS, + Pav_MAXU, Pav_MAXS, + Pav_MINU, Pav_MINS, /* Compare (always affects CR field 6) */ - Pav_CMPEQ8U, Pav_CMPEQ16U, Pav_CMPEQ32U, - Pav_CMPGT8U, Pav_CMPGT16U, Pav_CMPGT32U, - Pav_CMPGT8S, Pav_CMPGT16S, Pav_CMPGT32S, + Pav_CMPEQU, Pav_CMPGTU, Pav_CMPGTS, /* Shift */ - Pav_SHL8, Pav_SHL16, Pav_SHL32, Pav_SHL128, - Pav_SHR8, Pav_SHR16, Pav_SHR32, Pav_SHR128, - Pav_SAR8, Pav_SAR16, Pav_SAR32, - Pav_ROTL8, Pav_ROTL16, Pav_ROTL32, + Pav_SHL, Pav_SHR, Pav_SAR, Pav_ROTL, /* Pack */ - Pav_PACKU16UM, Pav_PACKU32UM, - Pav_PACKU16US, Pav_PACKU32US, - Pav_PACKS16US, Pav_PACKS32US, - Pav_PACKS16SS, Pav_PACKS32SS, Pav_PACKPXL, + Pav_PACKUUM, Pav_PACKUUS, Pav_PACKSUS, Pav_PACKSSS, + Pav_PACKPXL, /* Merge */ - Pav_MRG8HI, Pav_MRG16HI, Pav_MRG32HI, - Pav_MRG8LO, Pav_MRG16LO, Pav_MRG32LO, - -//.. /* Floating point binary */ -//.. Xsse_ADDF, Xsse_SUBF, Xsse_MULF, Xsse_DIVF, -//.. Xsse_MAXF, Xsse_MINF, -//.. Xsse_CMPEQF, Xsse_CMPLTF, Xsse_CMPLEF, Xsse_CMPUNF, -//.. + Pav_MRGHI, Pav_MRGLO, + + /* Floating point binary */ + Pav_ADDF, Pav_SUBF, Pav_MULF, + Pav_MAXF, Pav_MINF, + Pav_CMPEQF, Pav_CMPGTF, Pav_CMPGEF, + //.. /* Floating point unary */ //.. Xsse_RCPF, Xsse_RSQRTF, Xsse_SQRTF, } @@ -425,20 +407,20 @@ typedef // Pin_AvConst, /* Generate restricted AV literal */ Pin_AvLdSt, /* AV load/store (kludging for AMode_IR) */ Pin_AvUnary, /* AV unary general reg=>reg */ + Pin_AvBinary, /* AV binary general reg,reg=>reg */ + Pin_AvBin8x16, /* AV binary, 8x4 */ + Pin_AvBin16x8, /* AV binary, 16x4 */ + Pin_AvBin32x4, /* AV binary, 32x4 */ + + Pin_AvBin32Fx4, /* AV FP binary, 32Fx4 */ Pin_AvPerm, /* AV permute (shuffle) */ Pin_AvSel, /* AV select */ Pin_AvShlDbl, /* AV shift-left double by imm */ Pin_AvSplat, /* One elem repeated throughout dst */ - Pin_AvLdVSCR /* mtvscr */ - -//.. Xin_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */ -//.. Xin_Sse32Fx4, /* SSE binary, 32Fx4 */ -//.. Xin_Sse32FLo, /* SSE binary, 32F in lowest lane only */ -//.. Xin_Sse64Fx2, /* SSE binary, 64Fx2 */ -//.. Xin_Sse64FLo, /* SSE binary, 64F in lowest lane only */ -//.. Xin_SseCMov, /* SSE conditional move */ + Pin_AvLdVSCR, /* mtvscr */ + Pin_AvCMov /* AV conditional move */ } PPC32InstrTag; @@ -612,6 +594,30 @@ typedef HReg srcL; HReg srcR; } AvBinary; + struct { + PPC32AvOp op; + HReg dst; + HReg srcL; + HReg srcR; + } AvBin8x16; + struct { + PPC32AvOp op; + HReg dst; + HReg srcL; + HReg srcR; + } AvBin16x8; + struct { + PPC32AvOp op; + HReg dst; + HReg srcL; + HReg srcR; + } AvBin32x4; + struct { + PPC32AvOp op; + HReg dst; + HReg srcL; + HReg srcR; + } AvBin32Fx4; /* Perm,Sel,SlDbl,Splat are all weird AV permutations */ struct { HReg ctl; @@ -636,6 +642,13 @@ typedef HReg dst; PPC32RI* src; } AvSplat; + /* Mov src to dst on the given condition, which may not + be the bogus Xcc_ALWAYS. */ + struct { + PPC32CondCode cond; + HReg dst; + HReg src; + } AvCMov; /* Load AlitVec Status & Control Register */ struct { HReg src; @@ -675,10 +688,15 @@ extern PPC32Instr* PPC32Instr_RdWrLR ( Bool wrLR, HReg gpr ); extern PPC32Instr* PPC32Instr_AvLdSt ( Bool isLoad, UChar sz, HReg, PPC32AMode* ); extern PPC32Instr* PPC32Instr_AvUnary ( PPC32FpOp op, HReg dst, HReg src ); extern PPC32Instr* PPC32Instr_AvBinary ( PPC32FpOp op, HReg dst, HReg srcL, HReg srcR ); +extern PPC32Instr* PPC32Instr_AvBin8x16 ( PPC32FpOp op, HReg dst, HReg srcL, HReg srcR ); +extern PPC32Instr* PPC32Instr_AvBin16x8 ( PPC32FpOp op, HReg dst, HReg srcL, HReg srcR ); +extern PPC32Instr* PPC32Instr_AvBin32x4 ( PPC32FpOp op, HReg dst, HReg srcL, HReg srcR ); +extern PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32FpOp op, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32RI* src ); +extern PPC32Instr* PPC32Instr_AvCMov ( PPC32CondCode, HReg dst, HReg src ); extern PPC32Instr* PPC32Instr_AvLdVSCR ( HReg src ); extern void ppPPC32Instr ( PPC32Instr* ); -- 2.47.3