From: Julian Seward Date: Thu, 26 Jun 2014 07:41:14 +0000 (+0000) Subject: arm64: implement pmull{2}. X-Git-Tag: svn/VALGRIND_3_10_1^2~83 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d68e4e0ebc36a360a725e66c40e9ed5701582467;p=thirdparty%2Fvalgrind.git arm64: implement pmull{2}. git-svn-id: svn://svn.valgrind.org/vex/trunk@2888 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 6b73b1aba0..84fac0e49e 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -5609,6 +5609,24 @@ static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src, } +/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be + an op which takes two I64s and produces a V128. That is, a widening + operator. Generate IR which applies |opI64x2toV128| to either the + lower (if |is2| is False) or upper (if |is2| is True) halves of + |argL| and |argR|, and return the value in a new IRTemp. +*/ +static +IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128, + IRExpr* argL, IRExpr* argR ) +{ + IRTemp res = newTemp(Ity_V128); + IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64; + assign(res, binop(opI64x2toV128, unop(slice, argL), + unop(slice, argR))); + return res; +} + + /* Let |new64| be a V128 in which only the lower 64 bits are interesting, and the upper can contain any value -- it is ignored. If |is2| is False, generate IR to put |new64| in the lower half of vector reg |dd| and zero @@ -6938,6 +6956,22 @@ Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (bitU == 0 && opcode == BITS4(1,1,1,0)) { + /* -------- 0,1110 PMULL{2} -------- */ + /* Narrows, and size refers to the narrowed lanes. */ + if (size != X00) return False; + IRTemp res + = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8, + getQReg128(nn), getQReg128(mm)); + putQReg128(dd, mkexpr(res)); + const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size); + const HChar* arrWide = nameArr_Q_SZ(1, size+1); + DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "", + nameQReg128(dd), arrNarrow, + nameQReg128(nn), arrWide, nameQReg128(mm), arrWide); + return True; + } + return False; # undef INSN } diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 9efaed1d5f..dbb485d6d5 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -922,6 +922,7 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm, case ARM64vecb_ZIP216x8: *nm = "zip2"; *ar = "8h"; return; case ARM64vecb_ZIP28x16: *nm = "zip2"; *ar = "16b"; return; case ARM64vecb_PMUL8x16: *nm = "pmul"; *ar = "16b"; return; + case ARM64vecb_PMULL8x8: *nm = "pmull"; *ar = "8hb"; return; default: vpanic("showARM64VecBinOp"); } } @@ -5126,6 +5127,8 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b + + 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b */ UInt vD = qregNo(i->ARM64in.VBinV.dst); UInt vN = qregNo(i->ARM64in.VBinV.argL); @@ -5353,6 +5356,10 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD); break; + case ARM64vecb_PMULL8x8: + *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD); + break; + default: goto bad; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index c00bb3d72e..12b098095e 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -345,6 +345,7 @@ typedef ARM64vecb_ZIP18x16, ARM64vecb_ZIP232x4, ARM64vecb_ZIP216x8, ARM64vecb_ZIP28x16, ARM64vecb_PMUL8x16, + ARM64vecb_PMULL8x8, ARM64vecb_INVALID } ARM64VecBinOp; diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 7916ce26ca..d640a0d788 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -5671,6 +5671,19 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) break; } + case Iop_PolynomialMull8x8: { + HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1); + HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2); + HReg vSrcL = newVRegV(env); + HReg vSrcR = newVRegV(env); + HReg dst = newVRegV(env); + addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL)); + addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR)); + addInstr(env, ARM64Instr_VBinV(ARM64vecb_PMULL8x8, + dst, vSrcL, vSrcR)); + return dst; + } + //ZZ case Iop_CmpGT8Ux16: //ZZ case Iop_CmpGT16Ux8: //ZZ case Iop_CmpGT32Ux4: {