From a946774e00f209c12d504497b67f81ea0f114130 Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Wed, 25 Jun 2014 13:05:23 +0000 Subject: [PATCH] arm64: implement: LD3/ST3 (multi 3-elem structs, 3 regs, post index) (2d variants only) pmul 16b_16b_16b, 8b_8b_8b git-svn-id: svn://svn.valgrind.org/vex/trunk@2887 --- VEX/priv/guest_arm64_toIR.c | 54 ++++++++++++++++++++++++++++++++++++- VEX/priv/host_arm64_defs.c | 7 +++++ VEX/priv/host_arm64_defs.h | 1 + VEX/priv/host_arm64_isel.c | 2 ++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 7f2f51b62f..6b73b1aba0 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -4373,7 +4373,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) return False; } - /* ---------- LD2/ST2 (multiple structures, post index) ---------- */ + /* -------- LD2/ST2 (multi 2-elem structs, 2 regs, post index) -------- */ /* Only a very few cases. */ /* 31 23 11 9 4 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32 @@ -4513,6 +4513,58 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + /* -------- LD3/ST3 (multi 3-elem structs, 3 regs, post index) -------- */ + /* Only a very few cases. */ + /* 31 23 11 9 4 + 0100 1100 1101 1111 0100 11 n t LD3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48 + 0100 1100 1001 1111 0100 11 n t ST3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48 + */ + if ( (insn & 0xFFFFFC00) == 0x4CDF4C00 // LD3 .2d + || (insn & 0xFFFFFC00) == 0x4C9F4C00 // ST3 .2d + ) { + Bool isLD = INSN(22,22) == 1; + UInt rN = INSN(9,5); + UInt vT = INSN(4,0); + IRTemp tEA = newTemp(Ity_I64); + UInt sz = INSN(11,10); + const HChar* name = "??"; + assign(tEA, getIReg64orSP(rN)); + if (rN == 31) { /* FIXME generate stack alignment check */ } + IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0)); + IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8)); + IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16)); + IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24)); + IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32)); + IRExpr* tEA_40 = binop(Iop_Add64, mkexpr(tEA), mkU64(40)); + if (sz == BITS2(1,1)) { + name = "2d"; + if (isLD) { + putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0)); + putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_24)); + putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8)); + putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_32)); + putQRegLane((vT+2) % 32, 0, loadLE(Ity_I64, tEA_16)); + putQRegLane((vT+2) % 32, 1, loadLE(Ity_I64, tEA_40)); + } else { + storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64)); + storeLE(tEA_24, getQRegLane((vT+0) % 32, 1, Ity_I64)); + storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64)); + storeLE(tEA_32, getQRegLane((vT+1) % 32, 1, Ity_I64)); + storeLE(tEA_16, getQRegLane((vT+2) % 32, 0, Ity_I64)); + storeLE(tEA_40, getQRegLane((vT+2) % 32, 1, Ity_I64)); + } + } + else { + vassert(0); // Can't happen. + } + putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(48))); + DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n", + isLD ? "ld3" : "st3", + (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name, + nameIReg64orSP(rN)); + return True; + } + /* ------------------ LD{,A}X{R,RH,RB} ------------------ */ /* ------------------ ST{,L}X{R,RH,RB} ------------------ */ /* 31 29 23 20 14 9 4 diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 06c73ab796..9efaed1d5f 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -921,6 +921,7 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm, case ARM64vecb_ZIP232x4: *nm = "zip2"; *ar = "4s"; return; case ARM64vecb_ZIP216x8: *nm = "zip2"; *ar = "8h"; return; case ARM64vecb_ZIP28x16: *nm = "zip2"; *ar = "16b"; return; + case ARM64vecb_PMUL8x16: *nm = "pmul"; *ar = "16b"; return; default: vpanic("showARM64VecBinOp"); } } @@ -5123,6 +5124,8 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b + + 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b */ UInt vD = qregNo(i->ARM64in.VBinV.dst); UInt vN = qregNo(i->ARM64in.VBinV.argL); @@ -5346,6 +5349,10 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD); break; + case ARM64vecb_PMUL8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD); + break; + default: goto bad; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 5963354f6d..c00bb3d72e 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -344,6 +344,7 @@ typedef ARM64vecb_ZIP132x4, ARM64vecb_ZIP116x8, ARM64vecb_ZIP18x16, ARM64vecb_ZIP232x4, ARM64vecb_ZIP216x8, ARM64vecb_ZIP28x16, + ARM64vecb_PMUL8x16, ARM64vecb_INVALID } ARM64VecBinOp; diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 7e85082af0..7916ce26ca 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -4986,6 +4986,7 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_InterleaveLO32x4: case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16: + case Iop_PolynomialMul8x16: { HReg res = newVRegV(env); HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); @@ -5066,6 +5067,7 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) break; case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True; break; + case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break; default: vassert(0); } if (sw) { -- 2.47.2