extern void arm64g_dirtyhelper_AESIMC ( /*OUT*/V128* res,
ULong argHi, ULong argLo );
+extern
+void arm64g_dirtyhelper_SHA1C ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo );
+extern
+void arm64g_dirtyhelper_SHA1H ( /*OUT*/V128* res,
+ ULong nHi, ULong nLo );
+extern
+void arm64g_dirtyhelper_SHA1M ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo );
+extern
+void arm64g_dirtyhelper_SHA1P ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo );
+extern
+void arm64g_dirtyhelper_SHA1SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo );
+extern
+void arm64g_dirtyhelper_SHA1SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo );
+extern
+void arm64g_dirtyhelper_SHA256H2 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo );
+extern
+void arm64g_dirtyhelper_SHA256H ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo );
+extern
+void arm64g_dirtyhelper_SHA256SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo );
+extern
+void arm64g_dirtyhelper_SHA256SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo,
+ ULong mHi, ULong mLo );
+
/*---------------------------------------------------------*/
/*--- Condition code stuff ---*/
}
+/*---------------------------------------------------------------*/
+/*--- Crypto instruction helpers ---*/
+/*---------------------------------------------------------------*/
+
/* DIRTY HELPERS for doing AES support:
* AESE (SubBytes, then ShiftRows)
* AESD (InvShiftRows, then InvSubBytes)
}
+/* DIRTY HELPERS for SHA instruction support. As with the AES helpers
+ above, these are actually pure functions and are only dirty because
+ clean helpers can't return a V128. */
+
+static inline UInt ROL32 ( UInt x, UInt sh ) {
+ vassert(sh > 0 && sh < 32);
+ return (x << sh) | (x >> (32 - sh));
+}
+
+static inline UInt ROR32 ( UInt x, UInt sh ) {
+ vassert(sh > 0 && sh < 32);
+ return (x >> sh) | (x << (32 - sh));
+}
+
+static inline UInt SHAchoose ( UInt x, UInt y, UInt z ) {
+ return ((y ^ z) & x) ^ z;
+}
+
+static inline UInt SHAmajority ( UInt x, UInt y, UInt z ) {
+ return (x & y) | ((x | y) & z);
+}
+
+static inline UInt SHAparity ( UInt x, UInt y, UInt z ) {
+ return x ^ y ^ z;
+}
+
+static inline UInt SHAhashSIGMA0 ( UInt x ) {
+ return ROR32(x, 2) ^ ROR32(x, 13) ^ ROR32(x, 22);
+}
+
+static inline UInt SHAhashSIGMA1 ( UInt x ) {
+ return ROR32(x, 6) ^ ROR32(x, 11) ^ ROR32(x, 25);
+}
+
+static void SHA256hash ( /*MOD*/V128* X, /*MOD*/V128* Y, const V128* W )
+{
+ UInt e;
+ for (e = 0; e <= 3; e++) {
+ UInt chs = SHAchoose(Y->w32[0], Y->w32[1], Y->w32[2]);
+ UInt maj = SHAmajority(X->w32[0], X->w32[1], X->w32[2]);
+ UInt t = Y->w32[3] + SHAhashSIGMA1(Y->w32[0]) + chs + W->w32[e];
+ X->w32[3] = t + X->w32[3];
+ Y->w32[3] = t + SHAhashSIGMA0(X->w32[0]) + maj;
+ UInt ts = Y->w32[3];
+ Y->w32[3] = Y->w32[2];
+ Y->w32[2] = Y->w32[1];
+ Y->w32[1] = Y->w32[0];
+ Y->w32[0] = X->w32[3];
+ X->w32[3] = X->w32[2];
+ X->w32[2] = X->w32[1];
+ X->w32[1] = X->w32[0];
+ X->w32[0] = ts;
+ }
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA1C ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo )
+{
+ vassert(nHi == 0);
+ vassert((nLo >> 32) == 0);
+ V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
+ UInt Y; Y = (UInt)nLo;
+ V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
+ UInt e;
+ for (e = 0; e <= 3; e++) {
+ UInt t = SHAchoose(X.w32[1], X.w32[2], X.w32[3]);
+ Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
+ X.w32[1] = ROL32(X.w32[1], 30);
+ UInt oldY = Y;
+ Y = X.w32[3];
+ X.w32[3] = X.w32[2];
+ X.w32[2] = X.w32[1];
+ X.w32[1] = X.w32[0];
+ X.w32[0] = oldY;
+ }
+ res->w64[1] = X.w64[1];
+ res->w64[0] = X.w64[0];
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA1H ( /*OUT*/V128* res, ULong nHi, ULong nLo )
+{
+ vassert(nHi == 0);
+ vassert((nLo >> 32) == 0);
+ res->w32[3] = res->w32[2] = res->w32[1] = 0;
+ res->w32[0] = ROL32((UInt)nLo, 30);
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA1M ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo )
+{
+ vassert(nHi == 0);
+ vassert((nLo >> 32) == 0);
+ V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
+ UInt Y; Y = (UInt)nLo;
+ V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
+ UInt e;
+ for (e = 0; e <= 3; e++) {
+ UInt t = SHAmajority(X.w32[1], X.w32[2], X.w32[3]);
+ Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
+ X.w32[1] = ROL32(X.w32[1], 30);
+ UInt oldY = Y;
+ Y = X.w32[3];
+ X.w32[3] = X.w32[2];
+ X.w32[2] = X.w32[1];
+ X.w32[1] = X.w32[0];
+ X.w32[0] = oldY;
+ }
+ res->w64[1] = X.w64[1];
+ res->w64[0] = X.w64[0];
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA1P ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo )
+{
+ vassert(nHi == 0);
+ vassert((nLo >> 32) == 0);
+ V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
+ UInt Y; Y = (UInt)nLo;
+ V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
+ UInt e;
+ for (e = 0; e <= 3; e++) {
+ UInt t = SHAparity(X.w32[1], X.w32[2], X.w32[3]);
+ Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
+ X.w32[1] = ROL32(X.w32[1], 30);
+ UInt oldY = Y;
+ Y = X.w32[3];
+ X.w32[3] = X.w32[2];
+ X.w32[2] = X.w32[1];
+ X.w32[1] = X.w32[0];
+ X.w32[0] = oldY;
+ }
+ res->w64[1] = X.w64[1];
+ res->w64[0] = X.w64[0];
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA1SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo )
+{
+ res->w64[1] = nLo;
+ res->w64[0] = dHi;
+ res->w64[1] ^= dHi ^ mHi;
+ res->w64[0] ^= dLo ^ mLo;
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA1SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo )
+{
+ /* This computes "T = Vd ^ (Vn >>u 32)" */
+ V128 T; T.w64[1] = nHi; T.w64[0] = nLo;
+ T.w32[0] = T.w32[1];
+ T.w32[1] = T.w32[2];
+ T.w32[2] = T.w32[3];
+ T.w32[3] = 0;
+ T.w64[1] ^= dHi;
+ T.w64[0] ^= dLo;
+ /* */
+ res->w32[0] = ROL32(T.w32[0], 1);
+ res->w32[1] = ROL32(T.w32[1], 1);
+ res->w32[2] = ROL32(T.w32[2], 1);
+ res->w32[3] = ROL32(T.w32[3], 1) ^ ROL32(T.w32[0], 2);
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA256H2 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo )
+{
+ V128 X; X.w64[1] = nHi; X.w64[0] = nLo;
+ V128 Y; Y.w64[1] = dHi; Y.w64[0] = dLo;
+ V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
+ SHA256hash(&X, &Y, &W);
+ res->w64[1] = Y.w64[1];
+ res->w64[0] = Y.w64[0];
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA256H ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo, ULong mHi, ULong mLo )
+{
+ V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
+ V128 Y; Y.w64[1] = nHi; Y.w64[0] = nLo;
+ V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
+ SHA256hash(&X, &Y, &W);
+ res->w64[1] = X.w64[1];
+ res->w64[0] = X.w64[0];
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA256SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo )
+
+{
+ res->w64[1] = res->w64[0] = 0;
+ V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
+ V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
+ V128 T;
+ T.w32[3] = op2.w32[0];
+ T.w32[2] = op1.w32[3];
+ T.w32[1] = op1.w32[2];
+ T.w32[0] = op1.w32[1];
+ UInt e;
+ for (e = 0; e <= 3; e++) {
+ UInt elt = T.w32[e];
+ elt = ROR32(elt, 7) ^ ROR32(elt, 18) ^ (elt >> 3);
+ res->w32[e] = elt + op1.w32[e];
+ }
+}
+
+/* CALLED FROM GENERATED CODE */
+void arm64g_dirtyhelper_SHA256SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
+ ULong nHi, ULong nLo,
+ ULong mHi, ULong mLo )
+{
+ res->w64[0] = res->w64[1] = 0;
+ V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
+ V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
+ V128 op3; op3.w64[1] = mHi; op3.w64[0] = mLo;
+ V128 T0;
+ T0.w32[3] = op3.w32[0];
+ T0.w32[2] = op2.w32[3];
+ T0.w32[1] = op2.w32[2];
+ T0.w32[0] = op2.w32[1];
+ UInt T1[2];
+ UInt e;
+ T1[1] = op3.w32[3];
+ T1[0] = op3.w32[2];
+ for (e = 0; e <= 1; e++) {
+ UInt elt = T1[e];
+ elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
+ elt = elt + op1.w32[e] + T0.w32[e];
+ res->w32[e] = elt;
+ }
+ T1[1] = res->w32[1];
+ T1[0] = res->w32[0];
+ for (e = 2; e <= 3; e++) {
+ UInt elt = T1[e-2];
+ elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
+ elt = elt + op1.w32[e] + T0.w32[e];
+ res->w32[e] = elt;
+ }
+}
+
+
/*---------------------------------------------------------------*/
/*--- Flag-helpers translation-time function specialisers. ---*/
/*--- These help iropt specialise calls the above run-time ---*/
static
Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
{
+ /* 31 28 23 21 20 15 14 11 9 4
+ 0101 1110 sz 0 m 0 opc 00 n d
+ Decode fields are: sz,opc
+ */
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
+ || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
+ return False;
+ }
+ UInt sz = INSN(23,22);
+ UInt mm = INSN(20,16);
+ UInt opc = INSN(14,12);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
+ /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
+ /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
+ /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
+ /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
+ /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
+ /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
+ /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
+ vassert(opc < 7);
+ const HChar* inames[7]
+ = { "sha1c", "sha1p", "sha1m", "sha1su0",
+ "sha256h", "sha256h2", "sha256su1" };
+ void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
+ = { &arm64g_dirtyhelper_SHA1C, &arm64g_dirtyhelper_SHA1P,
+ &arm64g_dirtyhelper_SHA1M, &arm64g_dirtyhelper_SHA1SU0,
+ &arm64g_dirtyhelper_SHA256H, &arm64g_dirtyhelper_SHA256H2,
+ &arm64g_dirtyhelper_SHA256SU1 };
+ const HChar* hnames[7]
+ = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
+ "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
+ "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
+ "arm64g_dirtyhelper_SHA256SU1" };
+ IRTemp vD = newTemp(Ity_V128);
+ IRTemp vN = newTemp(Ity_V128);
+ IRTemp vM = newTemp(Ity_V128);
+ IRTemp vDhi = newTemp(Ity_I64);
+ IRTemp vDlo = newTemp(Ity_I64);
+ IRTemp vNhiPre = newTemp(Ity_I64);
+ IRTemp vNloPre = newTemp(Ity_I64);
+ IRTemp vNhi = newTemp(Ity_I64);
+ IRTemp vNlo = newTemp(Ity_I64);
+ IRTemp vMhi = newTemp(Ity_I64);
+ IRTemp vMlo = newTemp(Ity_I64);
+ assign(vD, getQReg128(dd));
+ assign(vN, getQReg128(nn));
+ assign(vM, getQReg128(mm));
+ assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
+ assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
+ assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
+ assign(vNloPre, unop(Iop_V128to64, mkexpr(vN)));
+ assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
+ assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
+ /* Mask off any bits of the N register operand that aren't actually
+ needed, so that Memcheck doesn't complain unnecessarily. */
+ switch (opc) {
+ case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
+ assign(vNhi, mkU64(0));
+ assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
+ break;
+ case BITS3(0,1,1): case BITS3(1,0,0):
+ case BITS3(1,0,1): case BITS3(1,1,0):
+ assign(vNhi, mkexpr(vNhiPre));
+ assign(vNlo, mkexpr(vNloPre));
+ break;
+ default:
+ vassert(0);
+ }
+ IRTemp res = newTemp(Ity_V128);
+ IRDirty* di
+ = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
+ mkIRExprVec_7(
+ IRExpr_VECRET(),
+ mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
+ mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
+ stmt(IRStmt_Dirty(di));
+ putQReg128(dd, mkexpr(res));
+ switch (opc) {
+ case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
+ DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
+ break;
+ case BITS3(0,1,1): case BITS3(1,1,0):
+ DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
+ break;
+ case BITS3(1,0,0): case BITS3(1,0,1):
+ DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
+ break;
+ default:
+ vassert(0);
+ }
+ return True;
+ }
+
return False;
# undef INSN
}
static
Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
{
+ /* 31 28 23 21 16 11 9 4
+ 0101 1110 sz 10100 opc 10 n d
+ Decode fields are: sz,opc
+ */
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
+ || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
+ return False;
+ }
+ UInt sz = INSN(23,22);
+ UInt opc = INSN(16,12);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
+ /* -------- 00,00000 SHA1H Sd, Sn -------- */
+ /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
+ /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
+ vassert(opc < 3);
+ const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
+ IRTemp vD = newTemp(Ity_V128);
+ IRTemp vN = newTemp(Ity_V128);
+ IRTemp vDhi = newTemp(Ity_I64);
+ IRTemp vDlo = newTemp(Ity_I64);
+ IRTemp vNhi = newTemp(Ity_I64);
+ IRTemp vNlo = newTemp(Ity_I64);
+ assign(vD, getQReg128(dd));
+ assign(vN, getQReg128(nn));
+ assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
+ assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
+ assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
+ assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
+ /* Mask off any bits of the N register operand that aren't actually
+ needed, so that Memcheck doesn't complain unnecessarily. Also
+ construct the calls, given that the helper functions don't take
+ the same number of arguments. */
+ IRDirty* di = NULL;
+ IRTemp res = newTemp(Ity_V128);
+ switch (opc) {
+ case BITS5(0,0,0,0,0): {
+ IRExpr* vNloMasked = unop(Iop_32Uto64,
+ unop(Iop_64to32, mkexpr(vNlo)));
+ di = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ "arm64g_dirtyhelper_SHA1H",
+ &arm64g_dirtyhelper_SHA1H,
+ mkIRExprVec_3(
+ IRExpr_VECRET(),
+ mkU64(0), vNloMasked) );
+ break;
+ }
+ case BITS5(0,0,0,0,1):
+ di = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ "arm64g_dirtyhelper_SHA1SU1",
+ &arm64g_dirtyhelper_SHA1SU1,
+ mkIRExprVec_5(
+ IRExpr_VECRET(),
+ mkexpr(vDhi), mkexpr(vDlo),
+ mkexpr(vNhi), mkexpr(vNlo)) );
+ break;
+ case BITS5(0,0,0,1,0):
+ di = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ "arm64g_dirtyhelper_SHA256SU0",
+ &arm64g_dirtyhelper_SHA256SU0,
+ mkIRExprVec_5(
+ IRExpr_VECRET(),
+ mkexpr(vDhi), mkexpr(vDlo),
+ mkexpr(vNhi), mkexpr(vNlo)) );
+ break;
+ default:
+ vassert(0);
+ }
+ stmt(IRStmt_Dirty(di));
+ putQReg128(dd, mkexpr(res));
+ switch (opc) {
+ case BITS5(0,0,0,0,0):
+ DIP("%s s%u, s%u\n", inames[opc], dd, nn);
+ break;
+ case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
+ DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
+ break;
+ default:
+ vassert(0);
+ }
+ return True;
+ }
+
return False;
# undef INSN
}