assign(*res, mkexpr(*sat1q));
}
+/* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply,
+ double, add a rounding constant, take the high half and accumulate. */
+static
+void math_SQRDMLAH ( /*OUT*/IRTemp* res, /*OUT*/IRTemp* res_nosat, Bool isAdd,
+ UInt size, IRTemp vD, IRTemp vN, IRTemp vM )
+{
+ vassert(size == X01 || size == X10); /* s or h only */
+
+ /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */
+
+ IRTemp mul, mul_nosat, dummy;
+ mul = mul_nosat = dummy = IRTemp_INVALID;
+ math_SQDMULH(&mul, &dummy, &mul_nosat, True/*R*/, size, vN, vM);
+
+ IROp op = isAdd ? mkVecADD(size) : mkVecSUB(size);
+ IROp qop = isAdd ? mkVecQADDS(size) : mkVecQSUBS(size);
+ newTempsV128_2(res, res_nosat);
+ assign(*res, binop(qop, mkexpr(vD), mkexpr(mul)));
+ assign(*res_nosat, binop(op, mkexpr(vD), mkexpr(mul_nosat)));
+}
+
/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
a new temp in *res, and the Q difference pair in new temps in
# undef INSN
}
+static
+Bool dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
+{
+ /* 31 29 28 23 21 20 15 10 9 4
+ 01 U 11110 size 0 m opcode 1 n d
+ Decode fields: u,size,opcode
+ */
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,30) != BITS2(0,1)
+ || INSN(28,24) != BITS5(1,1,1,1,0)
+ || INSN(21,21) != 0
+ || INSN(10,10) != 1) {
+ return False;
+ }
+ UInt bitU = INSN(29,29);
+ UInt size = INSN(23,22);
+ UInt mm = INSN(20,16);
+ UInt opcode = INSN(15,11);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ vassert(size < 4);
+ vassert(mm < 32 && nn < 32 && dd < 32);
+
+ if (bitU == 1 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
+ /* -------- xx,10000 SQRDMLAH s and h variants only -------- */
+ /* -------- xx,10001 SQRDMLSH s and h variants only -------- */
+ if (size == X00 || size == X11) return False;
+ Bool isAdd = opcode == BITS5(1,0,0,0,0);
+
+ IRTemp res, res_nosat, vD, vN, vM;
+ res = res_nosat = vD = vN = vM = IRTemp_INVALID;
+ newTempsV128_3(&vD, &vN, &vM);
+ assign(vD, getQReg128(dd));
+ assign(vN, getQReg128(nn));
+ assign(vM, getQReg128(mm));
+
+ math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
+ putQReg128(dd,
+ mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
+ updateQCFLAGwithDifference(
+ math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)),
+ math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res_nosat)));
+
+ const HChar arr = "hs"[size];
+ const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
+ DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
+ return True;
+ }
+
+ return False;
+# undef INSN
+}
+
static
Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
*/
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
if (INSN(31,30) != BITS2(0,1)
- || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
+ || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) {
return False;
}
UInt bitU = INSN(29,29);
return True;
}
- if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
+ if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
/* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
/* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
UInt mm = 32; // invalid
return True;
}
+ if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
+ /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
+ /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
+ UInt mm = 32; // invalid
+ UInt ix = 16; // invalid
+ switch (size) {
+ case X00:
+ return False; // b case is not allowed
+ case X01:
+ mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
+ case X10:
+ mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
+ case X11:
+ return False; // d case is not allowed
+ default:
+ vassert(0);
+ }
+ vassert(size < 4);
+ vassert(mm < 32 && ix < 16);
+ Bool isAdd = opcode == BITS4(1,1,0,1);
+
+ IRTemp res, res_nosat, vD, vN, vM;
+ res = res_nosat = vD = vN = vM = IRTemp_INVALID;
+ newTempsV128_2(&vD, &vN);
+ assign(vD, getQReg128(dd));
+ assign(vN, getQReg128(nn));
+ vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
+
+ math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
+ IROp opZHI = mkVecZEROHIxxOFV128(size);
+ putQReg128(dd, unop(opZHI, mkexpr(res)));
+ updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
+
+ const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
+ HChar ch = size == X01 ? 'h' : 's';
+ DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
+ return True;
+ }
+
return False;
# undef INSN
}
}
+static
+Bool dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
+{
+ /* 31 30 29 28 23 21 20 15 14 10 9 4
+ 0 Q U 01110 size 0 m 1 opcode 1 n d
+ Decode fields: u,size,opcode
+ */
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,31) != 0
+ || INSN(28,24) != BITS5(0,1,1,1,0)
+ || INSN(21,21) != 0
+ || INSN(15,15) != 1
+ || INSN(10,10) != 1) {
+ return False;
+ }
+ UInt bitQ = INSN(30,30);
+ UInt bitU = INSN(29,29);
+ UInt size = INSN(23,22);
+ UInt mm = INSN(20,16);
+ UInt opcode = INSN(14,11);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ vassert(size < 4);
+ vassert(mm < 32 && nn < 32 && dd < 32);
+
+ if (bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,0,1))) {
+ /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */
+ /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */
+ if (size == X00 || size == X11) return False;
+ Bool isAdd = opcode == BITS4(0,0,0,0);
+
+ IRTemp res, res_nosat, vD, vN, vM;
+ res = res_nosat = vD = vN = vM = IRTemp_INVALID;
+ newTempsV128_3(&vD, &vN, &vM);
+ assign(vD, getQReg128(dd));
+ assign(vN, getQReg128(nn));
+ assign(vM, getQReg128(mm));
+
+ math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
+ IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
+ updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
+ DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+ return True;
+ }
+
+ return False;
+# undef INSN
+}
+
+
static
Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
{
return True;
}
- if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
+ if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
/* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
/* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
UInt mm = 32; // invalid
return True;
}
+ if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
+ /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
+ /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
+ UInt mm = 32; // invalid
+ UInt ix = 16; // invalid
+ switch (size) {
+ case X00:
+ return False; // b case is not allowed
+ case X01: // h
+ mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
+ case X10: // s
+ mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
+ case X11:
+ return False; // d case is not allowed
+ default:
+ vassert(0);
+ }
+ vassert(mm < 32 && ix < 16);
+
+ IRTemp res, res_nosat, vD, vN, vM;
+ res = res_nosat = vD = vN = vM = IRTemp_INVALID;
+ newTempsV128_2(&vD, &vN);
+ assign(vD, getQReg128(dd));
+ assign(vN, getQReg128(nn));
+
+ vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
+ Bool isAdd = opcode == BITS4(1,1,0,1);
+ math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
+ IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
+ updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
+ putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ const HChar* nm = isAdd ? "sqrdmlah" : "sqrdmlsh";
+ HChar ch = size == X01 ? 'h' : 's';
+ DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
+ nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), ch, ix);
+ return True;
+ }
+
return False;
# undef INSN
}
if (UNLIKELY(ok)) return True;
ok = dis_AdvSIMD_scalar_three_same(dres, insn);
if (UNLIKELY(ok)) return True;
+ ok = dis_AdvSIMD_scalar_three_same_extra(dres, insn);
+ if (UNLIKELY(ok)) return True;
ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
if (UNLIKELY(ok)) return True;
ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
if (UNLIKELY(ok)) return True;
ok = dis_AdvSIMD_three_same(dres, insn);
if (UNLIKELY(ok)) return True;
+ ok = dis_AdvSIMD_three_same_extra(dres, insn);
+ if (UNLIKELY(ok)) return True;
ok = dis_AdvSIMD_two_reg_misc(dres, insn);
if (UNLIKELY(ok)) return True;
ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
--- /dev/null
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <math.h>
+
+typedef unsigned char UChar;
+typedef unsigned short int UShort;
+typedef unsigned int UInt;
+typedef signed int Int;
+typedef unsigned long long int ULong;
+typedef signed long long int Long;
+typedef double Double;
+typedef float Float;
+
+typedef unsigned char Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+#define ITERS 1
+
+typedef
+ enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
+ LaneTy;
+
+union _V128 {
+ UChar u8[16];
+ UShort u16[8];
+ UInt u32[4];
+ ULong u64[2];
+ Float f32[4];
+ Double f64[2];
+};
+typedef union _V128 V128;
+
+static void setV128( V128* v, int elements, LaneTy ty, ULong val )
+{
+ assert( (elements % 2) == 0 && elements >= 2 && elements <= 16 );
+ memset(v, 0, sizeof(*v));
+ switch (ty) {
+ /* 4H or 8H */
+ case TyH:
+ assert( val < (1 << 16) );
+ assert( elements == 4 || elements == 8 );
+ for ( int i = 0; i < elements; i++ )
+ v->u16[i] = val;
+ break;
+ /* 2S or 4S */
+ case TyS:
+ assert( val < (1 << 32) );
+ assert( elements == 2 || elements == 4 );
+ for ( int i = 0; i < elements; i++ )
+ v->u32[i] = val;
+ break;
+ default:
+ printf("8B, 2D and 16B not implemented\n");
+ }
+}
+
+static void setV128_idx( V128* v, int elements, LaneTy ty, ULong val, int idx )
+{
+ assert( (elements % 2) == 0 && elements >= 2 && elements <= 16 );
+ assert( idx >= 0 );
+ memset(v, 0, sizeof(*v));
+ switch (ty) {
+ /* 4H or 8H */
+ case TyH:
+ assert( val < (1 << 16) );
+ assert( elements == 4 || elements == 8 );
+ assert( idx < elements );
+ v->u16[idx] = val;
+ break;
+ /* 2S or 4S */
+ case TyS:
+ assert( val < (1 << 32) );
+ assert( elements == 2 || elements == 4 );
+ assert( idx < elements );
+ v->u32[idx] = val;
+ break;
+ default:
+ printf("8B, 2D and 16B not implemented\n");
+ }
+}
+
+static inline UChar randUChar ( void )
+{
+ static UInt seed = 80021;
+ seed = 1103515245 * seed + 12345;
+ return (seed >> 17) & 0xFF;
+}
+
+/* Generates a random V128. Ensures that that it contains normalised FP numbers
+ * when viewed as either F32x4 or F64x2, so that it is reasonable to use in FP
+ * test cases.
+ */
+static void randV128 ( /*OUT*/V128* v )
+{
+ static UInt nCalls = 0, nIters = 0;
+ Int i;
+ nCalls++;
+ while (1) {
+ nIters++;
+ for (i = 0; i < 16; i++) {
+ v->u8[i] = randUChar();
+ }
+ if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
+ && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
+ break;
+ }
+ if (0 == (nCalls & 0xFF))
+ printf("randV128: %u calls, %u iters\n", nCalls, nIters);
+}
+
+static void showV128 ( V128* v )
+{
+ Int i;
+ for (i = 15; i >= 0; i--)
+ printf("%02x", (Int)v->u8[i]);
+}
+
+/* Generate a test function that involves three vector regs with no bias as
+ * towards which is input or output.
+ */
+#define GEN_THREEVEC_TEST_RND(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
+ __attribute__((noinline)) \
+ static void test_##TESTNAME ( LaneTy ty ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[6+1]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0]); \
+ randV128(&block[1]); \
+ randV128(&block[2]); \
+ randV128(&block[3]); \
+ randV128(&block[4]); \
+ randV128(&block[5]); \
+ __asm__ __volatile__( \
+ "mov x30, #0 ; msr fpsr, x30 ; " \
+ "ldr q"#VECREG1NO", [%0, #0] ; " \
+ "ldr q"#VECREG2NO", [%0, #16] ; " \
+ "ldr q"#VECREG3NO", [%0, #32] ; " \
+ INSN " ; " \
+ "str q"#VECREG1NO", [%0, #48] ; " \
+ "str q"#VECREG2NO", [%0, #64] ; " \
+ "str q"#VECREG3NO", [%0, #80] ; " \
+ "mrs x30, fpsr ; str x30, [%0, #96] " \
+ : : "r"(&block[0]) \
+ : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, "x30" \
+ ); \
+ printf("%-34s", INSN); \
+ UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
+ printf("vout:"); showV128(&block[0]); printf(" "); \
+ printf("vin0:"); showV128(&block[1]); printf(" "); \
+ printf("vin1:"); showV128(&block[2]); \
+ printf("\n "); \
+ printf("vout:"); showV128(&block[3]); printf(" "); \
+ printf("vin0:"); showV128(&block[4]); printf(" "); \
+ printf("vin1:"); showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
+ } \
+ }
+
+/* Generate a test function that involves three vector regs with no bias as
+ * towards which is input or output. The three vector regs are loaded with user
+ * defined values rather than generated using randV128().
+ */
+#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREGOUT,VECREGIN0,VECREGIN1) \
+ __attribute__((noinline)) \
+ static void test_##TESTNAME ( V128* vout, V128* vin0, V128* vin1, LaneTy ty ) { \
+ V128 block[6+1]; \
+ memset(block, 0, sizeof(block)); \
+ block[0] = *vout; \
+ block[1] = *vin0; \
+ block[2] = *vin1; \
+ __asm__ __volatile__( \
+ "mov x30, #0 ; msr fpsr, x30 ; " \
+ "ldr q"#VECREGOUT", [%0, #0] ; " \
+ "ldr q"#VECREGIN0", [%0, #16] ; " \
+ "ldr q"#VECREGIN1", [%0, #32] ; " \
+ INSN " ; " \
+ "str q"#VECREGOUT", [%0, #48] ; " \
+ "str q"#VECREGIN0", [%0, #64] ; " \
+ "str q"#VECREGIN1", [%0, #80] ; " \
+ "mrs x30, fpsr ; str x30, [%0, #96] " \
+ : : "r"(&block[0]) \
+ : "memory", "v"#VECREGOUT, "v"#VECREGIN0, "v"#VECREGIN1, "x30" \
+ ); \
+ printf("%-34s", INSN); \
+ UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
+ printf("vout:"); showV128(&block[0]); printf(" "); \
+ printf("vin0:"); showV128(&block[1]); printf(" "); \
+ printf("vin1:"); showV128(&block[2]); \
+ printf("\n "); \
+ printf("vout:"); showV128(&block[3]); printf(" "); \
+ printf("vin0:"); showV128(&block[4]); printf(" "); \
+ printf("vin1:"); showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
+ }
+
+/* Generate calls to test functions generated by GEN_THREEVEC_TEST defined
+ * above, which require user defined data.
+ */
+#define GEN_THREEVEC_TEST_CALLS(INSN,ECOUNT,ETYPE,EARRANGE,PATTERN) \
+ setV128(&vout, ECOUNT, ETYPE, PATTERN); \
+ setV128(&vin[0], ECOUNT, ETYPE, PATTERN); \
+ setV128(&vin[1], ECOUNT, ETYPE, PATTERN); \
+ test_##INSN##_##EARRANGE##_##EARRANGE##_##EARRANGE##_##PATTERN(&vout, &vin[0], &vin[1], ETYPE);
+
+/* Indexed vector element variant of GEN_THREEVEC_TEST_CALLS above. */
+#define GEN_THREEVEC_TEST_CALLSI(INSN,ECOUNT,ETYPE,EARRANGE,PATTERN,IDX) \
+ setV128_idx(&vout, ECOUNT, ETYPE, PATTERN, IDX); \
+ setV128_idx(&vin[0], ECOUNT, ETYPE, PATTERN, IDX); \
+ setV128_idx(&vin[1], ECOUNT, ETYPE, PATTERN, IDX); \
+ test_##INSN##_##EARRANGE##_##EARRANGE##_##EARRANGE##_##PATTERN(&vout, &vin[0], &vin[1], ETYPE);
+
+/* Test patterns. */
+#define ALL5s_32 0x55555555ULL
+#define ALLas_32 0xAAAAAAAAULL
+#define ALLfs_32 0xFFFFFFFFULL
+#define UP_32 0x01234567ULL
+#define DOWN_32 0xFEDCBA98ULL
+#define PI_32 0x31415926ULL
+#define E_32 0x27182818ULL
+
+#define ALL5s_16 0x5555ULL
+#define ALLas_16 0xAAAAULL
+#define ALLfs_16 0xFFFFULL
+#define UP_16 0x0123ULL
+#define DOWN_16 0xFEDCULL
+#define PI_16 0x3141ULL
+#define E_16 0x2718ULL
+
+
+/* ---------------------------------------------------------
+ * -- Tests, in the same order that they appear in main() --
+ * ---------------------------------------------------------
+ *
+ * There are 4 types of test sets for each instruction:
+ * - vector
+ * - indexed vector
+ * - scalar
+ * - indexed scalar
+ */
+
+/* sqrdmlah (vector version, no index) */
+GEN_THREEVEC_TEST_RND(sqrdmlah_4h_4h_4h, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_8h_8h_8h, "sqrdmlah v0.8h, v1.8h, v2.8h", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_2s_2s_2s, "sqrdmlah v0.2s, v1.2s, v2.2s", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_4s_4s_4s, "sqrdmlah v0.4s, v1.4s, v2.4s", 0, 1, 2)
+
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_0, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALL5s_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALLas_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALLfs_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_UP_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_DOWN_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_PI_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_E_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+
+/* sqrdmlah (vector version, with index) */
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_4h_4h_4h, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_8h_8h_8h, "sqrdmlah v0.8h, v1.8h, v2.8h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_2s_2s_2s, "sqrdmlah v0.2s, v1.2s, v2.2s[2]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_4s_4s_4s, "sqrdmlah v0.4s, v1.4s, v2.4s[3]", 0, 1, 2)
+
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_0, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALL5s_16, "sqrdmlah v0.4h, v1.4h, v2.4h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALLas_16, "sqrdmlah v0.4h, v1.4h, v2.4h[2]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALLfs_16, "sqrdmlah v0.4h, v1.4h, v2.4h[3]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_UP_16, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_DOWN_16, "sqrdmlah v0.4h, v1.4h, v2.4h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_PI_16, "sqrdmlah v0.4h, v1.4h, v2.4h[2]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_E_16, "sqrdmlah v0.4h, v1.4h, v2.4h[3]", 0, 1, 2)
+
+/* sqrdmlah (scalar version) */
+GEN_THREEVEC_TEST_RND(sqrdmlah_h0_h1_h2, "sqrdmlah h0, h1, h2", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s0_s1_s2, "sqrdmlah s0, s1, s2", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h3_h4_h5, "sqrdmlah h3, h4, h5", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s3_s4_s5, "sqrdmlah s3, s4, s5", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h6_h7_h8, "sqrdmlah h6, h7, h8", 6, 7, 8)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s6_s7_s8, "sqrdmlah s6, s7, s8", 6, 7, 8)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h9_h10_h11, "sqrdmlah h9, h10, h11", 9, 10, 11)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s9_s10_s11, "sqrdmlah s9, s10, s11", 9, 10, 11)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h12_h13_h14, "sqrdmlah h12, h13, h14", 12, 13, 14)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s12_s13_s14, "sqrdmlah s12, s13, s14", 12, 13, 14)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h15_h16_h17, "sqrdmlah h15, h16, h17", 15, 16, 17)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s15_s16_s17, "sqrdmlah s15, s16, s17", 15, 16, 17)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h18_h19_h20, "sqrdmlah h18, h19, h20", 18, 19, 20)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s18_s19_s20, "sqrdmlah s18, s19, s20", 18, 19, 20)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h21_h22_h23, "sqrdmlah h21, h22, h23", 21, 22, 23)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s21_s22_s23, "sqrdmlah s21, s22, s23", 21, 22, 23)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h24_h25_h26, "sqrdmlah h24, h25, h26", 24, 25, 26)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s24_s25_s26, "sqrdmlah s24, s25, s26", 24, 25, 26)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h27_h28_h29, "sqrdmlah h27, h28, h29", 27, 28, 29)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s27_s28_s29, "sqrdmlah s27, s28, s29", 27, 28, 29)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h30_h31_h0, "sqrdmlah h30, h31, h0", 30, 31, 0)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s30_s31_s0, "sqrdmlah s30, s31, s0", 30, 31, 0)
+
+/* sqrdmlah (scalar version, with index) */
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_h0_h1_v2, "sqrdmlah h0, h1, v2.h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_s0_s1_v2, "sqrdmlah s0, s1, v2.s[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_h3_h4_v5, "sqrdmlah h3, h4, v5.h[1]", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_s3_s4_v5, "sqrdmlah s3, s4, v5.s[1]", 3, 4, 5)
+
+/* sqrdmlsh (vector version) */
+GEN_THREEVEC_TEST_RND(sqrdmlsh_4h_4h_4h, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_8h_8h_8h, "sqrdmlsh v0.8h, v1.8h, v2.8h", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_2s_2s_2s, "sqrdmlsh v0.2s, v1.2s, v2.2s", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_4s_4s_4s, "sqrdmlsh v0.4s, v1.4s, v2.4s", 0, 1, 2)
+
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_0, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALL5s_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALLas_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALLfs_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_UP_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_DOWN_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_PI_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_E_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+
+/* sqrdmlsh (vector version, with index) */
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_4h_4h_4h, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_8h_8h_8h, "sqrdmlsh v0.8h, v1.8h, v2.8h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_2s_2s_2s, "sqrdmlsh v0.2s, v1.2s, v2.2s[2]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_4s_4s_4s, "sqrdmlsh v0.4s, v1.4s, v2.4s[3]", 0, 1, 2)
+
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_0, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALL5s_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALLas_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[2]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALLfs_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[3]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_UP_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_DOWN_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_PI_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[2]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_E_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[3]", 0, 1, 2)
+
+/* sqrdmlsh (scalar version) */
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h0_h1_h2, "sqrdmlsh h0, h1, h2", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s0_s1_s2, "sqrdmlsh s0, s1, s2", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h3_h4_h5, "sqrdmlsh h3, h4, h5", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s3_s4_s5, "sqrdmlsh s3, s4, s5", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h6_h7_h8, "sqrdmlsh h6, h7, h8", 6, 7, 8)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s6_s7_s8, "sqrdmlsh s6, s7, s8", 6, 7, 8)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h9_h10_h11, "sqrdmlsh h9, h10, h11", 9, 10, 11)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s9_s10_s11, "sqrdmlsh s9, s10, s11", 9, 10, 11)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h12_h13_h14, "sqrdmlsh h12, h13, h14", 12, 13, 14)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s12_s13_s14, "sqrdmlsh s12, s13, s14", 12, 13, 14)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h15_h16_h17, "sqrdmlsh h15, h16, h17", 15, 16, 17)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s15_s16_s17, "sqrdmlsh s15, s16, s17", 15, 16, 17)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h18_h19_h20, "sqrdmlsh h18, h19, h20", 18, 19, 20)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s18_s19_s20, "sqrdmlsh s18, s19, s20", 18, 19, 20)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h21_h22_h23, "sqrdmlsh h21, h22, h23", 21, 22, 23)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s21_s22_s23, "sqrdmlsh s21, s22, s23", 21, 22, 23)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h24_h25_h26, "sqrdmlsh h24, h25, h26", 24, 25, 26)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s24_s25_s26, "sqrdmlsh s24, s25, s26", 24, 25, 26)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h27_h28_h29, "sqrdmlsh h27, h28, h29", 27, 28, 29)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s27_s28_s29, "sqrdmlsh s27, s28, s29", 27, 28, 29)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h30_h31_h0, "sqrdmlsh h30, h31, h0", 30, 31, 0)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s30_s31_s0, "sqrdmlsh s30, s31, s0", 30, 31, 0)
+
+/* sqrdmlsh (scalar version, with index) */
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_h0_h1_v2, "sqrdmlsh h0, h1, v2.h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_s0_s1_v2, "sqrdmlsh s0, s1, v2.s[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_h3_h4_v5, "sqrdmlsh h3, h4, v5.h[1]", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_s3_s4_v5, "sqrdmlsh s3, s4, v5.s[1]", 3, 4, 5)
+
+int main ( void )
+{
+ assert(sizeof(V128) == 16);
+
+ /* sqrdmlah (vector version, no index) */
+ if (1) test_sqrdmlah_4h_4h_4h(TyH);
+ if (1) test_sqrdmlah_8h_8h_8h(TyH);
+ if (1) test_sqrdmlah_2s_2s_2s(TyS);
+ if (1) test_sqrdmlah_4s_4s_4s(TyS);
+
+ V128 vout;
+ V128 vin[2];
+
+ if (1) {
+ GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, 0);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALL5s_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALLas_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALLfs_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, UP_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, DOWN_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, PI_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, E_16);
+ }
+
+ /* sqrdmlah (vector version, with index) */
+ if (1) test_sqrdmlah_i_4h_4h_4h(TyH);
+ if (1) test_sqrdmlah_i_8h_8h_8h(TyH);
+ if (1) test_sqrdmlah_i_2s_2s_2s(TyS);
+ if (1) test_sqrdmlah_i_4s_4s_4s(TyS);
+
+ if (1) {
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, 0, 0);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALL5s_16, 1);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALLas_16, 2);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALLfs_16, 3);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, UP_16, 0);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, DOWN_16, 1);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, PI_16, 2);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, E_16, 3);
+ }
+
+ /* sqrdmlah (scalar version) */
+ if (1) test_sqrdmlah_h0_h1_h2(TyH);
+ if (1) test_sqrdmlah_s0_s1_s2(TyS);
+ if (1) test_sqrdmlah_h3_h4_h5(TyH);
+ if (1) test_sqrdmlah_s3_s4_s5(TyS);
+ if (1) test_sqrdmlah_h6_h7_h8(TyH);
+ if (1) test_sqrdmlah_s6_s7_s8(TyS);
+ if (1) test_sqrdmlah_h9_h10_h11(TyH);
+ if (1) test_sqrdmlah_s9_s10_s11(TyS);
+ if (1) test_sqrdmlah_h12_h13_h14(TyH);
+ if (1) test_sqrdmlah_s12_s13_s14(TyS);
+ if (1) test_sqrdmlah_h15_h16_h17(TyH);
+ if (1) test_sqrdmlah_s15_s16_s17(TyS);
+ if (1) test_sqrdmlah_h18_h19_h20(TyH);
+ if (1) test_sqrdmlah_s18_s19_s20(TyS);
+ if (1) test_sqrdmlah_h21_h22_h23(TyH);
+ if (1) test_sqrdmlah_s21_s22_s23(TyS);
+ if (1) test_sqrdmlah_h24_h25_h26(TyH);
+ if (1) test_sqrdmlah_s24_s25_s26(TyS);
+ if (1) test_sqrdmlah_h27_h28_h29(TyH);
+ if (1) test_sqrdmlah_s27_s28_s29(TyS);
+ if (1) test_sqrdmlah_h30_h31_h0(TyH);
+ if (1) test_sqrdmlah_s30_s31_s0(TyS);
+
+ /* sqrdmlah (scalar version, with index) */
+ if (1) test_sqrdmlah_i_h0_h1_v2(TyH);
+ if (1) test_sqrdmlah_i_s0_s1_v2(TyS);
+ if (1) test_sqrdmlah_i_h3_h4_v5(TyH);
+ if (1) test_sqrdmlah_i_s3_s4_v5(TyS);
+
+ /* sqrdmlsh (vector version) */
+ if (1) test_sqrdmlsh_4h_4h_4h(TyH);
+ if (1) test_sqrdmlsh_8h_8h_8h(TyH);
+ if (1) test_sqrdmlsh_2s_2s_2s(TyS);
+ if (1) test_sqrdmlsh_4s_4s_4s(TyS);
+
+ if (1) {
+ GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, 0);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALL5s_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALLas_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALLfs_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, UP_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, DOWN_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, PI_16);
+ GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, E_16);
+ }
+
+ /* sqrdmlsh (vector version, with index) */
+ if (1) test_sqrdmlsh_i_4h_4h_4h(TyH);
+ if (1) test_sqrdmlsh_i_8h_8h_8h(TyH);
+ if (1) test_sqrdmlsh_i_2s_2s_2s(TyS);
+ if (1) test_sqrdmlsh_i_4s_4s_4s(TyS);
+
+ if (1) {
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, 0, 0);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALL5s_16, 1);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALLas_16, 2);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALLfs_16, 3);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, UP_16, 0);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, DOWN_16, 1);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, PI_16, 2);
+ GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, E_16, 3);
+ }
+
+ /* sqrdmlsh (scalar version) */
+ if (1) test_sqrdmlsh_h0_h1_h2(TyH);
+ if (1) test_sqrdmlsh_s0_s1_s2(TyS);
+ if (1) test_sqrdmlsh_h3_h4_h5(TyH);
+ if (1) test_sqrdmlsh_s3_s4_s5(TyS);
+ if (1) test_sqrdmlsh_h6_h7_h8(TyH);
+ if (1) test_sqrdmlsh_s6_s7_s8(TyS);
+ if (1) test_sqrdmlsh_h9_h10_h11(TyH);
+ if (1) test_sqrdmlsh_s9_s10_s11(TyS);
+ if (1) test_sqrdmlsh_h12_h13_h14(TyH);
+ if (1) test_sqrdmlsh_s12_s13_s14(TyS);
+ if (1) test_sqrdmlsh_h15_h16_h17(TyH);
+ if (1) test_sqrdmlsh_s15_s16_s17(TyS);
+ if (1) test_sqrdmlsh_h18_h19_h20(TyH);
+ if (1) test_sqrdmlsh_s18_s19_s20(TyS);
+ if (1) test_sqrdmlsh_h21_h22_h23(TyH);
+ if (1) test_sqrdmlsh_s21_s22_s23(TyS);
+ if (1) test_sqrdmlsh_h24_h25_h26(TyH);
+ if (1) test_sqrdmlsh_s24_s25_s26(TyS);
+ if (1) test_sqrdmlsh_h27_h28_h29(TyH);
+ if (1) test_sqrdmlsh_s27_s28_s29(TyS);
+ if (1) test_sqrdmlsh_h30_h31_h0(TyH);
+ if (1) test_sqrdmlsh_s30_s31_s0(TyS);
+
+ /* sqrdmlsh (scalar version, with index) */
+ if (1) test_sqrdmlsh_i_h0_h1_v2(TyH);
+ if (1) test_sqrdmlsh_i_s0_s1_v2(TyS);
+ if (1) test_sqrdmlsh_i_h3_h4_v5(TyH);
+ if (1) test_sqrdmlsh_i_s3_s4_v5(TyS);
+
+ return 0;
+}
--- /dev/null
+sqrdmlah v0.4h, v1.4h, v2.4h vout:5175e39d19c9ca1e98f24a4984175700 vin0:7d6528c5fa956a0d69c3e9a6af27d13b vin1:60b160857d45c48447b8d8c0eeef1e50
+ vout:0000000000000000d43451248edf4bed vin0:7d6528c5fa956a0d69c3e9a6af27d13b vin1:60b160857d45c48447b8d8c0eeef1e50 fpsr=00000000
+sqrdmlah v0.8h, v1.8h, v2.8h vout:d89998df5035ed364a4bc43968bc40e5 vin0:cb509970b8136c85d740b80eb7839b97 vin1:f9dd4a29f8c093db56b01a12b0ca1583
+ vout:db208000544891862eb2b5927fff3005 vin0:cb509970b8136c85d740b80eb7839b97 vin1:f9dd4a29f8c093db56b01a12b0ca1583 fpsr=08000000
+sqrdmlah v0.2s, v1.2s, v2.2s vout:d182c916cebc2e17cfaff39be272ef40 vin0:6897b536bbe4da8a369dab4f9465b86e vin1:407b8d9035449b06f4e06e2205236eb7
+ vout:0000000000000000caf0ee75de211f32 vin0:6897b536bbe4da8a369dab4f9465b86e vin1:407b8d9035449b06f4e06e2205236eb7 fpsr=00000000
+sqrdmlah v0.4s, v1.4s, v2.4s vout:f0350ca70523e0e45ba1ec54e87d39b3 vin0:0a3e0f7c75cb0842b95ed64d3b13ff64 vin1:e98ebd1ca893312a54cae7d5e13dfe91
+ vout:ee6951dab4afbfa02cd832b4da4afedf vin0:0a3e0f7c75cb0842b95ed64d3b13ff64 vin1:e98ebd1ca893312a54cae7d5e13dfe91 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000
+ vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000005555555555555555 vin0:00000000000000005555555555555555 vin1:00000000000000005555555555555555
+ vout:00000000000000007fff7fff7fff7fff vin0:00000000000000005555555555555555 vin1:00000000000000005555555555555555 fpsr=08000000
+sqrdmlah v0.4h, v1.4h, v2.4h vout:0000000000000000aaaaaaaaaaaaaaaa vin0:0000000000000000aaaaaaaaaaaaaaaa vin1:0000000000000000aaaaaaaaaaaaaaaa
+ vout:0000000000000000e38ee38ee38ee38e vin0:0000000000000000aaaaaaaaaaaaaaaa vin1:0000000000000000aaaaaaaaaaaaaaaa fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h vout:0000000000000000ffffffffffffffff vin0:0000000000000000ffffffffffffffff vin1:0000000000000000ffffffffffffffff
+ vout:0000000000000000ffffffffffffffff vin0:0000000000000000ffffffffffffffff vin1:0000000000000000ffffffffffffffff fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000000123012301230123 vin0:00000000000000000123012301230123 vin1:00000000000000000123012301230123
+ vout:00000000000000000126012601260126 vin0:00000000000000000123012301230123 vin1:00000000000000000123012301230123 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h vout:0000000000000000fedcfedcfedcfedc vin0:0000000000000000fedcfedcfedcfedc vin1:0000000000000000fedcfedcfedcfedc
+ vout:0000000000000000fedffedffedffedf vin0:0000000000000000fedcfedcfedcfedc vin1:0000000000000000fedcfedcfedcfedc fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000003141314131413141 vin0:00000000000000003141314131413141 vin1:00000000000000003141314131413141
+ vout:00000000000000004435443544354435 vin0:00000000000000003141314131413141 vin1:00000000000000003141314131413141 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h vout:00000000000000002718271827182718 vin0:00000000000000002718271827182718 vin1:00000000000000002718271827182718
+ vout:00000000000000003309330933093309 vin0:00000000000000002718271827182718 vin1:00000000000000002718271827182718 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[0] vout:e9b5f3f66b2e58c121a6c3476d21f1e5 vin0:63483da65c8c49d096084deb9ed0411e vin1:a81b6e33c572a86aacf29b0f395c98b4
+ vout:0000000000000000772a84667fffbd58 vin0:63483da65c8c49d096084deb9ed0411e vin1:a81b6e33c572a86aacf29b0f395c98b4 fpsr=08000000
+sqrdmlah v0.8h, v1.8h, v2.8h[1] vout:6f07136773a2ead356428c5a66a2ec77 vin0:28bad218e4ebf159ff1f240eb3e1553f vin1:8404eb7f0cf4ca6fee8536da9dbf68bc
+ vout:4fc436a47ffff61256ef80007fffab07 vin0:28bad218e4ebf159ff1f240eb3e1553f vin1:8404eb7f0cf4ca6fee8536da9dbf68bc fpsr=08000000
+sqrdmlah v0.2s, v1.2s, v2.2s[2] vout:36b2a38dcef18acf0e0f01a829ba3c66 vin0:f078b65e01737fd22bfa8f668c8b14f4 vin1:57436a097df30b8daa927a03090dfc6d
+ vout:000000000000000039553356b81ed47b vin0:f078b65e01737fd22bfa8f668c8b14f4 vin1:57436a097df30b8daa927a03090dfc6d fpsr=00000000
+sqrdmlah v0.4s, v1.4s, v2.4s[3] vout:6d08ed19fa045f841810cd8c109ed568 vin0:1c4a678450562685769ab818a5b7985e vin1:b984aed62671e865e6f21d40fc7bc013
+ vout:5d74fb0dcdc7dcb9d6c1ecfd425568e8 vin0:1c4a678450562685769ab818a5b7985e vin1:b984aed62671e865e6f21d40fc7bc013 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[0] vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000
+ vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[1] vout:00000000000000000000000055550000 vin0:00000000000000000000000055550000 vin1:00000000000000000000000055550000
+ vout:0000000000000000000000007fff0000 vin0:00000000000000000000000055550000 vin1:00000000000000000000000055550000 fpsr=08000000
+sqrdmlah v0.4h, v1.4h, v2.4h[2] vout:00000000000000000000aaaa00000000 vin0:00000000000000000000aaaa00000000 vin1:00000000000000000000aaaa00000000
+ vout:00000000000000000000e38e00000000 vin0:00000000000000000000aaaa00000000 vin1:00000000000000000000aaaa00000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[3] vout:0000000000000000ffff000000000000 vin0:0000000000000000ffff000000000000 vin1:0000000000000000ffff000000000000
+ vout:0000000000000000ffff000000000000 vin0:0000000000000000ffff000000000000 vin1:0000000000000000ffff000000000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[0] vout:00000000000000000000000000000123 vin0:00000000000000000000000000000123 vin1:00000000000000000000000000000123
+ vout:00000000000000000000000000000126 vin0:00000000000000000000000000000123 vin1:00000000000000000000000000000123 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[1] vout:000000000000000000000000fedc0000 vin0:000000000000000000000000fedc0000 vin1:000000000000000000000000fedc0000
+ vout:000000000000000000000000fedf0000 vin0:000000000000000000000000fedc0000 vin1:000000000000000000000000fedc0000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[2] vout:00000000000000000000314100000000 vin0:00000000000000000000314100000000 vin1:00000000000000000000314100000000
+ vout:00000000000000000000443500000000 vin0:00000000000000000000314100000000 vin1:00000000000000000000314100000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[3] vout:00000000000000002718000000000000 vin0:00000000000000002718000000000000 vin1:00000000000000002718000000000000
+ vout:00000000000000003309000000000000 vin0:00000000000000002718000000000000 vin1:00000000000000002718000000000000 fpsr=00000000
+sqrdmlah h0, h1, h2 vout:acb722146c6cbfa9ea4a022e1d3d7dbb vin0:048612e51a468e36c51cdd8f87e12ab4 vin1:0c05cb6ebd128663d7568e3e8a3ac80e
+ vout:00000000000000000000000000006b11 vin0:048612e51a468e36c51cdd8f87e12ab4 vin1:0c05cb6ebd128663d7568e3e8a3ac80e fpsr=00000000
+sqrdmlah s0, s1, s2 vout:6489eab2c96df363d52c4330a7aae391 vin0:c1fbfd8f4d8698c2cb9dfb4ea5d18713 vin1:14575775bc3a12029d8e66ea90352a18
+ vout:000000000000000000000000f66e187a vin0:c1fbfd8f4d8698c2cb9dfb4ea5d18713 vin1:14575775bc3a12029d8e66ea90352a18 fpsr=00000000
+sqrdmlah h3, h4, h5 vout:4784d95987cd4ed80c3ca578a32bd88e vin0:08aebee85fda964fbba02737f3c98220 vin1:837be65197abe2686b1fba2604afb8d5
+ vout:00000000000000000000000000001e8b vin0:08aebee85fda964fbba02737f3c98220 vin1:837be65197abe2686b1fba2604afb8d5 fpsr=00000000
+sqrdmlah s3, s4, s5 vout:0aaa836b194e242cc5fc3ae904033357 vin0:8ca3e752c306df00caab752f630ff07e vin1:0e780c65c22b4ab8778d9ed6d9eb46ea
+ vout:000000000000000000000000e68a6e36 vin0:8ca3e752c306df00caab752f630ff07e vin1:0e780c65c22b4ab8778d9ed6d9eb46ea fpsr=00000000
+sqrdmlah h6, h7, h8 vout:61ff7d4df3b6ca8131f01866bd76c58f vin0:02dd0e32eecfc5fa2c3ffa1aebe6a4d2 vin1:69505d14b27d9d16f25b26e0042fa9fa
+ vout:000000000000000000000000000002d6 vin0:02dd0e32eecfc5fa2c3ffa1aebe6a4d2 vin1:69505d14b27d9d16f25b26e0042fa9fa fpsr=00000000
+sqrdmlah s6, s7, s8 vout:0088596389c893fd879d51d4c5c764db vin0:1e61c5ec52f79c6015e3c8dc7e9273bf vin1:47086cc3da642fa7130d662777beb4a9
+ vout:0000000000000000000000003c30207f vin0:1e61c5ec52f79c6015e3c8dc7e9273bf vin1:47086cc3da642fa7130d662777beb4a9 fpsr=00000000
+sqrdmlah h9, h10, h11 vout:9a49ac115048d4c4f987fa170d3ce4dd vin0:9432a2e46543b956b819f459105730e9 vin1:5da3cfd6aea6558e0c28728e28dc3c9c
+ vout:0000000000000000000000000000fc06 vin0:9432a2e46543b956b819f459105730e9 vin1:5da3cfd6aea6558e0c28728e28dc3c9c fpsr=00000000
+sqrdmlah s9, s10, s11 vout:e4450ababbfae0f9bc3127138b19183c vin0:1755377e9a786f014a6592749579b0f4 vin1:5f2619b1a20662f012305efa0acd1475
+ vout:000000000000000000000000821bedee vin0:1755377e9a786f014a6592749579b0f4 vin1:5f2619b1a20662f012305efa0acd1475 fpsr=00000000
+sqrdmlah h12, h13, h14 vout:918107c43ea20cc00420edac31a0d599 vin0:5cce191e65591384ff4cb613013cc685 vin1:0194ddb82b49abf059a93d4f11d611db
+ vout:0000000000000000000000000000cd94 vin0:5cce191e65591384ff4cb613013cc685 vin1:0194ddb82b49abf059a93d4f11d611db fpsr=00000000
+sqrdmlah s12, s13, s14 vout:570037914d04ab3d05d75ec6f616ee9a vin0:17a0dc273ba9f8030a52741849e54740 vin1:f6f2b14fbb3184b2141625713239066f
+ vout:00000000000000000000000013156a40 vin0:17a0dc273ba9f8030a52741849e54740 vin1:f6f2b14fbb3184b2141625713239066f fpsr=00000000
+sqrdmlah h15, h16, h17 vout:e8c72e865de41295f2db8f44cbbf37e2 vin0:fcd015ff8f2e73a3a0fae06860b606c7 vin1:f34428d9c8833f5b78fb29445f3bc8d7
+ vout:000000000000000000000000000034f6 vin0:fcd015ff8f2e73a3a0fae06860b606c7 vin1:f34428d9c8833f5b78fb29445f3bc8d7 fpsr=00000000
+sqrdmlah s15, s16, s17 vout:f9da7f07e00794eb00b0940ba5e08516 vin0:be625608d5abd787f5c90ee73af5d7c0 vin1:ac8dd5bbc503330eb9dd5dab8e212ab7
+ vout:00000000000000000000000080000000 vin0:be625608d5abd787f5c90ee73af5d7c0 vin1:ac8dd5bbc503330eb9dd5dab8e212ab7 fpsr=08000000
+sqrdmlah h18, h19, h20 vout:3d3cc0784c2f856363d9810079bbabd9 vin0:125934a781e479d33d431279cce48fce vin1:d4d14e592776b1ef0b40d58cb22d00b1
+ vout:0000000000000000000000000000ab3e vin0:125934a781e479d33d431279cce48fce vin1:d4d14e592776b1ef0b40d58cb22d00b1 fpsr=00000000
+sqrdmlah s18, s19, s20 vout:69f2843d15223a224edb6a053a967ecf vin0:acb9433f079dacacabeb000208c90296 vin1:20162517609f0f22a1a7a4c9c0a51f6b
+ vout:000000000000000000000000363d52c9 vin0:acb9433f079dacacabeb000208c90296 vin1:20162517609f0f22a1a7a4c9c0a51f6b fpsr=00000000
+sqrdmlah h21, h22, h23 vout:31005fb9ada2074bf63a63fedcb4d29c vin0:3f871736dc9ac5357446eb65e4e703bb vin1:445ef059e641a1ccb097e047aacc5b89
+ vout:0000000000000000000000000000d547 vin0:3f871736dc9ac5357446eb65e4e703bb vin1:445ef059e641a1ccb097e047aacc5b89 fpsr=00000000
+sqrdmlah s21, s22, s23 vout:4969e55289753f038f7980d1535979e5 vin0:80c745ef729f1792ccd7e987538166e1 vin1:f4ad41832c22ba116c949cea66e687ae
+ vout:0000000000000000000000007fffffff vin0:80c745ef729f1792ccd7e987538166e1 vin1:f4ad41832c22ba116c949cea66e687ae fpsr=08000000
+sqrdmlah h24, h25, h26 vout:e309aef8a605af130821eb96e737777e vin0:b5a9377eb31749ef710cf757885d2728 vin1:1f1030333fb8fa4b2feb05cb92ed4f4d
+ vout:00000000000000000000000000007fff vin0:b5a9377eb31749ef710cf757885d2728 vin1:1f1030333fb8fa4b2feb05cb92ed4f4d fpsr=08000000
+sqrdmlah s24, s25, s26 vout:928efefdf9f5ec8d5313bd01b82612e0 vin0:bc36ca100a4a3a7d5127ba1c529aa0bf vin1:9f043af6a1aed58f1ee978efa4b054d2
+ vout:00000000000000000000000080000000 vin0:bc36ca100a4a3a7d5127ba1c529aa0bf vin1:9f043af6a1aed58f1ee978efa4b054d2 fpsr=08000000
+sqrdmlah h27, h28, h29 vout:2ad7482a960fb2b27014160ebbdb47e4 vin0:a7837c83faf3cb1d360794fec60222d6 vin1:61cd123e19cf1e2bb001f1161e946f5c
+ vout:00000000000000000000000000006633 vin0:a7837c83faf3cb1d360794fec60222d6 vin1:61cd123e19cf1e2bb001f1161e946f5c fpsr=00000000
+sqrdmlah s27, s28, s29 vout:7c4e1775412d1d47a8872cb61d8aca05 vin0:2993e139f7d64ff4532f9ae1d7da8010 vin1:19714a711ce1284318b88425f2de758f
+ vout:00000000000000000000000021a91e1a vin0:2993e139f7d64ff4532f9ae1d7da8010 vin1:19714a711ce1284318b88425f2de758f fpsr=00000000
+sqrdmlah h30, h31, h0 vout:3cf6fe426e1281712ef114ddd37570e8 vin0:f76b8d9773b81b24de24e0a879648e11 vin1:7af177f11da748fc8b9145fe16d0390f
+ vout:00000000000000000000000000003e1e vin0:f76b8d9773b81b24de24e0a879648e11 vin1:7af177f11da748fc8b9145fe16d0390f fpsr=00000000
+sqrdmlah s30, s31, s0 vout:1dd493f59184345437d5e366d0e20c30 vin0:c50f1401e45b82d3086a7a39a1e6217d vin1:3a542e238fe5d1793d1148867eb08f81
+ vout:00000000000000000000000080000000 vin0:c50f1401e45b82d3086a7a39a1e6217d vin1:3a542e238fe5d1793d1148867eb08f81 fpsr=08000000
+sqrdmlah h0, h1, v2.h[0] vout:d4ec68f21f468712f7b8ab3708137382 vin0:478209dbbd84d92508847c7642a20df9 vin1:0b9c016be95f18de62bba1a11cc04c89
+ vout:00000000000000000000000000007bdd vin0:478209dbbd84d92508847c7642a20df9 vin1:0b9c016be95f18de62bba1a11cc04c89 fpsr=00000000
+sqrdmlah s0, s1, v2.s[0] vout:1541139c8b1cd0d1a11d81326f4e7880 vin0:30c9028972f8733d11f7fa4450de2529 vin1:a1cd852d9cd970502d146432e64644c9
+ vout:0000000000000000000000005f0dbde5 vin0:30c9028972f8733d11f7fa4450de2529 vin1:a1cd852d9cd970502d146432e64644c9 fpsr=00000000
+sqrdmlah h3, h4, v5.h[1] vout:94d7265949ca62b46a8a793cf9d5f0d1 vin0:35e7926e777aa43f56470887bfdd3daf vin1:b2ed4ecc1e172df2d3a0a41fce854ae7
+ vout:0000000000000000000000000000d8f9 vin0:35e7926e777aa43f56470887bfdd3daf vin1:b2ed4ecc1e172df2d3a0a41fce854ae7 fpsr=00000000
+sqrdmlah s3, s4, v5.s[1] vout:09e14df041cdc14f0bf7ba2283e22a31 vin0:f0fdf0aee1dda4e888e2774acbc13287 vin1:f30110c432a534d0478d5d7e053a4e0c
+ vout:00000000000000000000000080000000 vin0:f0fdf0aee1dda4e888e2774acbc13287 vin1:f30110c432a534d0478d5d7e053a4e0c fpsr=08000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:62bbc77143b71e92668b24fb9133bf52 vin0:9fedb2229a090d2c018b42f3d3ec8415 vin1:6c11edd5a106e2d655f9b97953917f46
+ vout:0000000000000000658249dfadfa3a89 vin0:9fedb2229a090d2c018b42f3d3ec8415 vin1:6c11edd5a106e2d655f9b97953917f46 fpsr=00000000
+sqrdmlsh v0.8h, v1.8h, v2.8h vout:bf6982b029b396ea4f1e4ed5da99d2ee vin0:7b813bf15120fbc8683cbc58f8b23fca vin1:74876ac63afb7562c67d2c86fa7c09a3
+ vout:8000800004519ac87df3665eda48ce21 vin0:7b813bf15120fbc8683cbc58f8b23fca vin1:74876ac63afb7562c67d2c86fa7c09a3 fpsr=08000000
+sqrdmlsh v0.2s, v1.2s, v2.2s vout:077815d35567232e66c997070e860c39 vin0:109cfa471afbe686e2ede96f8809f947 vin1:9ce5d1a297a56adb474e1bb03bc55073
+ vout:000000000000000076fb5cdb468a5f5e vin0:109cfa471afbe686e2ede96f8809f947 vin1:9ce5d1a297a56adb474e1bb03bc55073 fpsr=00000000
+sqrdmlsh v0.4s, v1.4s, v2.4s vout:2a1f00ed91e9071d79112f6f64f5079c vin0:df63bd3c7359f634f791559ff8d88161 vin1:fba1981add7938e3067d74917c37833e
+ vout:2902119eb1066221797ea32c6be66494 vin0:df63bd3c7359f634f791559ff8d88161 vin1:fba1981add7938e3067d74917c37833e fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000
+ vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000005555555555555555 vin0:00000000000000005555555555555555 vin1:00000000000000005555555555555555
+ vout:00000000000000001c721c721c721c72 vin0:00000000000000005555555555555555 vin1:00000000000000005555555555555555 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:0000000000000000aaaaaaaaaaaaaaaa vin0:0000000000000000aaaaaaaaaaaaaaaa vin1:0000000000000000aaaaaaaaaaaaaaaa
+ vout:00000000000000008000800080008000 vin0:0000000000000000aaaaaaaaaaaaaaaa vin1:0000000000000000aaaaaaaaaaaaaaaa fpsr=08000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:0000000000000000ffffffffffffffff vin0:0000000000000000ffffffffffffffff vin1:0000000000000000ffffffffffffffff
+ vout:0000000000000000ffffffffffffffff vin0:0000000000000000ffffffffffffffff vin1:0000000000000000ffffffffffffffff fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000000123012301230123 vin0:00000000000000000123012301230123 vin1:00000000000000000123012301230123
+ vout:00000000000000000120012001200120 vin0:00000000000000000123012301230123 vin1:00000000000000000123012301230123 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:0000000000000000fedcfedcfedcfedc vin0:0000000000000000fedcfedcfedcfedc vin1:0000000000000000fedcfedcfedcfedc
+ vout:0000000000000000fed9fed9fed9fed9 vin0:0000000000000000fedcfedcfedcfedc vin1:0000000000000000fedcfedcfedcfedc fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000003141314131413141 vin0:00000000000000003141314131413141 vin1:00000000000000003141314131413141
+ vout:00000000000000001e4d1e4d1e4d1e4d vin0:00000000000000003141314131413141 vin1:00000000000000003141314131413141 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h vout:00000000000000002718271827182718 vin0:00000000000000002718271827182718 vin1:00000000000000002718271827182718
+ vout:00000000000000001b271b271b271b27 vin0:00000000000000002718271827182718 vin1:00000000000000002718271827182718 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[0] vout:9cdd1a32cd007ff7daac12cf3a64acbd vin0:e76fcc086aeb0414a9cd126c0869c6a0 vin1:d973ba438b80fdb556878af3ad4a4cb8
+ vout:00000000000000000e5607c4355acf20 vin0:e76fcc086aeb0414a9cd126c0869c6a0 vin1:d973ba438b80fdb556878af3ad4a4cb8 fpsr=00000000
+sqrdmlsh v0.8h, v1.8h, v2.8h[1] vout:fa0ba48e9db3d6f2c0c135e244f24dfe vin0:71a4885bc70f501cf18441c67d4b9e45 vin1:95a6e59e2a7fabcb65b86284a1cb27a3
+ vout:4dae8000800011e8b618664b7fff0610 vin0:71a4885bc70f501cf18441c67d4b9e45 vin1:95a6e59e2a7fabcb65b86284a1cb27a3 fpsr=08000000
+sqrdmlsh v0.2s, v1.2s, v2.2s[2] vout:aef4eeb358364f4add55d3bb09c439c9 vin0:3028339e0d3a0c468e8f584ceae94e7a vin1:e33fad8f313a964967940f284cfce9a3
+ vout:000000000000000008f6e02b11e090c9 vin0:3028339e0d3a0c468e8f584ceae94e7a vin1:e33fad8f313a964967940f284cfce9a3 fpsr=00000000
+sqrdmlsh v0.4s, v1.4s, v2.4s[3] vout:6c9a8e07714d3d2264ecfe407d2043c1 vin0:d6006035af2e8bb7b3736be34585abe2 vin1:7742a77a117513548f9ea7c3a323665c
+ vout:7fffffff7fffffff7fffffff3c59ca12 vin0:d6006035af2e8bb7b3736be34585abe2 vin1:7742a77a117513548f9ea7c3a323665c fpsr=08000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[0] vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000
+ vout:00000000000000000000000000000000 vin0:00000000000000000000000000000000 vin1:00000000000000000000000000000000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[1] vout:00000000000000000000000055550000 vin0:00000000000000000000000055550000 vin1:00000000000000000000000055550000
+ vout:0000000000000000000000001c720000 vin0:00000000000000000000000055550000 vin1:00000000000000000000000055550000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[2] vout:00000000000000000000aaaa00000000 vin0:00000000000000000000aaaa00000000 vin1:00000000000000000000aaaa00000000
+ vout:00000000000000000000800000000000 vin0:00000000000000000000aaaa00000000 vin1:00000000000000000000aaaa00000000 fpsr=08000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[3] vout:0000000000000000ffff000000000000 vin0:0000000000000000ffff000000000000 vin1:0000000000000000ffff000000000000
+ vout:0000000000000000ffff000000000000 vin0:0000000000000000ffff000000000000 vin1:0000000000000000ffff000000000000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[0] vout:00000000000000000000000000000123 vin0:00000000000000000000000000000123 vin1:00000000000000000000000000000123
+ vout:00000000000000000000000000000120 vin0:00000000000000000000000000000123 vin1:00000000000000000000000000000123 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[1] vout:000000000000000000000000fedc0000 vin0:000000000000000000000000fedc0000 vin1:000000000000000000000000fedc0000
+ vout:000000000000000000000000fed90000 vin0:000000000000000000000000fedc0000 vin1:000000000000000000000000fedc0000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[2] vout:00000000000000000000314100000000 vin0:00000000000000000000314100000000 vin1:00000000000000000000314100000000
+ vout:000000000000000000001e4d00000000 vin0:00000000000000000000314100000000 vin1:00000000000000000000314100000000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[3] vout:00000000000000002718000000000000 vin0:00000000000000002718000000000000 vin1:00000000000000002718000000000000
+ vout:00000000000000001b27000000000000 vin0:00000000000000002718000000000000 vin1:00000000000000002718000000000000 fpsr=00000000
+randV128: 256 calls, 266 iters
+sqrdmlsh h0, h1, h2 vout:e70216ec5cbcf49e8a09cb539549408a vin0:182fa58322b1219295b48e6f81658922 vin1:05b265c33ff4760f125b3d3899837173
+ vout:00000000000000000000000000007fff vin0:182fa58322b1219295b48e6f81658922 vin1:05b265c33ff4760f125b3d3899837173 fpsr=08000000
+sqrdmlsh s0, s1, s2 vout:aaba95edd88623fc68d5d5d393ccbadd vin0:40947ccd307b129e244ee56d2260de8c vin1:d2b5bf6419898df003e6fe7283eff6cb
+ vout:000000000000000000000000b51ee109 vin0:40947ccd307b129e244ee56d2260de8c vin1:d2b5bf6419898df003e6fe7283eff6cb fpsr=00000000
+sqrdmlsh h3, h4, h5 vout:3fa5c4d84771e518605a54f56dfe15b7 vin0:ddeb80fe57ce3c26f9fcb34432fe8249 vin1:3b3296ac6d6e4ba4d95578b09e02700d
+ vout:00000000000000000000000000007fff vin0:ddeb80fe57ce3c26f9fcb34432fe8249 vin1:3b3296ac6d6e4ba4d95578b09e02700d fpsr=08000000
+sqrdmlsh s3, s4, s5 vout:8fbc05b829b247cac4e8bba2bda13050 vin0:98bf1ba36919393bc4d999db7390839e vin1:44d5584589abea635dc49b10189f4c14
+ vout:000000000000000000000000a766456f vin0:98bf1ba36919393bc4d999db7390839e vin1:44d5584589abea635dc49b10189f4c14 fpsr=00000000
+sqrdmlsh h6, h7, h8 vout:0b0b9f6018e987aeba97106bb88dbd45 vin0:9d5fe4af824eabd8f8f577d6f4dd0223 vin1:d6c08bc57f47f9ba34279d2f35968b0a
+ vout:0000000000000000000000000000bf39 vin0:9d5fe4af824eabd8f8f577d6f4dd0223 vin1:d6c08bc57f47f9ba34279d2f35968b0a fpsr=00000000
+sqrdmlsh s6, s7, s8 vout:05dbe25a9a3951f70e8dc8821606fcca vin0:fe1783322bd1f4a0a92e2587172ec23f vin1:22d9446284e6ae8126fc5ee9b286181e
+ vout:000000000000000000000000240f31d7 vin0:fe1783322bd1f4a0a92e2587172ec23f vin1:22d9446284e6ae8126fc5ee9b286181e fpsr=00000000
+sqrdmlsh h9, h10, h11 vout:3131620a2265f8c8f64df6cdcb51c286 vin0:6eeb8d90d86668b60a08b6d0cfc59797 vin1:dc2316810c4e5ddd66c8f02281b3c8f2
+ vout:0000000000000000000000000000959d vin0:6eeb8d90d86668b60a08b6d0cfc59797 vin1:dc2316810c4e5ddd66c8f02281b3c8f2 fpsr=00000000
+sqrdmlsh s9, s10, s11 vout:4210b3d32431d146a45cad2eccb0e21a vin0:a2de962ffdd15c3e50063f9610e753cd vin1:b7a39486894259f1290e68be98626e2d
+ vout:000000000000000000000000da5fd688 vin0:a2de962ffdd15c3e50063f9610e753cd vin1:b7a39486894259f1290e68be98626e2d fpsr=00000000
+sqrdmlsh h12, h13, h14 vout:ee7d691b146130944d3d038a0b69312c vin0:4df433720fd7245dafacd5bdced9cd88 vin1:685c54d57186f6e2a353dba0ead5df70
+ vout:00000000000000000000000000002455 vin0:4df433720fd7245dafacd5bdced9cd88 vin1:685c54d57186f6e2a353dba0ead5df70 fpsr=00000000
+sqrdmlsh s12, s13, s14 vout:e77b184466b967d624750ac67ebe825f vin0:2533f6bc813a13365b808a28feded669 vin1:a353e8d137de89d3071b5bad6b52ee61
+ vout:0000000000000000000000007fb0f67c vin0:2533f6bc813a13365b808a28feded669 vin1:a353e8d137de89d3071b5bad6b52ee61 fpsr=00000000
+sqrdmlsh h15, h16, h17 vout:e11053b38ffdcd305e88d8c318f5aa57 vin0:dc9d7472c7c07dee870474bd92394516 vin1:1b8ce6e04f0e66e88ae9fdca101c70a3
+ vout:00000000000000000000000000008000 vin0:dc9d7472c7c07dee870474bd92394516 vin1:1b8ce6e04f0e66e88ae9fdca101c70a3 fpsr=08000000
+sqrdmlsh s15, s16, s17 vout:913db0cc02f1b3c72ff97f68cd517cb9 vin0:850ae0642ddae0466041d5d9cb7738db vin1:2af3bd4b509e6608a513cfe482162be8
+ vout:00000000000000000000000099a3f238 vin0:850ae0642ddae0466041d5d9cb7738db vin1:2af3bd4b509e6608a513cfe482162be8 fpsr=00000000
+sqrdmlsh h18, h19, h20 vout:b903f1b29f411487312d32f1bb069e61 vin0:95d26cc246074b10bda9f7bf92a71bac vin1:fcefa19f2c8a8cfd3989634f2a294a7c
+ vout:00000000000000000000000000008e47 vin0:95d26cc246074b10bda9f7bf92a71bac vin1:fcefa19f2c8a8cfd3989634f2a294a7c fpsr=00000000
+sqrdmlsh s18, s19, s20 vout:470818041ac5e9b218db305838ff3248 vin0:06ced856b4d04648a668c3da0fcbe652 vin1:39d4db0931b25e927a9632b68f624628
+ vout:00000000000000000000000046e512d8 vin0:06ced856b4d04648a668c3da0fcbe652 vin1:39d4db0931b25e927a9632b68f624628 fpsr=00000000
+sqrdmlsh h21, h22, h23 vout:764f859cf68f4679dab3699f129680a9 vin0:fc95f5d55c34e70e2034036b2540d210 vin1:32746a5ace2a448f4d76dd08966fd815
+ vout:00000000000000000000000000008000 vin0:fc95f5d55c34e70e2034036b2540d210 vin1:32746a5ace2a448f4d76dd08966fd815 fpsr=08000000
+sqrdmlsh s21, s22, s23 vout:b00b3cdf75747e60035ee161b2ddaa1e vin0:92478e7f987ac472db7137e460cce35a vin1:2915227d7d3b3371fe1c6a2981899c14
+ vout:0000000000000000000000001280e25d vin0:92478e7f987ac472db7137e460cce35a vin1:2915227d7d3b3371fe1c6a2981899c14 fpsr=00000000
+sqrdmlsh h24, h25, h26 vout:7be936badd6630980aa27329b5b3ecd2 vin0:d2bc96d6b1a87f5bc30eedfc43f567c8 vin1:ded3251e3f2e1bf337f62011aebf77d2
+ vout:00000000000000000000000000008bac vin0:d2bc96d6b1a87f5bc30eedfc43f567c8 vin1:ded3251e3f2e1bf337f62011aebf77d2 fpsr=00000000
+sqrdmlsh s24, s25, s26 vout:6c7f80e89ebd80a5e34bca20163ac21e vin0:e06c5cc8e1357d72cece7967d1f50cd5 vin1:4fd7e326d29b74541ae5bf20bcc2f9c2
+ vout:000000000000000000000000fe0b135f vin0:e06c5cc8e1357d72cece7967d1f50cd5 vin1:4fd7e326d29b74541ae5bf20bcc2f9c2 fpsr=00000000
+sqrdmlsh h27, h28, h29 vout:190c026f4f4108bb97f152ac79a338e2 vin0:082a07b97ea580d954e0244c1dcf60e0 vin1:b87fb552d02120cc96fce910c815b7b5
+ vout:00000000000000000000000000006f99 vin0:082a07b97ea580d954e0244c1dcf60e0 vin1:b87fb552d02120cc96fce910c815b7b5 fpsr=00000000
+sqrdmlsh s27, s28, s29 vout:35954eb164b81a015d181eb0d13422c0 vin0:fefa2b0bfdbeddb488c900901dc5368c vin1:cccf2d05af86747edec1b4c5c4fa8650
+ vout:000000000000000000000000deee4fe6 vin0:fefa2b0bfdbeddb488c900901dc5368c vin1:cccf2d05af86747edec1b4c5c4fa8650 fpsr=00000000
+sqrdmlsh h30, h31, h0 vout:751dfa1352e40c98674442111330555e vin0:76df5c23d344e7279f0d2317c41d637d vin1:40c9e0a4e28cc38e27b63222a6b73935
+ vout:000000000000000000000000000028e7 vin0:76df5c23d344e7279f0d2317c41d637d vin1:40c9e0a4e28cc38e27b63222a6b73935 fpsr=00000000
+sqrdmlsh s30, s31, s0 vout:23de2e6573f9f357cd2f9fc5071aba58 vin0:c8746293ddf96221a55f780d618fa50b vin1:16458560adcdd7091db23c3834cb4d4d
+ vout:000000000000000000000000dedd6a91 vin0:c8746293ddf96221a55f780d618fa50b vin1:16458560adcdd7091db23c3834cb4d4d fpsr=00000000
+sqrdmlsh h0, h1, v2.h[0] vout:17d247361590a45a8c419b68e9c69d73 vin0:23de85e7f3ba676cd7ca3327879cb597 vin1:9a985ec5f0031343f3185309c7b360a0
+ vout:0000000000000000000000000000d59f vin0:23de85e7f3ba676cd7ca3327879cb597 vin1:9a985ec5f0031343f3185309c7b360a0 fpsr=00000000
+sqrdmlsh s0, s1, v2.s[0] vout:e2e823f1fc15de5d0fe0ad1832a0f513 vin0:0a452b2c674cbddfcbf508515b068b9e vin1:6109ca6565cab2e77d69475df9b640b0
+ vout:0000000000000000000000003719b567 vin0:0a452b2c674cbddfcbf508515b068b9e vin1:6109ca6565cab2e77d69475df9b640b0 fpsr=00000000
+sqrdmlsh h3, h4, v5.h[1] vout:ddb98a28084c634f63bfc3013161828e vin0:7e7d09937d452c872eb7cf99a14da407 vin1:94e09c4d7a2fb98594259c37dc0df227
+ vout:00000000000000000000000000008000 vin0:7e7d09937d452c872eb7cf99a14da407 vin1:94e09c4d7a2fb98594259c37dc0df227 fpsr=08000000
+sqrdmlsh s3, s4, v5.s[1] vout:bc4a103eacf98853bc63f107d94d1889 vin0:348ab47fa96b098734939ce54eb5d374 vin1:e6246ae1a4f77a426cd3657964fa47a9
+ vout:0000000000000000000000009661afff vin0:348ab47fa96b098734939ce54eb5d374 vin1:e6246ae1a4f77a426cd3657964fa47a9 fpsr=00000000