]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Bug 413634 - ARMv8.1 arithmetic instructions are not supported
authorJulian Seward <jseward@acm.org>
Fri, 27 Dec 2019 14:30:21 +0000 (15:30 +0100)
committerJulian Seward <jseward@acm.org>
Fri, 27 Dec 2019 14:30:21 +0000 (15:30 +0100)
Patch from Assad Hashmi <assad.hashmi@linaro.org>.

This patch adds support for AArch64 ARMv8.1 SIMD instructions:
SQRDMLAH <V><d>, <V><n>, <V><m>
SQRDMLAH <Vd>.<T>, <Vn>.<T>, <Vm>.<T>
SQRDMLAH <V><d>, <V><n>, <Vm>.<Ts>[<index>]
SQRDMLAH <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]
SQRDMLSH <V><d>, <V><n>, <V><m>
SQRDMLSH <Vd>.<T>, <Vn>.<T>, <Vm>.<T>
SQRDMLSH <V><d>, <V><n>, <Vm>.<Ts>[<index>]
SQRDMLSH <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]

VEX/priv/guest_arm64_toIR.c
none/tests/arm64/Makefile.am
none/tests/arm64/simd_v81.c [new file with mode: 0644]
none/tests/arm64/simd_v81.stderr.exp [new file with mode: 0644]
none/tests/arm64/simd_v81.stdout.exp [new file with mode: 0644]
none/tests/arm64/simd_v81.vgtest [new file with mode: 0644]

index 513ceba81a134b24a6b26d2dbe7a5766101afcdf..2589ddfb5599718de8d83aecdd282e16caa5f53a 100644 (file)
@@ -8437,6 +8437,27 @@ void math_SQDMULH ( /*OUT*/IRTemp* res,
    assign(*res, mkexpr(*sat1q));
 }
 
+/* Generate IR for SQRDMLAH and SQRDMLSH: signedly wideningly multiply,
+   double, add a rounding constant, take the high half and accumulate. */
+static
+void math_SQRDMLAH ( /*OUT*/IRTemp* res, /*OUT*/IRTemp* res_nosat, Bool isAdd,
+                     UInt size, IRTemp vD, IRTemp vN, IRTemp vM )
+{
+   vassert(size == X01 || size == X10); /* s or h only */
+
+   /* SQRDMLAH = SQADD(A, SQRDMULH(B, C)) */
+
+   IRTemp mul, mul_nosat, dummy;
+   mul = mul_nosat = dummy = IRTemp_INVALID;
+   math_SQDMULH(&mul, &dummy, &mul_nosat, True/*R*/, size, vN, vM);
+
+   IROp  op = isAdd ? mkVecADD(size)   : mkVecSUB(size);
+   IROp qop = isAdd ? mkVecQADDS(size) : mkVecQSUBS(size);
+   newTempsV128_2(res, res_nosat);
+   assign(*res, binop(qop, mkexpr(vD), mkexpr(mul)));
+   assign(*res_nosat, binop(op, mkexpr(vD), mkexpr(mul_nosat)));
+}
+
 
 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm.  Put the result in
    a new temp in *res, and the Q difference pair in new temps in
@@ -10328,6 +10349,59 @@ Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
 #  undef INSN
 }
 
+static
+Bool dis_AdvSIMD_scalar_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
+{
+   /* 31 29 28    23   21 20 15     10 9 4
+      01 U  11110 size 0  m  opcode 1  n d
+      Decode fields: u,size,opcode
+   */
+#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
+   if (INSN(31,30) != BITS2(0,1)
+       || INSN(28,24) != BITS5(1,1,1,1,0)
+       || INSN(21,21) != 0
+       || INSN(10,10) != 1) {
+      return False;
+   }
+   UInt bitU   = INSN(29,29);
+   UInt size   = INSN(23,22);
+   UInt mm     = INSN(20,16);
+   UInt opcode = INSN(15,11);
+   UInt nn     = INSN(9,5);
+   UInt dd     = INSN(4,0);
+   vassert(size < 4);
+   vassert(mm < 32 && nn < 32 && dd < 32);
+
+   if (bitU == 1 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
+      /* -------- xx,10000 SQRDMLAH s and h variants only -------- */
+      /* -------- xx,10001 SQRDMLSH s and h variants only -------- */
+      if (size == X00 || size == X11) return False;
+      Bool isAdd = opcode == BITS5(1,0,0,0,0);
+
+      IRTemp res, res_nosat, vD, vN, vM;
+      res = res_nosat = vD = vN = vM = IRTemp_INVALID;
+      newTempsV128_3(&vD, &vN, &vM);
+      assign(vD, getQReg128(dd));
+      assign(vN, getQReg128(nn));
+      assign(vM, getQReg128(mm));
+
+      math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
+      putQReg128(dd,
+                 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
+      updateQCFLAGwithDifference(
+         math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res)),
+         math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res_nosat)));
+
+      const HChar  arr = "hs"[size];
+      const HChar* nm  = isAdd ? "sqrdmlah" : "sqrdmlsh";
+      DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
+      return True;
+   }
+
+   return False;
+#  undef INSN
+}
+
 
 static
 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
@@ -10655,7 +10729,7 @@ Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
    */
 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
    if (INSN(31,30) != BITS2(0,1)
-       || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
+       || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) != 0) {
       return False;
    }
    UInt bitU   = INSN(29,29);
@@ -10789,7 +10863,7 @@ Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
-   if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
+   if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
       /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
       /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
       UInt mm  = 32; // invalid
@@ -10823,6 +10897,45 @@ Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
+   if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
+      /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
+      /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
+      UInt mm  = 32; // invalid
+      UInt ix  = 16; // invalid
+      switch (size) {
+         case X00:
+            return False; // b case is not allowed
+         case X01:
+            mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
+         case X10:
+            mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
+         case X11:
+            return False; // d case is not allowed
+         default:
+            vassert(0);
+      }
+      vassert(size < 4);
+      vassert(mm < 32 && ix < 16);
+      Bool isAdd = opcode == BITS4(1,1,0,1);
+
+      IRTemp res, res_nosat, vD, vN, vM;
+      res = res_nosat = vD = vN = vM = IRTemp_INVALID;
+      newTempsV128_2(&vD, &vN);
+      assign(vD, getQReg128(dd));
+      assign(vN, getQReg128(nn));
+      vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
+
+      math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
+      IROp opZHI = mkVecZEROHIxxOFV128(size);
+      putQReg128(dd, unop(opZHI, mkexpr(res)));
+      updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
+
+      const HChar* nm  = isAdd ? "sqrdmlah" : "sqrdmlsh";
+      HChar ch         = size == X01 ? 'h' : 's';
+      DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
+      return True;
+   }
+
    return False;
 #  undef INSN
 }
@@ -12327,6 +12440,61 @@ Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
 }
 
 
+static
+Bool dis_AdvSIMD_three_same_extra(/*MB_OUT*/DisResult* dres, UInt insn)
+{
+   /* 31 30 29 28    23   21 20 15 14     10 9 4
+      0  Q  U  01110 size 0  m  1  opcode 1  n d
+      Decode fields: u,size,opcode
+   */
+#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
+   if (INSN(31,31) != 0
+       || INSN(28,24) != BITS5(0,1,1,1,0)
+       || INSN(21,21) != 0
+       || INSN(15,15) != 1
+       || INSN(10,10) != 1) {
+      return False;
+   }
+   UInt bitQ   = INSN(30,30);
+   UInt bitU   = INSN(29,29);
+   UInt size   = INSN(23,22);
+   UInt mm     = INSN(20,16);
+   UInt opcode = INSN(14,11);
+   UInt nn     = INSN(9,5);
+   UInt dd     = INSN(4,0);
+   vassert(size < 4);
+   vassert(mm < 32 && nn < 32 && dd < 32);
+
+   if (bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,0,1))) {
+      /* -------- 0,xx,10110 SQRDMLAH s and h variants only -------- */
+      /* -------- 1,xx,10110 SQRDMLSH s and h variants only -------- */
+      if (size == X00 || size == X11) return False;
+      Bool isAdd = opcode == BITS4(0,0,0,0);
+
+      IRTemp res, res_nosat, vD, vN, vM;
+      res = res_nosat = vD = vN = vM = IRTemp_INVALID;
+      newTempsV128_3(&vD, &vN, &vM);
+      assign(vD, getQReg128(dd));
+      assign(vN, getQReg128(nn));
+      assign(vM, getQReg128(mm));
+
+      math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
+      IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
+      updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
+      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+
+      const HChar* arr = nameArr_Q_SZ(bitQ, size);
+      const HChar* nm  = isAdd ? "sqrdmlah" : "sqrdmlsh";
+      DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
+          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
+      return True;
+   }
+
+   return False;
+#  undef INSN
+}
+
+
 static
 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
 {
@@ -13249,7 +13417,7 @@ Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
-   if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
+   if (bitU == 0 && (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1))) {
       /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
       /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
       UInt mm  = 32; // invalid
@@ -13285,6 +13453,46 @@ Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
       return True;
    }
 
+   if (bitU == 1 && (opcode == BITS4(1,1,0,1) || opcode == BITS4(1,1,1,1))) {
+      /* -------- 0,xx,1101 SQRDMLAH s and h variants only -------- */
+      /* -------- 0,xx,1111 SQRDMLSH s and h variants only -------- */
+      UInt mm  = 32; // invalid
+      UInt ix  = 16; // invalid
+      switch (size) {
+         case X00:
+            return False; // b case is not allowed
+         case X01:        // h
+            mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
+         case X10:        // s
+            mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
+         case X11:
+            return False; // d case is not allowed
+         default:
+            vassert(0);
+      }
+      vassert(mm < 32 && ix < 16);
+
+      IRTemp res, res_nosat, vD, vN, vM;
+      res = res_nosat = vD = vN = vM = IRTemp_INVALID;
+      newTempsV128_2(&vD, &vN);
+      assign(vD, getQReg128(dd));
+      assign(vN, getQReg128(nn));
+
+      vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
+      Bool isAdd = opcode == BITS4(1,1,0,1);
+      math_SQRDMLAH(&res, &res_nosat, isAdd, size, vD, vN, vM);
+      IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
+      updateQCFLAGwithDifferenceZHI(res, res_nosat, opZHI);
+      putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
+
+      const HChar* arr = nameArr_Q_SZ(bitQ, size);
+      const HChar* nm  = isAdd ? "sqrdmlah" : "sqrdmlsh";
+      HChar ch         = size == X01 ? 'h' : 's';
+      DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
+          nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), ch, ix);
+      return True;
+   }
+
    return False;
 #  undef INSN
 }
@@ -14529,6 +14737,8 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
    if (UNLIKELY(ok)) return True;
    ok = dis_AdvSIMD_scalar_three_same(dres, insn);
    if (UNLIKELY(ok)) return True;
+   ok = dis_AdvSIMD_scalar_three_same_extra(dres, insn);
+   if (UNLIKELY(ok)) return True;
    ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
    if (UNLIKELY(ok)) return True;
    ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
@@ -14539,6 +14749,8 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
    if (UNLIKELY(ok)) return True;
    ok = dis_AdvSIMD_three_same(dres, insn);
    if (UNLIKELY(ok)) return True;
+   ok = dis_AdvSIMD_three_same_extra(dres, insn);
+   if (UNLIKELY(ok)) return True;
    ok = dis_AdvSIMD_two_reg_misc(dres, insn);
    if (UNLIKELY(ok)) return True;
    ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
index 78a5742b2aed902985e5955f4064394c6f5f2e25..7b3ebbdca9a25c9ca7a710ac771699b1b97304f7 100644 (file)
@@ -9,7 +9,8 @@ EXTRA_DIST = \
        fp_and_simd.stdout.exp fp_and_simd.stderr.exp fp_and_simd.vgtest \
        integer.stdout.exp integer.stderr.exp integer.vgtest \
        memory.stdout.exp memory.stderr.exp memory.vgtest \
-       atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest
+       atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \
+       simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest
 
 check_PROGRAMS = \
        allexec \
@@ -23,7 +24,7 @@ if BUILD_ARMV8_CRC_TESTS
 endif
 
 if BUILD_ARMV81_TESTS
-  check_PROGRAMS += atomics_v81
+  check_PROGRAMS += atomics_v81 simd_v81
 endif
 
 AM_CFLAGS    += @FLAG_M64@
@@ -34,7 +35,9 @@ allexec_CFLAGS     = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@
 
 crc32_CFLAGS       = $(AM_CFLAGS) -march=armv8-a+crc
 atomics_v81_CFLAGS = $(AM_CFLAGS) -march=armv8.1-a
+simd_v81_CFLAGS    = $(AM_CFLAGS) -march=armv8.1-a+crypto
 fp_and_simd_CFLAGS = $(AM_CFLAGS) -march=armv8-a+crypto
 integer_CFLAGS     = $(AM_CFLAGS) -g -O0 -DTEST_BFM=0
 
 fp_and_simd_LDADD  = -lm
+simd_v81_LDADD     = -lm
diff --git a/none/tests/arm64/simd_v81.c b/none/tests/arm64/simd_v81.c
new file mode 100644 (file)
index 0000000..60c4c26
--- /dev/null
@@ -0,0 +1,498 @@
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <math.h>
+
+typedef  unsigned char           UChar;
+typedef  unsigned short int      UShort;
+typedef  unsigned int            UInt;
+typedef  signed int              Int;
+typedef  unsigned long long int  ULong;
+typedef  signed long long int    Long;
+typedef  double                  Double;
+typedef  float                   Float;
+
+typedef  unsigned char           Bool;
+#define False ((Bool)0)
+#define True  ((Bool)1)
+
+#define ITERS 1
+
+typedef
+  enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
+  LaneTy;
+
+union _V128 {
+   UChar  u8[16];
+   UShort u16[8];
+   UInt   u32[4];
+   ULong  u64[2];
+   Float  f32[4];
+   Double f64[2];
+};
+typedef  union _V128   V128;
+
+static void setV128( V128* v, int elements, LaneTy ty, ULong val )
+{
+   assert( (elements % 2) == 0 && elements >= 2 && elements <= 16 );
+   memset(v, 0, sizeof(*v));
+   switch (ty) {
+      /* 4H or 8H */
+      case TyH:
+         assert( val < (1 << 16) );
+         assert( elements == 4 || elements == 8 );
+         for ( int i = 0; i < elements; i++ )
+            v->u16[i] = val;
+         break;
+      /* 2S or 4S */
+      case TyS:
+         assert( val < (1 << 32) );
+         assert( elements == 2 || elements == 4 );
+         for ( int i = 0; i < elements; i++ )
+            v->u32[i] = val;
+         break;
+      default:
+         printf("8B, 2D and 16B not implemented\n");
+   }
+}
+
+static void setV128_idx( V128* v, int elements, LaneTy ty, ULong val, int idx )
+{
+   assert( (elements % 2) == 0 && elements >= 2 && elements <= 16 );
+   assert( idx >= 0 );
+   memset(v, 0, sizeof(*v));
+   switch (ty) {
+      /* 4H or 8H */
+      case TyH:
+         assert( val < (1 << 16) );
+         assert( elements == 4 || elements == 8 );
+         assert( idx < elements );
+         v->u16[idx] = val;
+         break;
+      /* 2S or 4S */
+      case TyS:
+         assert( val < (1 << 32) );
+         assert( elements == 2 || elements == 4 );
+         assert( idx < elements );
+         v->u32[idx] = val;
+         break;
+      default:
+         printf("8B, 2D and 16B not implemented\n");
+   }
+}
+
+static inline UChar randUChar ( void )
+{
+   static UInt seed = 80021;
+   seed = 1103515245 * seed + 12345;
+   return (seed >> 17) & 0xFF;
+}
+
+/* Generates a random V128. Ensures that that it contains normalised FP numbers
+ * when viewed as either F32x4 or F64x2, so that it is reasonable to use in FP
+ * test cases.
+ */
+static void randV128 ( /*OUT*/V128* v )
+{
+   static UInt nCalls = 0, nIters = 0;
+   Int i;
+   nCalls++;
+   while (1) {
+      nIters++;
+      for (i = 0; i < 16; i++) {
+         v->u8[i] = randUChar();
+      }
+      if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
+          && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
+        break;
+   }
+   if (0 == (nCalls & 0xFF))
+      printf("randV128: %u calls, %u iters\n", nCalls, nIters);
+}
+
+static void showV128 ( V128* v )
+{
+   Int i;
+   for (i = 15; i >= 0; i--)
+      printf("%02x", (Int)v->u8[i]);
+}
+
+/* Generate a test function that involves three vector regs with no bias as
+ * towards which is input or output.
+ */
+#define GEN_THREEVEC_TEST_RND(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO)  \
+  __attribute__((noinline)) \
+  static void test_##TESTNAME ( LaneTy ty ) { \
+     Int i; \
+     for (i = 0; i < ITERS; i++) { \
+        V128 block[6+1]; \
+        memset(block, 0x55, sizeof(block)); \
+        randV128(&block[0]); \
+        randV128(&block[1]); \
+        randV128(&block[2]); \
+        randV128(&block[3]); \
+        randV128(&block[4]); \
+        randV128(&block[5]); \
+        __asm__ __volatile__( \
+           "mov   x30, #0 ; msr fpsr, x30 ; " \
+           "ldr   q"#VECREG1NO", [%0, #0]  ; " \
+           "ldr   q"#VECREG2NO", [%0, #16] ; " \
+           "ldr   q"#VECREG3NO", [%0, #32] ; " \
+           INSN " ; " \
+           "str   q"#VECREG1NO", [%0, #48] ; " \
+           "str   q"#VECREG2NO", [%0, #64] ; " \
+           "str   q"#VECREG3NO", [%0, #80] ; " \
+           "mrs   x30, fpsr ; str x30, [%0, #96] " \
+           : : "r"(&block[0]) \
+           : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, "x30" \
+        ); \
+        printf("%-34s", INSN); \
+        UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
+        printf("vout:"); showV128(&block[0]); printf("  "); \
+        printf("vin0:"); showV128(&block[1]); printf("  "); \
+        printf("vin1:"); showV128(&block[2]); \
+        printf("\n                                  "); \
+        printf("vout:"); showV128(&block[3]); printf("  "); \
+        printf("vin0:"); showV128(&block[4]); printf("  "); \
+        printf("vin1:"); showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
+     } \
+  }
+
+/* Generate a test function that involves three vector regs with no bias as
+ * towards which is input or output. The three vector regs are loaded with user
+ * defined values rather than generated using randV128().
+ */
+#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREGOUT,VECREGIN0,VECREGIN1) \
+  __attribute__((noinline)) \
+  static void test_##TESTNAME ( V128* vout, V128* vin0, V128* vin1, LaneTy ty ) { \
+     V128 block[6+1]; \
+     memset(block, 0, sizeof(block)); \
+     block[0] = *vout; \
+     block[1] = *vin0; \
+     block[2] = *vin1; \
+     __asm__ __volatile__( \
+        "mov   x30, #0 ; msr fpsr, x30 ; " \
+        "ldr   q"#VECREGOUT", [%0, #0]  ; " \
+        "ldr   q"#VECREGIN0", [%0, #16] ; " \
+        "ldr   q"#VECREGIN1", [%0, #32] ; " \
+        INSN " ; " \
+        "str   q"#VECREGOUT", [%0, #48] ; " \
+        "str   q"#VECREGIN0", [%0, #64] ; " \
+        "str   q"#VECREGIN1", [%0, #80] ; " \
+        "mrs   x30, fpsr ; str x30, [%0, #96] " \
+        : : "r"(&block[0]) \
+        : "memory", "v"#VECREGOUT, "v"#VECREGIN0, "v"#VECREGIN1, "x30" \
+     ); \
+     printf("%-34s", INSN); \
+     UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
+     printf("vout:"); showV128(&block[0]); printf("  "); \
+     printf("vin0:"); showV128(&block[1]); printf("  "); \
+     printf("vin1:"); showV128(&block[2]); \
+     printf("\n                                  "); \
+     printf("vout:"); showV128(&block[3]); printf("  "); \
+     printf("vin0:"); showV128(&block[4]); printf("  "); \
+     printf("vin1:"); showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
+  }
+
+/* Generate calls to test functions generated by GEN_THREEVEC_TEST defined
+ * above, which require user defined data.
+ */
+#define GEN_THREEVEC_TEST_CALLS(INSN,ECOUNT,ETYPE,EARRANGE,PATTERN) \
+   setV128(&vout, ECOUNT, ETYPE, PATTERN); \
+   setV128(&vin[0], ECOUNT, ETYPE, PATTERN); \
+   setV128(&vin[1], ECOUNT, ETYPE, PATTERN); \
+   test_##INSN##_##EARRANGE##_##EARRANGE##_##EARRANGE##_##PATTERN(&vout, &vin[0], &vin[1], ETYPE);
+
+/* Indexed vector element variant of GEN_THREEVEC_TEST_CALLS above. */
+#define GEN_THREEVEC_TEST_CALLSI(INSN,ECOUNT,ETYPE,EARRANGE,PATTERN,IDX) \
+   setV128_idx(&vout, ECOUNT, ETYPE, PATTERN, IDX); \
+   setV128_idx(&vin[0], ECOUNT, ETYPE, PATTERN, IDX); \
+   setV128_idx(&vin[1], ECOUNT, ETYPE, PATTERN, IDX); \
+   test_##INSN##_##EARRANGE##_##EARRANGE##_##EARRANGE##_##PATTERN(&vout, &vin[0], &vin[1], ETYPE);
+
+/* Test patterns. */
+#define ALL5s_32 0x55555555ULL
+#define ALLas_32 0xAAAAAAAAULL
+#define ALLfs_32 0xFFFFFFFFULL
+#define UP_32    0x01234567ULL
+#define DOWN_32  0xFEDCBA98ULL
+#define PI_32    0x31415926ULL
+#define E_32     0x27182818ULL
+
+#define ALL5s_16 0x5555ULL
+#define ALLas_16 0xAAAAULL
+#define ALLfs_16 0xFFFFULL
+#define UP_16    0x0123ULL
+#define DOWN_16  0xFEDCULL
+#define PI_16    0x3141ULL
+#define E_16     0x2718ULL
+
+
+/* ---------------------------------------------------------
+ * -- Tests, in the same order that they appear in main() --
+ * ---------------------------------------------------------
+ *
+ * There are 4 types of test sets for each instruction:
+ * - vector
+ * - indexed vector
+ * - scalar
+ * - indexed scalar
+ */
+
+/* sqrdmlah (vector version, no index) */
+GEN_THREEVEC_TEST_RND(sqrdmlah_4h_4h_4h, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_8h_8h_8h, "sqrdmlah v0.8h, v1.8h, v2.8h", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_2s_2s_2s, "sqrdmlah v0.2s, v1.2s, v2.2s", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_4s_4s_4s, "sqrdmlah v0.4s, v1.4s, v2.4s", 0, 1, 2)
+
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_0, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALL5s_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALLas_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_ALLfs_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_UP_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_DOWN_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_PI_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_4h_4h_4h_E_16, "sqrdmlah v0.4h, v1.4h, v2.4h", 0, 1, 2)
+
+/* sqrdmlah (vector version, with index) */
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_4h_4h_4h, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_8h_8h_8h, "sqrdmlah v0.8h, v1.8h, v2.8h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_2s_2s_2s, "sqrdmlah v0.2s, v1.2s, v2.2s[2]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_4s_4s_4s, "sqrdmlah v0.4s, v1.4s, v2.4s[3]", 0, 1, 2)
+
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_0, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALL5s_16, "sqrdmlah v0.4h, v1.4h, v2.4h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALLas_16, "sqrdmlah v0.4h, v1.4h, v2.4h[2]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_ALLfs_16, "sqrdmlah v0.4h, v1.4h, v2.4h[3]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_UP_16, "sqrdmlah v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_DOWN_16, "sqrdmlah v0.4h, v1.4h, v2.4h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_PI_16, "sqrdmlah v0.4h, v1.4h, v2.4h[2]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlah_i_4h_4h_4h_E_16, "sqrdmlah v0.4h, v1.4h, v2.4h[3]", 0, 1, 2)
+
+/* sqrdmlah (scalar version) */
+GEN_THREEVEC_TEST_RND(sqrdmlah_h0_h1_h2, "sqrdmlah h0, h1, h2", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s0_s1_s2, "sqrdmlah s0, s1, s2", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h3_h4_h5, "sqrdmlah h3, h4, h5", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s3_s4_s5, "sqrdmlah s3, s4, s5", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h6_h7_h8, "sqrdmlah h6, h7, h8", 6, 7, 8)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s6_s7_s8, "sqrdmlah s6, s7, s8", 6, 7, 8)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h9_h10_h11, "sqrdmlah h9, h10, h11", 9, 10, 11)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s9_s10_s11, "sqrdmlah s9, s10, s11", 9, 10, 11)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h12_h13_h14, "sqrdmlah h12, h13, h14", 12, 13, 14)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s12_s13_s14, "sqrdmlah s12, s13, s14", 12, 13, 14)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h15_h16_h17, "sqrdmlah h15, h16, h17", 15, 16, 17)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s15_s16_s17, "sqrdmlah s15, s16, s17", 15, 16, 17)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h18_h19_h20, "sqrdmlah h18, h19, h20", 18, 19, 20)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s18_s19_s20, "sqrdmlah s18, s19, s20", 18, 19, 20)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h21_h22_h23, "sqrdmlah h21, h22, h23", 21, 22, 23)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s21_s22_s23, "sqrdmlah s21, s22, s23", 21, 22, 23)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h24_h25_h26, "sqrdmlah h24, h25, h26", 24, 25, 26)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s24_s25_s26, "sqrdmlah s24, s25, s26", 24, 25, 26)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h27_h28_h29, "sqrdmlah h27, h28, h29", 27, 28, 29)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s27_s28_s29, "sqrdmlah s27, s28, s29", 27, 28, 29)
+GEN_THREEVEC_TEST_RND(sqrdmlah_h30_h31_h0, "sqrdmlah h30, h31, h0", 30, 31, 0)
+GEN_THREEVEC_TEST_RND(sqrdmlah_s30_s31_s0, "sqrdmlah s30, s31, s0", 30, 31, 0)
+
+/* sqrdmlah (scalar version, with index) */
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_h0_h1_v2, "sqrdmlah h0, h1, v2.h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_s0_s1_v2, "sqrdmlah s0, s1, v2.s[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_h3_h4_v5, "sqrdmlah h3, h4, v5.h[1]", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlah_i_s3_s4_v5, "sqrdmlah s3, s4, v5.s[1]", 3, 4, 5)
+
+/* sqrdmlsh (vector version) */
+GEN_THREEVEC_TEST_RND(sqrdmlsh_4h_4h_4h, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_8h_8h_8h, "sqrdmlsh v0.8h, v1.8h, v2.8h", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_2s_2s_2s, "sqrdmlsh v0.2s, v1.2s, v2.2s", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_4s_4s_4s, "sqrdmlsh v0.4s, v1.4s, v2.4s", 0, 1, 2)
+
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_0, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALL5s_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALLas_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_ALLfs_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_UP_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_DOWN_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_PI_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_4h_4h_4h_E_16, "sqrdmlsh v0.4h, v1.4h, v2.4h", 0, 1, 2)
+
+/* sqrdmlsh (vector version, with index) */
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_4h_4h_4h, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_8h_8h_8h, "sqrdmlsh v0.8h, v1.8h, v2.8h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_2s_2s_2s, "sqrdmlsh v0.2s, v1.2s, v2.2s[2]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_4s_4s_4s, "sqrdmlsh v0.4s, v1.4s, v2.4s[3]", 0, 1, 2)
+
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_0, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALL5s_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALLas_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[2]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_ALLfs_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[3]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_UP_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_DOWN_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[1]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_PI_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[2]", 0, 1, 2)
+GEN_THREEVEC_TEST(sqrdmlsh_i_4h_4h_4h_E_16, "sqrdmlsh v0.4h, v1.4h, v2.4h[3]", 0, 1, 2)
+
+/* sqrdmlsh (scalar version) */
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h0_h1_h2, "sqrdmlsh h0, h1, h2", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s0_s1_s2, "sqrdmlsh s0, s1, s2", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h3_h4_h5, "sqrdmlsh h3, h4, h5", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s3_s4_s5, "sqrdmlsh s3, s4, s5", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h6_h7_h8, "sqrdmlsh h6, h7, h8", 6, 7, 8)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s6_s7_s8, "sqrdmlsh s6, s7, s8", 6, 7, 8)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h9_h10_h11, "sqrdmlsh h9, h10, h11", 9, 10, 11)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s9_s10_s11, "sqrdmlsh s9, s10, s11", 9, 10, 11)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h12_h13_h14, "sqrdmlsh h12, h13, h14", 12, 13, 14)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s12_s13_s14, "sqrdmlsh s12, s13, s14", 12, 13, 14)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h15_h16_h17, "sqrdmlsh h15, h16, h17", 15, 16, 17)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s15_s16_s17, "sqrdmlsh s15, s16, s17", 15, 16, 17)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h18_h19_h20, "sqrdmlsh h18, h19, h20", 18, 19, 20)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s18_s19_s20, "sqrdmlsh s18, s19, s20", 18, 19, 20)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h21_h22_h23, "sqrdmlsh h21, h22, h23", 21, 22, 23)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s21_s22_s23, "sqrdmlsh s21, s22, s23", 21, 22, 23)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h24_h25_h26, "sqrdmlsh h24, h25, h26", 24, 25, 26)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s24_s25_s26, "sqrdmlsh s24, s25, s26", 24, 25, 26)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h27_h28_h29, "sqrdmlsh h27, h28, h29", 27, 28, 29)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s27_s28_s29, "sqrdmlsh s27, s28, s29", 27, 28, 29)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_h30_h31_h0, "sqrdmlsh h30, h31, h0", 30, 31, 0)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_s30_s31_s0, "sqrdmlsh s30, s31, s0", 30, 31, 0)
+
+/* sqrdmlsh (scalar version, with index) */
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_h0_h1_v2, "sqrdmlsh h0, h1, v2.h[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_s0_s1_v2, "sqrdmlsh s0, s1, v2.s[0]", 0, 1, 2)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_h3_h4_v5, "sqrdmlsh h3, h4, v5.h[1]", 3, 4, 5)
+GEN_THREEVEC_TEST_RND(sqrdmlsh_i_s3_s4_v5, "sqrdmlsh s3, s4, v5.s[1]", 3, 4, 5)
+
+int main ( void )
+{
+   assert(sizeof(V128) == 16);
+
+   /* sqrdmlah (vector version, no index) */
+   if (1) test_sqrdmlah_4h_4h_4h(TyH);
+   if (1) test_sqrdmlah_8h_8h_8h(TyH);
+   if (1) test_sqrdmlah_2s_2s_2s(TyS);
+   if (1) test_sqrdmlah_4s_4s_4s(TyS);
+
+   V128 vout;
+   V128 vin[2];
+
+   if (1) {
+      GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, 0);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALL5s_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALLas_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, ALLfs_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, UP_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, DOWN_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, PI_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlah, 4, TyH, 4h, E_16);
+   }
+
+   /* sqrdmlah (vector version, with index) */
+   if (1) test_sqrdmlah_i_4h_4h_4h(TyH);
+   if (1) test_sqrdmlah_i_8h_8h_8h(TyH);
+   if (1) test_sqrdmlah_i_2s_2s_2s(TyS);
+   if (1) test_sqrdmlah_i_4s_4s_4s(TyS);
+
+   if (1) {
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, 0, 0);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALL5s_16, 1);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALLas_16, 2);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, ALLfs_16, 3);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, UP_16, 0);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, DOWN_16, 1);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, PI_16, 2);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlah_i, 4, TyH, 4h, E_16, 3);
+   }
+
+   /* sqrdmlah (scalar version) */
+   if (1) test_sqrdmlah_h0_h1_h2(TyH);
+   if (1) test_sqrdmlah_s0_s1_s2(TyS);
+   if (1) test_sqrdmlah_h3_h4_h5(TyH);
+   if (1) test_sqrdmlah_s3_s4_s5(TyS);
+   if (1) test_sqrdmlah_h6_h7_h8(TyH);
+   if (1) test_sqrdmlah_s6_s7_s8(TyS);
+   if (1) test_sqrdmlah_h9_h10_h11(TyH);
+   if (1) test_sqrdmlah_s9_s10_s11(TyS);
+   if (1) test_sqrdmlah_h12_h13_h14(TyH);
+   if (1) test_sqrdmlah_s12_s13_s14(TyS);
+   if (1) test_sqrdmlah_h15_h16_h17(TyH);
+   if (1) test_sqrdmlah_s15_s16_s17(TyS);
+   if (1) test_sqrdmlah_h18_h19_h20(TyH);
+   if (1) test_sqrdmlah_s18_s19_s20(TyS);
+   if (1) test_sqrdmlah_h21_h22_h23(TyH);
+   if (1) test_sqrdmlah_s21_s22_s23(TyS);
+   if (1) test_sqrdmlah_h24_h25_h26(TyH);
+   if (1) test_sqrdmlah_s24_s25_s26(TyS);
+   if (1) test_sqrdmlah_h27_h28_h29(TyH);
+   if (1) test_sqrdmlah_s27_s28_s29(TyS);
+   if (1) test_sqrdmlah_h30_h31_h0(TyH);
+   if (1) test_sqrdmlah_s30_s31_s0(TyS);
+
+   /* sqrdmlah (scalar version, with index) */
+   if (1) test_sqrdmlah_i_h0_h1_v2(TyH);
+   if (1) test_sqrdmlah_i_s0_s1_v2(TyS);
+   if (1) test_sqrdmlah_i_h3_h4_v5(TyH);
+   if (1) test_sqrdmlah_i_s3_s4_v5(TyS);
+
+   /* sqrdmlsh (vector version) */
+   if (1) test_sqrdmlsh_4h_4h_4h(TyH);
+   if (1) test_sqrdmlsh_8h_8h_8h(TyH);
+   if (1) test_sqrdmlsh_2s_2s_2s(TyS);
+   if (1) test_sqrdmlsh_4s_4s_4s(TyS);
+
+   if (1) {
+      GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, 0);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALL5s_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALLas_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, ALLfs_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, UP_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, DOWN_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, PI_16);
+      GEN_THREEVEC_TEST_CALLS(sqrdmlsh, 4, TyH, 4h, E_16);
+   }
+
+   /* sqrdmlsh (vector version, with index) */
+   if (1) test_sqrdmlsh_i_4h_4h_4h(TyH);
+   if (1) test_sqrdmlsh_i_8h_8h_8h(TyH);
+   if (1) test_sqrdmlsh_i_2s_2s_2s(TyS);
+   if (1) test_sqrdmlsh_i_4s_4s_4s(TyS);
+
+   if (1) {
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, 0, 0);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALL5s_16, 1);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALLas_16, 2);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, ALLfs_16, 3);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, UP_16, 0);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, DOWN_16, 1);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, PI_16, 2);
+      GEN_THREEVEC_TEST_CALLSI(sqrdmlsh_i, 4, TyH, 4h, E_16, 3);
+   }
+
+   /* sqrdmlsh (scalar version) */
+   if (1) test_sqrdmlsh_h0_h1_h2(TyH);
+   if (1) test_sqrdmlsh_s0_s1_s2(TyS);
+   if (1) test_sqrdmlsh_h3_h4_h5(TyH);
+   if (1) test_sqrdmlsh_s3_s4_s5(TyS);
+   if (1) test_sqrdmlsh_h6_h7_h8(TyH);
+   if (1) test_sqrdmlsh_s6_s7_s8(TyS);
+   if (1) test_sqrdmlsh_h9_h10_h11(TyH);
+   if (1) test_sqrdmlsh_s9_s10_s11(TyS);
+   if (1) test_sqrdmlsh_h12_h13_h14(TyH);
+   if (1) test_sqrdmlsh_s12_s13_s14(TyS);
+   if (1) test_sqrdmlsh_h15_h16_h17(TyH);
+   if (1) test_sqrdmlsh_s15_s16_s17(TyS);
+   if (1) test_sqrdmlsh_h18_h19_h20(TyH);
+   if (1) test_sqrdmlsh_s18_s19_s20(TyS);
+   if (1) test_sqrdmlsh_h21_h22_h23(TyH);
+   if (1) test_sqrdmlsh_s21_s22_s23(TyS);
+   if (1) test_sqrdmlsh_h24_h25_h26(TyH);
+   if (1) test_sqrdmlsh_s24_s25_s26(TyS);
+   if (1) test_sqrdmlsh_h27_h28_h29(TyH);
+   if (1) test_sqrdmlsh_s27_s28_s29(TyS);
+   if (1) test_sqrdmlsh_h30_h31_h0(TyH);
+   if (1) test_sqrdmlsh_s30_s31_s0(TyS);
+
+   /* sqrdmlsh (scalar version, with index) */
+   if (1) test_sqrdmlsh_i_h0_h1_v2(TyH);
+   if (1) test_sqrdmlsh_i_s0_s1_v2(TyS);
+   if (1) test_sqrdmlsh_i_h3_h4_v5(TyH);
+   if (1) test_sqrdmlsh_i_s3_s4_v5(TyS);
+
+   return 0;
+}
diff --git a/none/tests/arm64/simd_v81.stderr.exp b/none/tests/arm64/simd_v81.stderr.exp
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/none/tests/arm64/simd_v81.stdout.exp b/none/tests/arm64/simd_v81.stdout.exp
new file mode 100644 (file)
index 0000000..4cd08f4
--- /dev/null
@@ -0,0 +1,201 @@
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:5175e39d19c9ca1e98f24a4984175700  vin0:7d6528c5fa956a0d69c3e9a6af27d13b  vin1:60b160857d45c48447b8d8c0eeef1e50
+                                  vout:0000000000000000d43451248edf4bed  vin0:7d6528c5fa956a0d69c3e9a6af27d13b  vin1:60b160857d45c48447b8d8c0eeef1e50 fpsr=00000000
+sqrdmlah v0.8h, v1.8h, v2.8h      vout:d89998df5035ed364a4bc43968bc40e5  vin0:cb509970b8136c85d740b80eb7839b97  vin1:f9dd4a29f8c093db56b01a12b0ca1583
+                                  vout:db208000544891862eb2b5927fff3005  vin0:cb509970b8136c85d740b80eb7839b97  vin1:f9dd4a29f8c093db56b01a12b0ca1583 fpsr=08000000
+sqrdmlah v0.2s, v1.2s, v2.2s      vout:d182c916cebc2e17cfaff39be272ef40  vin0:6897b536bbe4da8a369dab4f9465b86e  vin1:407b8d9035449b06f4e06e2205236eb7
+                                  vout:0000000000000000caf0ee75de211f32  vin0:6897b536bbe4da8a369dab4f9465b86e  vin1:407b8d9035449b06f4e06e2205236eb7 fpsr=00000000
+sqrdmlah v0.4s, v1.4s, v2.4s      vout:f0350ca70523e0e45ba1ec54e87d39b3  vin0:0a3e0f7c75cb0842b95ed64d3b13ff64  vin1:e98ebd1ca893312a54cae7d5e13dfe91
+                                  vout:ee6951dab4afbfa02cd832b4da4afedf  vin0:0a3e0f7c75cb0842b95ed64d3b13ff64  vin1:e98ebd1ca893312a54cae7d5e13dfe91 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:00000000000000000000000000000000  vin0:00000000000000000000000000000000  vin1:00000000000000000000000000000000
+                                  vout:00000000000000000000000000000000  vin0:00000000000000000000000000000000  vin1:00000000000000000000000000000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:00000000000000005555555555555555  vin0:00000000000000005555555555555555  vin1:00000000000000005555555555555555
+                                  vout:00000000000000007fff7fff7fff7fff  vin0:00000000000000005555555555555555  vin1:00000000000000005555555555555555 fpsr=08000000
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:0000000000000000aaaaaaaaaaaaaaaa  vin0:0000000000000000aaaaaaaaaaaaaaaa  vin1:0000000000000000aaaaaaaaaaaaaaaa
+                                  vout:0000000000000000e38ee38ee38ee38e  vin0:0000000000000000aaaaaaaaaaaaaaaa  vin1:0000000000000000aaaaaaaaaaaaaaaa fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:0000000000000000ffffffffffffffff  vin0:0000000000000000ffffffffffffffff  vin1:0000000000000000ffffffffffffffff
+                                  vout:0000000000000000ffffffffffffffff  vin0:0000000000000000ffffffffffffffff  vin1:0000000000000000ffffffffffffffff fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:00000000000000000123012301230123  vin0:00000000000000000123012301230123  vin1:00000000000000000123012301230123
+                                  vout:00000000000000000126012601260126  vin0:00000000000000000123012301230123  vin1:00000000000000000123012301230123 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:0000000000000000fedcfedcfedcfedc  vin0:0000000000000000fedcfedcfedcfedc  vin1:0000000000000000fedcfedcfedcfedc
+                                  vout:0000000000000000fedffedffedffedf  vin0:0000000000000000fedcfedcfedcfedc  vin1:0000000000000000fedcfedcfedcfedc fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:00000000000000003141314131413141  vin0:00000000000000003141314131413141  vin1:00000000000000003141314131413141
+                                  vout:00000000000000004435443544354435  vin0:00000000000000003141314131413141  vin1:00000000000000003141314131413141 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h      vout:00000000000000002718271827182718  vin0:00000000000000002718271827182718  vin1:00000000000000002718271827182718
+                                  vout:00000000000000003309330933093309  vin0:00000000000000002718271827182718  vin1:00000000000000002718271827182718 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[0]   vout:e9b5f3f66b2e58c121a6c3476d21f1e5  vin0:63483da65c8c49d096084deb9ed0411e  vin1:a81b6e33c572a86aacf29b0f395c98b4
+                                  vout:0000000000000000772a84667fffbd58  vin0:63483da65c8c49d096084deb9ed0411e  vin1:a81b6e33c572a86aacf29b0f395c98b4 fpsr=08000000
+sqrdmlah v0.8h, v1.8h, v2.8h[1]   vout:6f07136773a2ead356428c5a66a2ec77  vin0:28bad218e4ebf159ff1f240eb3e1553f  vin1:8404eb7f0cf4ca6fee8536da9dbf68bc
+                                  vout:4fc436a47ffff61256ef80007fffab07  vin0:28bad218e4ebf159ff1f240eb3e1553f  vin1:8404eb7f0cf4ca6fee8536da9dbf68bc fpsr=08000000
+sqrdmlah v0.2s, v1.2s, v2.2s[2]   vout:36b2a38dcef18acf0e0f01a829ba3c66  vin0:f078b65e01737fd22bfa8f668c8b14f4  vin1:57436a097df30b8daa927a03090dfc6d
+                                  vout:000000000000000039553356b81ed47b  vin0:f078b65e01737fd22bfa8f668c8b14f4  vin1:57436a097df30b8daa927a03090dfc6d fpsr=00000000
+sqrdmlah v0.4s, v1.4s, v2.4s[3]   vout:6d08ed19fa045f841810cd8c109ed568  vin0:1c4a678450562685769ab818a5b7985e  vin1:b984aed62671e865e6f21d40fc7bc013
+                                  vout:5d74fb0dcdc7dcb9d6c1ecfd425568e8  vin0:1c4a678450562685769ab818a5b7985e  vin1:b984aed62671e865e6f21d40fc7bc013 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[0]   vout:00000000000000000000000000000000  vin0:00000000000000000000000000000000  vin1:00000000000000000000000000000000
+                                  vout:00000000000000000000000000000000  vin0:00000000000000000000000000000000  vin1:00000000000000000000000000000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[1]   vout:00000000000000000000000055550000  vin0:00000000000000000000000055550000  vin1:00000000000000000000000055550000
+                                  vout:0000000000000000000000007fff0000  vin0:00000000000000000000000055550000  vin1:00000000000000000000000055550000 fpsr=08000000
+sqrdmlah v0.4h, v1.4h, v2.4h[2]   vout:00000000000000000000aaaa00000000  vin0:00000000000000000000aaaa00000000  vin1:00000000000000000000aaaa00000000
+                                  vout:00000000000000000000e38e00000000  vin0:00000000000000000000aaaa00000000  vin1:00000000000000000000aaaa00000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[3]   vout:0000000000000000ffff000000000000  vin0:0000000000000000ffff000000000000  vin1:0000000000000000ffff000000000000
+                                  vout:0000000000000000ffff000000000000  vin0:0000000000000000ffff000000000000  vin1:0000000000000000ffff000000000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[0]   vout:00000000000000000000000000000123  vin0:00000000000000000000000000000123  vin1:00000000000000000000000000000123
+                                  vout:00000000000000000000000000000126  vin0:00000000000000000000000000000123  vin1:00000000000000000000000000000123 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[1]   vout:000000000000000000000000fedc0000  vin0:000000000000000000000000fedc0000  vin1:000000000000000000000000fedc0000
+                                  vout:000000000000000000000000fedf0000  vin0:000000000000000000000000fedc0000  vin1:000000000000000000000000fedc0000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[2]   vout:00000000000000000000314100000000  vin0:00000000000000000000314100000000  vin1:00000000000000000000314100000000
+                                  vout:00000000000000000000443500000000  vin0:00000000000000000000314100000000  vin1:00000000000000000000314100000000 fpsr=00000000
+sqrdmlah v0.4h, v1.4h, v2.4h[3]   vout:00000000000000002718000000000000  vin0:00000000000000002718000000000000  vin1:00000000000000002718000000000000
+                                  vout:00000000000000003309000000000000  vin0:00000000000000002718000000000000  vin1:00000000000000002718000000000000 fpsr=00000000
+sqrdmlah h0, h1, h2               vout:acb722146c6cbfa9ea4a022e1d3d7dbb  vin0:048612e51a468e36c51cdd8f87e12ab4  vin1:0c05cb6ebd128663d7568e3e8a3ac80e
+                                  vout:00000000000000000000000000006b11  vin0:048612e51a468e36c51cdd8f87e12ab4  vin1:0c05cb6ebd128663d7568e3e8a3ac80e fpsr=00000000
+sqrdmlah s0, s1, s2               vout:6489eab2c96df363d52c4330a7aae391  vin0:c1fbfd8f4d8698c2cb9dfb4ea5d18713  vin1:14575775bc3a12029d8e66ea90352a18
+                                  vout:000000000000000000000000f66e187a  vin0:c1fbfd8f4d8698c2cb9dfb4ea5d18713  vin1:14575775bc3a12029d8e66ea90352a18 fpsr=00000000
+sqrdmlah h3, h4, h5               vout:4784d95987cd4ed80c3ca578a32bd88e  vin0:08aebee85fda964fbba02737f3c98220  vin1:837be65197abe2686b1fba2604afb8d5
+                                  vout:00000000000000000000000000001e8b  vin0:08aebee85fda964fbba02737f3c98220  vin1:837be65197abe2686b1fba2604afb8d5 fpsr=00000000
+sqrdmlah s3, s4, s5               vout:0aaa836b194e242cc5fc3ae904033357  vin0:8ca3e752c306df00caab752f630ff07e  vin1:0e780c65c22b4ab8778d9ed6d9eb46ea
+                                  vout:000000000000000000000000e68a6e36  vin0:8ca3e752c306df00caab752f630ff07e  vin1:0e780c65c22b4ab8778d9ed6d9eb46ea fpsr=00000000
+sqrdmlah h6, h7, h8               vout:61ff7d4df3b6ca8131f01866bd76c58f  vin0:02dd0e32eecfc5fa2c3ffa1aebe6a4d2  vin1:69505d14b27d9d16f25b26e0042fa9fa
+                                  vout:000000000000000000000000000002d6  vin0:02dd0e32eecfc5fa2c3ffa1aebe6a4d2  vin1:69505d14b27d9d16f25b26e0042fa9fa fpsr=00000000
+sqrdmlah s6, s7, s8               vout:0088596389c893fd879d51d4c5c764db  vin0:1e61c5ec52f79c6015e3c8dc7e9273bf  vin1:47086cc3da642fa7130d662777beb4a9
+                                  vout:0000000000000000000000003c30207f  vin0:1e61c5ec52f79c6015e3c8dc7e9273bf  vin1:47086cc3da642fa7130d662777beb4a9 fpsr=00000000
+sqrdmlah h9, h10, h11             vout:9a49ac115048d4c4f987fa170d3ce4dd  vin0:9432a2e46543b956b819f459105730e9  vin1:5da3cfd6aea6558e0c28728e28dc3c9c
+                                  vout:0000000000000000000000000000fc06  vin0:9432a2e46543b956b819f459105730e9  vin1:5da3cfd6aea6558e0c28728e28dc3c9c fpsr=00000000
+sqrdmlah s9, s10, s11             vout:e4450ababbfae0f9bc3127138b19183c  vin0:1755377e9a786f014a6592749579b0f4  vin1:5f2619b1a20662f012305efa0acd1475
+                                  vout:000000000000000000000000821bedee  vin0:1755377e9a786f014a6592749579b0f4  vin1:5f2619b1a20662f012305efa0acd1475 fpsr=00000000
+sqrdmlah h12, h13, h14            vout:918107c43ea20cc00420edac31a0d599  vin0:5cce191e65591384ff4cb613013cc685  vin1:0194ddb82b49abf059a93d4f11d611db
+                                  vout:0000000000000000000000000000cd94  vin0:5cce191e65591384ff4cb613013cc685  vin1:0194ddb82b49abf059a93d4f11d611db fpsr=00000000
+sqrdmlah s12, s13, s14            vout:570037914d04ab3d05d75ec6f616ee9a  vin0:17a0dc273ba9f8030a52741849e54740  vin1:f6f2b14fbb3184b2141625713239066f
+                                  vout:00000000000000000000000013156a40  vin0:17a0dc273ba9f8030a52741849e54740  vin1:f6f2b14fbb3184b2141625713239066f fpsr=00000000
+sqrdmlah h15, h16, h17            vout:e8c72e865de41295f2db8f44cbbf37e2  vin0:fcd015ff8f2e73a3a0fae06860b606c7  vin1:f34428d9c8833f5b78fb29445f3bc8d7
+                                  vout:000000000000000000000000000034f6  vin0:fcd015ff8f2e73a3a0fae06860b606c7  vin1:f34428d9c8833f5b78fb29445f3bc8d7 fpsr=00000000
+sqrdmlah s15, s16, s17            vout:f9da7f07e00794eb00b0940ba5e08516  vin0:be625608d5abd787f5c90ee73af5d7c0  vin1:ac8dd5bbc503330eb9dd5dab8e212ab7
+                                  vout:00000000000000000000000080000000  vin0:be625608d5abd787f5c90ee73af5d7c0  vin1:ac8dd5bbc503330eb9dd5dab8e212ab7 fpsr=08000000
+sqrdmlah h18, h19, h20            vout:3d3cc0784c2f856363d9810079bbabd9  vin0:125934a781e479d33d431279cce48fce  vin1:d4d14e592776b1ef0b40d58cb22d00b1
+                                  vout:0000000000000000000000000000ab3e  vin0:125934a781e479d33d431279cce48fce  vin1:d4d14e592776b1ef0b40d58cb22d00b1 fpsr=00000000
+sqrdmlah s18, s19, s20            vout:69f2843d15223a224edb6a053a967ecf  vin0:acb9433f079dacacabeb000208c90296  vin1:20162517609f0f22a1a7a4c9c0a51f6b
+                                  vout:000000000000000000000000363d52c9  vin0:acb9433f079dacacabeb000208c90296  vin1:20162517609f0f22a1a7a4c9c0a51f6b fpsr=00000000
+sqrdmlah h21, h22, h23            vout:31005fb9ada2074bf63a63fedcb4d29c  vin0:3f871736dc9ac5357446eb65e4e703bb  vin1:445ef059e641a1ccb097e047aacc5b89
+                                  vout:0000000000000000000000000000d547  vin0:3f871736dc9ac5357446eb65e4e703bb  vin1:445ef059e641a1ccb097e047aacc5b89 fpsr=00000000
+sqrdmlah s21, s22, s23            vout:4969e55289753f038f7980d1535979e5  vin0:80c745ef729f1792ccd7e987538166e1  vin1:f4ad41832c22ba116c949cea66e687ae
+                                  vout:0000000000000000000000007fffffff  vin0:80c745ef729f1792ccd7e987538166e1  vin1:f4ad41832c22ba116c949cea66e687ae fpsr=08000000
+sqrdmlah h24, h25, h26            vout:e309aef8a605af130821eb96e737777e  vin0:b5a9377eb31749ef710cf757885d2728  vin1:1f1030333fb8fa4b2feb05cb92ed4f4d
+                                  vout:00000000000000000000000000007fff  vin0:b5a9377eb31749ef710cf757885d2728  vin1:1f1030333fb8fa4b2feb05cb92ed4f4d fpsr=08000000
+sqrdmlah s24, s25, s26            vout:928efefdf9f5ec8d5313bd01b82612e0  vin0:bc36ca100a4a3a7d5127ba1c529aa0bf  vin1:9f043af6a1aed58f1ee978efa4b054d2
+                                  vout:00000000000000000000000080000000  vin0:bc36ca100a4a3a7d5127ba1c529aa0bf  vin1:9f043af6a1aed58f1ee978efa4b054d2 fpsr=08000000
+sqrdmlah h27, h28, h29            vout:2ad7482a960fb2b27014160ebbdb47e4  vin0:a7837c83faf3cb1d360794fec60222d6  vin1:61cd123e19cf1e2bb001f1161e946f5c
+                                  vout:00000000000000000000000000006633  vin0:a7837c83faf3cb1d360794fec60222d6  vin1:61cd123e19cf1e2bb001f1161e946f5c fpsr=00000000
+sqrdmlah s27, s28, s29            vout:7c4e1775412d1d47a8872cb61d8aca05  vin0:2993e139f7d64ff4532f9ae1d7da8010  vin1:19714a711ce1284318b88425f2de758f
+                                  vout:00000000000000000000000021a91e1a  vin0:2993e139f7d64ff4532f9ae1d7da8010  vin1:19714a711ce1284318b88425f2de758f fpsr=00000000
+sqrdmlah h30, h31, h0             vout:3cf6fe426e1281712ef114ddd37570e8  vin0:f76b8d9773b81b24de24e0a879648e11  vin1:7af177f11da748fc8b9145fe16d0390f
+                                  vout:00000000000000000000000000003e1e  vin0:f76b8d9773b81b24de24e0a879648e11  vin1:7af177f11da748fc8b9145fe16d0390f fpsr=00000000
+sqrdmlah s30, s31, s0             vout:1dd493f59184345437d5e366d0e20c30  vin0:c50f1401e45b82d3086a7a39a1e6217d  vin1:3a542e238fe5d1793d1148867eb08f81
+                                  vout:00000000000000000000000080000000  vin0:c50f1401e45b82d3086a7a39a1e6217d  vin1:3a542e238fe5d1793d1148867eb08f81 fpsr=08000000
+sqrdmlah h0, h1, v2.h[0]          vout:d4ec68f21f468712f7b8ab3708137382  vin0:478209dbbd84d92508847c7642a20df9  vin1:0b9c016be95f18de62bba1a11cc04c89
+                                  vout:00000000000000000000000000007bdd  vin0:478209dbbd84d92508847c7642a20df9  vin1:0b9c016be95f18de62bba1a11cc04c89 fpsr=00000000
+sqrdmlah s0, s1, v2.s[0]          vout:1541139c8b1cd0d1a11d81326f4e7880  vin0:30c9028972f8733d11f7fa4450de2529  vin1:a1cd852d9cd970502d146432e64644c9
+                                  vout:0000000000000000000000005f0dbde5  vin0:30c9028972f8733d11f7fa4450de2529  vin1:a1cd852d9cd970502d146432e64644c9 fpsr=00000000
+sqrdmlah h3, h4, v5.h[1]          vout:94d7265949ca62b46a8a793cf9d5f0d1  vin0:35e7926e777aa43f56470887bfdd3daf  vin1:b2ed4ecc1e172df2d3a0a41fce854ae7
+                                  vout:0000000000000000000000000000d8f9  vin0:35e7926e777aa43f56470887bfdd3daf  vin1:b2ed4ecc1e172df2d3a0a41fce854ae7 fpsr=00000000
+sqrdmlah s3, s4, v5.s[1]          vout:09e14df041cdc14f0bf7ba2283e22a31  vin0:f0fdf0aee1dda4e888e2774acbc13287  vin1:f30110c432a534d0478d5d7e053a4e0c
+                                  vout:00000000000000000000000080000000  vin0:f0fdf0aee1dda4e888e2774acbc13287  vin1:f30110c432a534d0478d5d7e053a4e0c fpsr=08000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:62bbc77143b71e92668b24fb9133bf52  vin0:9fedb2229a090d2c018b42f3d3ec8415  vin1:6c11edd5a106e2d655f9b97953917f46
+                                  vout:0000000000000000658249dfadfa3a89  vin0:9fedb2229a090d2c018b42f3d3ec8415  vin1:6c11edd5a106e2d655f9b97953917f46 fpsr=00000000
+sqrdmlsh v0.8h, v1.8h, v2.8h      vout:bf6982b029b396ea4f1e4ed5da99d2ee  vin0:7b813bf15120fbc8683cbc58f8b23fca  vin1:74876ac63afb7562c67d2c86fa7c09a3
+                                  vout:8000800004519ac87df3665eda48ce21  vin0:7b813bf15120fbc8683cbc58f8b23fca  vin1:74876ac63afb7562c67d2c86fa7c09a3 fpsr=08000000
+sqrdmlsh v0.2s, v1.2s, v2.2s      vout:077815d35567232e66c997070e860c39  vin0:109cfa471afbe686e2ede96f8809f947  vin1:9ce5d1a297a56adb474e1bb03bc55073
+                                  vout:000000000000000076fb5cdb468a5f5e  vin0:109cfa471afbe686e2ede96f8809f947  vin1:9ce5d1a297a56adb474e1bb03bc55073 fpsr=00000000
+sqrdmlsh v0.4s, v1.4s, v2.4s      vout:2a1f00ed91e9071d79112f6f64f5079c  vin0:df63bd3c7359f634f791559ff8d88161  vin1:fba1981add7938e3067d74917c37833e
+                                  vout:2902119eb1066221797ea32c6be66494  vin0:df63bd3c7359f634f791559ff8d88161  vin1:fba1981add7938e3067d74917c37833e fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:00000000000000000000000000000000  vin0:00000000000000000000000000000000  vin1:00000000000000000000000000000000
+                                  vout:00000000000000000000000000000000  vin0:00000000000000000000000000000000  vin1:00000000000000000000000000000000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:00000000000000005555555555555555  vin0:00000000000000005555555555555555  vin1:00000000000000005555555555555555
+                                  vout:00000000000000001c721c721c721c72  vin0:00000000000000005555555555555555  vin1:00000000000000005555555555555555 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:0000000000000000aaaaaaaaaaaaaaaa  vin0:0000000000000000aaaaaaaaaaaaaaaa  vin1:0000000000000000aaaaaaaaaaaaaaaa
+                                  vout:00000000000000008000800080008000  vin0:0000000000000000aaaaaaaaaaaaaaaa  vin1:0000000000000000aaaaaaaaaaaaaaaa fpsr=08000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:0000000000000000ffffffffffffffff  vin0:0000000000000000ffffffffffffffff  vin1:0000000000000000ffffffffffffffff
+                                  vout:0000000000000000ffffffffffffffff  vin0:0000000000000000ffffffffffffffff  vin1:0000000000000000ffffffffffffffff fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:00000000000000000123012301230123  vin0:00000000000000000123012301230123  vin1:00000000000000000123012301230123
+                                  vout:00000000000000000120012001200120  vin0:00000000000000000123012301230123  vin1:00000000000000000123012301230123 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:0000000000000000fedcfedcfedcfedc  vin0:0000000000000000fedcfedcfedcfedc  vin1:0000000000000000fedcfedcfedcfedc
+                                  vout:0000000000000000fed9fed9fed9fed9  vin0:0000000000000000fedcfedcfedcfedc  vin1:0000000000000000fedcfedcfedcfedc fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:00000000000000003141314131413141  vin0:00000000000000003141314131413141  vin1:00000000000000003141314131413141
+                                  vout:00000000000000001e4d1e4d1e4d1e4d  vin0:00000000000000003141314131413141  vin1:00000000000000003141314131413141 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h      vout:00000000000000002718271827182718  vin0:00000000000000002718271827182718  vin1:00000000000000002718271827182718
+                                  vout:00000000000000001b271b271b271b27  vin0:00000000000000002718271827182718  vin1:00000000000000002718271827182718 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[0]   vout:9cdd1a32cd007ff7daac12cf3a64acbd  vin0:e76fcc086aeb0414a9cd126c0869c6a0  vin1:d973ba438b80fdb556878af3ad4a4cb8
+                                  vout:00000000000000000e5607c4355acf20  vin0:e76fcc086aeb0414a9cd126c0869c6a0  vin1:d973ba438b80fdb556878af3ad4a4cb8 fpsr=00000000
+sqrdmlsh v0.8h, v1.8h, v2.8h[1]   vout:fa0ba48e9db3d6f2c0c135e244f24dfe  vin0:71a4885bc70f501cf18441c67d4b9e45  vin1:95a6e59e2a7fabcb65b86284a1cb27a3
+                                  vout:4dae8000800011e8b618664b7fff0610  vin0:71a4885bc70f501cf18441c67d4b9e45  vin1:95a6e59e2a7fabcb65b86284a1cb27a3 fpsr=08000000
+sqrdmlsh v0.2s, v1.2s, v2.2s[2]   vout:aef4eeb358364f4add55d3bb09c439c9  vin0:3028339e0d3a0c468e8f584ceae94e7a  vin1:e33fad8f313a964967940f284cfce9a3
+                                  vout:000000000000000008f6e02b11e090c9  vin0:3028339e0d3a0c468e8f584ceae94e7a  vin1:e33fad8f313a964967940f284cfce9a3 fpsr=00000000
+sqrdmlsh v0.4s, v1.4s, v2.4s[3]   vout:6c9a8e07714d3d2264ecfe407d2043c1  vin0:d6006035af2e8bb7b3736be34585abe2  vin1:7742a77a117513548f9ea7c3a323665c
+                                  vout:7fffffff7fffffff7fffffff3c59ca12  vin0:d6006035af2e8bb7b3736be34585abe2  vin1:7742a77a117513548f9ea7c3a323665c fpsr=08000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[0]   vout:00000000000000000000000000000000  vin0:00000000000000000000000000000000  vin1:00000000000000000000000000000000
+                                  vout:00000000000000000000000000000000  vin0:00000000000000000000000000000000  vin1:00000000000000000000000000000000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[1]   vout:00000000000000000000000055550000  vin0:00000000000000000000000055550000  vin1:00000000000000000000000055550000
+                                  vout:0000000000000000000000001c720000  vin0:00000000000000000000000055550000  vin1:00000000000000000000000055550000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[2]   vout:00000000000000000000aaaa00000000  vin0:00000000000000000000aaaa00000000  vin1:00000000000000000000aaaa00000000
+                                  vout:00000000000000000000800000000000  vin0:00000000000000000000aaaa00000000  vin1:00000000000000000000aaaa00000000 fpsr=08000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[3]   vout:0000000000000000ffff000000000000  vin0:0000000000000000ffff000000000000  vin1:0000000000000000ffff000000000000
+                                  vout:0000000000000000ffff000000000000  vin0:0000000000000000ffff000000000000  vin1:0000000000000000ffff000000000000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[0]   vout:00000000000000000000000000000123  vin0:00000000000000000000000000000123  vin1:00000000000000000000000000000123
+                                  vout:00000000000000000000000000000120  vin0:00000000000000000000000000000123  vin1:00000000000000000000000000000123 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[1]   vout:000000000000000000000000fedc0000  vin0:000000000000000000000000fedc0000  vin1:000000000000000000000000fedc0000
+                                  vout:000000000000000000000000fed90000  vin0:000000000000000000000000fedc0000  vin1:000000000000000000000000fedc0000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[2]   vout:00000000000000000000314100000000  vin0:00000000000000000000314100000000  vin1:00000000000000000000314100000000
+                                  vout:000000000000000000001e4d00000000  vin0:00000000000000000000314100000000  vin1:00000000000000000000314100000000 fpsr=00000000
+sqrdmlsh v0.4h, v1.4h, v2.4h[3]   vout:00000000000000002718000000000000  vin0:00000000000000002718000000000000  vin1:00000000000000002718000000000000
+                                  vout:00000000000000001b27000000000000  vin0:00000000000000002718000000000000  vin1:00000000000000002718000000000000 fpsr=00000000
+randV128: 256 calls, 266 iters
+sqrdmlsh h0, h1, h2               vout:e70216ec5cbcf49e8a09cb539549408a  vin0:182fa58322b1219295b48e6f81658922  vin1:05b265c33ff4760f125b3d3899837173
+                                  vout:00000000000000000000000000007fff  vin0:182fa58322b1219295b48e6f81658922  vin1:05b265c33ff4760f125b3d3899837173 fpsr=08000000
+sqrdmlsh s0, s1, s2               vout:aaba95edd88623fc68d5d5d393ccbadd  vin0:40947ccd307b129e244ee56d2260de8c  vin1:d2b5bf6419898df003e6fe7283eff6cb
+                                  vout:000000000000000000000000b51ee109  vin0:40947ccd307b129e244ee56d2260de8c  vin1:d2b5bf6419898df003e6fe7283eff6cb fpsr=00000000
+sqrdmlsh h3, h4, h5               vout:3fa5c4d84771e518605a54f56dfe15b7  vin0:ddeb80fe57ce3c26f9fcb34432fe8249  vin1:3b3296ac6d6e4ba4d95578b09e02700d
+                                  vout:00000000000000000000000000007fff  vin0:ddeb80fe57ce3c26f9fcb34432fe8249  vin1:3b3296ac6d6e4ba4d95578b09e02700d fpsr=08000000
+sqrdmlsh s3, s4, s5               vout:8fbc05b829b247cac4e8bba2bda13050  vin0:98bf1ba36919393bc4d999db7390839e  vin1:44d5584589abea635dc49b10189f4c14
+                                  vout:000000000000000000000000a766456f  vin0:98bf1ba36919393bc4d999db7390839e  vin1:44d5584589abea635dc49b10189f4c14 fpsr=00000000
+sqrdmlsh h6, h7, h8               vout:0b0b9f6018e987aeba97106bb88dbd45  vin0:9d5fe4af824eabd8f8f577d6f4dd0223  vin1:d6c08bc57f47f9ba34279d2f35968b0a
+                                  vout:0000000000000000000000000000bf39  vin0:9d5fe4af824eabd8f8f577d6f4dd0223  vin1:d6c08bc57f47f9ba34279d2f35968b0a fpsr=00000000
+sqrdmlsh s6, s7, s8               vout:05dbe25a9a3951f70e8dc8821606fcca  vin0:fe1783322bd1f4a0a92e2587172ec23f  vin1:22d9446284e6ae8126fc5ee9b286181e
+                                  vout:000000000000000000000000240f31d7  vin0:fe1783322bd1f4a0a92e2587172ec23f  vin1:22d9446284e6ae8126fc5ee9b286181e fpsr=00000000
+sqrdmlsh h9, h10, h11             vout:3131620a2265f8c8f64df6cdcb51c286  vin0:6eeb8d90d86668b60a08b6d0cfc59797  vin1:dc2316810c4e5ddd66c8f02281b3c8f2
+                                  vout:0000000000000000000000000000959d  vin0:6eeb8d90d86668b60a08b6d0cfc59797  vin1:dc2316810c4e5ddd66c8f02281b3c8f2 fpsr=00000000
+sqrdmlsh s9, s10, s11             vout:4210b3d32431d146a45cad2eccb0e21a  vin0:a2de962ffdd15c3e50063f9610e753cd  vin1:b7a39486894259f1290e68be98626e2d
+                                  vout:000000000000000000000000da5fd688  vin0:a2de962ffdd15c3e50063f9610e753cd  vin1:b7a39486894259f1290e68be98626e2d fpsr=00000000
+sqrdmlsh h12, h13, h14            vout:ee7d691b146130944d3d038a0b69312c  vin0:4df433720fd7245dafacd5bdced9cd88  vin1:685c54d57186f6e2a353dba0ead5df70
+                                  vout:00000000000000000000000000002455  vin0:4df433720fd7245dafacd5bdced9cd88  vin1:685c54d57186f6e2a353dba0ead5df70 fpsr=00000000
+sqrdmlsh s12, s13, s14            vout:e77b184466b967d624750ac67ebe825f  vin0:2533f6bc813a13365b808a28feded669  vin1:a353e8d137de89d3071b5bad6b52ee61
+                                  vout:0000000000000000000000007fb0f67c  vin0:2533f6bc813a13365b808a28feded669  vin1:a353e8d137de89d3071b5bad6b52ee61 fpsr=00000000
+sqrdmlsh h15, h16, h17            vout:e11053b38ffdcd305e88d8c318f5aa57  vin0:dc9d7472c7c07dee870474bd92394516  vin1:1b8ce6e04f0e66e88ae9fdca101c70a3
+                                  vout:00000000000000000000000000008000  vin0:dc9d7472c7c07dee870474bd92394516  vin1:1b8ce6e04f0e66e88ae9fdca101c70a3 fpsr=08000000
+sqrdmlsh s15, s16, s17            vout:913db0cc02f1b3c72ff97f68cd517cb9  vin0:850ae0642ddae0466041d5d9cb7738db  vin1:2af3bd4b509e6608a513cfe482162be8
+                                  vout:00000000000000000000000099a3f238  vin0:850ae0642ddae0466041d5d9cb7738db  vin1:2af3bd4b509e6608a513cfe482162be8 fpsr=00000000
+sqrdmlsh h18, h19, h20            vout:b903f1b29f411487312d32f1bb069e61  vin0:95d26cc246074b10bda9f7bf92a71bac  vin1:fcefa19f2c8a8cfd3989634f2a294a7c
+                                  vout:00000000000000000000000000008e47  vin0:95d26cc246074b10bda9f7bf92a71bac  vin1:fcefa19f2c8a8cfd3989634f2a294a7c fpsr=00000000
+sqrdmlsh s18, s19, s20            vout:470818041ac5e9b218db305838ff3248  vin0:06ced856b4d04648a668c3da0fcbe652  vin1:39d4db0931b25e927a9632b68f624628
+                                  vout:00000000000000000000000046e512d8  vin0:06ced856b4d04648a668c3da0fcbe652  vin1:39d4db0931b25e927a9632b68f624628 fpsr=00000000
+sqrdmlsh h21, h22, h23            vout:764f859cf68f4679dab3699f129680a9  vin0:fc95f5d55c34e70e2034036b2540d210  vin1:32746a5ace2a448f4d76dd08966fd815
+                                  vout:00000000000000000000000000008000  vin0:fc95f5d55c34e70e2034036b2540d210  vin1:32746a5ace2a448f4d76dd08966fd815 fpsr=08000000
+sqrdmlsh s21, s22, s23            vout:b00b3cdf75747e60035ee161b2ddaa1e  vin0:92478e7f987ac472db7137e460cce35a  vin1:2915227d7d3b3371fe1c6a2981899c14
+                                  vout:0000000000000000000000001280e25d  vin0:92478e7f987ac472db7137e460cce35a  vin1:2915227d7d3b3371fe1c6a2981899c14 fpsr=00000000
+sqrdmlsh h24, h25, h26            vout:7be936badd6630980aa27329b5b3ecd2  vin0:d2bc96d6b1a87f5bc30eedfc43f567c8  vin1:ded3251e3f2e1bf337f62011aebf77d2
+                                  vout:00000000000000000000000000008bac  vin0:d2bc96d6b1a87f5bc30eedfc43f567c8  vin1:ded3251e3f2e1bf337f62011aebf77d2 fpsr=00000000
+sqrdmlsh s24, s25, s26            vout:6c7f80e89ebd80a5e34bca20163ac21e  vin0:e06c5cc8e1357d72cece7967d1f50cd5  vin1:4fd7e326d29b74541ae5bf20bcc2f9c2
+                                  vout:000000000000000000000000fe0b135f  vin0:e06c5cc8e1357d72cece7967d1f50cd5  vin1:4fd7e326d29b74541ae5bf20bcc2f9c2 fpsr=00000000
+sqrdmlsh h27, h28, h29            vout:190c026f4f4108bb97f152ac79a338e2  vin0:082a07b97ea580d954e0244c1dcf60e0  vin1:b87fb552d02120cc96fce910c815b7b5
+                                  vout:00000000000000000000000000006f99  vin0:082a07b97ea580d954e0244c1dcf60e0  vin1:b87fb552d02120cc96fce910c815b7b5 fpsr=00000000
+sqrdmlsh s27, s28, s29            vout:35954eb164b81a015d181eb0d13422c0  vin0:fefa2b0bfdbeddb488c900901dc5368c  vin1:cccf2d05af86747edec1b4c5c4fa8650
+                                  vout:000000000000000000000000deee4fe6  vin0:fefa2b0bfdbeddb488c900901dc5368c  vin1:cccf2d05af86747edec1b4c5c4fa8650 fpsr=00000000
+sqrdmlsh h30, h31, h0             vout:751dfa1352e40c98674442111330555e  vin0:76df5c23d344e7279f0d2317c41d637d  vin1:40c9e0a4e28cc38e27b63222a6b73935
+                                  vout:000000000000000000000000000028e7  vin0:76df5c23d344e7279f0d2317c41d637d  vin1:40c9e0a4e28cc38e27b63222a6b73935 fpsr=00000000
+sqrdmlsh s30, s31, s0             vout:23de2e6573f9f357cd2f9fc5071aba58  vin0:c8746293ddf96221a55f780d618fa50b  vin1:16458560adcdd7091db23c3834cb4d4d
+                                  vout:000000000000000000000000dedd6a91  vin0:c8746293ddf96221a55f780d618fa50b  vin1:16458560adcdd7091db23c3834cb4d4d fpsr=00000000
+sqrdmlsh h0, h1, v2.h[0]          vout:17d247361590a45a8c419b68e9c69d73  vin0:23de85e7f3ba676cd7ca3327879cb597  vin1:9a985ec5f0031343f3185309c7b360a0
+                                  vout:0000000000000000000000000000d59f  vin0:23de85e7f3ba676cd7ca3327879cb597  vin1:9a985ec5f0031343f3185309c7b360a0 fpsr=00000000
+sqrdmlsh s0, s1, v2.s[0]          vout:e2e823f1fc15de5d0fe0ad1832a0f513  vin0:0a452b2c674cbddfcbf508515b068b9e  vin1:6109ca6565cab2e77d69475df9b640b0
+                                  vout:0000000000000000000000003719b567  vin0:0a452b2c674cbddfcbf508515b068b9e  vin1:6109ca6565cab2e77d69475df9b640b0 fpsr=00000000
+sqrdmlsh h3, h4, v5.h[1]          vout:ddb98a28084c634f63bfc3013161828e  vin0:7e7d09937d452c872eb7cf99a14da407  vin1:94e09c4d7a2fb98594259c37dc0df227
+                                  vout:00000000000000000000000000008000  vin0:7e7d09937d452c872eb7cf99a14da407  vin1:94e09c4d7a2fb98594259c37dc0df227 fpsr=08000000
+sqrdmlsh s3, s4, v5.s[1]          vout:bc4a103eacf98853bc63f107d94d1889  vin0:348ab47fa96b098734939ce54eb5d374  vin1:e6246ae1a4f77a426cd3657964fa47a9
+                                  vout:0000000000000000000000009661afff  vin0:348ab47fa96b098734939ce54eb5d374  vin1:e6246ae1a4f77a426cd3657964fa47a9 fpsr=00000000
diff --git a/none/tests/arm64/simd_v81.vgtest b/none/tests/arm64/simd_v81.vgtest
new file mode 100644 (file)
index 0000000..38549f2
--- /dev/null
@@ -0,0 +1,3 @@
+prog: simd_v81
+prereq: test -x simd_v81
+vgopts: -q