From e05f55c77c4a330283778cfea042b6b18312e23d Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 30 Nov 2022 19:15:53 +0100
Subject: [PATCH] s390: Fix VFMA, VFMS, VFNMA, and VFNMS for 128-bit FP

When trying to execute any of the vector float multiply-and-add family
instructions, Valgrind panics with the message "Iex_Qop with F128 data".

So far all F128 operations were implemented with instructions that operate
on FP register pairs.  However, such instructions don't exist for
Iop_MAddF128 and Iop_MSubF128, so they must be implemented with vector
instructions instead.  This was missed when emitting them in
"guest_s390_toIR.c".

Add the missing support.  This also involves adding a few new features to
host_s390_defs:

* a new vector operation S390_VEC_INIT_FROM_FPRS

* the capability to move the left half of a VR to an FPR

* S390_VEC_FLOAT_MADD/_MSUB for 128-bit sizes
---
 VEX/priv/host_s390_defs.c | 15 ++++++++++---
 VEX/priv/host_s390_defs.h |  1 +
 VEX/priv/host_s390_isel.c | 46 +++++++++++++++++++++++++++++++++++++--
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
index 239d9d2997..ac5ca6cbe8 100644
--- a/VEX/priv/host_s390_defs.c
+++ b/VEX/priv/host_s390_defs.c
@@ -8356,6 +8356,7 @@ s390_insn_as_string(const s390_insn *insn)
       case S390_VEC_PWSUM_DW:         op = "v-vpwsumdw"; break;
       case S390_VEC_PWSUM_QW:         op = "v-vpwsumqw"; break;
       case S390_VEC_INIT_FROM_GPRS:   op = "v-vinitfromgprs"; break;
+      case S390_VEC_INIT_FROM_FPRS:   op = "v-vinitfromfprs"; break;
       case S390_VEC_FLOAT_ADD:        op = "v-vfloatadd"; break;
       case S390_VEC_FLOAT_SUB:        op = "v-vfloatsub"; break;
       case S390_VEC_FLOAT_MUL:        op = "v-vfloatmul"; break;
@@ -8772,6 +8773,9 @@ s390_insn_move_emit(UChar *buf, const s390_insn *insn)
             return s390_emit_LGDRw(buf, dst, src);
          }
       }
+      if (dst_class == HRcFlt64 && src_class == HRcVec128) {
+         return s390_emit_VLR(buf, dst, src);
+      }
       /* A move between floating point registers and general purpose
          registers of different size should never occur and indicates
          an error elsewhere. */
@@ -11692,6 +11696,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn)
          return s390_emit_VSUMQ(buf, v1, v2, v3, s390_getM_from_size(size));
       case S390_VEC_INIT_FROM_GPRS:
          return s390_emit_VLVGP(buf, v1, v2, v3);
+      case S390_VEC_INIT_FROM_FPRS:
+         return s390_emit_VMRH(buf, v1, v2, v3, 3);
       case S390_VEC_FLOAT_ADD:
          return s390_emit_VFA(buf, v1, v2, v3, s390_getM_from_size(size), 0);
       case S390_VEC_FLOAT_SUB:
@@ -11722,6 +11728,7 @@ static UChar *
 s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn)
 {
    s390_vec_triop_t tag = insn->variant.vec_triop.tag;
+   UChar size = insn->size;
    UChar v1 = hregNumber(insn->variant.vec_triop.dst);
    UChar v2 = hregNumber(insn->variant.vec_triop.op1);
    UChar v3 = hregNumber(insn->variant.vec_triop.op2);
@@ -11729,13 +11736,15 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn)
 
    switch (tag) {
       case S390_VEC_PERM: {
-         vassert(insn->size == 16);
+         vassert(size == 16);
          return s390_emit_VPERM(buf, v1, v2, v3, v4);
       }
       case S390_VEC_FLOAT_MADD:
-         return s390_emit_VFMA(buf, v1, v2, v3, v4, 0, 3);
+         return s390_emit_VFMA(buf, v1, v2, v3, v4, 0,
+                               s390_getM_from_size(size));
       case S390_VEC_FLOAT_MSUB:
-         return s390_emit_VFMS(buf, v1, v2, v3, v4, 0, 3);
+         return s390_emit_VFMS(buf, v1, v2, v3, v4, 0,
+                               s390_getM_from_size(size));
       default:
          goto fail;
    }
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
index dc116106ef..375cc8402a 100644
--- a/VEX/priv/host_s390_defs.h
+++ b/VEX/priv/host_s390_defs.h
@@ -402,6 +402,7 @@ typedef enum {
    S390_VEC_PWSUM_QW,
 
    S390_VEC_INIT_FROM_GPRS,
+   S390_VEC_INIT_FROM_FPRS,
    S390_VEC_FLOAT_ADD,
    S390_VEC_FLOAT_SUB,
    S390_VEC_FLOAT_MUL,
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
index 32c209edb5..3ae7c07c07 100644
--- a/VEX/priv/host_s390_isel.c
+++ b/VEX/priv/host_s390_isel.c
@@ -2153,8 +2153,47 @@ s390_isel_float128_expr_wrk(HReg *dst_hi, HReg *dst_lo, ISelEnv *env,
       vpanic("Iex_Get with F128 data");
 
       /* --------- 4-ary OP --------- */
-   case Iex_Qop:
-      vpanic("Iex_Qop with F128 data");
+   case Iex_Qop: {
+      IRQop *qop = expr->Iex.Qop.details;
+      s390_vec_triop_t vecop;
+      HReg op1_hi, op1_lo, op2_hi, op2_lo, op3_hi, op3_lo;
+      HReg dst, dstv_lo, op1, op2, op3;
+
+      s390_isel_float128_expr(&op1_hi, &op1_lo, env, qop->arg2);
+      s390_isel_float128_expr(&op2_hi, &op2_lo, env, qop->arg3);
+      s390_isel_float128_expr(&op3_hi, &op3_lo, env, qop->arg4);
+
+      /* Cannot carry out with FPRs; move operands to VRs instead. */
+      op1 = newVRegV(env);
+      op2 = newVRegV(env);
+      op3 = newVRegV(env);
+      dst = newVRegV(env);
+      dstv_lo = newVRegV(env);
+      addInstr(env, s390_insn_vec_binop(8, S390_VEC_INIT_FROM_FPRS,
+                                        op1, op1_hi, op1_lo));
+      addInstr(env, s390_insn_vec_binop(8, S390_VEC_INIT_FROM_FPRS,
+                                        op2, op2_hi, op2_lo));
+      addInstr(env, s390_insn_vec_binop(8, S390_VEC_INIT_FROM_FPRS,
+                                        op3, op3_hi, op3_lo));
+
+      switch (qop->op) {
+      case Iop_MAddF128: vecop = S390_VEC_FLOAT_MADD; break;
+      case Iop_MSubF128: vecop = S390_VEC_FLOAT_MSUB; break;
+      default:
+         goto irreducible;
+      }
+
+      set_bfp_rounding_mode_in_fpc(env, qop->arg1);
+      addInstr(env, s390_insn_vec_triop(16, vecop, dst, op1, op2, op3));
+      addInstr(env, s390_insn_vec_binop(8, S390_VEC_MERGEL, dstv_lo, dst, dst));
+
+      /* Move result to destination FPRs. */
+      *dst_hi = newVRegF(env);
+      *dst_lo = newVRegF(env);
+      addInstr(env, s390_insn_move(8, *dst_hi, dst));
+      addInstr(env, s390_insn_move(8, *dst_lo, dstv_lo));
+      return;
+   }
 
       /* --------- TERNARY OP --------- */
    case Iex_Triop: {
@@ -4797,6 +4836,9 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
       }
    }
 
+   case Iex_Qop: {
+   }
+
    /* --------- MULTIPLEX --------- */
    case Iex_ITE: {
       IRExpr *cond_expr = expr->Iex.ITE.cond;
-- 
2.47.2