]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
s390x: Add support for NNPA facility vector instructions
authorAndreas Arnez <arnez@linux.ibm.com>
Wed, 27 Mar 2024 11:44:37 +0000 (12:44 +0100)
committerAndreas Arnez <arnez@linux.ibm.com>
Thu, 28 Mar 2024 18:11:43 +0000 (19:11 +0100)
Add support for the vector instructions introduced with the NNPA facility,
i.e, VCNF, VCLFNH, VCFN, VCLFNL, and VCRNF.  These instructions convert
floating-point data between the usual binary FP format (IEEE) and the
NNPA-specific format.  Implement them with dirty helpers.

The NNPA instruction itself will be addressed with a separate patch.

VEX/priv/guest_s390_defs.h
VEX/priv/guest_s390_helpers.c
VEX/priv/guest_s390_toIR.c
VEX/priv/host_s390_defs.h
VEX/pub/libvex.h
VEX/pub/libvex_s390x_common.h
coregrind/m_machine.c
include/vki/vki-s390x-linux.h

index 24f3798c1ac03ff9d0e3556bf476ebfb5d71b4d6..1436ce3e8e23d39ce75cf08d1639e11c7746a839 100644 (file)
@@ -278,6 +278,11 @@ typedef enum {
    S390_VEC_OP_VFMAX,
    S390_VEC_OP_VBPERM,
    S390_VEC_OP_VMSL,
+   S390_VEC_OP_VCNF,
+   S390_VEC_OP_VCLFNH,
+   S390_VEC_OP_VCFN,
+   S390_VEC_OP_VCLFNL,
+   S390_VEC_OP_VCRNF,
    S390_VEC_OP_LAST             // supposed to be the last element in enum
 } s390x_vec_op_t;
 
@@ -295,12 +300,13 @@ typedef union {
       unsigned int v4 : 5;        // argument two of operation or
                                   // zero for unary and binary operations
 
+      unsigned int m3 : 4;        // field m3 of insn or zero if it's missing
       unsigned int m4 : 4;        // field m4 of insn or zero if it's missing
       unsigned int m5 : 4;        // field m5 of insn or zero if it's missing
       unsigned int m6 : 4;        // field m6 of insn or zero if it's missing
       unsigned int i3 : 12;       // field i3 of insn or zero if it's missing
       unsigned int read_only: 1;  // don't write result to Guest State
-      unsigned int reserved : 11; // reserved for future
+      unsigned int reserved : 7; // reserved for future
    };
    ULong serialized;
 } s390x_vec_op_details_t;
index 008f8c6c8c2189154dbecf2730a893a9426138a0..d60e4ce3c0c8b05732cf8ecd33d37481acd81846 100644 (file)
@@ -2593,6 +2593,11 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
       [S390_VEC_OP_VFMAX] = {0xe7, 0xef},
       [S390_VEC_OP_VBPERM]= {0xe7, 0x85},
       [S390_VEC_OP_VMSL]  = {0xe7, 0xb8},
+      [S390_VEC_OP_VCNF]  = {0xe6, 0x55},
+      [S390_VEC_OP_VCLFNH]= {0xe6, 0x56},
+      [S390_VEC_OP_VCFN]  = {0xe6, 0x5d},
+      [S390_VEC_OP_VCLFNL]= {0xe6, 0x5e},
+      [S390_VEC_OP_VCRNF] = {0xe6, 0x75},
    };
 
    union {
@@ -2632,6 +2637,16 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
          UInt rxb : 4;
          UInt op2 : 8;
       } VRRc;
+      struct {
+         UInt op1 : 8;
+         UInt v1  : 4;
+         UInt v2  : 4;
+         UInt     : 12;
+         UInt m4  : 4;
+         UInt m3  : 4;
+         UInt rxb : 4;
+         UInt op2 : 8;
+      } VRRa;
       struct {
          UInt op1 : 8;
          UInt v1  : 4;
@@ -2687,6 +2702,7 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
    case S390_VEC_OP_VFMIN:
    case S390_VEC_OP_VFMAX:
    case S390_VEC_OP_VBPERM:
+   case S390_VEC_OP_VCRNF:
       the_insn.VRRc.v1 = 1;
       the_insn.VRRc.v2 = 2;
       the_insn.VRRc.v3 = 3;
@@ -2696,6 +2712,17 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
       the_insn.VRRc.m6 = d->m6;
       break;
 
+   case S390_VEC_OP_VCNF:
+   case S390_VEC_OP_VCLFNH:
+   case S390_VEC_OP_VCFN:
+   case S390_VEC_OP_VCLFNL:
+      the_insn.VRRa.v1 = 1;
+      the_insn.VRRa.v2 = 2;
+      the_insn.VRRa.rxb = 0b1100;
+      the_insn.VRRa.m3 = d->m3;
+      the_insn.VRRa.m4 = d->m4;
+      break;
+
    case S390_VEC_OP_VFTCI:
       the_insn.VRIe.v1 = 1;
       the_insn.VRIe.v2 = 2;
index 81cce9fb48e98c82c3f38e72dcb947745a66f0dd..ceb4084ace314adbb3ad069dcda258238c3f3d32 100644 (file)
@@ -19723,6 +19723,113 @@ s390_irgen_VSTEBRG(UChar v1, IRTemp op2addr, UChar m3)
    return "vstebrg";
 }
 
+static const HChar *
+s390_irgen_VCxx(const HChar *mnem, s390x_vec_op_details_t details,
+                UShort v2_offs, UShort v2_size)
+{
+   s390_insn_assert(mnem, s390_host_has_nnpa);
+
+   IRDirty* d = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_vec_op",
+                                  &s390x_dirtyhelper_vec_op,
+                                  mkIRExprVec_2(IRExpr_GSPTR(),
+                                                mkU64(details.serialized)));
+   d->nFxState = 2;
+   vex_bzero(&d->fxState, sizeof(d->fxState));
+   d->fxState[0].fx = Ifx_Read;
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0)
+      + details.v2 * sizeof(V128) + v2_offs;
+   d->fxState[0].size = v2_size;
+   d->fxState[1].fx = Ifx_Write;
+   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0)
+      + details.v1 * sizeof(V128);
+   d->fxState[1].size = sizeof(V128);
+
+   stmt(IRStmt_Dirty(d));
+   return mnem;
+}
+
+static const HChar *
+s390_irgen_VCNF(UChar v1, UChar v2, UChar m3, UChar m4)
+{
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VCNF;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.m3 = m3;
+   details.m4 = m4;
+   return s390_irgen_VCxx("vcnf", details, 0, sizeof(V128));
+}
+
+static const HChar *
+s390_irgen_VCLFNH(UChar v1, UChar v2, UChar m3, UChar m4)
+{
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VCLFNH;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.m3 = m3;
+   details.m4 = m4;
+   return s390_irgen_VCxx("vclfnh", details, 0, sizeof(V128) / 2);
+}
+
+static const HChar *
+s390_irgen_VCFN(UChar v1, UChar v2, UChar m3, UChar m4)
+{
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VCFN;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.m3 = m3;
+   details.m4 = m4;
+   return s390_irgen_VCxx("vcfn", details, 0, sizeof(V128));
+}
+
+static const HChar *
+s390_irgen_VCLFNL(UChar v1, UChar v2, UChar m3, UChar m4)
+{
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VCLFNL;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.m3 = m3;
+   details.m4 = m4;
+   return s390_irgen_VCxx("vclfnl", details, sizeof(V128) / 2,
+                          sizeof(V128) / 2);
+}
+
+static const HChar *
+s390_irgen_VCRNF(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
+{
+   s390_insn_assert("vcrnf", s390_host_has_nnpa);
+
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VCRNF;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.v3 = v3;
+   details.m4 = m4;
+   details.m5 = m5;
+   details.m6 = 0;
+   IRDirty* d = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_vec_op",
+                                  &s390x_dirtyhelper_vec_op,
+                                  mkIRExprVec_2(IRExpr_GSPTR(),
+                                                mkU64(details.serialized)));
+   d->nFxState = 3;
+   vex_bzero(&d->fxState, sizeof(d->fxState));
+   d->fxState[0].fx = Ifx_Read;
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+   d->fxState[0].size = sizeof(V128);
+   d->fxState[1].fx = Ifx_Read;
+   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
+   d->fxState[1].size = sizeof(V128);
+   d->fxState[2].fx = Ifx_Write;
+   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+   d->fxState[2].size = sizeof(V128);
+
+   stmt(IRStmt_Dirty(d));
+   return "vcrnf";
+}
+
 /* New insns are added here.
    If an insn is contingent on a facility being installed also
    check whether the list of supported facilities in function
@@ -21415,6 +21522,22 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
    case 0xe60000000049ULL: /* VLIP */ goto unimplemented;
    case 0xe60000000050ULL: /* VCVB */ goto unimplemented;
    case 0xe60000000052ULL: /* VCVBG */ goto unimplemented;
+   case 0xe60000000055ULL: s390_format_VRRa_VVMM(s390_irgen_VCNF,
+                                                 VRRa_v1(ovl), VRRa_v2(ovl),
+                                                 VRRa_m3(ovl), VRRa_m4(ovl),
+                                                 VRRa_rxb(ovl));  goto ok;
+   case 0xe60000000056ULL: s390_format_VRRa_VVMM(s390_irgen_VCLFNH,
+                                                 VRRa_v1(ovl), VRRa_v2(ovl),
+                                                 VRRa_m3(ovl), VRRa_m4(ovl),
+                                                 VRRa_rxb(ovl));  goto ok;
+   case 0xe6000000005dULL: s390_format_VRRa_VVMM(s390_irgen_VCFN,
+                                                 VRRa_v1(ovl), VRRa_v2(ovl),
+                                                 VRRa_m3(ovl), VRRa_m4(ovl),
+                                                 VRRa_rxb(ovl));  goto ok;
+   case 0xe6000000005eULL: s390_format_VRRa_VVMM(s390_irgen_VCLFNL,
+                                                 VRRa_v1(ovl), VRRa_v2(ovl),
+                                                 VRRa_m3(ovl), VRRa_m4(ovl),
+                                                 VRRa_rxb(ovl));  goto ok;
    case 0xe60000000058ULL: /* VCVD */ goto unimplemented;
    case 0xe60000000059ULL: /* VSRP */ goto unimplemented;
    case 0xe6000000005aULL: /* VCVDG */ goto unimplemented;
@@ -21422,6 +21545,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
    case 0xe6000000005fULL: /* VTP */ goto unimplemented;
    case 0xe60000000071ULL: /* VAP */ goto unimplemented;
    case 0xe60000000073ULL: /* VSP */ goto unimplemented;
+   case 0xe60000000075ULL: s390_format_VRRa_VVVMM(s390_irgen_VCRNF,
+                                                  VRRa_v1(ovl), VRRa_v2(ovl),
+                                                  VRRa_v3(ovl),
+                                                  VRRa_m3(ovl), VRRa_m4(ovl),
+                                                  VRRa_rxb(ovl)); goto ok;
    case 0xe60000000077ULL: /* VCP */ goto unimplemented;
    case 0xe60000000078ULL: /* VMP */ goto unimplemented;
    case 0xe60000000079ULL: /* VMSP */ goto unimplemented;
index 375cc8402af4ae0bca6e3c0eba5f085ef7f73323..912db61041424b9094594a38b0ff1a80249c16d8 100644 (file)
@@ -943,6 +943,8 @@ extern UInt s390_host_hwcaps;
                       (s390_host_hwcaps & (VEX_HWCAPS_S390X_LSC2))
 #define s390_host_has_vxe \
                       (s390_host_hwcaps & (VEX_HWCAPS_S390X_VXE))
+#define s390_host_has_nnpa \
+                      (s390_host_hwcaps & (VEX_HWCAPS_S390X_NNPA))
 #endif /* ndef __VEX_HOST_S390_DEFS_H */
 
 /*---------------------------------------------------------------*/
index 27bb6e0f4624fe467f8cd17b7fdb38e7b33e1b40..42c013c1ece6da9700b7aa5e0dc8a13e865f5ba5 100644 (file)
@@ -175,6 +175,7 @@ typedef
 #define VEX_HWCAPS_S390X_MI2   (1<<20)  /* miscellaneous-instruction-extensions facility 2 */
 #define VEX_HWCAPS_S390X_LSC2  (1<<21)  /* Conditional load/store facility2 */
 #define VEX_HWCAPS_S390X_VXE   (1<<22)  /* Vector-enhancements facility */
+#define VEX_HWCAPS_S390X_NNPA  (1<<23)  /* NNPA facility */
 
 /* Special value representing all available s390x hwcaps */
 #define VEX_HWCAPS_S390X_ALL   (VEX_HWCAPS_S390X_LDISP | \
@@ -193,7 +194,8 @@ typedef
                                 VEX_HWCAPS_S390X_MSA5  | \
                                 VEX_HWCAPS_S390X_MI2   | \
                                 VEX_HWCAPS_S390X_LSC2  | \
-                                VEX_HWCAPS_S390X_VXE)
+                                VEX_HWCAPS_S390X_VXE   | \
+                                VEX_HWCAPS_S390X_NNPA)
 
 #define VEX_HWCAPS_S390X(x)  ((x) & ~VEX_S390X_MODEL_MASK)
 #define VEX_S390X_MODEL(x)   ((x) &  VEX_S390X_MODEL_MASK)
index 0fbe4145a9e2e181200432359870f61c0d591193..256541b568737c9c90cbe42a63d9aab8a714fd73 100644 (file)
 #define S390_FAC_VXE     135 // vector enhancements facility 1
 #define S390_FAC_VXE2    148 // vector enhancements facility 2
 #define S390_FAC_DFLT    151 // deflate-conversion facility
+#define S390_FAC_NNPA    165 // NNPA facility
 
 
 /*--------------------------------------------------------------*/
index a4c2218bfb575b4126344caa132b870af0884589..079383651226e12cd4e2bf3a78f79a35c7ba1a14 100644 (file)
@@ -1591,6 +1591,7 @@ Bool VG_(machine_get_hwcaps)( void )
         { False, S390_FAC_MI2,   VEX_HWCAPS_S390X_MI2,   "MI2"   },
         { False, S390_FAC_LSC2,  VEX_HWCAPS_S390X_LSC2,  "LSC2"  },
         { False, S390_FAC_VXE,   VEX_HWCAPS_S390X_VXE,   "VXE"   },
+        { False, S390_FAC_NNPA,  VEX_HWCAPS_S390X_NNPA,  "NNPA"  },
      };
 
      /* Set hwcaps according to the detected facilities */
index 9d1cf1dde73a1bc4d905242c9bb7b05a16b4dd11..829382f7b2b6e8b7b99b520610d613abbe10f922 100644 (file)
@@ -809,6 +809,7 @@ typedef vki_s390_regs vki_elf_gregset_t;
 #define VKI_HWCAP_S390_VXRS         2048
 #define VKI_HWCAP_S390_VXRS_EXT     8192
 #define VKI_HWCAP_S390_VXRS_EXT2   32768
+#define VKI_HWCAP_S390_NNPA       (1<<20)
 
 
 //----------------------------------------------------------------------