From: Andreas Arnez Date: Wed, 27 Mar 2024 11:44:37 +0000 (+0100) Subject: s390x: Add support for NNPA facility vector instructions X-Git-Tag: VALGRIND_3_23_0~86 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c502a01b4bcba4b30db0dd166700b9be7bae832d;p=thirdparty%2Fvalgrind.git s390x: Add support for NNPA facility vector instructions Add support for the vector instructions introduced with the NNPA facility, i.e, VCNF, VCLFNH, VCFN, VCLFNL, and VCRNF. These instructions convert floating-point data between the usual binary FP format (IEEE) and the NNPA-specific format. Implement them with dirty helpers. The NNPA instruction itself will be addressed with a separate patch. --- diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h index 24f3798c1..1436ce3e8 100644 --- a/VEX/priv/guest_s390_defs.h +++ b/VEX/priv/guest_s390_defs.h @@ -278,6 +278,11 @@ typedef enum { S390_VEC_OP_VFMAX, S390_VEC_OP_VBPERM, S390_VEC_OP_VMSL, + S390_VEC_OP_VCNF, + S390_VEC_OP_VCLFNH, + S390_VEC_OP_VCFN, + S390_VEC_OP_VCLFNL, + S390_VEC_OP_VCRNF, S390_VEC_OP_LAST // supposed to be the last element in enum } s390x_vec_op_t; @@ -295,12 +300,13 @@ typedef union { unsigned int v4 : 5; // argument two of operation or // zero for unary and binary operations + unsigned int m3 : 4; // field m3 of insn or zero if it's missing unsigned int m4 : 4; // field m4 of insn or zero if it's missing unsigned int m5 : 4; // field m5 of insn or zero if it's missing unsigned int m6 : 4; // field m6 of insn or zero if it's missing unsigned int i3 : 12; // field i3 of insn or zero if it's missing unsigned int read_only: 1; // don't write result to Guest State - unsigned int reserved : 11; // reserved for future + unsigned int reserved : 7; // reserved for future }; ULong serialized; } s390x_vec_op_details_t; diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c index 008f8c6c8..d60e4ce3c 100644 --- a/VEX/priv/guest_s390_helpers.c +++ b/VEX/priv/guest_s390_helpers.c @@ -2593,6 +2593,11 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, [S390_VEC_OP_VFMAX] = {0xe7, 0xef}, [S390_VEC_OP_VBPERM]= {0xe7, 0x85}, [S390_VEC_OP_VMSL] = {0xe7, 0xb8}, + [S390_VEC_OP_VCNF] = {0xe6, 0x55}, + [S390_VEC_OP_VCLFNH]= {0xe6, 0x56}, + [S390_VEC_OP_VCFN] = {0xe6, 0x5d}, + [S390_VEC_OP_VCLFNL]= {0xe6, 0x5e}, + [S390_VEC_OP_VCRNF] = {0xe6, 0x75}, }; union { @@ -2632,6 +2637,16 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, UInt rxb : 4; UInt op2 : 8; } VRRc; + struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt : 12; + UInt m4 : 4; + UInt m3 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRRa; struct { UInt op1 : 8; UInt v1 : 4; @@ -2687,6 +2702,7 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, case S390_VEC_OP_VFMIN: case S390_VEC_OP_VFMAX: case S390_VEC_OP_VBPERM: + case S390_VEC_OP_VCRNF: the_insn.VRRc.v1 = 1; the_insn.VRRc.v2 = 2; the_insn.VRRc.v3 = 3; @@ -2696,6 +2712,17 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, the_insn.VRRc.m6 = d->m6; break; + case S390_VEC_OP_VCNF: + case S390_VEC_OP_VCLFNH: + case S390_VEC_OP_VCFN: + case S390_VEC_OP_VCLFNL: + the_insn.VRRa.v1 = 1; + the_insn.VRRa.v2 = 2; + the_insn.VRRa.rxb = 0b1100; + the_insn.VRRa.m3 = d->m3; + the_insn.VRRa.m4 = d->m4; + break; + case S390_VEC_OP_VFTCI: the_insn.VRIe.v1 = 1; the_insn.VRIe.v2 = 2; diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 81cce9fb4..ceb4084ac 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -19723,6 +19723,113 @@ s390_irgen_VSTEBRG(UChar v1, IRTemp op2addr, UChar m3) return "vstebrg"; } +static const HChar * +s390_irgen_VCxx(const HChar *mnem, s390x_vec_op_details_t details, + UShort v2_offs, UShort v2_size) +{ + s390_insn_assert(mnem, s390_host_has_nnpa); + + IRDirty* d = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + d->nFxState = 2; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + + details.v2 * sizeof(V128) + v2_offs; + d->fxState[0].size = v2_size; + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + + details.v1 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + return mnem; +} + +static const HChar * +s390_irgen_VCNF(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VCNF; + details.v1 = v1; + details.v2 = v2; + details.m3 = m3; + details.m4 = m4; + return s390_irgen_VCxx("vcnf", details, 0, sizeof(V128)); +} + +static const HChar * +s390_irgen_VCLFNH(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VCLFNH; + details.v1 = v1; + details.v2 = v2; + details.m3 = m3; + details.m4 = m4; + return s390_irgen_VCxx("vclfnh", details, 0, sizeof(V128) / 2); +} + +static const HChar * +s390_irgen_VCFN(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VCFN; + details.v1 = v1; + details.v2 = v2; + details.m3 = m3; + details.m4 = m4; + return s390_irgen_VCxx("vcfn", details, 0, sizeof(V128)); +} + +static const HChar * +s390_irgen_VCLFNL(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VCLFNL; + details.v1 = v1; + details.v2 = v2; + details.m3 = m3; + details.m4 = m4; + return s390_irgen_VCxx("vclfnl", details, sizeof(V128) / 2, + sizeof(V128) / 2); +} + +static const HChar * +s390_irgen_VCRNF(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vcrnf", s390_host_has_nnpa); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VCRNF; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = 0; + IRDirty* d = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + return "vcrnf"; +} + /* New insns are added here. If an insn is contingent on a facility being installed also check whether the list of supported facilities in function @@ -21415,6 +21522,22 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe60000000049ULL: /* VLIP */ goto unimplemented; case 0xe60000000050ULL: /* VCVB */ goto unimplemented; case 0xe60000000052ULL: /* VCVBG */ goto unimplemented; + case 0xe60000000055ULL: s390_format_VRRa_VVMM(s390_irgen_VCNF, + VRRa_v1(ovl), VRRa_v2(ovl), + VRRa_m3(ovl), VRRa_m4(ovl), + VRRa_rxb(ovl)); goto ok; + case 0xe60000000056ULL: s390_format_VRRa_VVMM(s390_irgen_VCLFNH, + VRRa_v1(ovl), VRRa_v2(ovl), + VRRa_m3(ovl), VRRa_m4(ovl), + VRRa_rxb(ovl)); goto ok; + case 0xe6000000005dULL: s390_format_VRRa_VVMM(s390_irgen_VCFN, + VRRa_v1(ovl), VRRa_v2(ovl), + VRRa_m3(ovl), VRRa_m4(ovl), + VRRa_rxb(ovl)); goto ok; + case 0xe6000000005eULL: s390_format_VRRa_VVMM(s390_irgen_VCLFNL, + VRRa_v1(ovl), VRRa_v2(ovl), + VRRa_m3(ovl), VRRa_m4(ovl), + VRRa_rxb(ovl)); goto ok; case 0xe60000000058ULL: /* VCVD */ goto unimplemented; case 0xe60000000059ULL: /* VSRP */ goto unimplemented; case 0xe6000000005aULL: /* VCVDG */ goto unimplemented; @@ -21422,6 +21545,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe6000000005fULL: /* VTP */ goto unimplemented; case 0xe60000000071ULL: /* VAP */ goto unimplemented; case 0xe60000000073ULL: /* VSP */ goto unimplemented; + case 0xe60000000075ULL: s390_format_VRRa_VVVMM(s390_irgen_VCRNF, + VRRa_v1(ovl), VRRa_v2(ovl), + VRRa_v3(ovl), + VRRa_m3(ovl), VRRa_m4(ovl), + VRRa_rxb(ovl)); goto ok; case 0xe60000000077ULL: /* VCP */ goto unimplemented; case 0xe60000000078ULL: /* VMP */ goto unimplemented; case 0xe60000000079ULL: /* VMSP */ goto unimplemented; diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 375cc8402..912db6104 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -943,6 +943,8 @@ extern UInt s390_host_hwcaps; (s390_host_hwcaps & (VEX_HWCAPS_S390X_LSC2)) #define s390_host_has_vxe \ (s390_host_hwcaps & (VEX_HWCAPS_S390X_VXE)) +#define s390_host_has_nnpa \ + (s390_host_hwcaps & (VEX_HWCAPS_S390X_NNPA)) #endif /* ndef __VEX_HOST_S390_DEFS_H */ /*---------------------------------------------------------------*/ diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index 27bb6e0f4..42c013c1e 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -175,6 +175,7 @@ typedef #define VEX_HWCAPS_S390X_MI2 (1<<20) /* miscellaneous-instruction-extensions facility 2 */ #define VEX_HWCAPS_S390X_LSC2 (1<<21) /* Conditional load/store facility2 */ #define VEX_HWCAPS_S390X_VXE (1<<22) /* Vector-enhancements facility */ +#define VEX_HWCAPS_S390X_NNPA (1<<23) /* NNPA facility */ /* Special value representing all available s390x hwcaps */ #define VEX_HWCAPS_S390X_ALL (VEX_HWCAPS_S390X_LDISP | \ @@ -193,7 +194,8 @@ typedef VEX_HWCAPS_S390X_MSA5 | \ VEX_HWCAPS_S390X_MI2 | \ VEX_HWCAPS_S390X_LSC2 | \ - VEX_HWCAPS_S390X_VXE) + VEX_HWCAPS_S390X_VXE | \ + VEX_HWCAPS_S390X_NNPA) #define VEX_HWCAPS_S390X(x) ((x) & ~VEX_S390X_MODEL_MASK) #define VEX_S390X_MODEL(x) ((x) & VEX_S390X_MODEL_MASK) diff --git a/VEX/pub/libvex_s390x_common.h b/VEX/pub/libvex_s390x_common.h index 0fbe4145a..256541b56 100644 --- a/VEX/pub/libvex_s390x_common.h +++ b/VEX/pub/libvex_s390x_common.h @@ -106,6 +106,7 @@ #define S390_FAC_VXE 135 // vector enhancements facility 1 #define S390_FAC_VXE2 148 // vector enhancements facility 2 #define S390_FAC_DFLT 151 // deflate-conversion facility +#define S390_FAC_NNPA 165 // NNPA facility /*--------------------------------------------------------------*/ diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c index a4c2218bf..079383651 100644 --- a/coregrind/m_machine.c +++ b/coregrind/m_machine.c @@ -1591,6 +1591,7 @@ Bool VG_(machine_get_hwcaps)( void ) { False, S390_FAC_MI2, VEX_HWCAPS_S390X_MI2, "MI2" }, { False, S390_FAC_LSC2, VEX_HWCAPS_S390X_LSC2, "LSC2" }, { False, S390_FAC_VXE, VEX_HWCAPS_S390X_VXE, "VXE" }, + { False, S390_FAC_NNPA, VEX_HWCAPS_S390X_NNPA, "NNPA" }, }; /* Set hwcaps according to the detected facilities */ diff --git a/include/vki/vki-s390x-linux.h b/include/vki/vki-s390x-linux.h index 9d1cf1dde..829382f7b 100644 --- a/include/vki/vki-s390x-linux.h +++ b/include/vki/vki-s390x-linux.h @@ -809,6 +809,7 @@ typedef vki_s390_regs vki_elf_gregset_t; #define VKI_HWCAP_S390_VXRS 2048 #define VKI_HWCAP_S390_VXRS_EXT 8192 #define VKI_HWCAP_S390_VXRS_EXT2 32768 +#define VKI_HWCAP_S390_NNPA (1<<20) //----------------------------------------------------------------------