From: Andreas Arnez Date: Mon, 24 Sep 2018 16:56:07 +0000 (+0200) Subject: s390x: Vector integer and string instruction support X-Git-Tag: VALGRIND_3_14_0~9 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1cc1d564f4e9b33daa381c598dfc464c83080c15;p=thirdparty%2Fvalgrind.git s390x: Vector integer and string instruction support This adds z/Architecture vector integer and string instruction support. The main author of this patch is Vadim Barkov . Some fixes were provided by Andreas Arnez . --- diff --git a/NEWS b/NEWS index 384cc9d357..def0b4d29f 100644 --- a/NEWS +++ b/NEWS @@ -121,6 +121,8 @@ where XXXXXX is the bug number as listed below. == 387045 Valgrind crashing on High Sierra when testing any newly [..] 385334 PPC64, fix vpermr, xxperm, xxpermr mask value. 385408 s390x: z13 vector "support" instructions not implemented +385409 s390x: z13 vector integer instructions not implemented +385410 s390x: z13 vector string instructions not implemented 385412 s390x: new non-vector z13 instructions not implemented 385868 glibc ld.so _dl_runtime_resolve_avx_slow conditional jump warning. 385912 none/tests/rlimit_nofile fails on newer glibc/kernel. diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h index 4f9e962d3a..3bfecbe316 100644 --- a/VEX/priv/guest_s390_defs.h +++ b/VEX/priv/guest_s390_defs.h @@ -80,8 +80,8 @@ ULong s390x_dirtyhelper_STCKF(ULong *addr); ULong s390x_dirtyhelper_STCKE(ULong *addr); ULong s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr); void s390x_dirtyhelper_CUxy(UChar *addr, ULong data, ULong num_bytes); -ULong s390x_dirtyhelper_vec_binop(VexGuestS390XState *guest_state, ULong opcode, - ULong v1, ULong v2); +ULong s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + ULong details); ULong s390_do_cu12_cu14_helper1(UInt byte1, UInt etf3_and_m3_is_1); ULong s390_do_cu12_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4, ULong stuff); @@ -261,25 +261,52 @@ extern ULong last_execute_target; /*--- Vector helpers. ---*/ /*------------------------------------------------------------*/ -/* Vector operatons which can change condition code */ +/* Vector operatons passed to s390x_dirtyhelper_vec_op(...) helper. + Please don't change ordering of elements and append new items + before S390_VEC_OP_LAST. */ enum { - S390_CC_VEC_INVALID = 0, - S390_CC_VEC_VPKS = 1, - S390_CC_VEC_VPKLS = 2, - S390_CC_VEC_LAST = 3 // supposed to be the last element in enum -} s390x_cc_vec_binop; - -/* Create an "object" which contain information about vector operation - and it's element size. Used for passing data to dirtyhelper with one argument. -*/ -#define s390x_cc_vec_opcode(op, elem_size) ( ((op) << 3) | ((elem_size) & 0x07)) - -/* Extract operation from opcode created with "s390x_cc_vec_opcode" macro */ -#define s390x_cc_vec_get_op(opcode) ((opcode) >> 3) - -/* Extract operation from opcode created with "s390x_cc_vec_opcode" macro */ -#define s390x_cc_vec_get_elem_size(opcode) ((opcode) & 0x07) - + S390_VEC_OP_INVALID = 0, + S390_VEC_OP_VPKS = 1, + S390_VEC_OP_VPKLS = 2, + S390_VEC_OP_VFAE = 3, + S390_VEC_OP_VFEE = 4, + S390_VEC_OP_VFENE = 5, + S390_VEC_OP_VISTR = 6, + S390_VEC_OP_VSTRC = 7, + S390_VEC_OP_VCEQ = 8, + S390_VEC_OP_VTM = 9, + S390_VEC_OP_VGFM = 10, + S390_VEC_OP_VGFMA = 11, + S390_VEC_OP_VMAH = 12, + S390_VEC_OP_VMALH = 13, + S390_VEC_OP_VCH = 14, + S390_VEC_OP_VCHL = 15, + S390_VEC_OP_LAST = 16 // supposed to be the last element in enum +} s390x_vec_op_t; + +/* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one + ULong variable. + */ +typedef union { + struct { + unsigned int op : 8; // should be an element of s390x_vec_op_t + unsigned int v1 : 5; // result of operation + unsigned int v2 : 5; // argument one of operation + unsigned int v3 : 5; // argument two of operation or + // zero for unary operations + + unsigned int v4 : 5; // argument two of operation or + // zero for unary and binary operations + + unsigned int m4 : 4; // field m4 of insn or zero if it's missing + unsigned int m5 : 4; // field m5 of insn or zero if it's missing + unsigned int read_only: 1; // don't write result to Guest State + unsigned int reserved : 27; // reserved for future + }; + ULong serialized; +} s390x_vec_op_details_t; + +STATIC_ASSERT(sizeof(s390x_vec_op_details_t) == sizeof(ULong)); /* Macro definitions for opcodes that are not generally available. @@ -293,6 +320,7 @@ enum { ".short 0x" #op1 #v1 #v2 "\n\t .int 0x" #v3 "0" #m5 "0" #m4 #rxb #op2 "\n\t" #define VL(v1, x2, b2, d2, rxb) VRX_VXBD(e7, v1, x2, b2, d2, rxb, 06) +#define VST(v1, x2, b2, d2, rxb) VRX_VXBD(e7, v1, x2, b2, d2, rxb, 0e) #define VPKS(v1, v2, v3, m4, m5, rxb) VRR_VVVMM(e7, v1, v2, v3, m5, m4, rxb, 97) #define VPKLS(v1, v2, v3, m4, m5, rxb) VRR_VVVMM(e7, v1, v2, v3, m5, m4, rxb, 95) diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c index aeda677044..3aec1f8949 100644 --- a/VEX/priv/guest_s390_helpers.c +++ b/VEX/priv/guest_s390_helpers.c @@ -1210,23 +1210,6 @@ decode_bfp_rounding_mode(UInt irrm) psw >> 28; /* cc */ \ }) -/* This macro believes that arguments' addresses are in GPR1 and GPR2. - We use %%v16, %%v17 and %%v18 to avoid side effects in FPRs. -*/ -#define S390_CC_FOR_V128_BINOP(insn) \ -({ \ - /* VL(v1, x2, b2, d2, rxb) */ \ - __asm__ volatile ( \ - VL(1, 0, 1, 000, 8) \ - VL(2, 0, 2, 000, 8) \ - insn \ - "ipm %[psw]\n\t" \ - : [psw] "=d"(psw) \ - : "d"(arg1), "d"(arg2) \ - : "cc", "v16", "v17", "v18"); \ - psw >> 28; /* cc */ \ -}) - /* Convert an IRRoundingMode value to s390_dfp_round_t */ #if defined(VGA_s390x) static s390_dfp_round_t @@ -2488,48 +2471,156 @@ missed: #if defined(VGA_s390x) ULong -s390x_dirtyhelper_vec_binop(VexGuestS390XState *guest_state, ULong opcode, - ULong v1, ULong v2) +s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + const ULong serialized) { UInt psw; - UInt elem_size = s390x_cc_vec_get_elem_size(opcode); - UInt op = s390x_cc_vec_get_op(opcode); - /* S390_CC_FOR_V128_BINOP relies on exatly this GPRs numbers and names. */ - register ULong arg1 asm("1") = (ULong) &((&guest_state->guest_v0)[v1]); - register ULong arg2 asm("2") = (ULong) &((&guest_state->guest_v0)[v2]); - - switch(op) { - case S390_CC_VEC_VPKS: - /* VPKS(v1, v2, v3, m4, m5, rxb) */ - switch(elem_size) { - case 1: return S390_CC_FOR_V128_BINOP(VPKS(3, 1, 2, 1, 1, e)); - case 2: return S390_CC_FOR_V128_BINOP(VPKS(3, 1, 2, 2, 1, e)); - case 3: return S390_CC_FOR_V128_BINOP(VPKS(3, 1, 2, 3, 1, e)); - default: vassert(0); - } + s390x_vec_op_details_t details; + const s390x_vec_op_details_t* d = (const s390x_vec_op_details_t*) &details; + + details.serialized = serialized; + + vassert(d->op > S390_VEC_OP_INVALID && d->op < S390_VEC_OP_LAST); + static const UChar opcodes[][2] = { + {0x00, 0x00}, /* invalid */ + {0xe7, 0x97}, /* VPKS */ + {0xe7, 0x95}, /* VPKLS */ + {0xe7, 0x82}, /* VFAE */ + {0xe7, 0x80}, /* VFEE */ + {0xe7, 0x81}, /* VFENE */ + {0xe7, 0x5c}, /* VISTR */ + {0xe7, 0x8a}, /* VSTRC */ + {0xe7, 0xf8}, /* VCEQ */ + {0xe7, 0xd8}, /* VTM */ + {0xe7, 0xb4}, /* VGFM */ + {0xe7, 0xbc}, /* VGFMA */ + {0xe7, 0xab}, /* VMAH */ + {0xe7, 0xa9}, /* VMALH */ + {0xe7, 0xfb}, /* VCH */ + {0xe7, 0xf9}, /* VCHL */ + }; + + union { + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int : 4; + unsigned int m5 : 4; + unsigned int : 4; + unsigned int m4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRR; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int m5 : 4; + unsigned int m6 : 4; + unsigned int : 4; + unsigned int v4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRRd; + UChar bytes[6]; + } the_insn; + + the_insn.VRR.op1 = opcodes[d->op][0]; + the_insn.bytes[1] = the_insn.bytes[2] + = the_insn.bytes[3] = the_insn.bytes[4] = 0; + the_insn.VRR.op2 = opcodes[d->op][1]; + + switch(d->op) { + case S390_VEC_OP_VISTR: + the_insn.VRR.v1 = 1; + the_insn.VRR.v2 = 2; + the_insn.VRR.rxb = 0b1100; + the_insn.VRR.m4 = d->m4; + the_insn.VRR.m5 = d->m5; + break; - case S390_CC_VEC_VPKLS: - /* VPKLS(v1, v2, v3, m4, m5, rxb) */ - switch(elem_size) { - case 1: return S390_CC_FOR_V128_BINOP(VPKLS(3, 1, 2, 1, 1, e)); - case 2: return S390_CC_FOR_V128_BINOP(VPKLS(3, 1, 2, 2, 1, e)); - case 3: return S390_CC_FOR_V128_BINOP(VPKLS(3, 1, 2, 3, 1, e)); - default: vassert(0); - } + case S390_VEC_OP_VTM: + the_insn.VRR.v1 = 2; + the_insn.VRR.v2 = 3; + the_insn.VRR.rxb = 0b1100; + break; + + case S390_VEC_OP_VPKS: + case S390_VEC_OP_VPKLS: + case S390_VEC_OP_VFAE: + case S390_VEC_OP_VFEE: + case S390_VEC_OP_VFENE: + case S390_VEC_OP_VCEQ: + case S390_VEC_OP_VGFM: + case S390_VEC_OP_VCH: + case S390_VEC_OP_VCHL: + the_insn.VRR.v1 = 1; + the_insn.VRR.v2 = 2; + the_insn.VRR.v3 = 3; + the_insn.VRR.rxb = 0b1110; + the_insn.VRR.m4 = d->m4; + the_insn.VRR.m5 = d->m5; + break; + + case S390_VEC_OP_VSTRC: + case S390_VEC_OP_VGFMA: + case S390_VEC_OP_VMAH: + case S390_VEC_OP_VMALH: + the_insn.VRRd.v1 = 1; + the_insn.VRRd.v2 = 2; + the_insn.VRRd.v3 = 3; + the_insn.VRRd.v4 = 4; + the_insn.VRRd.rxb = 0b1111; + the_insn.VRRd.m5 = d->m4; + the_insn.VRRd.m6 = d->m5; + break; default: - vex_printf("operation = %d\n", op); - vpanic("s390x_dirtyhelper_vec_binop: unknown operation"); + vex_printf("operation = %d\n", d->op); + vpanic("s390x_dirtyhelper_vec_op: unknown operation"); } - return 0; + const V128* guest_v = &(guest_state->guest_v0); + __asm__ volatile ( + "lgr %%r10, %[arg1]\n" + VL(2, 0, a, 000, 8) + "lgr %%r10, %[arg2]\n" + VL(3, 0, a, 000, 8) + "lgr %%r10, %[arg3]\n" + VL(4, 0, a, 000, 8) + "ex %[zero], %[insn]\n" + + "cijne %[read_only], 0, return_cc\n" + "lgr %%r10, %[res]\n" + VST(1, 0, a, 000, 8) + + "return_cc: " + "ipm %[psw]\n\t" + : [psw] "=d" (psw) + + : [res] "r" (&guest_v[d->v1]), + [arg1] "r" (&guest_v[d->v2]), + [arg2] "r" (&guest_v[d->v3]), + [arg3] "r" (&guest_v[d->v4]), + + [zero] "r" (0ULL), + [insn] "m" (the_insn), + [read_only] "r" (d->read_only) + + : "cc", "r10", "v16", "v17", "v18", "v19" + ); + + return psw >> 28; /* cc */ } #else ULong -s390x_dirtyhelper_vec_binop(VexGuestS390XState *guest_state, ULong opcode, - ULong v1, ULong v2) +s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, ULong opcode, + ULong v1, ULong v2) { return 0; } #endif diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 8f3fb6d3d4..c594ad51bf 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -748,12 +748,18 @@ s390_cc_thunk_put1d128Z(UInt opc, IRTemp d1, IRTemp nd) s390_cc_thunk_fill(op, hi, lox, ndep); } +static void +s390_cc_set(IRTemp cc) +{ + vassert(typeOfIRTemp(irsb->tyenv, cc) == Ity_I64); + + s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), mkexpr(cc), mkU64(0), mkU64(0)); +} static void -s390_cc_set(UInt val) +s390_cc_set_val(UInt val) { - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), - mkU64(val), mkU64(0), mkU64(0)); + s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), mkU64(val), mkU64(0), mkU64(0)); } /* Build IR to calculate the condition code from flags thunk. @@ -1536,7 +1542,7 @@ get_fpc_w0(void) /* Return the guest state offset of a vr register. */ static UInt -vr_offset(UInt archreg) +vr_offset(const UInt archreg) { static const UInt offset[32] = { S390X_GUEST_OFFSET(guest_v0), @@ -1580,14 +1586,14 @@ vr_offset(UInt archreg) /* Return the guest state offset of quadword of a vr register. */ static UInt -vr_qw_offset(UInt archreg) +vr_qw_offset(const UInt archreg) { return vr_offset(archreg) + 0; } /* Write quadword of a vr to the guest state. */ static void -put_vr_qw(UInt archreg, IRExpr *expr) +put_vr_qw(const UInt archreg, IRExpr *expr) { vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_V128); @@ -1596,7 +1602,7 @@ put_vr_qw(UInt archreg, IRExpr *expr) /* Read quadword of a vr register. */ static IRExpr * -get_vr_qw(UInt archreg) +get_vr_qw(const UInt archreg) { return IRExpr_Get(vr_qw_offset(archreg), Ity_V128); } @@ -1661,6 +1667,13 @@ vr_w3_offset(UInt archreg) return vr_offset(archreg) + 12; } +/* Read word #0 of a vr register. */ +static IRExpr * +get_vr_w0(UInt archreg) +{ + return IRExpr_Get(vr_dw0_offset(archreg), Ity_I32); +} + /* Read word #1 of a vr register. */ static IRExpr * get_vr_w1(UInt archreg) @@ -1668,6 +1681,13 @@ get_vr_w1(UInt archreg) return IRExpr_Get(vr_w1_offset(archreg), Ity_I32); } +/* Read word #2 of a vr register. */ +static IRExpr * +get_vr_w2(UInt archreg) +{ + return IRExpr_Get(vr_dw1_offset(archreg), Ity_I32); +} + /* Read word #3 of a vr register. */ static IRExpr * get_vr_w3(UInt archreg) @@ -1744,6 +1764,223 @@ s390_vr_get_type(const UChar m) return results[m]; } +/* Determine if Condition Code Set (CS) flag is set in m field */ +#define s390_vr_is_cs_set(m) (((m) & 0x1) != 0) + +/* Determine if Zero Search (ZS) flag is set in m field */ +#define s390_vr_is_zs_set(m) (((m) & 0b0010) != 0) + +/* Generates arg1 < arg2 (or arg1 <= arg2 if allow_equal == True) expression. + Arguments must have V128 type and are treated as unsigned 128-bit numbers. +*/ +static IRExpr* +s390_V128_compareLT128x1(IRExpr* arg1, IRExpr* arg2, Bool allow_equal) +{ + /* If high halves are equal + then we compare lower ones + otherwise we compare high halves. + */ + IRExpr* result; + result = mkite(binop(Iop_CmpEQ64, + unop(Iop_V128HIto64, arg1), + unop(Iop_V128HIto64, arg2) + ), + unop(Iop_1Uto64, + binop(allow_equal ? Iop_CmpLE64U : Iop_CmpLT64U, + unop(Iop_V128to64, arg1), + unop(Iop_V128to64, arg2) + ) + ), + unop(Iop_1Uto64, + binop(Iop_CmpLT64U, + unop(Iop_V128HIto64, arg1), + unop(Iop_V128HIto64, arg2) + ) + ) + ); + + return result; +} + +/* Generates arg1 == 0 expression. + Argument must have V128 type and is treated as unsigned 128-bit number. +*/ +static IRExpr* +s390_V128_isZero(IRExpr* arg) +{ + IRExpr* high_or_low = binop(Iop_Or64, + unop(Iop_V128to64, arg), + unop(Iop_V128HIto64, arg) + ); + + return unop(Iop_1Uto64, binop(Iop_CmpEQ64, high_or_low, mkU64(0ULL))); +} + +/* Generate the two's complement for arg. + Arg should be V128. +*/ +static IRExpr* +s390_V128_get_complement(IRExpr* arg, IRType type) +{ + IRExpr* notArg = unop(Iop_NotV128, arg); + IRExpr* ones; + IRExpr* result; + switch(type) { + case Ity_I8: + ones = unop(Iop_Dup8x16, mkU8(0x01)); + result = binop(Iop_Add8x16, notArg, ones); + break; + case Ity_I16: + ones = unop(Iop_Dup16x8, mkU16(0x0001)); + result = binop(Iop_Add16x8, notArg, ones); + break; + case Ity_I32: + ones = unop(Iop_Dup32x4, mkU32(0x00000001)); + result = binop(Iop_Add32x4, notArg, ones); + break; + case Ity_I64: + ones = binop(Iop_64HLtoV128, mkU64(0x1ULL), mkU64(0x1ULL)); + result = binop(Iop_Add64x2, notArg, ones); + break; + case Ity_V128: + ones = binop(Iop_64HLtoV128, mkU64(0x0ULL), mkU64(0x1ULL)); + result = binop(Iop_Add128x1, notArg, ones); + break; + default: + vpanic("s390_V128_get_complement: unknown type"); + } + + return result; +} + +/* # Elements are treated as 128-bit unsigned integers + For i = 0; i < elemCount; i++ do: + sum = arg1[i] + arg2[i] + result[i] = carry_out_bit(sum) + end + return result + */ +static IRExpr* +s390_V128_calculate_carry_out(IRExpr* arg1, IRExpr* arg2, IRType type, + Bool allow_equal) +{ + IRTemp sum = newTemp(Ity_V128); + IRExpr* mask; + IRExpr* comparison; + IRExpr* result; + switch(type){ + case Ity_I8: + assign(sum, binop(Iop_Add8x16, arg1, arg2)); + mask = unop(Iop_Dup8x16, mkU8(0x1)); + comparison = binop(Iop_CmpGT8Ux16, arg1, mkexpr(sum)); + if(allow_equal) { + comparison = binop(Iop_OrV128, binop(Iop_CmpEQ8x16, arg1, mkexpr(sum)), + comparison); + } + result = binop(Iop_AndV128, comparison, mask); + break; + case Ity_I16: + assign(sum, binop(Iop_Add16x8, arg1, arg2)); + mask = unop(Iop_Dup16x8, mkU16(0x1)); + comparison = binop(Iop_CmpGT16Ux8, arg1, mkexpr(sum)); + if(allow_equal) { + comparison = binop(Iop_OrV128, binop(Iop_CmpEQ16x8, arg1, mkexpr(sum)), + comparison); + } + result = binop(Iop_AndV128, comparison, mask); + break; + case Ity_I32: + assign(sum, binop(Iop_Add32x4, arg1, arg2)); + mask = unop(Iop_Dup32x4, mkU32(0x1)); + comparison = binop(Iop_CmpGT32Ux4, arg1, mkexpr(sum)); + if(allow_equal) { + comparison = binop(Iop_OrV128, binop(Iop_CmpEQ32x4, arg1, mkexpr(sum)), + comparison); + } + result = binop(Iop_AndV128, comparison, mask); + break; + case Ity_I64: + assign(sum, binop(Iop_Add64x2, arg1, arg2)); + mask = binop(Iop_64HLtoV128, mkU64(0x1), mkU64(0x1)); + comparison = binop(Iop_CmpGT64Ux2, arg1, mkexpr(sum)); + if(allow_equal) { + comparison = binop(Iop_OrV128, binop(Iop_CmpEQ64x2, arg1, mkexpr(sum)), + comparison); + } + result = binop(Iop_AndV128, comparison, mask); + break; + case Ity_V128: + assign(sum, binop(Iop_Add128x1, arg1, arg2)); + comparison = s390_V128_compareLT128x1(mkexpr(sum), arg1, allow_equal); + result = binop(Iop_64HLtoV128, mkU64(0x0), comparison); + break; + default: + ppIRType(type); + vpanic("s390_V128_calculate_carry_out: unknown type"); + } + + return result; +} + +/* # elemCount = 1 for now (elements are 128-bit unsigned integers) + For i = 0; i < elemCount; i++ do: + sum = arg1[i] + arg2[i] + arg3[i] & 0x1 + result[i] = carry_out_bit(sum) + end + return result + */ +static IRExpr* +s390_V128_calculate_carry_out_with_carry(IRExpr* arg1, IRExpr* arg2, IRExpr* arg3) +{ + IRTemp sum = newTemp(Ity_V128); + assign(sum, binop(Iop_Add128x1, arg1, arg2)); + + IRTemp overflow_before = newTemp(Ity_I64); + assign(overflow_before, s390_V128_compareLT128x1(mkexpr(sum), arg1, False)); + + IRExpr* mask = binop(Iop_64HLtoV128, mkU64(0), mkU64(1)); + IRTemp carry_in = newTemp(Ity_V128); + assign(carry_in, binop(Iop_AndV128, arg3, mask)); + + IRExpr* carry_is_not_zero = unop(Iop_1Uto64, + binop(Iop_CmpNE64, + unop(Iop_V128to64, mkexpr(carry_in)), + mkU64(0ULL) + ) + ); + + IRTemp sum_plus_carry = newTemp(Ity_V128); + assign(sum_plus_carry, binop(Iop_Add128x1, mkexpr(sum), mkexpr(carry_in))); + + IRExpr* overflow_after = binop(Iop_And64, + carry_is_not_zero, + s390_V128_isZero(mkexpr(sum_plus_carry)) + ); + + IRExpr* result = binop(Iop_Or64, mkexpr(overflow_before), overflow_after); + result = binop(Iop_64HLtoV128, mkU64(0Ull), result); + return result; +} + +/* Performs "arg1 + arg2 + carry_out_bit(arg1 + arg2)". + Arguments and result are Ity_I32. +*/ +static IRTemp +s390_checksum_add(IRExpr* arg1, IRExpr* arg2) +{ + IRTemp sum = newTemp(Ity_I32); + IRTemp res = newTemp(Ity_I32); + + assign(sum, binop(Iop_Add32, arg1, arg2)); + assign(res, + mkite(binop(Iop_CmpLT32U, mkexpr(sum), arg1), + binop(Iop_Add32, mkexpr(sum), mkU32(1)), + mkexpr(sum)) + ); + + return res; +} + /* Return the guest state offset of element with type's size and given index of a vr register. */ @@ -1816,7 +2053,7 @@ s390_vr_getVRindex(UChar v,UChar argNumber, UChar rxb) { vassert(argNumber > 0 && argNumber <= 4); vassert(rxb < 16); - return v | (((rxb) << (++argNumber)) & 0b00010000); + return v | (((rxb) << argNumber) & 0b00010000); } static void @@ -1834,8 +2071,7 @@ s390_vr_fill(UChar v1, IRExpr *o2) put_vr_qw(v1, unop(Iop_Dup32x4, o2)); break; case Ity_I64: - put_vr_dw0(v1, o2); - put_vr_dw1(v1, o2); + put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2)); break; default: ppIRType(o2type); @@ -1881,43 +2117,65 @@ s390_getCountToBlockBoundary(IRTemp op2addr, UChar m) return mkexpr(output); } -/* Helper macro for s390_vr_loadWithLength */ -#define s390_vr_loadWithLength_process(elem) \ - put_vr_qw(v1, triop(Iop_SetElem8x16,\ - get_vr_qw(v1), mkU8(elem),\ - mkite(binop(Iop_CmpLE32U, mkU32(elem), mkexpr(maxIndexToLoad)),\ - load(Ity_I8, binop(Iop_Add64, mkexpr(addr), mkU64(elem))),\ - mkU8(0x00)\ - )\ - )\ - ) - /* Load bytes into v1. maxIndex specifies max index to load and must be Ity_I32. - If maxIndex > 16, all 16 bytes are loaded. + If maxIndex >= 15, all 16 bytes are loaded. All bytes after maxIndex are zeroed. */ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) { - IRTemp maxIndexToLoad = newTemp(Ity_I32); - - assign(maxIndexToLoad, maxIndex); - - s390_vr_loadWithLength_process(0); - s390_vr_loadWithLength_process(1); - s390_vr_loadWithLength_process(2); - s390_vr_loadWithLength_process(3); - s390_vr_loadWithLength_process(4); - s390_vr_loadWithLength_process(5); - s390_vr_loadWithLength_process(6); - s390_vr_loadWithLength_process(7); - s390_vr_loadWithLength_process(8); - s390_vr_loadWithLength_process(9); - s390_vr_loadWithLength_process(10); - s390_vr_loadWithLength_process(11); - s390_vr_loadWithLength_process(12); - s390_vr_loadWithLength_process(13); - s390_vr_loadWithLength_process(14); - s390_vr_loadWithLength_process(15); + IRTemp maxIdx = newTemp(Ity_I32); + IRTemp cappedMax = newTemp(Ity_I64); + IRTemp offset = newTemp(Ity_I64); + IRTemp zeroed = newTemp(Ity_I64); + IRTemp back = newTemp(Ity_I64); + + /* Implement the insn with a single 16-byte load, to allow memcheck's + "partial-loads-OK" heuristic to apply. Ensure that a page boundary is + crossed if and only if the real insn would have crossed it as well. + Thus, if the bytes to load are fully contained in an aligned 16-byte + chunk, load the whole 16-byte aligned chunk, and otherwise load 16 bytes + from the unaligned address. Then shift the loaded data left-aligned + into the target vector register. */ + + assign(maxIdx, maxIndex); + assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)), + unop(Iop_32Uto64, mkexpr(maxIdx)), mkU64(15))); + /* 'offset': addr's offset from last 16-byte aligned address + 'zeroed': number of bytes to be zeroed in the target vector + 'back': how much to subtract from addr before loading 16 bytes */ + assign(offset, binop(Iop_And64, mkexpr(addr), mkU64(15))); + assign(zeroed, binop(Iop_Sub64, mkU64(15), mkexpr(cappedMax))); + assign(back, mkite(binop(Iop_CmpLE64U, mkexpr(offset), mkexpr(zeroed)), + mkexpr(offset), mkU64(0))); + + /* How much to shift the loaded 16-byte vector to the right, and then to + the left. Since both 'zeroed' and 'back' range from 0 to 15, the shift + amounts range from 0 to 120. */ + IRExpr *shrAmount = binop(Iop_Shl64, + binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)), + mkU8(3)); + IRExpr *shlAmount = binop(Iop_Shl64, mkexpr(zeroed), mkU8(3)); + + put_vr_qw(v1, binop(Iop_ShlV128, + binop(Iop_ShrV128, + load(Ity_V128, + binop(Iop_Sub64, mkexpr(addr), mkexpr(back))), + unop(Iop_64to8, shrAmount)), + unop(Iop_64to8, shlAmount))); +} + +/* Bitwise vCond ? v1 : v2 + All args are V128. + */ +static IRExpr* +s390_V128_bitwiseITE(IRExpr* vCond, IRExpr* v1, IRExpr* v2) +{ + IRTemp vc = newTemp(Ity_V128); + assign(vc, vCond); + /* result = (v1 & vCond) | (v2 & ~vCond) */ + return binop(Iop_OrV128, + binop(Iop_AndV128, v1, mkexpr(vc)), + binop(Iop_AndV128, v2, unop(Iop_NotV128, mkexpr(vc)))); } /*------------------------------------------------------------*/ @@ -3291,6 +3549,31 @@ s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3, } +static void +s390_format_VRS_VRDVM(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar v3, + UChar m4), UChar v1, UChar b2, UShort d2, UChar v3, + UChar m4, UChar rxb) +{ + const HChar *mnm; + IRTemp op2addr = newTemp(Ity_I64); + + if (! s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : + mkU64(0))); + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v3 = s390_vr_getVRindex(v3, 2, rxb); + mnm = irgen(v1, op2addr, v3, m4); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC5(MNM, VR, UDXB, VR, UINT), mnm, v1, d2, 0, b2, v3, m4); +} + + static void s390_format_VRS_VRDV(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar v3), UChar v1, UChar b2, UShort d2, UChar v3, UChar rxb) @@ -3396,6 +3679,121 @@ s390_format_VRV_VVRDMT(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar m3) } +static void +s390_format_VRRd_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + UChar v4, UChar m5, UChar m6), + UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, + UChar m6, UChar rxb) +{ + const HChar *mnm; + + if (! s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + v4 = s390_vr_getVRindex(v4, 4, rxb); + mnm = irgen(v1, v2, v3, v4, m5, m6); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), + mnm, v1, v2, v3, v4, m5, m6); +} + + +static void +s390_format_VRR_VVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3, + UChar m5), + UChar v1, UChar v2, UChar m3, UChar m5, UChar rxb) +{ + const HChar *mnm; + + if (! s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, m3, m5); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v2, m3, m5); +} + + +static void +s390_format_VRId_VVVIM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + UChar i4, UChar m5), + UChar v1, UChar v2, UChar v3, UChar i4, UChar m5, + UChar rxb) +{ + const HChar *mnm; + + if (! s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + mnm = irgen(v1, v2, v3, i4, m5); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), mnm, v1, v2, v3, i4, m5); +} + + +static void +s390_format_VRId_VVVI(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + UChar i4), + UChar v1, UChar v2, UChar v3, UChar i4, UChar rxb) +{ + const HChar *mnm; + + if (! s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + mnm = irgen(v1, v2, v3, i4); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), mnm, v1, v2, v3, i4); +} + + +static void +s390_format_VRRd_VVVVM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + UChar v4, UChar m5), + UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, + UChar rxb) +{ + const HChar *mnm; + + if (! s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + v4 = s390_vr_getVRindex(v4, 4, rxb); + mnm = irgen(v1, v2, v3, v4, m5); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC6(MNM, VR, VR, VR, VR, UINT), mnm, v1, v2, v3, v4, m5); +} + + /*------------------------------------------------------------*/ /*--- Build IR for opcodes ---*/ /*------------------------------------------------------------*/ @@ -11817,7 +12215,7 @@ s390_irgen_CLCL(UChar r1, UChar r2) assign(pad, get_gpr_b4(r2 + 1)); /* len1 == 0 and len2 == 0? Exit */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpEQ32, binop(Iop_Or32, mkexpr(len1), mkexpr(len2)), mkU32(0))); @@ -11893,7 +12291,7 @@ s390_irgen_CLCLE(UChar r1, UChar r3, IRTemp pad2) assign(len3, get_gpr_dw0(r3 + 1)); /* len1 == 0 and len3 == 0? Exit */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpEQ64,binop(Iop_Or64, mkexpr(len1), mkexpr(len3)), mkU64(0))); @@ -12255,7 +12653,7 @@ s390_irgen_SRST(UChar r1, UChar r2) put_counter_dw0(mkU64(0)); // start = next? CC=2 and out r1 and r2 unchanged - s390_cc_set(2); + s390_cc_set_val(2); put_gpr_dw0(r2, binop(Iop_Sub64, mkexpr(address), mkexpr(counter))); next_insn_if(binop(Iop_CmpEQ64, mkexpr(address), mkexpr(next))); @@ -12263,7 +12661,7 @@ s390_irgen_SRST(UChar r1, UChar r2) assign(delim, get_gpr_b7(0)); // byte = delim? CC=1, R1=address - s390_cc_set(1); + s390_cc_set_val(1); put_gpr_dw0(r1, mkexpr(address)); next_insn_if(binop(Iop_CmpEQ8, mkexpr(delim), mkexpr(byte))); @@ -12296,7 +12694,7 @@ s390_irgen_CLST(UChar r1, UChar r2) assign(byte2, load(Ity_I8, mkexpr(address2))); // end in both? all equal, reset r1 and r2 to start values - s390_cc_set(0); + s390_cc_set_val(0); put_gpr_dw0(r1, binop(Iop_Sub64, mkexpr(address1), mkexpr(counter))); put_gpr_dw0(r2, binop(Iop_Sub64, mkexpr(address2), mkexpr(counter))); next_insn_if(binop(Iop_CmpEQ8, mkU8(0), @@ -12308,20 +12706,20 @@ s390_irgen_CLST(UChar r1, UChar r2) put_gpr_dw0(r2, mkexpr(address2)); // End found in string1 - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpEQ8, mkexpr(end), mkexpr(byte1))); // End found in string2 - s390_cc_set(2); + s390_cc_set_val(2); next_insn_if(binop(Iop_CmpEQ8, mkexpr(end), mkexpr(byte2))); // string1 < string2 - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpLT32U, unop(Iop_8Uto32, mkexpr(byte1)), unop(Iop_8Uto32, mkexpr(byte2)))); // string2 < string1 - s390_cc_set(2); + s390_cc_set_val(2); next_insn_if(binop(Iop_CmpLT32U, unop(Iop_8Uto32, mkexpr(byte2)), unop(Iop_8Uto32, mkexpr(byte1)))); @@ -12647,7 +13045,7 @@ s390_irgen_MVCL(UChar r1, UChar r2) /* Check for destructive overlap: addr1 > addr2 && addr2 + len1 > addr1 && (addr2 + len2) > addr1 */ - s390_cc_set(3); + s390_cc_set_val(3); IRTemp cond1 = newTemp(Ity_I32); assign(cond1, unop(Iop_1Uto32, binop(Iop_CmpLT64U, mkexpr(addr2), mkexpr(addr1)))); @@ -12777,7 +13175,7 @@ s390_irgen_MVST(UChar r1, UChar r2) iterate_if(binop(Iop_CmpNE8, mkexpr(end), mkexpr(byte))); // and always set cc=1 at the end + update r1 - s390_cc_set(1); + s390_cc_set_val(1); put_gpr_dw0(r1, binop(Iop_Add64, mkexpr(addr1), mkexpr(counter))); put_counter_dw0(mkU64(0)); @@ -14132,8 +14530,7 @@ s390_irgen_STCK(IRTemp op2addr) d->mAddr = mkexpr(op2addr); d->mSize = 8; stmt(IRStmt_Dirty(d)); - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), - mkexpr(cc), mkU64(0), mkU64(0)); + s390_cc_set(cc); return "stck"; } @@ -14152,8 +14549,7 @@ s390_irgen_STCKF(IRTemp op2addr) d->mAddr = mkexpr(op2addr); d->mSize = 8; stmt(IRStmt_Dirty(d)); - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), - mkexpr(cc), mkU64(0), mkU64(0)); + s390_cc_set(cc); } return "stckf"; } @@ -14171,8 +14567,7 @@ s390_irgen_STCKE(IRTemp op2addr) d->mAddr = mkexpr(op2addr); d->mSize = 16; stmt(IRStmt_Dirty(d)); - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), - mkexpr(cc), mkU64(0), mkU64(0)); + s390_cc_set(cc); return "stcke"; } @@ -14206,7 +14601,7 @@ s390_irgen_STFLE(IRTemp op2addr) stmt(IRStmt_Dirty(d)); - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), mkexpr(cc), mkU64(0), mkU64(0)); + s390_cc_set(cc); return "stfle"; } @@ -14229,7 +14624,7 @@ s390_irgen_CKSM(UChar r1,UChar r2) assign(len, get_gpr_dw0(r2+1)); /* Condition code is always zero. */ - s390_cc_set(0); + s390_cc_set_val(0); /* If length is zero, there is no need to calculate the checksum */ next_insn_if(binop(Iop_CmpEQ64, mkexpr(len), mkU64(0))); @@ -14296,7 +14691,7 @@ s390_irgen_TROO(UChar m3, UChar r1, UChar r2) IRTemp result = newTemp(Ity_I64); /* End of source string? We're done; proceed to next insn */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpEQ64, mkexpr(src_len), mkU64(0))); /* Load character from source string, index translation table and @@ -14308,7 +14703,7 @@ s390_irgen_TROO(UChar m3, UChar r1, UChar r2) assign(op1, load(Ity_I8, mkexpr(result))); if (! s390_host_has_etf2 || (m3 & 0x1) == 0) { - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpEQ8, mkexpr(op1), mkexpr(test_byte))); } store(get_gpr_dw0(r1), mkexpr(op1)); @@ -14343,7 +14738,7 @@ s390_irgen_TRTO(UChar m3, UChar r1, UChar r2) IRTemp result = newTemp(Ity_I64); /* End of source string? We're done; proceed to next insn */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpEQ64, mkexpr(src_len), mkU64(0))); /* Load character from source string, index translation table and @@ -14356,7 +14751,7 @@ s390_irgen_TRTO(UChar m3, UChar r1, UChar r2) assign(op1, load(Ity_I8, mkexpr(result))); if (! s390_host_has_etf2 || (m3 & 0x1) == 0) { - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpEQ8, mkexpr(op1), mkexpr(test_byte))); } store(get_gpr_dw0(r1), mkexpr(op1)); @@ -14391,7 +14786,7 @@ s390_irgen_TROT(UChar m3, UChar r1, UChar r2) IRTemp result = newTemp(Ity_I64); /* End of source string? We're done; proceed to next insn */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpEQ64, mkexpr(src_len), mkU64(0))); /* Load character from source string, index translation table and @@ -14403,7 +14798,7 @@ s390_irgen_TROT(UChar m3, UChar r1, UChar r2) assign(op1, load(Ity_I16, mkexpr(result))); if (! s390_host_has_etf2 || (m3 & 0x1) == 0) { - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpEQ16, mkexpr(op1), mkexpr(test_byte))); } store(get_gpr_dw0(r1), mkexpr(op1)); @@ -14438,7 +14833,7 @@ s390_irgen_TRTT(UChar m3, UChar r1, UChar r2) IRTemp result = newTemp(Ity_I64); /* End of source string? We're done; proceed to next insn */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpEQ64, mkexpr(src_len), mkU64(0))); /* Load character from source string, index translation table and @@ -14450,7 +14845,7 @@ s390_irgen_TRTT(UChar m3, UChar r1, UChar r2) assign(op1, load(Ity_I16, mkexpr(result))); if (! s390_host_has_etf2 || (m3 & 0x1) == 0) { - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpEQ16, mkexpr(op1), mkexpr(test_byte))); } @@ -14495,13 +14890,13 @@ s390_irgen_TRE(UChar r1,UChar r2) IRTemp result = newTemp(Ity_I64); /* End of source string? We're done; proceed to next insn */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpEQ64, mkexpr(src_len), mkU64(0))); /* Load character from source string and compare with test byte */ assign(op, load(Ity_I8, mkexpr(src_addr))); - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpEQ8, mkexpr(op), mkexpr(test_byte))); assign(result, binop(Iop_Add64, unop(Iop_8Uto64, mkexpr(op)), @@ -14548,7 +14943,7 @@ s390_irgen_CU21(UChar m3, UChar r1, UChar r2) /* We're processing the 2nd operand 2 bytes at a time. Therefore, if there are less than 2 bytes left, then the 2nd operand is exhausted and we're done here. cc = 0 */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len2), mkU64(2))); /* There are at least two bytes there. Read them. */ @@ -14594,7 +14989,7 @@ s390_irgen_CU21(UChar m3, UChar r1, UChar r2) IRExpr *invalid_low_surrogate = binop(Iop_And64, mkexpr(retval), mkU64(0xff)); - s390_cc_set(2); + s390_cc_set_val(2); next_insn_if(binop(Iop_CmpEQ64, invalid_low_surrogate, mkU64(1))); } @@ -14603,7 +14998,7 @@ s390_irgen_CU21(UChar m3, UChar r1, UChar r2) assign(num_bytes, binop(Iop_And64, binop(Iop_Shr64, mkexpr(retval), mkU8(8)), mkU64(0xff))); - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len1), mkexpr(num_bytes))); /* Extract the bytes to be stored at addr1 */ @@ -14675,7 +15070,7 @@ s390_irgen_CU24(UChar m3, UChar r1, UChar r2) /* We're processing the 2nd operand 2 bytes at a time. Therefore, if there are less than 2 bytes left, then the 2nd operand is exhausted and we're done here. cc = 0 */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len2), mkU64(2))); /* There are at least two bytes there. Read them. */ @@ -14722,12 +15117,12 @@ s390_irgen_CU24(UChar m3, UChar r1, UChar r2) IRExpr *invalid_low_surrogate = binop(Iop_And64, mkexpr(retval), mkU64(0xff)); - s390_cc_set(2); + s390_cc_set_val(2); next_insn_if(binop(Iop_CmpEQ64, invalid_low_surrogate, mkU64(1))); } /* Now test whether the 1st operand is exhausted */ - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len1), mkU64(4))); /* Extract the bytes to be stored at addr1 */ @@ -14782,7 +15177,7 @@ s390_irgen_CU42(UChar r1, UChar r2) /* We're processing the 2nd operand 4 bytes at a time. Therefore, if there are less than 4 bytes left, then the 2nd operand is exhausted and we're done here. cc = 0 */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len2), mkU64(4))); /* Read the 2nd operand. */ @@ -14797,7 +15192,7 @@ s390_irgen_CU42(UChar r1, UChar r2) cc=2 outranks cc=1 (1st operand exhausted) */ IRExpr *invalid_character = binop(Iop_And64, mkexpr(retval), mkU64(0xff)); - s390_cc_set(2); + s390_cc_set_val(2); next_insn_if(binop(Iop_CmpEQ64, invalid_character, mkU64(1))); /* Now test whether the 1st operand is exhausted */ @@ -14805,7 +15200,7 @@ s390_irgen_CU42(UChar r1, UChar r2) assign(num_bytes, binop(Iop_And64, binop(Iop_Shr64, mkexpr(retval), mkU8(8)), mkU64(0xff))); - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len1), mkexpr(num_bytes))); /* Extract the bytes to be stored at addr1 */ @@ -14876,7 +15271,7 @@ s390_irgen_CU41(UChar r1, UChar r2) /* We're processing the 2nd operand 4 bytes at a time. Therefore, if there are less than 4 bytes left, then the 2nd operand is exhausted and we're done here. cc = 0 */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len2), mkU64(4))); /* Read the 2nd operand. */ @@ -14891,7 +15286,7 @@ s390_irgen_CU41(UChar r1, UChar r2) cc=2 outranks cc=1 (1st operand exhausted) */ IRExpr *invalid_character = binop(Iop_And64, mkexpr(retval), mkU64(0xff)); - s390_cc_set(2); + s390_cc_set_val(2); next_insn_if(binop(Iop_CmpEQ64, invalid_character, mkU64(1))); /* Now test whether the 1st operand is exhausted */ @@ -14899,7 +15294,7 @@ s390_irgen_CU41(UChar r1, UChar r2) assign(num_bytes, binop(Iop_And64, binop(Iop_Shr64, mkexpr(retval), mkU8(8)), mkU64(0xff))); - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len1), mkexpr(num_bytes))); /* Extract the bytes to be stored at addr1 */ @@ -14999,7 +15394,7 @@ s390_irgen_cu12_cu14(UChar m3, UChar r1, UChar r2, Bool is_cu12) /* We're processing the 2nd operand 1 byte at a time. Therefore, if there is less than 1 byte left, then the 2nd operand is exhausted and we're done here. cc = 0 */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len2), mkU64(1))); /* There is at least one byte there. Read it. */ @@ -15013,7 +15408,7 @@ s390_irgen_cu12_cu14(UChar m3, UChar r1, UChar r2, Bool is_cu12) /* Check for invalid 1st byte */ IRExpr *is_invalid = unop(Iop_64to1, mkexpr(retval1)); - s390_cc_set(2); + s390_cc_set_val(2); next_insn_if(is_invalid); /* How many bytes do we have to read? */ @@ -15021,7 +15416,7 @@ s390_irgen_cu12_cu14(UChar m3, UChar r1, UChar r2, Bool is_cu12) assign(num_src_bytes, binop(Iop_Shr64, mkexpr(retval1), mkU8(8))); /* Now test whether the 2nd operand is exhausted */ - s390_cc_set(0); + s390_cc_set_val(0); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len2), mkexpr(num_src_bytes))); /* Read the remaining bytes */ @@ -15054,7 +15449,7 @@ s390_irgen_cu12_cu14(UChar m3, UChar r1, UChar r2, Bool is_cu12) } /* Check for invalid character */ - s390_cc_set(2); + s390_cc_set_val(2); is_invalid = unop(Iop_64to1, mkexpr(retval2)); next_insn_if(is_invalid); @@ -15063,7 +15458,7 @@ s390_irgen_cu12_cu14(UChar m3, UChar r1, UChar r2, Bool is_cu12) assign(num_bytes, binop(Iop_And64, binop(Iop_Shr64, mkexpr(retval2), mkU8(8)), mkU64(0xff))); - s390_cc_set(1); + s390_cc_set_val(1); next_insn_if(binop(Iop_CmpLT64U, mkexpr(len1), mkexpr(num_bytes))); /* Extract the bytes to be stored at addr1 */ @@ -15409,8 +15804,7 @@ s390_irgen_VLM(UChar v1, IRTemp op2addr, UChar v3) static const HChar * s390_irgen_VLVGP(UChar v1, UChar r2, UChar r3) { - put_vr_dw0(v1, get_gpr_dw0(r2)); - put_vr_dw1(v1, get_gpr_dw0(r3)); + put_vr_qw(v1, binop(Iop_64HLtoV128, get_gpr_dw0(r2), get_gpr_dw0(r3))); return "vlvgp"; } @@ -15449,28 +15843,10 @@ s390_irgen_VLVG(UChar v1, IRTemp op2addr, UChar r3, UChar m4) static const HChar * s390_irgen_VMRH(UChar v1, UChar v2, UChar v3, UChar m4) { - IRType type = s390_vr_get_type(m4); - switch (type) { - case Ity_I8: - put_vr_qw(v1, binop(Iop_InterleaveLO8x16, get_vr_qw(v2), get_vr_qw(v3))); - break; - - case Ity_I16: - put_vr_qw(v1, binop(Iop_InterleaveLO16x8, get_vr_qw(v2), get_vr_qw(v3))); - break; - - case Ity_I32: - put_vr_qw(v1, binop(Iop_InterleaveLO32x4, get_vr_qw(v2), get_vr_qw(v3))); - break; - - case Ity_I64: - put_vr_qw(v1, binop(Iop_InterleaveLO64x2, get_vr_qw(v2), get_vr_qw(v3))); - break; - - default: - ppIRType(type); - vpanic("s390_irgen_VMRH: unknown type"); - } + const IROp ops[] = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, + Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); return "vmrh"; } @@ -15478,28 +15854,10 @@ s390_irgen_VMRH(UChar v1, UChar v2, UChar v3, UChar m4) static const HChar * s390_irgen_VMRL(UChar v1, UChar v2, UChar v3, UChar m4) { - IRType type = s390_vr_get_type(m4); - switch (type) { - case Ity_I8: - put_vr_qw(v1, binop(Iop_InterleaveHI8x16, get_vr_qw(v2), get_vr_qw(v3))); - break; - - case Ity_I16: - put_vr_qw(v1, binop(Iop_InterleaveHI16x8, get_vr_qw(v2), get_vr_qw(v3))); - break; - - case Ity_I32: - put_vr_qw(v1, binop(Iop_InterleaveHI32x4, get_vr_qw(v2), get_vr_qw(v3))); - break; - - case Ity_I64: - put_vr_qw(v1, binop(Iop_InterleaveHI64x2, get_vr_qw(v2), get_vr_qw(v3))); - break; - - default: - ppIRType(type); - vpanic("s390_irgen_VMRL: unknown type"); - } + const IROp ops[] = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, + Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); return "vmrl"; } @@ -15507,25 +15865,11 @@ s390_irgen_VMRL(UChar v1, UChar v2, UChar v3, UChar m4) static const HChar * s390_irgen_VPK(UChar v1, UChar v2, UChar v3, UChar m4) { - IRType type = s390_vr_get_type(m4); - IRExpr* result = NULL; - switch(type) { - case Ity_I16: - result = binop(Iop_NarrowBin16to8x16, get_vr_qw(v2), get_vr_qw(v3)); - break; - case Ity_I32: - result = binop(Iop_NarrowBin32to16x8, get_vr_qw(v2), get_vr_qw(v3)); - break; - case Ity_I64: - result = binop(Iop_NarrowBin64to32x4, get_vr_qw(v2), get_vr_qw(v3)); - break; - default: - ppIRType(type); - vpanic("s390_irgen_VPK: unknown type"); - } - - put_vr_qw(v1, result); - + const IROp ops[] = { Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8, + Iop_NarrowBin64to32x4 }; + Char index = m4 - 1; + vassert((index >= 0) && (index < sizeof(ops) / sizeof(ops[0]))); + put_vr_qw(v1, binop(ops[index], get_vr_qw(v2), get_vr_qw(v3))); return "vpk"; } @@ -15648,21 +15992,9 @@ s390_irgen_VSTM(UChar v1, IRTemp op2addr, UChar v3) static const HChar * s390_irgen_VUPH(UChar v1, UChar v2, UChar m3) { - IRType type = s390_vr_get_type(m3); - switch (type) { - case Ity_I8: - put_vr_qw(v1, unop(Iop_Widen8Sto16x8, get_vr_dw0(v2))); - break; - case Ity_I16: - put_vr_qw(v1, unop(Iop_Widen16Sto32x4, get_vr_dw0(v2))); - break; - case Ity_I32: - put_vr_qw(v1, unop(Iop_Widen32Sto64x2, get_vr_dw0(v2))); - break; - default: - ppIRType(type); - vpanic("s390_irgen_VUPH: unknown type"); - } + const IROp ops[] = { Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2 }; + vassert(m3 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, unop(ops[m3], get_vr_dw0(v2))); return "vuph"; } @@ -15670,43 +16002,18 @@ s390_irgen_VUPH(UChar v1, UChar v2, UChar m3) static const HChar * s390_irgen_VUPLH(UChar v1, UChar v2, UChar m3) { - IRType type = s390_vr_get_type(m3); - switch (type) { - case Ity_I8: - put_vr_qw(v1, unop(Iop_Widen8Uto16x8, get_vr_dw0(v2))); - break; - case Ity_I16: - put_vr_qw(v1, unop(Iop_Widen16Uto32x4, get_vr_dw0(v2))); - break; - case Ity_I32: - put_vr_qw(v1, unop(Iop_Widen32Uto64x2, get_vr_dw0(v2))); - break; - default: - ppIRType(type); - vpanic("s390_irgen_VUPLH: unknown type"); - } - + const IROp ops[] = { Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2 }; + vassert(m3 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, unop(ops[m3], get_vr_dw0(v2))); return "vuplh"; } static const HChar * s390_irgen_VUPL(UChar v1, UChar v2, UChar m3) { - IRType type = s390_vr_get_type(m3); - switch (type) { - case Ity_I8: - put_vr_qw(v1, unop(Iop_Widen8Sto16x8, get_vr_dw1(v2))); - break; - case Ity_I16: - put_vr_qw(v1, unop(Iop_Widen16Sto32x4, get_vr_dw1(v2))); - break; - case Ity_I32: - put_vr_qw(v1, unop(Iop_Widen32Sto64x2, get_vr_dw1(v2))); - break; - default: - ppIRType(type); - vpanic("s390_irgen_VUPL: unknown type"); - } + const IROp ops[] = { Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2 }; + vassert(m3 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, unop(ops[m3], get_vr_dw1(v2))); return "vupl"; } @@ -15714,21 +16021,9 @@ s390_irgen_VUPL(UChar v1, UChar v2, UChar m3) static const HChar * s390_irgen_VUPLL(UChar v1, UChar v2, UChar m3) { - IRType type = s390_vr_get_type(m3); - switch (type) { - case Ity_I8: - put_vr_qw(v1, unop(Iop_Widen8Uto16x8, get_vr_dw1(v2))); - break; - case Ity_I16: - put_vr_qw(v1, unop(Iop_Widen16Uto32x4, get_vr_dw1(v2))); - break; - case Ity_I32: - put_vr_qw(v1, unop(Iop_Widen32Uto64x2, get_vr_dw1(v2))); - break; - default: - ppIRType(type); - vpanic("s390_irgen_VUPLL: unknown type"); - } + const IROp ops[] = { Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2 }; + vassert(m3 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, unop(ops[m3], get_vr_dw1(v2))); return "vupll"; } @@ -15773,33 +16068,31 @@ s390_irgen_VREPI(UChar v1, UShort i2, UChar m3) static const HChar * s390_irgen_VPKS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) { - IRType type = s390_vr_get_type(m4); - IRExpr* result = NULL; - - switch(type) { - case Ity_I16: - result = binop(Iop_QNarrowBin16Sto8Sx16, get_vr_qw(v2), get_vr_qw(v3)); - break; - case Ity_I32: - result = binop(Iop_QNarrowBin32Sto16Sx8, get_vr_qw(v2), get_vr_qw(v3)); - break; - case Ity_I64: - result = binop(Iop_QNarrowBin64Sto32Sx4, get_vr_qw(v2), get_vr_qw(v3)); - break; - default: - ppIRType(type); - vpanic("s390_irgen_VPKS: unknown type"); - } + if (!s390_vr_is_cs_set(m5)) { + const IROp ops[] = { Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8, + Iop_QNarrowBin64Sto32Sx4 }; + Char index = m4 - 1; + vassert((index >= 0) && (index < sizeof(ops) / sizeof(ops[0]))); + put_vr_qw(v1, binop(ops[index], get_vr_qw(v2), get_vr_qw(v3))); - if((m5 & 0x1) != 0) { + } else { IRDirty* d; IRTemp cc = newTemp(Ity_I64); - ULong opcode = s390x_cc_vec_opcode(S390_CC_VEC_VPKS, m4); - d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_binop", - &s390x_dirtyhelper_vec_binop, - mkIRExprVec_4(IRExpr_GSPTR(), mkU64(opcode), - mkU64(v2), mkU64(v3))); - d->nFxState = 2; + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VPKS; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 3; vex_bzero(&d->fxState, sizeof(d->fxState)); d->fxState[0].fx = Ifx_Read; d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); @@ -15807,45 +16100,45 @@ s390_irgen_VPKS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) d->fxState[1].fx = Ifx_Read; d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); stmt(IRStmt_Dirty(d)); - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), - mkexpr(cc), mkU64(0), mkU64(0)); + s390_cc_set(cc); } - put_vr_qw(v1, result); return "vpks"; } static const HChar * s390_irgen_VPKLS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) { - IRType type = s390_vr_get_type(m4); - IRExpr* result = NULL; - switch(type) { - case Ity_I16: - result = binop(Iop_QNarrowBin16Uto8Ux16, get_vr_qw(v2), get_vr_qw(v3)); - break; - case Ity_I32: - result = binop(Iop_QNarrowBin32Uto16Ux8, get_vr_qw(v2), get_vr_qw(v3)); - break; - case Ity_I64: - result = binop(Iop_QNarrowBin64Uto32Ux4, get_vr_qw(v2), get_vr_qw(v3)); - break; - default: - ppIRType(type); - vpanic("s390_irgen_VPKLS: unknown type"); - } + if (!s390_vr_is_cs_set(m5)) { + const IROp ops[] = { Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8, + Iop_QNarrowBin64Uto32Ux4 }; + Char index = m4 - 1; + vassert((index >= 0) && (index < sizeof(ops) / sizeof(ops[0]))); + put_vr_qw(v1, binop(ops[index], get_vr_qw(v2), get_vr_qw(v3))); - if((m5 & 0x1) != 0) { + } else { IRDirty* d; IRTemp cc = newTemp(Ity_I64); - ULong opcode = s390x_cc_vec_opcode(S390_CC_VEC_VPKLS, m4); - d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_binop", - &s390x_dirtyhelper_vec_binop, - mkIRExprVec_4(IRExpr_GSPTR(), mkU64(opcode), - mkU64(v2), mkU64(v3))); - d->nFxState = 2; + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VPKLS; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 3; vex_bzero(&d->fxState, sizeof(d->fxState)); d->fxState[0].fx = Ifx_Read; d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); @@ -15853,30 +16146,25 @@ s390_irgen_VPKLS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) d->fxState[1].fx = Ifx_Read; d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); stmt(IRStmt_Dirty(d)); - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), - mkexpr(cc), mkU64(0), mkU64(0)); + s390_cc_set(cc); } - put_vr_qw(v1, result); return "vpkls"; } static const HChar * s390_irgen_VSEL(UChar v1, UChar v2, UChar v3, UChar v4) { - IRExpr* vA = get_vr_qw(v3); - IRExpr* vB = get_vr_qw(v2); - IRExpr* vC = get_vr_qw(v4); - - /* result = (vA & ~vC) | (vB & vC) */ - put_vr_qw(v1, - binop(Iop_OrV128, - binop(Iop_AndV128, vA, unop(Iop_NotV128, vC)), - binop(Iop_AndV128, vB, vC) - ) - ); + IRExpr* vIfTrue = get_vr_qw(v2); + IRExpr* vIfFalse = get_vr_qw(v3); + IRExpr* vCond = get_vr_qw(v4); + + put_vr_qw(v1, s390_V128_bitwiseITE(vCond, vIfTrue, vIfFalse)); return "vsel"; } @@ -16021,229 +16309,1595 @@ s390_irgen_LOCHHI(UChar r1, UChar m3, UShort i2, UChar unused) next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0))); put_gpr_w0(r1, mkU32(i2)); - return "lochhi"; + return "lochhi"; +} + +static const HChar * +s390_irgen_LOCHI(UChar r1, UChar m3, UShort i2, UChar unused) +{ + next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0))); + put_gpr_w1(r1, mkU32(i2)); + + return "lochi"; +} + +static const HChar * +s390_irgen_LOCGHI(UChar r1, UChar m3, UShort i2, UChar unused) +{ + next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0))); + put_gpr_dw0(r1, mkU64(i2)); + + return "locghi"; +} + +static const HChar * +s390_irgen_STOCFH(UChar r1, IRTemp op2addr) +{ + /* condition is checked in format handler */ + store(mkexpr(op2addr), get_gpr_w1(r1)); + + return "stocfh"; +} + +static const HChar * +s390_irgen_LCBB(UChar r1, IRTemp op2addr, UChar m3) +{ + IRTemp op2 = newTemp(Ity_I32); + assign(op2, s390_getCountToBlockBoundary(op2addr, m3)); + put_gpr_w1(r1, mkexpr(op2)); + + IRExpr* cc = mkite(binop(Iop_CmpEQ32, mkexpr(op2), mkU32(16)), mkU64(0), mkU64(3)); + s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), cc, mkU64(0), mkU64(0)); + + return "lcbb"; +} + +/* Regarding the use of + // Dummy helper which is used to signal VEX library that memory was loaded + sha512_loadparam + = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_PPNO_sha512_load_param_block", + &s390x_dirtyhelper_PPNO_sha512_load_param_block, + mkIRExprVec_0()); + + in the following function (s390_irgen_PPNO). This is a workaround to get + around the fact that IRDirty annotations cannot indicate two memory side + effects, which are unfortunately necessary here. It will possibly lead to + losing undefinedness (undefinedness in some inputs might not be propagated + to the outputs as it shouod, in Memcheck). The correct fix would be to + extend IRDirty to represent two memory side effects, but that's quite a bit + of work. + + Here's a summary of what this insn does. + + // getReg(RegisterNumber n) returns the value of GPR number 'n' + + // reg1 and reg2 are even + void ppno(RegisterNumber reg1, RegisterNumber reg2) { + + switch(getReg(0)) { + case 0x0: + // Query mode, ignore reg1 and reg2 + // Write 16 bytes at getReg(1) + break; + + case 0x3: + // SHA-512 generate mode, ignore reg2 + + // Read 240 bytes at getReg(1) + // Write getReg(reg1 + 1) bytes at getReg(reg1) + // Write some of 240 bytes starting at getReg(1) + break; + + case 0x83: + // SHA-512 seed mode, ignore reg1 + + // Read some of 240 bytes starting at getReg(1) + // Read getReg(reg2 + 1) bytes at getReg(reg2) + // Write 240 bytes at getReg(1) + break; + + default: + // Specification exception, abort execution. + } + } +*/ +/* Also known as "prno" + If you implement new functions please don't forget to update + "s390x_dirtyhelper_PPNO_query" function. + */ +static const HChar * +s390_irgen_PPNO(UChar r1, UChar r2) +{ + if (!s390_host_has_msa5) { + emulation_failure(EmFail_S390X_ppno); + return "ppno"; + } + + /* Theese conditions lead to specification exception */ + vassert(r1 % 2 == 0); + vassert(r2 % 2 == 0); + vassert((r1 != 0) && (r2 != 0)); + + IRDirty *query, *sha512_gen, *sha512_seed, *sha512_loadparam; + IRTemp gpr1num = newTemp(Ity_I64); + IRTemp gpr2num = newTemp(Ity_I64); + + IRTemp funcCode = newTemp(Ity_I8); + IRTemp is_query = newTemp(Ity_I1); + IRTemp is_sha512_gen = newTemp(Ity_I1); + IRTemp is_sha512_seed = newTemp(Ity_I1); + IRTemp is_sha512 = newTemp(Ity_I1); + + assign(funcCode, unop(Iop_64to8, binop(Iop_And64, get_gpr_dw0(0), + mkU64(0xffULL)))); + assign(gpr1num, mkU64(r1)); + assign(gpr2num, mkU64(r2)); + + assign(is_query, binop(Iop_CmpEQ8, mkexpr(funcCode), mkU8(S390_PPNO_QUERY))); + assign(is_sha512_gen, binop(Iop_CmpEQ8, mkexpr(funcCode), + mkU8(S390_PPNO_SHA512_GEN))); + assign(is_sha512_seed, binop(Iop_CmpEQ8, mkexpr(funcCode), + mkU8(S390_PPNO_SHA512_SEED))); + assign(is_sha512, binop(Iop_CmpEQ8, + mkU8(S390_PPNO_SHA512_GEN), + binop(Iop_And8, + mkexpr(funcCode), + mkU8(S390_PPNO_SHA512_GEN) + ) + )); + + query = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_PPNO_query", + &s390x_dirtyhelper_PPNO_query, + mkIRExprVec_3(IRExpr_GSPTR(), mkexpr(gpr1num), + mkexpr(gpr2num))); + query->guard = mkexpr(is_query); + query->nFxState = 1; + vex_bzero(&query->fxState, sizeof(query->fxState)); + query->fxState[0].fx = Ifx_Read; + query->fxState[0].offset = S390X_GUEST_OFFSET(guest_r0); + query->fxState[0].size = 2 * sizeof(ULong); /* gpr0 and gpr1 are read */ + query->mAddr = get_gpr_dw0(1); + query->mSize = S390_PPNO_PARAM_BLOCK_SIZE_QUERY; + query->mFx = Ifx_Write; + + IRTemp gen_cc = newTemp(Ity_I64); + sha512_gen = unsafeIRDirty_1_N(gen_cc, 0, "s390x_dirtyhelper_PPNO_sha512", + &s390x_dirtyhelper_PPNO_sha512, + mkIRExprVec_3(IRExpr_GSPTR(), mkexpr(gpr1num), + mkexpr(gpr2num))); + sha512_gen->guard = mkexpr(is_sha512_gen); + sha512_gen->nFxState = 3; + vex_bzero(&sha512_gen->fxState, sizeof(sha512_gen->fxState)); + sha512_gen->fxState[0].fx = Ifx_Read; + sha512_gen->fxState[0].offset = S390X_GUEST_OFFSET(guest_r0); + sha512_gen->fxState[0].size = 2 * sizeof(ULong); /* gpr0 and gpr1 are read */ + sha512_gen->fxState[1].fx = Ifx_Read; + sha512_gen->fxState[1].offset = S390X_GUEST_OFFSET(guest_r0) + r1 * sizeof(ULong); + sha512_gen->fxState[1].size = sizeof(ULong); + sha512_gen->fxState[2].fx = Ifx_Modify; + sha512_gen->fxState[2].offset = S390X_GUEST_OFFSET(guest_r0) + (r1 + 1) * sizeof(ULong); + sha512_gen->fxState[2].size = sizeof(ULong); + sha512_gen->mAddr = get_gpr_dw0(r1); + sha512_gen->mSize = S390_PPNO_MAX_SIZE_SHA512_GEN; + sha512_gen->mFx = Ifx_Write; + + IRTemp unused = newTemp(Ity_I64); + sha512_seed = unsafeIRDirty_1_N(unused, 0, "s390x_dirtyhelper_PPNO_sha512", + &s390x_dirtyhelper_PPNO_sha512, + mkIRExprVec_3(IRExpr_GSPTR(), mkexpr(gpr1num), + mkexpr(gpr2num))); + sha512_seed->guard = mkexpr(is_sha512_seed); + sha512_seed->nFxState = 2; + vex_bzero(&sha512_seed->fxState, sizeof(sha512_seed->fxState)); + sha512_seed->fxState[0].fx = Ifx_Read; + sha512_seed->fxState[0].offset = S390X_GUEST_OFFSET(guest_r0); + sha512_seed->fxState[0].size = 2 * sizeof(ULong); /* gpr0 and gpr1 are read */ + sha512_seed->fxState[1].fx = Ifx_Read; + sha512_seed->fxState[1].offset = S390X_GUEST_OFFSET(guest_r0) + r2 * sizeof(ULong); + sha512_seed->fxState[1].size = 2 * sizeof(ULong); /* r2 and r2 + 1 are read */ + sha512_seed->mAddr = get_gpr_dw0(r2); + sha512_seed->mSize = S390_PPNO_MAX_SIZE_SHA512_SEED; + sha512_seed->mFx = Ifx_Write; + + /* Dummy helper which is used to signal VEX library that memory was loaded */ + sha512_loadparam = + unsafeIRDirty_0_N(0, "s390x_dirtyhelper_PPNO_sha512_load_param_block", + &s390x_dirtyhelper_PPNO_sha512_load_param_block, + mkIRExprVec_0()); + sha512_loadparam->guard = mkexpr(is_sha512); + sha512_loadparam->nFxState = 0; + vex_bzero(&sha512_loadparam->fxState, sizeof(sha512_loadparam->fxState)); + sha512_loadparam->mAddr = get_gpr_dw0(1); + sha512_loadparam->mSize = S390_PPNO_PARAM_BLOCK_SIZE_SHA512; + sha512_loadparam->mFx = Ifx_Read; + + IRDirty* sha512_saveparam = + unsafeIRDirty_0_N(0, "s390x_dirtyhelper_PPNO_sha512_save_param_block", + &s390x_dirtyhelper_PPNO_sha512_load_param_block, + mkIRExprVec_0()); + sha512_saveparam->guard = mkexpr(is_sha512); + sha512_saveparam->nFxState = 0; + vex_bzero(&sha512_saveparam->fxState, sizeof(sha512_saveparam->fxState)); + sha512_saveparam->mAddr = get_gpr_dw0(1); + sha512_saveparam->mSize = S390_PPNO_PARAM_BLOCK_SIZE_SHA512; + sha512_saveparam->mFx = Ifx_Write; + + stmt(IRStmt_Dirty(query)); + stmt(IRStmt_Dirty(sha512_loadparam)); + stmt(IRStmt_Dirty(sha512_gen)); + stmt(IRStmt_Dirty(sha512_seed)); + stmt(IRStmt_Dirty(sha512_saveparam)); + + IRTemp cc = newTemp(Ity_I64); + assign(cc, + mkite(mkexpr(is_sha512_gen), + mkexpr(gen_cc), + mkU64(0) + ) + ); + + s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), mkexpr(cc), mkU64(0), mkU64(0)); + + return "ppno"; +} + +static const HChar * +s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + /* Check for specification exception */ + vassert(m4 < 3); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFAE; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + + if (s390_vr_is_cs_set(m5)) { + s390_cc_set(cc); + } + + return "vfae"; +} + +static const HChar * +s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + /* Check for specification exception */ + vassert(m4 < 3); + vassert((m5 & 0b1100) == 0); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFEE; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + + if (s390_vr_is_cs_set(m5)) { + s390_cc_set(cc); + } + + return "vfee"; +} + +static const HChar * +s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + const Bool negateComparison = True; + const IRType type = s390_vr_get_type(m4); + + /* Check for specification exception */ + vassert(m4 < 3); + vassert((m5 & 0b1100) == 0); + + static const IROp elementGetters[] = { + Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4 + }; + IROp getter = elementGetters[m4]; + + static const IROp elementComparators[] = { + Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32 + }; + IROp comparator = elementComparators[m4]; + + static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32}; + IROp converter = resultConverter[m4]; + + IRTemp isZeroElem; + + IRTemp counter = newTemp(Ity_I64); + assign(counter, get_counter_dw0()); + + IRTemp arg1 = newTemp(type); + assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter)))); + IRTemp arg2 = newTemp(type); + assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter)))); + + IRTemp isGoodPair = newTemp(Ity_I1); + if(negateComparison) { + assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1), + mkexpr(arg2)))); + } else { + assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2))); + } + + if(s390_vr_is_zs_set(m5)) { + isZeroElem = newTemp(Ity_I1); + assign(isZeroElem, binop(comparator, mkexpr(arg1), + unop(converter, mkU64(0)))); + } + + static const UChar invalidIndices[] = {16, 8, 4}; + const UChar invalidIndex = invalidIndices[m4]; + IRTemp endOfVectorIsReached = newTemp(Ity_I1); + assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter), + mkU64(invalidIndex))); + + put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); + IRExpr* shouldBreak = binop(Iop_Or32, + unop(Iop_1Uto32, mkexpr(isGoodPair)), + unop(Iop_1Uto32, mkexpr(endOfVectorIsReached)) + ); + if(s390_vr_is_zs_set(m5)) { + shouldBreak = binop(Iop_Or32, + shouldBreak, + unop(Iop_1Uto32, mkexpr(isZeroElem))); + } + iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0))); + + IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1)); + if(m4 > 0) { + /* We should return index of byte but we found index of element in + general case. + if byte elem (m4 == 0) then indexOfByte = indexOfElement + if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement + = indexOfElement << 1 + if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement + = indexOfElement << 2 + */ + foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4)); + } + + IRTemp result = newTemp(Ity_I64); + assign(result, mkite(mkexpr(endOfVectorIsReached), + mkU64(16), + foundIndex)); + put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0))); + + + if (s390_vr_is_cs_set(m5)) { + static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64}; + IROp to64Converter = to64Converters[m4]; + + IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U, + unop(to64Converter, mkexpr(arg1)), + unop(to64Converter, mkexpr(arg2))); + + IRExpr* ccexp = mkite(binop(Iop_CmpEQ32, + unop(Iop_1Uto32, mkexpr(isGoodPair)), + mkU32(1)), + mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)), + mkU64(3)); + + if(s390_vr_is_zs_set(m5)) { + IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2), + unop(converter, mkU64(0))); + IRExpr* bothArgsAreZero = binop(Iop_And32, + unop(Iop_1Uto32, mkexpr(isZeroElem)), + unop(Iop_1Uto32, arg2IsZero)); + ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)), + mkU64(0), + ccexp); + } + IRTemp cc = newTemp(Ity_I64); + assign(cc, ccexp); + + s390_cc_set(cc); + } + + + put_counter_dw0(mkU64(0)); + return "vfene"; +} + +static const HChar * +s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5) +{ + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + /* Check for specification exception */ + vassert(m3 < 3); + vassert((m5 & 0b1110) == 0); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VISTR; + details.v1 = v1; + details.v2 = v2; + details.m4 = m3; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 2; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + + if (s390_vr_is_cs_set(m5)) { + s390_cc_set(cc); + } + + return "vistr"; +} + +static const HChar * +s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + /* Check for specification exception */ + vassert(m5 < 3); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VSTRC; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.v4 = v4; + details.m4 = m5; + details.m5 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 4; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[3].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + + if (s390_vr_is_cs_set(m6)) { + s390_cc_set(cc); + } + + return "vstrc"; +} + +static const HChar * +s390_irgen_VNC(UChar v1, UChar v2, UChar v3) +{ + put_vr_qw(v1, binop(Iop_AndV128, + get_vr_qw(v2), unop(Iop_NotV128, get_vr_qw(v3))) + ); + + return "vnc"; +} + +static const HChar * +s390_irgen_VA(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, + Iop_Add64x2, Iop_Add128x1 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "va"; +} + +static const HChar * +s390_irgen_VS(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, + Iop_Sub64x2, Iop_Sub128x1 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vs"; +} + +static const HChar * +s390_irgen_VMX(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vmx"; +} + +static const HChar * +s390_irgen_VMXL(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vmxl"; +} + +static const HChar * +s390_irgen_VMN(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vmn"; +} + +static const HChar * +s390_irgen_VMNL(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vmnl"; +} + +static const HChar * +s390_irgen_VAVG(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, Iop_Avg64Sx2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vavg"; +} + +static const HChar * +s390_irgen_VAVGL(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, Iop_Avg64Ux2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vavgl"; +} + +static const HChar * +s390_irgen_VLC(UChar v1, UChar v2, UChar m3) +{ + vassert(m3 < 4); + IRType type = s390_vr_get_type(m3); + put_vr_qw(v1, s390_V128_get_complement(get_vr_qw(v2), type)); + return "vlc"; +} + +static const HChar * +s390_irgen_VLP(UChar v1, UChar v2, UChar m3) +{ + const IROp ops[] = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 }; + vassert(m3 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, unop(ops[m3], get_vr_qw(v2))); + + return "vlp"; +} + +static const HChar * +s390_irgen_VCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (!s390_vr_is_cs_set(m5)) { + const IROp ops[] = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, + Iop_CmpGT64Sx2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + } else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VCH; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vch"; +} + +static const HChar * +s390_irgen_VCHL(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (!s390_vr_is_cs_set(m5)) { + const IROp ops[] = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, + Iop_CmpGT64Ux2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + } else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VCHL; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vchl"; +} + +static const HChar * +s390_irgen_VCLZ(UChar v1, UChar v2, UChar m3) +{ + const IROp ops[] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4, Iop_Clz64x2 }; + vassert(m3 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, unop(ops[m3], get_vr_qw(v2))); + + return "vclz"; +} + +static const HChar * +s390_irgen_VCTZ(UChar v1, UChar v2, UChar m3) +{ + const IROp ops[] = { Iop_Ctz8x16, Iop_Ctz16x8, Iop_Ctz32x4, Iop_Ctz64x2 }; + vassert(m3 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, unop(ops[m3], get_vr_qw(v2))); + + return "vctz"; +} + +static const HChar * +s390_irgen_VPOPCT(UChar v1, UChar v2, UChar m3) +{ + vassert(m3 == 0); + + put_vr_qw(v1, unop(Iop_Cnt8x16, get_vr_qw(v2))); + + return "vpopct"; +} + +static const HChar * +s390_irgen_VML(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vml"; +} + +static const HChar * +s390_irgen_VMLH(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_MulHi8Ux16, Iop_MulHi16Ux8, Iop_MulHi32Ux4 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vmlh"; +} + +static const HChar * +s390_irgen_VMH(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_MulHi8Sx16, Iop_MulHi16Sx8, Iop_MulHi32Sx4 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vmh"; +} + +static const HChar * +s390_irgen_VME(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_MullEven8Sx16, Iop_MullEven16Sx8, Iop_MullEven32Sx4 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vme"; +} + +static const HChar * +s390_irgen_VMLE(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_MullEven8Ux16, Iop_MullEven16Ux8, Iop_MullEven32Ux4 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vmle"; +} + +static const HChar * +s390_irgen_VESLV(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2}; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "veslv"; +} + +static const HChar * +s390_irgen_VESL(UChar v1, IRTemp op2addr, UChar v3, UChar m4) +{ + IRExpr* shift_amount = unop(Iop_64to8, mkexpr(op2addr)); + const IROp ops[] = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v3), shift_amount)); + + return "vesl"; +} + +static const HChar * +s390_irgen_VESRAV(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vesrav"; +} + +static const HChar * +s390_irgen_VESRA(UChar v1, IRTemp op2addr, UChar v3, UChar m4) +{ + IRExpr* shift_amount = unop(Iop_64to8, mkexpr(op2addr)); + const IROp ops[] = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v3), shift_amount)); + + return "vesra"; +} + +static const HChar * +s390_irgen_VESRLV(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "vesrlv"; +} + +static const HChar * +s390_irgen_VESRL(UChar v1, IRTemp op2addr, UChar v3, UChar m4) +{ + IRExpr* shift_amount = unop(Iop_64to8, mkexpr(op2addr)); + const IROp ops[] = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v3), shift_amount)); + + return "vesrl"; +} + +static const HChar * +s390_irgen_VERLLV(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, Iop_Rol64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + return "verllv"; +} + +static const HChar * +s390_irgen_VERLL(UChar v1, IRTemp op2addr, UChar v3, UChar m4) +{ + /* + There is no Iop_RolN?x?? operations + so we have to use VECTOR x VECTOR variant. + */ + IRExpr* shift_vector = unop(Iop_Dup8x16, unop(Iop_64to8, mkexpr(op2addr))); + const IROp ops[] = { Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, Iop_Rol64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v3), shift_vector)); + + return "verll"; +} + +static const HChar * +s390_irgen_VSL(UChar v1, UChar v2, UChar v3) +{ + IRTemp shift_amount = newTemp(Ity_I8); + assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); + + put_vr_qw(v1, binop(Iop_ShlV128, get_vr_qw(v2), mkexpr(shift_amount))); + return "vsl"; +} + +static const HChar * +s390_irgen_VSRL(UChar v1, UChar v2, UChar v3) +{ + IRTemp shift_amount = newTemp(Ity_I8); + assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); + + put_vr_qw(v1, binop(Iop_ShrV128, get_vr_qw(v2), mkexpr(shift_amount))); + return "vsrl"; +} + +static const HChar * +s390_irgen_VSRA(UChar v1, UChar v2, UChar v3) +{ + IRTemp shift_amount = newTemp(Ity_I8); + assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); + + put_vr_qw(v1, binop(Iop_SarV128, get_vr_qw(v2), mkexpr(shift_amount))); + return "vsra"; +} + +static const HChar * +s390_irgen_VERIM(UChar v1, UChar v2, UChar v3, UChar i4, UChar m5) +{ + /* + There is no Iop_RolN?x?? operations + so we have to use VECTOR x VECTOR variant. + */ + const IROp ops[] = { Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, Iop_Rol64x2 }; + vassert(m5 < sizeof(ops) / sizeof(ops[0])); + IRExpr* shift_vector = unop(Iop_Dup8x16, mkU8(i4)); + IRExpr* rotated_vector = binop(ops[m5], get_vr_qw(v2), shift_vector); + + /* result = (result & ~mask) | (rotated_vector & mask) */ + IRExpr* mask = get_vr_qw(v3); + IRExpr* result = get_vr_qw(v1); + put_vr_qw(v1, s390_V128_bitwiseITE(mask, rotated_vector, result)); + + return "verim"; +} + +static const HChar * +s390_irgen_VEC(UChar v1, UChar v2, UChar m3) +{ + IRType type = s390_vr_get_type(m3); + IRTemp op1 = newTemp(type); + IRTemp op2 = newTemp(type); + + switch(type) { + case Ity_I8: + assign(op1, get_vr_b7(v1)); + assign(op2, get_vr_b7(v2)); + break; + case Ity_I16: + assign(op1, get_vr_hw3(v1)); + assign(op2, get_vr_hw3(v2)); + break; + case Ity_I32: + assign(op1, get_vr_w1(v1)); + assign(op2, get_vr_w1(v2)); + break; + case Ity_I64: + assign(op1, get_vr_dw0(v1)); + assign(op2, get_vr_dw0(v2)); + break; + default: + vpanic("s390_irgen_VEC: unknown type"); + } + + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "vec"; +} + +static const HChar * +s390_irgen_VECL(UChar v1, UChar v2, UChar m3) +{ + IRType type = s390_vr_get_type(m3); + IRTemp op1 = newTemp(type); + IRTemp op2 = newTemp(type); + + switch(type) { + case Ity_I8: + assign(op1, get_vr_b7(v1)); + assign(op2, get_vr_b7(v2)); + break; + case Ity_I16: + assign(op1, get_vr_hw3(v1)); + assign(op2, get_vr_hw3(v2)); + break; + case Ity_I32: + assign(op1, get_vr_w1(v1)); + assign(op2, get_vr_w1(v2)); + break; + case Ity_I64: + assign(op1, get_vr_dw0(v1)); + assign(op2, get_vr_dw0(v2)); + break; + default: + vpanic("s390_irgen_VECL: unknown type"); + } + + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "vecl"; +} + +static const HChar * +s390_irgen_VCEQ(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (!s390_vr_is_cs_set(m5)) { + const IROp ops[] = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, + Iop_CmpEQ64x2 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + put_vr_qw(v1, binop(ops[m4], get_vr_qw(v2), get_vr_qw(v3))); + + } else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VCEQ; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vceq"; +} + +static const HChar * +s390_irgen_VSLB(UChar v1, UChar v2, UChar v3) +{ + IRTemp shift_amount = newTemp(Ity_I8); + assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b01111000))); + + put_vr_qw(v1, binop(Iop_ShlV128, get_vr_qw(v2), mkexpr(shift_amount))); + return "vslb"; +} + +static const HChar * +s390_irgen_VSRLB(UChar v1, UChar v2, UChar v3) +{ + IRTemp shift_amount = newTemp(Ity_I8); + assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b01111000))); + + put_vr_qw(v1, binop(Iop_ShrV128, get_vr_qw(v2), mkexpr(shift_amount))); + return "vsrlb"; +} + +static const HChar * +s390_irgen_VSRAB(UChar v1, UChar v2, UChar v3) +{ + IRTemp shift_amount = newTemp(Ity_I8); + assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b01111000))); + + put_vr_qw(v1, binop(Iop_SarV128, get_vr_qw(v2), mkexpr(shift_amount))); + return "vsrab"; +} + +static const HChar * +s390_irgen_VSLDB(UChar v1, UChar v2, UChar v3, UChar i4) +{ + UChar imm = i4 & 0b00001111; + + if (imm == 0) + { + put_vr_qw(v1, get_vr_qw(v2)); + } + else if (imm == 16) + { + put_vr_qw(v1, get_vr_qw(v3)); + } + else + { + put_vr_qw(v1, + binop(Iop_OrV128, + binop(Iop_ShlV128, get_vr_qw(v2), mkU8(imm * 8)), + binop(Iop_ShrV128, get_vr_qw(v3), mkU8((16 - imm) * 8)) + ) + ); + } + + return "vsldb"; +} + +static const HChar * +s390_irgen_VMO(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_MullEven8Sx16, Iop_MullEven16Sx8, + Iop_MullEven32Sx4 }; + UChar shifts[] = { 8, 16, 32 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + IRExpr* result = binop(ops[m4], + binop(Iop_ShlV128, get_vr_qw(v2), mkU8(shifts[m4])), + binop(Iop_ShlV128, get_vr_qw(v3), mkU8(shifts[m4])) + ); + put_vr_qw(v1, result); + + return "vmo"; +} + +static const HChar * +s390_irgen_VMLO(UChar v1, UChar v2, UChar v3, UChar m4) +{ + const IROp ops[] = { Iop_MullEven8Ux16, Iop_MullEven16Ux8, + Iop_MullEven32Ux4 }; + UChar shifts[] = { 8, 16, 32 }; + vassert(m4 < sizeof(ops) / sizeof(ops[0])); + IRExpr* result = binop(ops[m4], + binop(Iop_ShlV128, get_vr_qw(v2), mkU8(shifts[m4])), + binop(Iop_ShlV128, get_vr_qw(v3), mkU8(shifts[m4])) + ); + put_vr_qw(v1, result); + + return "vmlo"; +} + +static const HChar * +s390_irgen_VMAE(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + const IROp mul_ops[] = { Iop_MullEven8Sx16, Iop_MullEven16Sx8, + Iop_MullEven32Sx4 }; + const IROp add_ops[] = { Iop_Add16x8, Iop_Add32x4, Iop_Add64x2}; + vassert(m5 < sizeof(mul_ops) / sizeof(mul_ops[0])); + + IRExpr* mul_result = binop(mul_ops[m5], get_vr_qw(v2), get_vr_qw(v3)); + IRExpr* result = binop(add_ops[m5], mul_result, get_vr_qw(v4)); + put_vr_qw(v1, result); + + return "vmae"; +} + +static const HChar * +s390_irgen_VMALE(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + const IROp mul_ops[] = { Iop_MullEven8Ux16, Iop_MullEven16Ux8, + Iop_MullEven32Ux4 }; + const IROp add_ops[] = { Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; + vassert(m5 < sizeof(mul_ops) / sizeof(mul_ops[0])); + + IRExpr* mul_result = binop(mul_ops[m5], get_vr_qw(v2), get_vr_qw(v3)); + IRExpr* result = binop(add_ops[m5], mul_result, get_vr_qw(v4)); + put_vr_qw(v1, result); + + return "vmale"; +} + +static const HChar * +s390_irgen_VMAO(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + const IROp mul_ops[] = { Iop_MullEven8Sx16, Iop_MullEven16Sx8, + Iop_MullEven32Sx4 }; + const IROp add_ops[] = { Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; + UChar shifts[] = { 8, 16, 32 }; + vassert(m5 < sizeof(mul_ops) / sizeof(mul_ops[0])); + + IRExpr* mul_result = + binop(mul_ops[m5], + binop(Iop_ShlV128, get_vr_qw(v2), mkU8(shifts[m5])), + binop(Iop_ShlV128, get_vr_qw(v3), mkU8(shifts[m5]))); + IRExpr* result = binop(add_ops[m5], mul_result, get_vr_qw(v4)); + put_vr_qw(v1, result); + + return "vmao"; +} + +static const HChar * +s390_irgen_VMALO(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + const IROp mul_ops[] = { Iop_MullEven8Ux16, Iop_MullEven16Ux8, + Iop_MullEven32Ux4 }; + const IROp add_ops[] = { Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 }; + UChar shifts[] = { 8, 16, 32 }; + vassert(m5 < sizeof(mul_ops) / sizeof(mul_ops[0])); + + IRExpr* mul_result = binop(mul_ops[m5], + binop(Iop_ShlV128, + get_vr_qw(v2), mkU8(shifts[m5])), + binop(Iop_ShlV128, + get_vr_qw(v3), mkU8(shifts[m5])) + ); + + IRExpr* result = binop(add_ops[m5], mul_result, get_vr_qw(v4)); + put_vr_qw(v1, result); + + return "vmalo"; +} + +static const HChar * +s390_irgen_VMAL(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + const IROp mul_ops[] = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4 }; + const IROp add_ops[] = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 }; + vassert(m5 < sizeof(mul_ops) / sizeof(mul_ops[0])); + + IRExpr* mul_result = binop(mul_ops[m5], get_vr_qw(v2), get_vr_qw(v3)); + IRExpr* result = binop(add_ops[m5], mul_result, get_vr_qw(v4)); + put_vr_qw(v1, result); + + return "vmal"; +} + +static const HChar * +s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4) +{ + IRType type = s390_vr_get_type(m4); + IRExpr* mask; + IRExpr* sum; + switch(type) { + case Ity_I8: + sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2))); + mask = IRExpr_Const(IRConst_V128(0b0001000100010001)); + break; + case Ity_I16: + sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2)); + mask = IRExpr_Const(IRConst_V128(0b0011001100110011)); + break; + default: + vpanic("s390_irgen_VSUM: invalid type "); + } + + IRExpr* addition = binop(Iop_AndV128, get_vr_qw(v3), mask); + put_vr_qw(v1, binop(Iop_Add32x4, sum, addition)); + + return "vsum"; +} + +static const HChar * +s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4) +{ + IRType type = s390_vr_get_type(m4); + IRExpr* mask; + IRExpr* sum; + switch(type) { + case Ity_I16: + sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2))); + mask = IRExpr_Const(IRConst_V128(0b0000001100000011)); + break; + case Ity_I32: + sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2)); + mask = IRExpr_Const(IRConst_V128(0b0000111100001111)); + break; + default: + vpanic("s390_irgen_VSUMG: invalid type "); + } + + IRExpr* addition = binop(Iop_AndV128, get_vr_qw(v3), mask); + put_vr_qw(v1, binop(Iop_Add64x2, sum, addition)); + + return "vsumg"; +} + +static const HChar * +s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4) +{ + IRType type = s390_vr_get_type(m4); + IRExpr* mask; + IRExpr* sum; + switch(type) { + case Ity_I32: + sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2))); + mask = IRExpr_Const(IRConst_V128(0b0000000000001111)); + break; + case Ity_I64: + sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2)); + mask = IRExpr_Const(IRConst_V128(0b0000000011111111)); + break; + default: + vpanic("s390_irgen_VSUMQ: invalid type "); + } + + IRExpr* addition = binop(Iop_AndV128, get_vr_qw(v3), mask); + put_vr_qw(v1, binop(Iop_Add128x1, sum, addition)); + + return "vsumq"; +} + +static const HChar * +s390_irgen_VTM(UChar v1, UChar v2) +{ + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VTM; + details.v2 = v1; + details.v3 = v2; + details.read_only = 1; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 2; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + + return "vtm"; +} + +static const HChar * +s390_irgen_VAC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + vassert(m5 == 4); /* specification exception otherwise */ + + IRTemp sum = newTemp(Ity_V128); + assign(sum, binop(Iop_Add128x1, get_vr_qw(v2), get_vr_qw(v3))); + + IRExpr* mask = binop(Iop_64HLtoV128, mkU64(0), mkU64(1)); + IRExpr* carry_in = binop(Iop_AndV128, get_vr_qw(v4), mask); + put_vr_qw(v1, binop(Iop_Add128x1, mkexpr(sum), carry_in)); + + return "vac"; +} + +static const HChar * +s390_irgen_VACC(UChar v1, UChar v2, UChar v3, UChar m4) +{ + IRType type = s390_vr_get_type(m4); + IRExpr* arg1 = get_vr_qw(v2); + IRExpr* arg2 = get_vr_qw(v3); + + put_vr_qw(v1, s390_V128_calculate_carry_out(arg1, arg2, type, False)); + return "vacc"; } static const HChar * -s390_irgen_LOCHI(UChar r1, UChar m3, UShort i2, UChar unused) +s390_irgen_VACCC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) { - next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0))); - put_gpr_w1(r1, mkU32(i2)); + vassert(m5 == 4); /* specification exception otherwise */ + IRExpr* result = + s390_V128_calculate_carry_out_with_carry(get_vr_qw(v2), + get_vr_qw(v3), + get_vr_qw(v4) + ); - return "lochi"; + put_vr_qw(v1, result); + return "vaccc"; } -static const HChar * -s390_irgen_LOCGHI(UChar r1, UChar m3, UShort i2, UChar unused) +static const HChar* +s390_irgen_VCKSM(UChar v1, UChar v2, UChar v3) { - next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0))); - put_gpr_dw0(r1, mkU64(i2)); - return "locghi"; + IRTemp sum1 = s390_checksum_add(get_vr_w1(v3), get_vr_w0(v2)); + IRTemp sum2 = s390_checksum_add(mkexpr(sum1), get_vr_w1(v2)); + IRTemp sum3 = s390_checksum_add(mkexpr(sum2), get_vr_w2(v2)); + IRTemp result = s390_checksum_add(mkexpr(sum3), get_vr_w3(v2)); + + put_vr_qw(v1, binop(Iop_64HLtoV128, + unop(Iop_32Uto64, mkexpr(result)), mkU64(0ULL))); + + return "vcksm"; } static const HChar * -s390_irgen_STOCFH(UChar r1, IRTemp op2addr) +s390_irgen_VGFM(UChar v1, UChar v2, UChar v3, UChar m4) { - /* condition is checked in format handler */ - store(mkexpr(op2addr), get_gpr_w1(r1)); + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); - return "stocfh"; + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VGFM; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + return "vgfm"; } static const HChar * -s390_irgen_LCBB(UChar r1, IRTemp op2addr, UChar m3) +s390_irgen_VGFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) { - IRTemp op2 = newTemp(Ity_I32); - assign(op2, s390_getCountToBlockBoundary(op2addr, m3)); - put_gpr_w1(r1, mkexpr(op2)); + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); - IRExpr* cc = mkite(binop(Iop_CmpEQ32, mkexpr(op2), mkU32(16)), mkU64(0), mkU64(3)); - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), cc, mkU64(0), mkU64(0)); + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VGFMA; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.v4 = v4; + details.m4 = m5; - return "lcbb"; -} + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); -/* Regarding the use of - // Dummy helper which is used to signal VEX library that memory was loaded - sha512_loadparam - = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_PPNO_sha512_load_param_block", - &s390x_dirtyhelper_PPNO_sha512_load_param_block, - mkIRExprVec_0()); + d->nFxState = 4; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[3].size = sizeof(V128); - in the following function (s390_irgen_PPNO). This is a workaround to get - around the fact that IRDirty annotations cannot indicate two memory side - effects, which are unfortunately necessary here. It will possibly lead to - losing undefinedness (undefinedness in some inputs might not be propagated - to the outputs as it shouod, in Memcheck). The correct fix would be to - extend IRDirty to represent two memory side effects, but that's quite a bit - of work. + stmt(IRStmt_Dirty(d)); + return "vgfma"; +} - Here's a summary of what this insn does. +static const HChar * +s390_irgen_VSBI(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + vassert(m5 == 4); /* specification exception otherwise */ - // getReg(RegisterNumber n) returns the value of GPR number 'n' + IRExpr* mask = binop(Iop_64HLtoV128, mkU64(0ULL), mkU64(1ULL)); + IRExpr* carry_in = binop(Iop_AndV128, get_vr_qw(v4), mask); - // reg1 and reg2 are even - void ppno(RegisterNumber reg1, RegisterNumber reg2) { + IRTemp sum = newTemp(Ity_V128); + assign(sum, binop(Iop_Add128x1, + get_vr_qw(v2), + unop(Iop_NotV128, get_vr_qw(v3)) + ) + ); - switch(getReg(0)) { - case 0x0: - // Query mode, ignore reg1 and reg2 - // Write 16 bytes at getReg(1) - break; + put_vr_qw(v1, binop(Iop_Add128x1, mkexpr(sum), carry_in)); + return "vsbi"; +} - case 0x3: - // SHA-512 generate mode, ignore reg2 +static const HChar * +s390_irgen_VSCBI(UChar v1, UChar v2, UChar v3, UChar m4) +{ + IRType type = s390_vr_get_type(m4); + IRExpr* arg1 = get_vr_qw(v2); + IRExpr* arg2 = s390_V128_get_complement(get_vr_qw(v3), type); + IRExpr* result = s390_V128_calculate_carry_out(arg1, arg2, type, True); - // Read 240 bytes at getReg(1) - // Write getReg(reg1 + 1) bytes at getReg(reg1) - // Write some of 240 bytes starting at getReg(1) - break; + put_vr_qw(v1, result); + return "vscbi"; +} - case 0x83: - // SHA-512 seed mode, ignore reg1 +static const HChar * +s390_irgen_VSBCBI(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + vassert(m5 == 4); /* specification exception otherwise */ + IRExpr* result = + s390_V128_calculate_carry_out_with_carry(get_vr_qw(v2), + unop(Iop_NotV128, get_vr_qw(v3)), + get_vr_qw(v4)); - // Read some of 240 bytes starting at getReg(1) - // Read getReg(reg2 + 1) bytes at getReg(reg2) - // Write 240 bytes at getReg(1) - break; + put_vr_qw(v1, result); + return "vsbcbi"; +} - default: - // Specification exception, abort execution. - } - } -*/ -/* Also known as "prno" - If you implement new functions please don't forget to update - "s390x_dirtyhelper_PPNO_query" function. - */ static const HChar * -s390_irgen_PPNO(UChar r1, UChar r2) +s390_irgen_VMAH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) { - if (!s390_host_has_msa5) { - emulation_failure(EmFail_S390X_ppno); - return "ppno"; - } - - /* Theese conditions lead to specification exception */ - vassert(r1 % 2 == 0); - vassert(r2 % 2 == 0); - vassert((r1 != 0) && (r2 != 0)); + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); - IRDirty *query, *sha512_gen, *sha512_seed, *sha512_loadparam; - IRTemp gpr1num = newTemp(Ity_I64); - IRTemp gpr2num = newTemp(Ity_I64); + /* Check for specification exception */ + vassert(m5 < 3); - IRTemp funcCode = newTemp(Ity_I8); - IRTemp is_query = newTemp(Ity_I1); - IRTemp is_sha512_gen = newTemp(Ity_I1); - IRTemp is_sha512_seed = newTemp(Ity_I1); - IRTemp is_sha512 = newTemp(Ity_I1); + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VMAH; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.v4 = v4; + details.m4 = m5; - assign(funcCode, unop(Iop_64to8, binop(Iop_And64, get_gpr_dw0(0), mkU64(0xffULL)))); - assign(gpr1num, mkU64(r1)); - assign(gpr2num, mkU64(r2)); + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); - assign(is_query, binop(Iop_CmpEQ8, mkexpr(funcCode), mkU8(S390_PPNO_QUERY))); - assign(is_sha512_gen, binop(Iop_CmpEQ8, mkexpr(funcCode), mkU8(S390_PPNO_SHA512_GEN))); - assign(is_sha512_seed, binop(Iop_CmpEQ8, mkexpr(funcCode), mkU8(S390_PPNO_SHA512_SEED))); - assign(is_sha512, binop(Iop_CmpEQ8, - mkU8(S390_PPNO_SHA512_GEN), - binop(Iop_And8, - mkexpr(funcCode), - mkU8(S390_PPNO_SHA512_GEN) - ) - )); + d->nFxState = 4; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[3].size = sizeof(V128); - query = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_PPNO_query", - &s390x_dirtyhelper_PPNO_query, - mkIRExprVec_3(IRExpr_GSPTR(), mkexpr(gpr1num), mkexpr(gpr2num))); - query->guard = mkexpr(is_query); - query->nFxState = 1; - vex_bzero(&query->fxState, sizeof(query->fxState)); - query->fxState[0].fx = Ifx_Read; - query->fxState[0].offset = S390X_GUEST_OFFSET(guest_r0); - query->fxState[0].size = 2 * sizeof(ULong); /* gpr0 and gpr1 are read */ - query->mAddr = get_gpr_dw0(1); - query->mSize = S390_PPNO_PARAM_BLOCK_SIZE_QUERY; - query->mFx = Ifx_Write; + stmt(IRStmt_Dirty(d)); - IRTemp gen_cc = newTemp(Ity_I64); - sha512_gen = unsafeIRDirty_1_N(gen_cc, 0, "s390x_dirtyhelper_PPNO_sha512", - &s390x_dirtyhelper_PPNO_sha512, - mkIRExprVec_3(IRExpr_GSPTR(), mkexpr(gpr1num), mkexpr(gpr2num))); - sha512_gen->guard = mkexpr(is_sha512_gen); - sha512_gen->nFxState = 3; - vex_bzero(&sha512_gen->fxState, sizeof(sha512_gen->fxState)); - sha512_gen->fxState[0].fx = Ifx_Read; - sha512_gen->fxState[0].offset = S390X_GUEST_OFFSET(guest_r0); - sha512_gen->fxState[0].size = 2 * sizeof(ULong); /* gpr0 and gpr1 are read */ - sha512_gen->fxState[1].fx = Ifx_Read; - sha512_gen->fxState[1].offset = S390X_GUEST_OFFSET(guest_r0) + r1 * sizeof(ULong); - sha512_gen->fxState[1].size = sizeof(ULong); - sha512_gen->fxState[2].fx = Ifx_Modify; - sha512_gen->fxState[2].offset = S390X_GUEST_OFFSET(guest_r0) + (r1 + 1) * sizeof(ULong); - sha512_gen->fxState[2].size = sizeof(ULong); - sha512_gen->mAddr = get_gpr_dw0(r1); - sha512_gen->mSize = S390_PPNO_MAX_SIZE_SHA512_GEN; - sha512_gen->mFx = Ifx_Write; + return "vmah"; +} - IRTemp unused = newTemp(Ity_I64); - sha512_seed = unsafeIRDirty_1_N(unused, 0, "s390x_dirtyhelper_PPNO_sha512", - &s390x_dirtyhelper_PPNO_sha512, - mkIRExprVec_3(IRExpr_GSPTR(), mkexpr(gpr1num), mkexpr(gpr2num))); - sha512_seed->guard = mkexpr(is_sha512_seed); - sha512_seed->nFxState = 2; - vex_bzero(&sha512_seed->fxState, sizeof(sha512_seed->fxState)); - sha512_seed->fxState[0].fx = Ifx_Read; - sha512_seed->fxState[0].offset = S390X_GUEST_OFFSET(guest_r0); - sha512_seed->fxState[0].size = 2 * sizeof(ULong); /* gpr0 and gpr1 are read */ - sha512_seed->fxState[1].fx = Ifx_Read; - sha512_seed->fxState[1].offset = S390X_GUEST_OFFSET(guest_r0) + r2 * sizeof(ULong); - sha512_seed->fxState[1].size = 2 * sizeof(ULong); /* r2 and r2 + 1 are read */ - sha512_seed->mAddr = get_gpr_dw0(r2); - sha512_seed->mSize = S390_PPNO_MAX_SIZE_SHA512_SEED; - sha512_seed->mFx = Ifx_Write; +static const HChar * +s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) +{ + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); - /* Dummy helper which is used to signal VEX library that memory was loaded */ - sha512_loadparam = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_PPNO_sha512_load_param_block", - &s390x_dirtyhelper_PPNO_sha512_load_param_block, - mkIRExprVec_0()); - sha512_loadparam->guard = mkexpr(is_sha512); - sha512_loadparam->nFxState = 0; - vex_bzero(&sha512_loadparam->fxState, sizeof(sha512_loadparam->fxState)); - sha512_loadparam->mAddr = get_gpr_dw0(1); - sha512_loadparam->mSize = S390_PPNO_PARAM_BLOCK_SIZE_SHA512; - sha512_loadparam->mFx = Ifx_Read; + /* Check for specification exception */ + vassert(m5 < 3); - IRDirty* - sha512_saveparam = unsafeIRDirty_0_N(0, "s390x_dirtyhelper_PPNO_sha512_save_param_block", - &s390x_dirtyhelper_PPNO_sha512_load_param_block, - mkIRExprVec_0()); - sha512_saveparam->guard = mkexpr(is_sha512); - sha512_saveparam->nFxState = 0; - vex_bzero(&sha512_saveparam->fxState, sizeof(sha512_saveparam->fxState)); - sha512_saveparam->mAddr = get_gpr_dw0(1); - sha512_saveparam->mSize = S390_PPNO_PARAM_BLOCK_SIZE_SHA512; - sha512_saveparam->mFx = Ifx_Write; + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VMALH; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.v4 = v4; + details.m4 = m5; - stmt(IRStmt_Dirty(query)); - stmt(IRStmt_Dirty(sha512_loadparam)); - stmt(IRStmt_Dirty(sha512_gen)); - stmt(IRStmt_Dirty(sha512_seed)); - stmt(IRStmt_Dirty(sha512_saveparam)); + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); - IRTemp cc = newTemp(Ity_I64); - assign(cc, - mkite(mkexpr(is_sha512_gen), - mkexpr(gen_cc), - mkU64(0) - ) - ); + d->nFxState = 4; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeof(V128); + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[3].size = sizeof(V128); - s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), mkexpr(cc), mkU64(0), mkU64(0)); + stmt(IRStmt_Dirty(d)); - return "ppno"; + return "vmalh"; } /* New insns are added here. @@ -17708,6 +19362,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int rxb : 4; unsigned int op2 : 8; } VRR; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int m5 : 4; + unsigned int m6 : 4; + unsigned int : 4; + unsigned int v4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRRd; struct { unsigned int op1 : 8; unsigned int v1 : 4; @@ -17717,6 +19383,17 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int rxb : 4; unsigned int op2 : 8; } VRI; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int : 4; + unsigned int i4 : 8; + unsigned int m5 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRId; struct { unsigned int op1 : 8; unsigned int v1 : 4; @@ -18253,16 +19930,28 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe70000000027ULL: s390_format_RXE_RRRDR(s390_irgen_LCBB, ovl.fmt.RXE.r1, ovl.fmt.RXE.x2, ovl.fmt.RXE.b2, ovl.fmt.RXE.d2, ovl.fmt.RXE.m3); goto ok; - case 0xe70000000030ULL: /* VESL */ goto unimplemented; - case 0xe70000000033ULL: /* VERLL */ goto unimplemented; + case 0xe70000000030ULL: s390_format_VRS_VRDVM(s390_irgen_VESL, ovl.fmt.VRS.v1, + ovl.fmt.VRS.b2, ovl.fmt.VRS.d2, + ovl.fmt.VRS.v3, ovl.fmt.VRS.m4, + ovl.fmt.VRS.rxb); goto ok; + case 0xe70000000033ULL: s390_format_VRS_VRDVM(s390_irgen_VERLL, ovl.fmt.VRS.v1, + ovl.fmt.VRS.b2, ovl.fmt.VRS.d2, + ovl.fmt.VRS.v3, ovl.fmt.VRS.m4, + ovl.fmt.VRS.rxb); goto ok; case 0xe70000000036ULL: s390_format_VRS_VRDV(s390_irgen_VLM, ovl.fmt.VRS.v1, ovl.fmt.VRS.b2, ovl.fmt.VRS.d2, ovl.fmt.VRS.v3, ovl.fmt.VRS.rxb); goto ok; case 0xe70000000037ULL: s390_format_VRS_VRRD(s390_irgen_VLL, ovl.fmt.VRS.v1, ovl.fmt.VRS.b2, ovl.fmt.VRS.d2, ovl.fmt.VRS.v3, ovl.fmt.VRS.rxb); goto ok; - case 0xe70000000038ULL: /* VESRL */ goto unimplemented; - case 0xe7000000003aULL: /* VESRA */ goto unimplemented; + case 0xe70000000038ULL: s390_format_VRS_VRDVM(s390_irgen_VESRL, ovl.fmt.VRS.v1, + ovl.fmt.VRS.b2, ovl.fmt.VRS.d2, + ovl.fmt.VRS.v3, ovl.fmt.VRS.m4, + ovl.fmt.VRS.rxb); goto ok; + case 0xe7000000003aULL: s390_format_VRS_VRDVM(s390_irgen_VESRA, ovl.fmt.VRS.v1, + ovl.fmt.VRS.b2, ovl.fmt.VRS.d2, + ovl.fmt.VRS.v3, ovl.fmt.VRS.m4, + ovl.fmt.VRS.rxb); goto ok; case 0xe7000000003eULL: s390_format_VRS_VRDV(s390_irgen_VSTM, ovl.fmt.VRS.v1, ovl.fmt.VRS.b2, ovl.fmt.VRS.d2, ovl.fmt.VRS.v3, ovl.fmt.VRS.rxb); goto ok; @@ -18294,12 +19983,20 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe7000000004dULL: s390_format_VRI_VVIM(s390_irgen_VREP, ovl.fmt.VRI.v1, ovl.fmt.VRI.v3, ovl.fmt.VRI.i2, ovl.fmt.VRI.m3, ovl.fmt.VRI.rxb); goto ok; - case 0xe70000000050ULL: /* VPOPCT */ goto unimplemented; - case 0xe70000000052ULL: /* VCTZ */ goto unimplemented; - case 0xe70000000053ULL: /* VCLZ */ goto unimplemented; + case 0xe70000000050ULL: s390_format_VRR_VVM(s390_irgen_VPOPCT, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, + ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000052ULL: s390_format_VRR_VVM(s390_irgen_VCTZ, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, + ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000053ULL: s390_format_VRR_VVM(s390_irgen_VCLZ, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, + ovl.fmt.VRR.rxb); goto ok; case 0xe70000000056ULL: s390_format_VRR_VV(s390_irgen_VLR, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.rxb); goto ok; - case 0xe7000000005cULL: /* VISTR */ goto unimplemented; + case 0xe7000000005cULL: s390_format_VRR_VVMM(s390_irgen_VISTR, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, + ovl.fmt.VRR.m5, ovl.fmt.VRR.rxb); goto ok; case 0xe7000000005fULL: s390_format_VRR_VVM(s390_irgen_VSEG, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; @@ -18312,14 +20009,24 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe70000000062ULL: s390_format_VRR_VRR(s390_irgen_VLVGP, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.rxb); goto ok; - case 0xe70000000064ULL: /* VSUM */ goto unimplemented; - case 0xe70000000065ULL: /* VSUMG */ goto unimplemented; - case 0xe70000000066ULL: /* VCKSM */ goto unimplemented; - case 0xe70000000067ULL: /* VSUMQ */ goto unimplemented; + case 0xe70000000064ULL: s390_format_VRR_VVVM(s390_irgen_VSUM, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000065ULL: s390_format_VRR_VVVM(s390_irgen_VSUMG, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000066ULL: s390_format_VRR_VVV(s390_irgen_VCKSM, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000067ULL: s390_format_VRR_VVVM(s390_irgen_VSUMQ, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe70000000068ULL: s390_format_VRR_VVV(s390_irgen_VN, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.rxb); goto ok; - case 0xe70000000069ULL: /* VNC */ goto unimplemented; + case 0xe70000000069ULL: s390_format_VRR_VVV(s390_irgen_VNC, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.rxb); goto ok; case 0xe7000000006aULL: s390_format_VRR_VVV(s390_irgen_VO, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.rxb); goto ok; @@ -18332,26 +20039,64 @@ s390_decode_6byte_and_irgen(const UChar *bytes) ovl.fmt.VRR.rxb); goto ok; case 0xe7000000006eULL: /* VNN */ goto unimplemented; case 0xe7000000006fULL: /* VOC */ goto unimplemented; - case 0xe70000000070ULL: /* VESLV */ goto unimplemented; - case 0xe70000000072ULL: /* VERIM */ goto unimplemented; - case 0xe70000000073ULL: /* VERLLV */ goto unimplemented; - case 0xe70000000074ULL: /* VSL */ goto unimplemented; - case 0xe70000000075ULL: /* VSLB */ goto unimplemented; - case 0xe70000000077ULL: /* VSLDB */ goto unimplemented; - case 0xe70000000078ULL: /* VESRLV */ goto unimplemented; - case 0xe7000000007aULL: /* VESRAV */ goto unimplemented; - case 0xe7000000007cULL: /* VSRL */ goto unimplemented; - case 0xe7000000007dULL: /* VSRLB */ goto unimplemented; - case 0xe7000000007eULL: /* VSRA */ goto unimplemented; - case 0xe7000000007fULL: /* VSRAB */ goto unimplemented; - case 0xe70000000080ULL: /* VFEE */ goto unimplemented; - case 0xe70000000081ULL: /* VFENE */ goto unimplemented; - case 0xe70000000082ULL: /* VFAE */ goto unimplemented; + case 0xe70000000070ULL: s390_format_VRR_VVVM(s390_irgen_VESLV, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000072ULL: s390_format_VRId_VVVIM(s390_irgen_VERIM, ovl.fmt.VRId.v1, + ovl.fmt.VRId.v2, ovl.fmt.VRId.v3, + ovl.fmt.VRId.i4, ovl.fmt.VRId.m5, + ovl.fmt.VRId.rxb); goto ok; + case 0xe70000000073ULL: s390_format_VRR_VVVM(s390_irgen_VERLLV, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000074ULL: s390_format_VRR_VVV(s390_irgen_VSL, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000075ULL: s390_format_VRR_VVV(s390_irgen_VSLB, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000077ULL: s390_format_VRId_VVVI(s390_irgen_VSLDB, ovl.fmt.VRId.v1, + ovl.fmt.VRId.v2, ovl.fmt.VRId.v3, + ovl.fmt.VRId.i4, ovl.fmt.VRId.rxb); goto ok; + case 0xe70000000078ULL: s390_format_VRR_VVVM(s390_irgen_VESRLV, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe7000000007aULL: s390_format_VRR_VVVM(s390_irgen_VESRAV, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe7000000007cULL: s390_format_VRR_VVV(s390_irgen_VSRL, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.rxb); goto ok; + case 0xe7000000007dULL: s390_format_VRR_VVV(s390_irgen_VSRLB, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.rxb); goto ok; + case 0xe7000000007eULL: s390_format_VRR_VVV(s390_irgen_VSRA, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.rxb); goto ok; + case 0xe7000000007fULL: s390_format_VRR_VVV(s390_irgen_VSRAB, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000080ULL: s390_format_VRR_VVVMM(s390_irgen_VFEE, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.m5, + ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000081ULL: s390_format_VRR_VVVMM(s390_irgen_VFENE, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.m5, + ovl.fmt.VRR.rxb); goto ok; + case 0xe70000000082ULL: s390_format_VRR_VVVMM(s390_irgen_VFAE, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.m5, + ovl.fmt.VRR.rxb); goto ok; case 0xe70000000084ULL: s390_format_VRR_VVVM(s390_irgen_VPDI, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe70000000085ULL: /* VBPERM */ goto unimplemented; - case 0xe7000000008aULL: /* VSTRC */ goto unimplemented; + case 0xe7000000008aULL: s390_format_VRRd_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.m6, + ovl.fmt.VRRd.rxb); goto ok; case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; @@ -18371,27 +20116,79 @@ s390_decode_6byte_and_irgen(const UChar *bytes) ovl.fmt.VRR.m4, ovl.fmt.VRR.m5, ovl.fmt.VRR.rxb); goto ok; case 0xe7000000009eULL: /* VFNMS */ goto unimplemented; case 0xe7000000009fULL: /* VFNMA */ goto unimplemented; - case 0xe700000000a1ULL: /* VMLH */ goto unimplemented; - case 0xe700000000a2ULL: /* VML */ goto unimplemented; - case 0xe700000000a3ULL: /* VMH */ goto unimplemented; - case 0xe700000000a4ULL: /* VMLE */ goto unimplemented; - case 0xe700000000a5ULL: /* VMLO */ goto unimplemented; - case 0xe700000000a6ULL: /* VME */ goto unimplemented; - case 0xe700000000a7ULL: /* VMO */ goto unimplemented; - case 0xe700000000a9ULL: /* VMALH */ goto unimplemented; - case 0xe700000000aaULL: /* VMAL */ goto unimplemented; - case 0xe700000000abULL: /* VMAH */ goto unimplemented; - case 0xe700000000acULL: /* VMALE */ goto unimplemented; - case 0xe700000000adULL: /* VMALO */ goto unimplemented; - case 0xe700000000aeULL: /* VMAE */ goto unimplemented; - case 0xe700000000afULL: /* VMAO */ goto unimplemented; - case 0xe700000000b4ULL: /* VGFM */ goto unimplemented; + case 0xe700000000a1ULL: s390_format_VRR_VVVM(s390_irgen_VMLH, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000a2ULL: s390_format_VRR_VVVM(s390_irgen_VML, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000a3ULL: s390_format_VRR_VVVM(s390_irgen_VMH, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000a4ULL: s390_format_VRR_VVVM(s390_irgen_VMLE, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000a5ULL: s390_format_VRR_VVVM(s390_irgen_VMLO, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000a6ULL: s390_format_VRR_VVVM(s390_irgen_VME, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000a7ULL: s390_format_VRR_VVVM(s390_irgen_VMO, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000a9ULL: s390_format_VRRd_VVVVM(s390_irgen_VMALH, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000aaULL: s390_format_VRRd_VVVVM(s390_irgen_VMAL, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000abULL: s390_format_VRRd_VVVVM(s390_irgen_VMAH, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000acULL: s390_format_VRRd_VVVVM(s390_irgen_VMALE, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000adULL: s390_format_VRRd_VVVVM(s390_irgen_VMALO, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000aeULL: s390_format_VRRd_VVVVM(s390_irgen_VMAE, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000afULL: s390_format_VRRd_VVVVM(s390_irgen_VMAO, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000b4ULL: s390_format_VRR_VVVM(s390_irgen_VGFM, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe700000000b8ULL: /* VMSL */ goto unimplemented; - case 0xe700000000b9ULL: /* VACCC */ goto unimplemented; - case 0xe700000000bbULL: /* VAC */ goto unimplemented; - case 0xe700000000bcULL: /* VGFMA */ goto unimplemented; - case 0xe700000000bdULL: /* VSBCBI */ goto unimplemented; - case 0xe700000000bfULL: /* VSBI */ goto unimplemented; + case 0xe700000000b9ULL: s390_format_VRRd_VVVVM(s390_irgen_VACCC, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000bbULL: s390_format_VRRd_VVVVM(s390_irgen_VAC, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000bcULL: s390_format_VRRd_VVVVM(s390_irgen_VGFMA, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000bdULL: s390_format_VRRd_VVVVM(s390_irgen_VSBCBI, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; + case 0xe700000000bfULL: s390_format_VRRd_VVVVM(s390_irgen_VSBI, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.rxb); goto ok; case 0xe700000000c0ULL: /* VCLGD */ goto unimplemented; case 0xe700000000c1ULL: /* VCDLG */ goto unimplemented; case 0xe700000000c2ULL: /* VCGD */ goto unimplemented; @@ -18415,11 +20212,20 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe700000000d7ULL: s390_format_VRR_VVM(s390_irgen_VUPH, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; - case 0xe700000000d8ULL: /* VTM */ goto unimplemented; - case 0xe700000000d9ULL: /* VECL */ goto unimplemented; - case 0xe700000000dbULL: /* VEC */ goto unimplemented; - case 0xe700000000deULL: /* VLC */ goto unimplemented; - case 0xe700000000dfULL: /* VLP */ goto unimplemented; + case 0xe700000000d8ULL: s390_format_VRR_VV(s390_irgen_VTM, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000d9ULL: s390_format_VRR_VVM(s390_irgen_VECL, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, + ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000dbULL: s390_format_VRR_VVM(s390_irgen_VEC, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, + ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000deULL: s390_format_VRR_VVM(s390_irgen_VLC, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, + ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000dfULL: s390_format_VRR_VVM(s390_irgen_VLP, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, + ovl.fmt.VRR.rxb); goto ok; case 0xe700000000e2ULL: /* VFS */ goto unimplemented; case 0xe700000000e3ULL: /* VFA */ goto unimplemented; case 0xe700000000e5ULL: /* VFD */ goto unimplemented; @@ -18429,19 +20235,48 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe700000000ebULL: /* VFCH */ goto unimplemented; case 0xe700000000eeULL: /* VFMIN */ goto unimplemented; case 0xe700000000efULL: /* VFMAX */ goto unimplemented; - case 0xe700000000f0ULL: /* VAVGL */ goto unimplemented; - case 0xe700000000f1ULL: /* VACC */ goto unimplemented; - case 0xe700000000f2ULL: /* VAVG */ goto unimplemented; - case 0xe700000000f3ULL: /* VA */ goto unimplemented; - case 0xe700000000f5ULL: /* VSCBI */ goto unimplemented; - case 0xe700000000f7ULL: /* VS */ goto unimplemented; - case 0xe700000000f8ULL: /* VCEQ */ goto unimplemented; - case 0xe700000000f9ULL: /* VCHL */ goto unimplemented; - case 0xe700000000fbULL: /* VCH */ goto unimplemented; - case 0xe700000000fcULL: /* VMNL */ goto unimplemented; - case 0xe700000000fdULL: /* VMXL */ goto unimplemented; - case 0xe700000000feULL: /* VMN */ goto unimplemented; - case 0xe700000000ffULL: /* VMX */ goto unimplemented; + case 0xe700000000f0ULL: s390_format_VRR_VVVM(s390_irgen_VAVGL, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000f1ULL: s390_format_VRR_VVVM(s390_irgen_VACC, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000f2ULL: s390_format_VRR_VVVM(s390_irgen_VAVG, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000f3ULL: s390_format_VRR_VVVM(s390_irgen_VA, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000f5ULL: s390_format_VRR_VVVM(s390_irgen_VSCBI, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000f7ULL: s390_format_VRR_VVVM(s390_irgen_VS, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000f8ULL: s390_format_VRR_VVVMM(s390_irgen_VCEQ, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.m5, + ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000f9ULL: s390_format_VRR_VVVMM(s390_irgen_VCHL, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.m5, + ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000fbULL: s390_format_VRR_VVVMM(s390_irgen_VCH, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.m5, + ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000fcULL: s390_format_VRR_VVVM(s390_irgen_VMNL, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000fdULL: s390_format_VRR_VVVM(s390_irgen_VMXL, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000feULL: s390_format_VRR_VVVM(s390_irgen_VMN, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; + case 0xe700000000ffULL: s390_format_VRR_VVVM(s390_irgen_VMX, ovl.fmt.VRR.v1, + ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, + ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xeb0000000004ULL: s390_format_RSY_RRRD(s390_irgen_LMG, ovl.fmt.RSY.r1, ovl.fmt.RSY.r3, ovl.fmt.RSY.b2, ovl.fmt.RSY.dl2, diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 6c35c67246..6c22ac8430 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -1766,6 +1766,20 @@ emit_VRR_VVVV(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4) } +static UChar * +emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)r2) << 32; + the_insn |= ((ULong)r3) << 28; + the_insn |= ((ULong)rxb)<< 8; + + return emit_6bytes(p, the_insn); +} + /*------------------------------------------------------------*/ /*--- Functions to emit particular instructions ---*/ /*------------------------------------------------------------*/ @@ -5713,6 +5727,338 @@ s390_emit_VMRL(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) return emit_VRR_VVVM(p, 0xE70000000060ULL, v1, v2, v3, m4); } +static UChar * +s390_emit_VA(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "va", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000f3ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vs", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000f7ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VNO(UChar *p, UChar v1, UChar v2, UChar v3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, VR), "vno", v1, v2, v3); + + return emit_VRR_VVV(p, 0xE7000000006bULL, v1, v2, v3); +} + +static UChar * +s390_emit_VCH(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vch", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000fbULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VCHL(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vchl", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000f9ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VCLZ(UChar *p, UChar v1, UChar v2, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, UINT), "vclz", v1, v2, m4); + + return emit_VRR_VVM(p, 0xE70000000053ULL, v1, v2, m4); +} + +static UChar * +s390_emit_VCTZ(UChar *p, UChar v1, UChar v2, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, UINT), "vctz", v1, v2, m4); + + return emit_VRR_VVM(p, 0xE70000000052ULL, v1, v2, m4); +} + +static UChar * +s390_emit_VPOPCT(UChar *p, UChar v1, UChar v2, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, UINT), "vpopct", v1, v2, m4); + + return emit_VRR_VVM(p, 0xE70000000050ULL, v1, v2, m4); +} + +static UChar * +s390_emit_VMX(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vmx", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000ffULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VMXL(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vmxl", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000fdULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VMN(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vmn", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000feULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VMNL(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vmnl", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000fcULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VAVG(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vavg", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000f2ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VAVGL(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vavgl", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000f0ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VLP(UChar *p, UChar v1, UChar v2, UChar m3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, UINT), "vlp", v1, v2, m3); + + return emit_VRR_VVM(p, 0xE700000000DFULL, v1, v2, m3); +} + +static UChar * +s390_emit_VMH(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vmh", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000a3ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VMLH(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vmlh", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000a1ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VML(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vml", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000a2ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VME(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vme", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000a6ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VMLE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vmle", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE700000000a4ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VESLV(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "veslv", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE70000000070ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VESRAV(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vesrav", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE7000000007aULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VESRLV(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vesrlv", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE70000000078ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VESL(UChar *p, UChar v1, UChar b2, UShort d2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, UDXB, VR, UINT), "vesl", v1, d2, 0, b2, v3, m4); + + return emit_VRS(p, 0xE70000000030ULL, v1, b2, d2, v3, m4); +} + +static UChar * +s390_emit_VESRA(UChar *p, UChar v1, UChar b2, UShort d2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, UDXB, VR, UINT), "vesra", v1, d2, 0, b2, v3, m4); + + return emit_VRS(p, 0xE7000000003aULL, v1, b2, d2, v3, m4); +} + +static UChar * +s390_emit_VESRL(UChar *p, UChar v1, UChar b2, UShort d2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, UDXB, VR, UINT), "vesrl", v1, d2, 0, b2, v3, m4); + + return emit_VRS(p, 0xE70000000038ULL, v1, b2, d2, v3, m4); +} + +static UChar * +s390_emit_VERLLV(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "verllv", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE70000000073ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VSL(UChar *p, UChar v1, UChar v2, UChar v3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, VR), "vsl", v1, v2, v3); + + return emit_VRR_VVV(p, 0xE70000000074ULL, v1, v2, v3); +} + +static UChar * +s390_emit_VSRL(UChar *p, UChar v1, UChar v2, UChar v3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, VR), "vsrl", v1, v2, v3); + + return emit_VRR_VVV(p, 0xE7000000007cULL, v1, v2, v3); +} + +static UChar * +s390_emit_VSRA(UChar *p, UChar v1, UChar v2, UChar v3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, VR), "vsra", v1, v2, v3); + + return emit_VRR_VVV(p, 0xE7000000007eULL, v1, v2, v3); +} + +static UChar * +s390_emit_VSLB(UChar *p, UChar v1, UChar v2, UChar v3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, VR), "vslb", v1, v2, v3); + + return emit_VRR_VVV(p, 0xE70000000075ULL, v1, v2, v3); +} + +static UChar * +s390_emit_VSRLB(UChar *p, UChar v1, UChar v2, UChar v3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, VR), "vsrlb", v1, v2, v3); + + return emit_VRR_VVV(p, 0xE7000000007dULL, v1, v2, v3); +} + +static UChar * +s390_emit_VSRAB(UChar *p, UChar v1, UChar v2, UChar v3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, VR, VR), "vsrab", v1, v2, v3); + + return emit_VRR_VVV(p, 0xE7000000007fULL, v1, v2, v3); +} + +static UChar * +s390_emit_VSUM(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vsum", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE70000000064ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VSUMG(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vsumg", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE70000000065ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VSUMQ(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, VR, UINT), "vsumq", v1, v2, v3, m4); + + return emit_VRR_VVVM(p, 0xE70000000067ULL, v1, v2, v3, m4); +} + +static UChar * +s390_emit_VLVGP(UChar *p, UChar v1, UChar r2, UChar r3) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC4(MNM, VR, GPR, GPR), "vlvgp", v1, r2, r3); + + return emit_VRR_VRR(p, 0xE70000000062ULL, v1, r2, r3); +} /*---------------------------------------------------------------*/ /*--- Constructors for the various s390_insn kinds ---*/ @@ -7476,6 +7822,9 @@ s390_insn_as_string(const s390_insn *insn) case S390_INSN_VEC_AMODEOP: switch (insn->variant.vec_amodeop.tag) { case S390_VEC_GET_ELEM: op = "v-vgetelem"; break; + case S390_VEC_ELEM_SHL_INT: op = "v-veshl"; break; + case S390_VEC_ELEM_SHRA_INT: op = "v-veshra"; break; + case S390_VEC_ELEM_SHRL_INT: op = "v-veshrl"; break; default: goto fail; } s390_sprintf(buf, "%M %R, %R, %A", op, insn->variant.vec_amodeop.dst, @@ -7504,6 +7853,36 @@ s390_insn_as_string(const s390_insn *insn) case S390_VEC_AND: op = "v-vand"; break; case S390_VEC_MERGEL: op = "v-vmergel"; break; case S390_VEC_MERGEH: op = "v-vmergeh"; break; + case S390_VEC_NOR: op = "v-vnor"; break; + case S390_VEC_INT_ADD: op = "v-vintadd"; break; + case S390_VEC_INT_SUB: op = "v-vintsub"; break; + case S390_VEC_MAXU: op = "v-vmaxu"; break; + case S390_VEC_MAXS: op = "v-vmaxs"; break; + case S390_VEC_MINU: op = "v-vminu"; break; + case S390_VEC_MINS: op = "v-vmins"; break; + case S390_VEC_AVGU: op = "v-vavgu"; break; + case S390_VEC_AVGS: op = "v-vavgs"; break; + case S390_VEC_COMPARE_GREATERS: op = "v-vcmpgts"; break; + case S390_VEC_COMPARE_GREATERU: op = "v-vcmpgtu"; break; + case S390_VEC_INT_MUL_HIGHS: op = "v-vintmulhis"; break; + case S390_VEC_INT_MUL_HIGHU: op = "v-vintmulhiu"; break; + case S390_VEC_INT_MUL_LOW: op = "v-vintmullo"; break; + case S390_VEC_INT_MUL_EVENS: op = "v-vintmulevens"; break; + case S390_VEC_INT_MUL_EVENU: op = "v-vintmulevenu"; break; + case S390_VEC_ELEM_SHL_V: op = "v-velemshl"; break; + case S390_VEC_ELEM_SHRA_V: op = "v-vshrav"; break; + case S390_VEC_ELEM_SHRL_V: op = "v-vshrlv"; break; + case S390_VEC_ELEM_ROLL_V: op = "v-vrollv"; break; + case S390_VEC_SHL_BITS: op = "v-vshlbits"; break; + case S390_VEC_SHRL_BITS: op = "v-vshrlbits"; break; + case S390_VEC_SHRA_BITS: op = "v-vshrabits"; break; + case S390_VEC_SHL_BYTES: op = "v-vshlbytes"; break; + case S390_VEC_SHRL_BYTES: op = "v-vshrlbytes"; break; + case S390_VEC_SHRA_BYTES: op = "v-vshrabytes"; break; + case S390_VEC_PWSUM_W: op = "v-vpwsumw"; break; + case S390_VEC_PWSUM_DW: op = "v-vpwsumdw"; break; + case S390_VEC_PWSUM_QW: op = "v-vpwsumqw"; break; + case S390_VEC_INIT_FROM_GPRS: op = "v-vinitfromgprs"; break; default: goto fail; } s390_sprintf(buf, "%M %R, %R, %R", op, insn->variant.vec_binop.dst, @@ -7884,6 +8263,9 @@ s390_insn_move_emit(UChar *buf, const s390_insn *insn) return s390_emit_LGR(buf, dst, src); if (dst_class == HRcFlt64) return s390_emit_LDR(buf, dst, src); + if (dst_class == HRcVec128) { + return s390_emit_VLR(buf, dst, src); + } } else { if (dst_class == HRcFlt64 && src_class == HRcInt64) { if (insn->size == 4) { @@ -7901,12 +8283,6 @@ s390_insn_move_emit(UChar *buf, const s390_insn *insn) return s390_emit_LGDRw(buf, dst, src); } } - - if (dst_class == HRcVec128 && src_class == HRcVec128) { - if(insn->size == 16) { - return s390_emit_VLR(buf, dst, src); - } - } /* A move between floating point registers and general purpose registers of different size should never occur and indicates an error elsewhere. */ @@ -8634,9 +9010,38 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); return s390_emit_VUPLH(buf, v1, v2, s390_getM_from_size(insn->size)); } + + case S390_VEC_ABS:{ + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VLP(buf, v1, v2, s390_getM_from_size(insn->size)); + } + + case S390_VEC_COUNT_LEADING_ZEROES:{ + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VCLZ(buf, v1, v2, s390_getM_from_size(insn->size)); + } + + case S390_VEC_COUNT_TRAILING_ZEROES:{ + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VCTZ(buf, v1, v2, s390_getM_from_size(insn->size)); } - vpanic("s390_insn_unop_emit"); + case S390_VEC_COUNT_ONES:{ + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VPOPCT(buf, v1, v2, s390_getM_from_size(insn->size)); + } + + default: + vpanic("s390_insn_unop_emit"); + } } @@ -10502,18 +10907,30 @@ s390_insn_profinc_emit(UChar *buf, static UChar * s390_insn_vec_amodeop_emit(UChar *buf, const s390_insn *insn) { - UChar r1 = hregNumber(insn->variant.vec_amodeop.dst); - UChar v1 = hregNumber(insn->variant.vec_amodeop.op1); + UChar v1 = hregNumber(insn->variant.vec_amodeop.dst); + UChar v2 = hregNumber(insn->variant.vec_amodeop.op1); s390_amode* op2 = insn->variant.vec_amodeop.op2; vassert(hregNumber(op2->x) == 0); + vassert(fits_unsigned_12bit(op2->d)); + UChar b = hregNumber(op2->b); UShort d = op2->d; switch (insn->variant.vec_amodeop.tag) { case S390_VEC_GET_ELEM: - return s390_emit_VLGV(buf, r1, b, d, v1, s390_getM_from_size(insn->size)); + return s390_emit_VLGV(buf, v1, b, d, v2, s390_getM_from_size(insn->size)); + + case S390_VEC_ELEM_SHL_INT: + return s390_emit_VESL(buf, v1, b, d, v2, s390_getM_from_size(insn->size)); + + case S390_VEC_ELEM_SHRA_INT: + return s390_emit_VESRA(buf, v1, b, d, v2, s390_getM_from_size(insn->size)); + + case S390_VEC_ELEM_SHRL_INT: + return s390_emit_VESRL(buf, v1, b, d, v2, s390_getM_from_size(insn->size)); + default: goto fail; } @@ -10569,9 +10986,72 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn) case S390_VEC_AND: return s390_emit_VN(buf, v1, v2, v3); case S390_VEC_MERGEL: - return s390_emit_VMRH(buf, v1, v2, v3, s390_getM_from_size(size)); - case S390_VEC_MERGEH: return s390_emit_VMRL(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_MERGEH: + return s390_emit_VMRH(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_NOR: + return s390_emit_VNO(buf, v1, v2, v3); + case S390_VEC_INT_ADD: + return s390_emit_VA(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_INT_SUB: + return s390_emit_VS(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_MAXU: + return s390_emit_VMXL(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_MAXS: + return s390_emit_VMX(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_MINU: + return s390_emit_VMNL(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_MINS: + return s390_emit_VMN(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_AVGU: + return s390_emit_VAVGL(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_AVGS: + return s390_emit_VAVG(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_COMPARE_GREATERS: + return s390_emit_VCH(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_COMPARE_GREATERU: + return s390_emit_VCHL(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_INT_MUL_HIGHS: + return s390_emit_VMH(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_INT_MUL_HIGHU: + return s390_emit_VMLH(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_INT_MUL_LOW: + return s390_emit_VML(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_INT_MUL_EVENS: + return s390_emit_VME(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_INT_MUL_EVENU: + return s390_emit_VMLE(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_ELEM_SHL_V: + return s390_emit_VESLV(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_ELEM_SHRA_V: + return s390_emit_VESRAV(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_ELEM_SHRL_V: + return s390_emit_VESRLV(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_ELEM_ROLL_V: + return s390_emit_VERLLV(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_SHL_BITS: + return s390_emit_VSL(buf, v1, v2, v3); + case S390_VEC_SHRL_BITS: + return s390_emit_VSRL(buf, v1, v2, v3); + case S390_VEC_SHRA_BITS: + return s390_emit_VSRA(buf, v1, v2, v3); + case S390_VEC_SHL_BYTES: + return s390_emit_VSLB(buf, v1, v2, v3); + case S390_VEC_SHRL_BYTES: + return s390_emit_VSRLB(buf, v1, v2, v3); + case S390_VEC_SHRA_BYTES: + return s390_emit_VSRAB(buf, v1, v2, v3); + case S390_VEC_PWSUM_W: + vassert((size == 1) || (size == 2)); + return s390_emit_VSUM(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_PWSUM_DW: + vassert((size == 2) || (size == 4)); + return s390_emit_VSUMG(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_PWSUM_QW: + vassert((size == 4) || (size == 8)); + return s390_emit_VSUMQ(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_INIT_FROM_GPRS: + return s390_emit_VLVGP(buf, v1, v2, v3); default: goto fail; } diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index c88075d5f0..7ea01010e2 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -198,7 +198,11 @@ typedef enum { S390_VEC_FILL, S390_VEC_DUPLICATE, S390_VEC_UNPACKLOWS, - S390_VEC_UNPACKLOWU + S390_VEC_UNPACKLOWU, + S390_VEC_ABS, + S390_VEC_COUNT_LEADING_ZEROES, + S390_VEC_COUNT_TRAILING_ZEROES, + S390_VEC_COUNT_ONES } s390_unop_t; /* The kind of ternary BFP operations */ @@ -337,7 +341,10 @@ typedef enum { /* The vector operations with 2 operands one of them being amode */ typedef enum { - S390_VEC_GET_ELEM + S390_VEC_GET_ELEM, + S390_VEC_ELEM_SHL_INT, + S390_VEC_ELEM_SHRA_INT, + S390_VEC_ELEM_SHRL_INT } s390_vec_amodeop_t; /* The vector operations with three (vector, amode and integer) operands */ @@ -355,7 +362,38 @@ typedef enum { S390_VEC_XOR, S390_VEC_AND, S390_VEC_MERGEL, - S390_VEC_MERGEH + S390_VEC_MERGEH, + S390_VEC_NOR, + S390_VEC_INT_ADD, + S390_VEC_INT_SUB, + S390_VEC_MAXU, + S390_VEC_MAXS, + S390_VEC_MINU, + S390_VEC_MINS, + S390_VEC_AVGU, + S390_VEC_AVGS, + S390_VEC_COMPARE_GREATERS, + S390_VEC_COMPARE_GREATERU, + S390_VEC_INT_MUL_HIGHS, + S390_VEC_INT_MUL_HIGHU, + S390_VEC_INT_MUL_LOW, + S390_VEC_INT_MUL_EVENS, + S390_VEC_INT_MUL_EVENU, + S390_VEC_ELEM_SHL_V, + S390_VEC_ELEM_SHRA_V, + S390_VEC_ELEM_SHRL_V, + S390_VEC_ELEM_ROLL_V, + + /* host_s390_isel depends on this order. */ + S390_VEC_SHL_BITS, S390_VEC_SHL_BYTES, + S390_VEC_SHRL_BITS, S390_VEC_SHRL_BYTES, + S390_VEC_SHRA_BITS, S390_VEC_SHRA_BYTES, + + S390_VEC_PWSUM_W, + S390_VEC_PWSUM_DW, + S390_VEC_PWSUM_QW, + + S390_VEC_INIT_FROM_GPRS, } s390_vec_binop_t; /* The vector operations with three operands */ diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c index dec1259f6e..bc34f90ff3 100644 --- a/VEX/priv/host_s390_isel.c +++ b/VEX/priv/host_s390_isel.c @@ -526,21 +526,36 @@ vec_generate_zeroes(ISelEnv* env) } static HReg -vec_generate_ones(ISelEnv* env) +vec_do_notV128(ISelEnv* env, HReg arg) { HReg dst = newVRegV(env); - addInstr(env, s390_insn_unop(16, S390_VEC_FILL, dst, s390_opnd_imm(0xffff))); + addInstr(env, s390_insn_vec_binop(16, S390_VEC_NOR, dst, arg, arg)); return dst; } -static HReg -vec_do_notV128(ISelEnv* env, HReg arg) +#define IRCONST_IS_EQUAL_U8(arg, val) \ + ( ((arg)->tag == Iex_Const) \ + && ((arg)->Iex.Const.con->tag == Ico_U8) \ + && ((arg)->Iex.Const.con->Ico.U8 == (val)) ) + +/* Returns true if (expr & 0x7 == 0) */ +static Bool +vec_is_bytes_only_shift(const IRExpr* expr) { - HReg dst = newVRegV(env); - addInstr(env, s390_insn_vec_binop(16, S390_VEC_XOR,dst, - arg, vec_generate_ones(env))); - return dst; + const Bool is_good_const = + (expr->tag == Iex_Const) && + ((expr->Iex.Const.con->Ico.U8 & 0b00000111) == 0); + + const Bool good_mask_applied = + (expr->tag == Iex_Binop) && (expr->Iex.Binop.op == Iop_And8) && + (IRCONST_IS_EQUAL_U8(expr->Iex.Binop.arg1, 0b01111000) + || + IRCONST_IS_EQUAL_U8(expr->Iex.Binop.arg2, 0b01111000) + ); + + return is_good_const || good_mask_applied; } +#undef IRCONST_IS_EQUAL_U8 /* Call a helper (clean or dirty) Arguments must satisfy the following conditions: @@ -3687,11 +3702,25 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) UChar size_for_int_arg = 0; HReg dst; HReg reg1; - s390_unop_t vec_op = 0; + s390_unop_t vec_op; IROp op = expr->Iex.Unop.op; IRExpr* arg = expr->Iex.Unop.arg; switch(op) { case Iop_NotV128: + /* Not(Or(arg1, arg2)) -> Nor(arg1, arg2) */ + if(UNLIKELY((arg->tag == Iex_Binop ) && (arg->Iex.Binop.op == Iop_OrV128))) + { + dst = newVRegV(env); + addInstr(env, + s390_insn_vec_binop(16, + S390_VEC_NOR, + dst, + s390_isel_vec_expr(env, arg->Iex.Binop.arg1), + s390_isel_vec_expr(env, arg->Iex.Binop.arg2) + ) + ); + return dst; + } reg1 = s390_isel_vec_expr(env, arg); return vec_do_notV128(env, reg1); @@ -3715,6 +3744,20 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) return vec_do_notV128(env, dst); } + case Iop_CmpNEZ128x1: { + IRExpr* low64 = IRExpr_Unop(Iop_V128to64, arg); + IRExpr* high64 = IRExpr_Unop(Iop_V128HIto64, arg); + IRExpr* both = IRExpr_Binop(Iop_Or64, low64, high64); + IRExpr* anyNonZ = IRExpr_Unop(Iop_CmpNEZ64, both); + IRExpr* anyNonZ64 = IRExpr_Unop(Iop_1Sto64, anyNonZ); + reg1 = s390_isel_int_expr(env, anyNonZ64); + + dst = newVRegV(env); + addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS, + dst, reg1, reg1)); + return dst; + } + case Iop_Dup8x16: size = size_for_int_arg = 1; vec_op = S390_VEC_DUPLICATE; @@ -3773,6 +3816,122 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) return dst; } + case Iop_Abs8x16: + size = 1; + vec_op = S390_VEC_ABS; + goto Iop_V_wrk; + case Iop_Abs16x8: + size = 2; + vec_op = S390_VEC_ABS; + goto Iop_V_wrk; + case Iop_Abs32x4: + size = 4; + vec_op = S390_VEC_ABS; + goto Iop_V_wrk; + case Iop_Abs64x2: + size = 8; + vec_op = S390_VEC_ABS; + goto Iop_V_wrk; + + case Iop_Clz8x16: + size = 1; + vec_op = S390_VEC_COUNT_LEADING_ZEROES; + goto Iop_V_wrk; + case Iop_Ctz8x16: + size = 1; + vec_op = S390_VEC_COUNT_TRAILING_ZEROES; + goto Iop_V_wrk; + case Iop_Clz16x8: + size = 2; + vec_op = S390_VEC_COUNT_LEADING_ZEROES; + goto Iop_V_wrk; + case Iop_Ctz16x8: + size = 2; + vec_op = S390_VEC_COUNT_TRAILING_ZEROES; + goto Iop_V_wrk; + case Iop_Clz32x4: + size = 4; + vec_op = S390_VEC_COUNT_LEADING_ZEROES; + goto Iop_V_wrk; + case Iop_Ctz32x4: + size = 4; + vec_op = S390_VEC_COUNT_TRAILING_ZEROES; + goto Iop_V_wrk; + case Iop_Clz64x2: + size = 8; + vec_op = S390_VEC_COUNT_LEADING_ZEROES; + goto Iop_V_wrk; + case Iop_Ctz64x2: + size = 8; + vec_op = S390_VEC_COUNT_TRAILING_ZEROES; + goto Iop_V_wrk; + + case Iop_Cnt8x16: + size = 1; + vec_op = S390_VEC_COUNT_ONES; + goto Iop_V_wrk; + + Iop_V_wrk: { + dst = newVRegV(env); + reg1 = s390_isel_vec_expr(env, arg); + + addInstr(env, + s390_insn_unop(size, vec_op, dst, s390_opnd_reg(reg1))); + return dst; + } + + case Iop_PwAddL8Ux16: { + /* There is no such instruction. We have to emulate it. */ + IRExpr *even = IRExpr_Binop(Iop_InterleaveEvenLanes8x16, + IRExpr_Const(IRConst_V128(0x0000)), + arg); + IRExpr *odd = IRExpr_Binop(Iop_InterleaveOddLanes8x16, + IRExpr_Const(IRConst_V128(0x0000)), + arg); + dst = s390_isel_vec_expr(env, IRExpr_Binop(Iop_Add16x8, even, odd)); + return dst; + } + + case Iop_PwAddL16Ux8: + if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_PwAddL8Ux16) { + size = 1; + arg = arg->Iex.Unop.arg; + } else { + size = 2; + } + vec_op = S390_VEC_PWSUM_W; + goto Iop_Pairwise_wrk; + + case Iop_PwAddL32Ux4: + if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_PwAddL16Ux8) { + size = 2; + arg = arg->Iex.Unop.arg; + } else { + size = 4; + } + vec_op = S390_VEC_PWSUM_DW; + goto Iop_Pairwise_wrk; + + case Iop_PwAddL64Ux2: + if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_PwAddL32Ux4) { + size = 4; + arg = arg->Iex.Unop.arg; + } else { + size = 8; + } + vec_op = S390_VEC_PWSUM_QW; + goto Iop_Pairwise_wrk; + + Iop_Pairwise_wrk: { + dst = newVRegV(env); + reg1 = s390_isel_vec_expr(env, arg); + + addInstr(env, + s390_insn_vec_binop(size, vec_op, dst, reg1, + vec_generate_zeroes(env))); + return dst; + } + default: goto irreducible; } @@ -3784,6 +3943,7 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) HReg reg1, reg2; IROp op = expr->Iex.Binop.op; s390_vec_binop_t vec_op = 0; + s390_vec_amodeop_t shift_op = 0; IRExpr* arg1 = expr->Iex.Binop.arg1; IRExpr* arg2 = expr->Iex.Binop.arg2; switch(op) { @@ -3874,6 +4034,455 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) vec_op = S390_VEC_MERGEH; goto Iop_VV_wrk; + case Iop_InterleaveEvenLanes8x16: { + /* There is no such instruction. We have to emulate it. */ + IRExpr* mask = IRExpr_Binop(Iop_64HLtoV128, + mkU64(0x0010021204140616ULL), + mkU64(0x08180a1a0c1c0e1eULL)); + HReg reg_mask = s390_isel_vec_expr(env, mask); + reg1 = s390_isel_vec_expr(env, arg1); + reg2 = s390_isel_vec_expr(env, arg2); + + addInstr(env, + s390_insn_vec_triop(16, S390_VEC_PERM, dst, reg1, reg2, + reg_mask) + ); + + return dst; + } + case Iop_InterleaveOddLanes8x16: { + /* There is no such instruction. We have to emulate it. */ + IRExpr* mask = IRExpr_Binop(Iop_64HLtoV128, + mkU64(0x0111031305150717ULL), + mkU64(0x09190b1b0d1d0f1fULL)); + HReg reg_mask = s390_isel_vec_expr(env, mask); + reg1 = s390_isel_vec_expr(env, arg1); + reg2 = s390_isel_vec_expr(env, arg2); + + addInstr(env, + s390_insn_vec_triop(16, S390_VEC_PERM, dst, reg1, reg2, reg_mask) + ); + + return dst; + } + + case Iop_CmpEQ8x16: + size = 1; + vec_op = S390_VEC_COMPARE_EQUAL; + goto Iop_VV_wrk; + case Iop_CmpEQ16x8: + size = 2; + vec_op = S390_VEC_COMPARE_EQUAL; + goto Iop_VV_wrk; + case Iop_CmpEQ32x4: + size = 4; + vec_op = S390_VEC_COMPARE_EQUAL; + goto Iop_VV_wrk; + case Iop_CmpEQ64x2: + size = 8; + vec_op = S390_VEC_COMPARE_EQUAL; + goto Iop_VV_wrk; + + case Iop_Add8x16: + size = 1; + vec_op = S390_VEC_INT_ADD; + goto Iop_VV_wrk; + case Iop_Add16x8: + size = 2; + vec_op = S390_VEC_INT_ADD; + goto Iop_VV_wrk; + case Iop_Add32x4: + size = 4; + vec_op = S390_VEC_INT_ADD; + goto Iop_VV_wrk; + case Iop_Add64x2: + size = 8; + vec_op = S390_VEC_INT_ADD; + goto Iop_VV_wrk; + case Iop_Add128x1: + size = 16; + vec_op = S390_VEC_INT_ADD; + goto Iop_VV_wrk; + + case Iop_Sub8x16: + size = 1; + vec_op = S390_VEC_INT_SUB; + goto Iop_VV_wrk; + case Iop_Sub16x8: + size = 2; + vec_op = S390_VEC_INT_SUB; + goto Iop_VV_wrk; + case Iop_Sub32x4: + size = 4; + vec_op = S390_VEC_INT_SUB; + goto Iop_VV_wrk; + case Iop_Sub64x2: + size = 8; + vec_op = S390_VEC_INT_SUB; + goto Iop_VV_wrk; + case Iop_Sub128x1: + size = 16; + vec_op = S390_VEC_INT_SUB; + goto Iop_VV_wrk; + + case Iop_Max8Ux16: + size = 1; + vec_op = S390_VEC_MAXU; + goto Iop_VV_wrk; + case Iop_Max8Sx16: + size = 1; + vec_op = S390_VEC_MAXS; + goto Iop_VV_wrk; + case Iop_Max16Ux8: + size = 2; + vec_op = S390_VEC_MAXU; + goto Iop_VV_wrk; + case Iop_Max16Sx8: + size = 2; + vec_op = S390_VEC_MAXS; + goto Iop_VV_wrk; + case Iop_Max32Ux4: + size = 4; + vec_op = S390_VEC_MAXU; + goto Iop_VV_wrk; + case Iop_Max32Sx4: + size = 4; + vec_op = S390_VEC_MAXS; + goto Iop_VV_wrk; + case Iop_Max64Ux2: + size = 8; + vec_op = S390_VEC_MAXU; + goto Iop_VV_wrk; + case Iop_Max64Sx2: + size = 8; + vec_op = S390_VEC_MAXS; + goto Iop_VV_wrk; + + case Iop_Min8Ux16: + size = 1; + vec_op = S390_VEC_MINU; + goto Iop_VV_wrk; + case Iop_Min8Sx16: + size = 1; + vec_op = S390_VEC_MINS; + goto Iop_VV_wrk; + case Iop_Min16Ux8: + size = 2; + vec_op = S390_VEC_MINU; + goto Iop_VV_wrk; + case Iop_Min16Sx8: + size = 2; + vec_op = S390_VEC_MINS; + goto Iop_VV_wrk; + case Iop_Min32Ux4: + size = 4; + vec_op = S390_VEC_MINU; + goto Iop_VV_wrk; + case Iop_Min32Sx4: + size = 4; + vec_op = S390_VEC_MINS; + goto Iop_VV_wrk; + case Iop_Min64Ux2: + size = 8; + vec_op = S390_VEC_MINU; + goto Iop_VV_wrk; + case Iop_Min64Sx2: + size = 8; + vec_op = S390_VEC_MINS; + goto Iop_VV_wrk; + + case Iop_Avg8Ux16: + size = 1; + vec_op = S390_VEC_AVGU; + goto Iop_VV_wrk; + case Iop_Avg8Sx16: + size = 1; + vec_op = S390_VEC_AVGS; + goto Iop_VV_wrk; + case Iop_Avg16Ux8: + size = 2; + vec_op = S390_VEC_AVGU; + goto Iop_VV_wrk; + case Iop_Avg16Sx8: + size = 2; + vec_op = S390_VEC_AVGS; + goto Iop_VV_wrk; + case Iop_Avg32Ux4: + size = 4; + vec_op = S390_VEC_AVGU; + goto Iop_VV_wrk; + case Iop_Avg32Sx4: + size = 4; + vec_op = S390_VEC_AVGS; + goto Iop_VV_wrk; + case Iop_Avg64Ux2: + size = 8; + vec_op = S390_VEC_AVGU; + goto Iop_VV_wrk; + case Iop_Avg64Sx2: + size = 8; + vec_op = S390_VEC_AVGS; + goto Iop_VV_wrk; + + case Iop_CmpGT8Ux16: + size = 1; + vec_op = S390_VEC_COMPARE_GREATERU; + goto Iop_VV_wrk; + case Iop_CmpGT8Sx16: + size = 1; + vec_op = S390_VEC_COMPARE_GREATERS; + goto Iop_VV_wrk; + case Iop_CmpGT16Ux8: + size = 2; + vec_op = S390_VEC_COMPARE_GREATERU; + goto Iop_VV_wrk; + case Iop_CmpGT16Sx8: + size = 2; + vec_op = S390_VEC_COMPARE_GREATERS; + goto Iop_VV_wrk; + case Iop_CmpGT32Ux4: + size = 4; + vec_op = S390_VEC_COMPARE_GREATERU; + goto Iop_VV_wrk; + case Iop_CmpGT32Sx4: + size = 4; + vec_op = S390_VEC_COMPARE_GREATERS; + goto Iop_VV_wrk; + case Iop_CmpGT64Ux2: + size = 8; + vec_op = S390_VEC_COMPARE_GREATERU; + goto Iop_VV_wrk; + case Iop_CmpGT64Sx2: + size = 8; + vec_op = S390_VEC_COMPARE_GREATERS; + goto Iop_VV_wrk; + + case Iop_MulHi8Ux16: + size = 1; + vec_op = S390_VEC_INT_MUL_HIGHU; + goto Iop_VV_wrk; + case Iop_MulHi8Sx16: + size = 1; + vec_op = S390_VEC_INT_MUL_HIGHS; + goto Iop_VV_wrk; + case Iop_MulHi16Ux8: + size = 2; + vec_op = S390_VEC_INT_MUL_HIGHU; + goto Iop_VV_wrk; + case Iop_MulHi16Sx8: + size = 2; + vec_op = S390_VEC_INT_MUL_HIGHS; + goto Iop_VV_wrk; + case Iop_MulHi32Ux4: + size = 4; + vec_op = S390_VEC_INT_MUL_HIGHU; + goto Iop_VV_wrk; + case Iop_MulHi32Sx4: + size = 4; + vec_op = S390_VEC_INT_MUL_HIGHS; + goto Iop_VV_wrk; + + case Iop_Mul8x16: + size = 1; + vec_op = S390_VEC_INT_MUL_LOW; + goto Iop_VV_wrk; + case Iop_Mul16x8: + size = 2; + vec_op = S390_VEC_INT_MUL_LOW; + goto Iop_VV_wrk; + case Iop_Mul32x4: + size = 4; + vec_op = S390_VEC_INT_MUL_LOW; + goto Iop_VV_wrk; + + case Iop_MullEven8Sx16: + size = 1; + vec_op = S390_VEC_INT_MUL_EVENS; + goto Iop_VV_wrk; + case Iop_MullEven8Ux16: + size = 1; + vec_op = S390_VEC_INT_MUL_EVENU; + goto Iop_VV_wrk; + case Iop_MullEven16Sx8: + size = 2; + vec_op = S390_VEC_INT_MUL_EVENS; + goto Iop_VV_wrk; + case Iop_MullEven16Ux8: + size = 2; + vec_op = S390_VEC_INT_MUL_EVENU; + goto Iop_VV_wrk; + case Iop_MullEven32Sx4: + size = 4; + vec_op = S390_VEC_INT_MUL_EVENS; + goto Iop_VV_wrk; + case Iop_MullEven32Ux4: + size = 4; + vec_op = S390_VEC_INT_MUL_EVENU; + goto Iop_VV_wrk; + + case Iop_Shl8x16: + size = 1; + vec_op = S390_VEC_ELEM_SHL_V; + goto Iop_VV_wrk; + case Iop_Shl16x8: + size = 2; + vec_op = S390_VEC_ELEM_SHL_V; + goto Iop_VV_wrk; + case Iop_Shl32x4: + size = 4; + vec_op = S390_VEC_ELEM_SHL_V; + goto Iop_VV_wrk; + case Iop_Shl64x2: + size = 8; + vec_op = S390_VEC_ELEM_SHL_V; + goto Iop_VV_wrk; + + case Iop_Shr8x16: + size = 1; + vec_op = S390_VEC_ELEM_SHRL_V; + goto Iop_VV_wrk; + case Iop_Shr16x8: + size = 2; + vec_op = S390_VEC_ELEM_SHRL_V; + goto Iop_VV_wrk; + case Iop_Shr32x4: + size = 4; + vec_op = S390_VEC_ELEM_SHRL_V; + goto Iop_VV_wrk; + case Iop_Shr64x2: + size = 8; + vec_op = S390_VEC_ELEM_SHRL_V; + goto Iop_VV_wrk; + + case Iop_Sar8x16: + size = 1; + vec_op = S390_VEC_ELEM_SHRA_V; + goto Iop_VV_wrk; + case Iop_Sar16x8: + size = 2; + vec_op = S390_VEC_ELEM_SHRA_V; + goto Iop_VV_wrk; + case Iop_Sar32x4: + size = 4; + vec_op = S390_VEC_ELEM_SHRA_V; + goto Iop_VV_wrk; + case Iop_Sar64x2: + size = 8; + vec_op = S390_VEC_ELEM_SHRA_V; + goto Iop_VV_wrk; + + case Iop_Rol8x16: + size = 1; + vec_op = S390_VEC_ELEM_ROLL_V; + goto Iop_VV_wrk; + case Iop_Rol16x8: + size = 2; + vec_op = S390_VEC_ELEM_ROLL_V; + goto Iop_VV_wrk; + case Iop_Rol32x4: + size = 4; + vec_op = S390_VEC_ELEM_ROLL_V; + goto Iop_VV_wrk; + case Iop_Rol64x2: + size = 8; + vec_op = S390_VEC_ELEM_ROLL_V; + goto Iop_VV_wrk; + + case Iop_ShlN8x16: + size = 1; + shift_op = S390_VEC_ELEM_SHL_INT; + goto Iop_ShiftN_wrk; + case Iop_ShlN16x8: + size = 2; + shift_op = S390_VEC_ELEM_SHL_INT; + goto Iop_ShiftN_wrk; + case Iop_ShlN32x4: + size = 4; + shift_op = S390_VEC_ELEM_SHL_INT; + goto Iop_ShiftN_wrk; + case Iop_ShlN64x2: + size = 8; + shift_op = S390_VEC_ELEM_SHL_INT; + goto Iop_ShiftN_wrk; + + case Iop_ShrN8x16: + size = 1; + shift_op = S390_VEC_ELEM_SHRL_INT; + goto Iop_ShiftN_wrk; + case Iop_ShrN16x8: + size = 2; + shift_op = S390_VEC_ELEM_SHRL_INT; + goto Iop_ShiftN_wrk; + case Iop_ShrN32x4: + size = 4; + shift_op = S390_VEC_ELEM_SHRL_INT; + goto Iop_ShiftN_wrk; + case Iop_ShrN64x2: + size = 8; + shift_op = S390_VEC_ELEM_SHRL_INT; + goto Iop_ShiftN_wrk; + + case Iop_SarN8x16: + size = 1; + shift_op = S390_VEC_ELEM_SHRA_INT; + goto Iop_ShiftN_wrk; + case Iop_SarN16x8: + size = 2; + shift_op = S390_VEC_ELEM_SHRA_INT; + goto Iop_ShiftN_wrk; + case Iop_SarN32x4: + size = 4; + shift_op = S390_VEC_ELEM_SHRA_INT; + goto Iop_ShiftN_wrk; + case Iop_SarN64x2: + size = 8; + shift_op = S390_VEC_ELEM_SHRA_INT; + goto Iop_ShiftN_wrk; + + Iop_ShiftN_wrk: { + HReg vec = s390_isel_vec_expr(env, arg1); + s390_amode* number = s390_isel_amode(env,IRExpr_Unop(Iop_8Uto64, arg2)); + + addInstr(env, + s390_insn_vec_amodeop(size, shift_op, dst, vec, number)); + + return dst; + } + + case Iop_ShlV128: + vec_op = S390_VEC_SHL_BITS; + goto Iop_ShiftVV_wrk; + case Iop_ShrV128: + vec_op = S390_VEC_SHRL_BITS; + goto Iop_ShiftVV_wrk; + case Iop_SarV128: + vec_op = S390_VEC_SHRA_BITS; + goto Iop_ShiftVV_wrk; + + Iop_ShiftVV_wrk: { + reg1 = s390_isel_vec_expr(env, arg1); + reg2 = s390_isel_vec_expr(env, IRExpr_Unop(Iop_Dup8x16, arg2)); + + /* Handle special case */ + if (vec_is_bytes_only_shift(arg2)) + { + /* In this case we skip the BITS shift step. */ + addInstr(env, s390_insn_vec_binop(16, (vec_op + 1), + dst, reg1, reg2)); + + return dst; + } + + /* General case (BYTES shift & BITS shift) */ + addInstr(env, s390_insn_vec_binop(16, (vec_op + 1), + dst, reg1, reg2)); + + addInstr(env, s390_insn_vec_binop(16, vec_op, + dst, dst, reg2)); + + return dst; + } + Iop_VV_wrk: { reg1 = s390_isel_vec_expr(env, arg1); reg2 = s390_isel_vec_expr(env, arg2); @@ -3884,6 +4493,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) return dst; } + case Iop_64HLtoV128: + reg1 = s390_isel_int_expr(env, arg1); + reg2 = s390_isel_int_expr(env, arg2); + + addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS, + dst, reg1, reg2)); + + return dst; + default: goto irreducible; } diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 15524bffb0..823b6be7dd 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -764,6 +764,7 @@ void ppIROp ( IROp op ) case Iop_CmpNEZ16x8: vex_printf("CmpNEZ16x8"); return; case Iop_CmpNEZ32x4: vex_printf("CmpNEZ32x4"); return; case Iop_CmpNEZ64x2: vex_printf("CmpNEZ64x2"); return; + case Iop_CmpNEZ128x1: vex_printf("CmpNEZ128x1"); return; case Iop_Abs8x16: vex_printf("Abs8x16"); return; case Iop_Abs16x8: vex_printf("Abs16x8"); return; @@ -774,6 +775,7 @@ void ppIROp ( IROp op ) case Iop_Add16x8: vex_printf("Add16x8"); return; case Iop_Add32x4: vex_printf("Add32x4"); return; case Iop_Add64x2: vex_printf("Add64x2"); return; + case Iop_Add128x1: vex_printf("Add128x1"); return; case Iop_QAdd8Ux16: vex_printf("QAdd8Ux16"); return; case Iop_QAdd16Ux8: vex_printf("QAdd16Ux8"); return; case Iop_QAdd32Ux4: vex_printf("QAdd32Ux4"); return; @@ -798,6 +800,7 @@ void ppIROp ( IROp op ) case Iop_PwAddL8Ux16: vex_printf("PwAddL8Ux16"); return; case Iop_PwAddL16Ux8: vex_printf("PwAddL16Ux8"); return; case Iop_PwAddL32Ux4: vex_printf("PwAddL32Ux4"); return; + case Iop_PwAddL64Ux2: vex_printf("PwAddL64Ux2"); return; case Iop_PwAddL8Sx16: vex_printf("PwAddL8Sx16"); return; case Iop_PwAddL16Sx8: vex_printf("PwAddL16Sx8"); return; case Iop_PwAddL32Sx4: vex_printf("PwAddL32Sx4"); return; @@ -806,6 +809,7 @@ void ppIROp ( IROp op ) case Iop_Sub16x8: vex_printf("Sub16x8"); return; case Iop_Sub32x4: vex_printf("Sub32x4"); return; case Iop_Sub64x2: vex_printf("Sub64x2"); return; + case Iop_Sub128x1: vex_printf("Sub128x1"); return; case Iop_QSub8Ux16: vex_printf("QSub8Ux16"); return; case Iop_QSub16Ux8: vex_printf("QSub16Ux8"); return; case Iop_QSub32Ux4: vex_printf("QSub32Ux4"); return; @@ -826,8 +830,10 @@ void ppIROp ( IROp op ) case Iop_Mull32Sx2: vex_printf("Mull32Sx2"); return; case Iop_PolynomialMul8x16: vex_printf("PolynomialMul8x16"); return; case Iop_PolynomialMull8x8: vex_printf("PolynomialMull8x8"); return; + case Iop_MulHi8Ux16: vex_printf("MulHi8Ux16"); return; case Iop_MulHi16Ux8: vex_printf("MulHi16Ux8"); return; case Iop_MulHi32Ux4: vex_printf("MulHi32Ux4"); return; + case Iop_MulHi8Sx16: vex_printf("MulHi8Sx16"); return; case Iop_MulHi16Sx8: vex_printf("MulHi16Sx8"); return; case Iop_MulHi32Sx4: vex_printf("MulHi32Sx4"); return; case Iop_QDMulHi16Sx8: vex_printf("QDMulHi16Sx8"); return; @@ -854,9 +860,11 @@ void ppIROp ( IROp op ) case Iop_Avg8Ux16: vex_printf("Avg8Ux16"); return; case Iop_Avg16Ux8: vex_printf("Avg16Ux8"); return; case Iop_Avg32Ux4: vex_printf("Avg32Ux4"); return; + case Iop_Avg64Ux2: vex_printf("Avg64Ux2"); return; case Iop_Avg8Sx16: vex_printf("Avg8Sx16"); return; case Iop_Avg16Sx8: vex_printf("Avg16Sx8"); return; case Iop_Avg32Sx4: vex_printf("Avg32Sx4"); return; + case Iop_Avg64Sx2: vex_printf("Avg64Sx2"); return; case Iop_Max8Sx16: vex_printf("Max8Sx16"); return; case Iop_Max16Sx8: vex_printf("Max16Sx8"); return; @@ -904,6 +912,7 @@ void ppIROp ( IROp op ) case Iop_ShlV128: vex_printf("ShlV128"); return; case Iop_ShrV128: vex_printf("ShrV128"); return; + case Iop_SarV128: vex_printf("SarV128"); return; case Iop_ShlN8x16: vex_printf("ShlN8x16"); return; case Iop_ShlN16x8: vex_printf("ShlN16x8"); return; @@ -1567,6 +1576,7 @@ void ppIRJumpKind ( IRJumpKind kind ) case Ijk_SigTRAP: vex_printf("SigTRAP"); break; case Ijk_SigSEGV: vex_printf("SigSEGV"); break; case Ijk_SigBUS: vex_printf("SigBUS"); break; + case Ijk_SigFPE: vex_printf("SigFPE"); break; case Ijk_SigFPE_IntDiv: vex_printf("SigFPE_IntDiv"); break; case Ijk_SigFPE_IntOvf: vex_printf("SigFPE_IntOvf"); break; case Ijk_Sys_syscall: vex_printf("Sys_syscall"); break; @@ -3038,7 +3048,7 @@ void typeOfPrimop ( IROp op, case Iop_Sub64F0x2: case Iop_AndV128: case Iop_OrV128: case Iop_XorV128: case Iop_Add8x16: case Iop_Add16x8: - case Iop_Add32x4: case Iop_Add64x2: + case Iop_Add32x4: case Iop_Add64x2: case Iop_Add128x1: case Iop_QAdd8Ux16: case Iop_QAdd16Ux8: case Iop_QAdd32Ux4: case Iop_QAdd64Ux2: case Iop_QAdd8Sx16: case Iop_QAdd16Sx8: @@ -3049,7 +3059,7 @@ void typeOfPrimop ( IROp op, case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2: case Iop_PwAdd8x16: case Iop_PwAdd16x8: case Iop_PwAdd32x4: case Iop_Sub8x16: case Iop_Sub16x8: - case Iop_Sub32x4: case Iop_Sub64x2: + case Iop_Sub32x4: case Iop_Sub64x2: case Iop_Sub128x1: case Iop_QSub8Ux16: case Iop_QSub16Ux8: case Iop_QSub32Ux4: case Iop_QSub64Ux2: case Iop_QSub8Sx16: case Iop_QSub16Sx8: @@ -3058,14 +3068,14 @@ void typeOfPrimop ( IROp op, case Iop_PolynomialMul8x16: case Iop_PolynomialMulAdd8x16: case Iop_PolynomialMulAdd16x8: case Iop_PolynomialMulAdd32x4: case Iop_PolynomialMulAdd64x2: - case Iop_MulHi16Ux8: case Iop_MulHi32Ux4: - case Iop_MulHi16Sx8: case Iop_MulHi32Sx4: + case Iop_MulHi8Ux16: case Iop_MulHi16Ux8: case Iop_MulHi32Ux4: + case Iop_MulHi8Sx16: case Iop_MulHi16Sx8: case Iop_MulHi32Sx4: case Iop_QDMulHi16Sx8: case Iop_QDMulHi32Sx4: case Iop_QRDMulHi16Sx8: case Iop_QRDMulHi32Sx4: case Iop_MullEven8Ux16: case Iop_MullEven16Ux8: case Iop_MullEven32Ux4: case Iop_MullEven8Sx16: case Iop_MullEven16Sx8: case Iop_MullEven32Sx4: - case Iop_Avg8Ux16: case Iop_Avg16Ux8: case Iop_Avg32Ux4: - case Iop_Avg8Sx16: case Iop_Avg16Sx8: case Iop_Avg32Sx4: + case Iop_Avg8Ux16: case Iop_Avg16Ux8: case Iop_Avg32Ux4: case Iop_Avg64Ux2: + case Iop_Avg8Sx16: case Iop_Avg16Sx8: case Iop_Avg32Sx4: case Iop_Avg64Sx2: case Iop_Max8Sx16: case Iop_Max16Sx8: case Iop_Max32Sx4: case Iop_Max64Sx2: case Iop_Max8Ux16: case Iop_Max16Ux8: case Iop_Max32Ux4: @@ -3144,11 +3154,12 @@ void typeOfPrimop ( IROp op, case Iop_Sqrt32F0x4: case Iop_Sqrt64F0x2: case Iop_CmpNEZ8x16: case Iop_CmpNEZ16x8: - case Iop_CmpNEZ32x4: case Iop_CmpNEZ64x2: + case Iop_CmpNEZ32x4: case Iop_CmpNEZ64x2: case Iop_CmpNEZ128x1: case Iop_Cnt8x16: case Iop_Clz8x16: case Iop_Clz16x8: case Iop_Clz32x4: case Iop_Clz64x2: case Iop_Cls8x16: case Iop_Cls16x8: case Iop_Cls32x4: case Iop_PwAddL8Ux16: case Iop_PwAddL16Ux8: case Iop_PwAddL32Ux4: + case Iop_PwAddL64Ux2: case Iop_PwAddL8Sx16: case Iop_PwAddL16Sx8: case Iop_PwAddL32Sx4: case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2: case Iop_Reverse32sIn64_x2: @@ -3170,7 +3181,7 @@ void typeOfPrimop ( IROp op, case Iop_BCD128toI128S: UNARY(Ity_V128, Ity_V128); - case Iop_ShlV128: case Iop_ShrV128: + case Iop_ShlV128: case Iop_ShrV128: case Iop_SarV128: case Iop_ShlN8x16: case Iop_ShlN16x8: case Iop_ShlN32x4: case Iop_ShlN64x2: case Iop_ShrN8x16: case Iop_ShrN16x8: diff --git a/VEX/priv/s390_disasm.c b/VEX/priv/s390_disasm.c index f95d7ec3b2..58189f1233 100644 --- a/VEX/priv/s390_disasm.c +++ b/VEX/priv/s390_disasm.c @@ -344,8 +344,8 @@ dvb_operand(HChar *p, UInt d, UInt v, UInt b, Bool displacement_is_signed) are separated by a ','. The command holds the arguments. Each argument is encoded using a 4-bit S390_ARG_xyz value. The first argument is placed in the least significant bits of the command and so on. There are at most - 5 arguments in an insn and a sentinel (S390_ARG_DONE) is needed to identify - the end of the argument list. 6 * 4 = 24 bits are required for the + 7 arguments in an insn and a sentinel (S390_ARG_DONE) is needed to identify + the end of the argument list. 8 * 4 = 32 bits are required for the command. */ void s390_disasm(UInt command, ...) diff --git a/VEX/priv/s390_disasm.h b/VEX/priv/s390_disasm.h index 9d29b9600f..3cccceadfc 100644 --- a/VEX/priv/s390_disasm.h +++ b/VEX/priv/s390_disasm.h @@ -51,6 +51,10 @@ #undef ENC6 #define ENC6(a,b,c,d,e,f) ((P(DONE) << 24) | (P(f) << 20) | (P(e) << 16) | \ (P(d) << 12) | (P(c) << 8) | (P(b) << 4) | P(a)) +#undef ENC7 +#define ENC7(a,b,c,d,e,f,g) ((P(DONE) << 28) | (P(g) << 24) | (P(f) << 20) | \ + (P(e) << 16) | (P(d) << 12) | (P(c) << 8) | \ + (P(b) << 4) | P(a)) /* The different kinds of operands in an asm insn */ enum { diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 4beaabd387..17bcb55840 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -1479,13 +1479,14 @@ typedef Iop_AndV128, Iop_OrV128, Iop_XorV128, /* VECTOR SHIFT (shift amt :: Ity_I8) */ - Iop_ShlV128, Iop_ShrV128, + Iop_ShlV128, Iop_ShrV128, Iop_SarV128, /* MISC (vector integer cmp != 0) */ Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2, + Iop_CmpNEZ128x1, /* ADDITION (normal / U->U sat / S->S sat) */ - Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, + Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, Iop_Add128x1, Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, @@ -1500,14 +1501,14 @@ typedef Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2, /* SUBTRACTION (normal / unsigned sat / signed sat) */ - Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2, + Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2, Iop_Sub128x1, Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2, Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2, /* MULTIPLICATION (normal / high half of signed/unsigned) */ Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, - Iop_MulHi16Ux8, Iop_MulHi32Ux4, - Iop_MulHi16Sx8, Iop_MulHi32Sx4, + Iop_MulHi8Ux16, Iop_MulHi16Ux8, Iop_MulHi32Ux4, + Iop_MulHi8Sx16, Iop_MulHi16Sx8, Iop_MulHi32Sx4, /* (widening signed/unsigned of even lanes, with lowest lane=zero) */ Iop_MullEven8Ux16, Iop_MullEven16Ux8, Iop_MullEven32Ux4, Iop_MullEven8Sx16, Iop_MullEven16Sx8, Iop_MullEven32Sx4, @@ -1584,7 +1585,7 @@ typedef Example: Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] where a+b and c+d are unsigned 32-bit values. */ - Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4, + Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4, Iop_PwAddL64Ux2, Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4, /* Other unary pairwise ops */ @@ -1598,8 +1599,8 @@ typedef Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2, /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ - Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, - Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, + Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, Iop_Avg64Ux2, + Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, Iop_Avg64Sx2, /* MIN/MAX */ Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2, diff --git a/VEX/useful/test_main.c b/VEX/useful/test_main.c index 2d24aaf0a0..a7fc06b246 100644 --- a/VEX/useful/test_main.c +++ b/VEX/useful/test_main.c @@ -1416,6 +1416,10 @@ static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); } +static IRAtom* mkPCast128x1 ( MCEnv* mce, IRAtom* at ) +{ + return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ128x1, at)); +} /* Here's a simple scheme capable of handling ops derived from SSE1 code and while only generating ops that can be efficiently @@ -1631,6 +1635,14 @@ IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) return at; } +static +IRAtom* binary128Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) +{ + IRAtom* at; + at = mkUifUV128(mce, vatom1, vatom2); + at = mkPCast128x1(mce, at); + return at; +} /*------------------------------------------------------------*/ /*--- Generate shadow values from all kinds of IRExprs. ---*/ @@ -1674,6 +1686,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_QSub8Ux16: case Iop_QSub8Sx16: case Iop_Sub8x16: + case Iop_MulHi8Sx16: + case Iop_MulHi8Ux16: case Iop_Min8Ux16: case Iop_Max8Ux16: case Iop_CmpGT8Sx16: @@ -1713,11 +1727,18 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_Sub64x2: case Iop_QSub64Ux2: case Iop_QSub64Sx2: + case Iop_Avg64Ux2: + case Iop_Avg64Sx2: case Iop_Add64x2: case Iop_QAdd64Ux2: case Iop_QAdd64Sx2: return binary64Ix2(mce, vatom1, vatom2); + case Iop_Add128x1: + case Iop_Sub128x1: + case Iop_CmpNEZ128x1: + return binary128Ix1(mce, vatom1, vatom2); + case Iop_QNarrowBin32Sto16Sx8: case Iop_QNarrowBin16Sto8Sx16: case Iop_QNarrowBin16Sto8Ux16: diff --git a/docs/internals/3_13_BUGSTATUS.txt b/docs/internals/3_13_BUGSTATUS.txt index f803c9a8aa..944dde8f83 100644 --- a/docs/internals/3_13_BUGSTATUS.txt +++ b/docs/internals/3_13_BUGSTATUS.txt @@ -758,14 +758,6 @@ Should fold these to constant zero in iropt; awaiting test case === VEX/s390x ========================================================== -(carried over) -366413 s390x: New z13 instructions not implemented - [Per cborntraeger, is not important for 3.12.0] - -385409 s390x: z13 vector integer instructions not implemented - -385410 s390x: z13 vector string instructions not implemented - 385411 s390x: z13 vector floating-point instructions not implemented === VEX/x86 ============================================================ diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c index a1edb9a749..9b2e654902 100644 --- a/memcheck/mc_main.c +++ b/memcheck/mc_main.c @@ -1350,7 +1350,16 @@ void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res, ok |= pessim[j] != V_BITS64_DEFINED; tl_assert(ok); - if (0 == (a & (szB - 1)) && n_addrs_bad < szB) { +# if defined(VGP_s390x_linux) + tl_assert(szB == 16); // s390 doesn't have > 128 bit SIMD + /* OK if all loaded bytes are from the same page. */ + Bool alignedOK = ((a & 0xfff) <= 0x1000 - szB); +# else + /* OK if the address is aligned by the load size. */ + Bool alignedOK = (0 == (a & (szB - 1))); +# endif + + if (alignedOK && n_addrs_bad < szB) { /* Exemption applies. Use the previously computed pessimising value and return the combined result, but don't flag an addressing error. The pessimising value is Defined for valid diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c index 5ed39ae21e..68a2ab3bb6 100644 --- a/memcheck/mc_translate.c +++ b/memcheck/mc_translate.c @@ -2324,6 +2324,11 @@ static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); } +static IRAtom* mkPCast128x1 ( MCEnv* mce, IRAtom* at ) +{ + return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ128x1, at)); +} + static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at ) { return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at)); @@ -2933,6 +2938,15 @@ IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) return at; } +static +IRAtom* binary128Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) +{ + IRAtom* at; + at = mkUifUV128(mce, vatom1, vatom2); + at = mkPCast128x1(mce, at); + return at; +} + /* --- 64-bit versions --- */ static @@ -3609,6 +3623,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_QShl8x16: case Iop_Add8x16: case Iop_Mul8x16: + case Iop_MulHi8Sx16: + case Iop_MulHi8Ux16: case Iop_PolynomialMul8x16: case Iop_PolynomialMulAdd8x16: return binary8Ix16(mce, vatom1, vatom2); @@ -3660,6 +3676,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_Min32Ux4: case Iop_Min32Sx4: case Iop_Mul32x4: + case Iop_MulHi32Sx4: + case Iop_MulHi32Ux4: case Iop_QDMulHi32Sx4: case Iop_QRDMulHi32Sx4: case Iop_PolynomialMulAdd32x4: @@ -3667,6 +3685,8 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_Sub64x2: case Iop_Add64x2: + case Iop_Avg64Ux2: + case Iop_Avg64Sx2: case Iop_Max64Sx2: case Iop_Max64Ux2: case Iop_Min64Sx2: @@ -3691,6 +3711,11 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_MulI128by10ECarry: return binary64Ix2(mce, vatom1, vatom2); + case Iop_Add128x1: + case Iop_Sub128x1: + case Iop_CmpNEZ128x1: + return binary128Ix1(mce, vatom1, vatom2); + case Iop_QNarrowBin64Sto32Sx4: case Iop_QNarrowBin64Uto32Ux4: case Iop_QNarrowBin32Sto16Sx8: @@ -3998,6 +4023,7 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, binop(op, vatom1, vatom2)); case Iop_ShrV128: + case Iop_SarV128: case Iop_ShlV128: case Iop_I128StoBCD128: /* Same scheme as with all other shifts. Note: 10 Nov 05: @@ -4950,6 +4976,10 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) return mkPCast64x2(mce, assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom)))); + case Iop_PwAddL64Ux2: + return mkPCast128x1(mce, + assignNew('V', mce, Ity_V128, unop(op, mkPCast64x2(mce, vatom)))); + case Iop_PwAddL16Ux8: case Iop_PwAddL16Sx8: return mkPCast32x4(mce, diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c index adc7845aee..bfd82fcec9 100644 --- a/memcheck/tests/vbit-test/irops.c +++ b/memcheck/tests/vbit-test/irops.c @@ -712,14 +712,17 @@ static irop_t irops[] = { { DEFOP(Iop_XorV128, UNDEF_UNKNOWN), }, { DEFOP(Iop_ShlV128, UNDEF_UNKNOWN), }, { DEFOP(Iop_ShrV128, UNDEF_UNKNOWN), }, + { DEFOP(Iop_SarV128, UNDEF_UNKNOWN), }, { DEFOP(Iop_CmpNEZ8x16, UNDEF_UNKNOWN), }, { DEFOP(Iop_CmpNEZ16x8, UNDEF_UNKNOWN), }, { DEFOP(Iop_CmpNEZ32x4, UNDEF_UNKNOWN), }, { DEFOP(Iop_CmpNEZ64x2, UNDEF_UNKNOWN), }, + { DEFOP(Iop_CmpNEZ128x1, UNDEF_UNKNOWN), }, { DEFOP(Iop_Add8x16, UNDEF_UNKNOWN), }, { DEFOP(Iop_Add16x8, UNDEF_UNKNOWN), }, { DEFOP(Iop_Add32x4, UNDEF_UNKNOWN), }, { DEFOP(Iop_Add64x2, UNDEF_UNKNOWN), }, + { DEFOP(Iop_Add128x1, UNDEF_UNKNOWN), }, { DEFOP(Iop_QAdd8Ux16, UNDEF_UNKNOWN), }, { DEFOP(Iop_QAdd16Ux8, UNDEF_UNKNOWN), }, { DEFOP(Iop_QAdd32Ux4, UNDEF_UNKNOWN), }, @@ -742,6 +745,7 @@ static irop_t irops[] = { { DEFOP(Iop_Sub16x8, UNDEF_UNKNOWN), }, { DEFOP(Iop_Sub32x4, UNDEF_UNKNOWN), }, { DEFOP(Iop_Sub64x2, UNDEF_UNKNOWN), }, + { DEFOP(Iop_Sub128x1, UNDEF_UNKNOWN), }, { DEFOP(Iop_QSub8Ux16, UNDEF_UNKNOWN), }, { DEFOP(Iop_QSub16Ux8, UNDEF_UNKNOWN), }, { DEFOP(Iop_QSub32Ux4, UNDEF_UNKNOWN), }, @@ -753,8 +757,10 @@ static irop_t irops[] = { { DEFOP(Iop_Mul8x16, UNDEF_UNKNOWN), }, { DEFOP(Iop_Mul16x8, UNDEF_UNKNOWN), }, { DEFOP(Iop_Mul32x4, UNDEF_UNKNOWN), }, + { DEFOP(Iop_MulHi8Ux16, UNDEF_UNKNOWN), }, { DEFOP(Iop_MulHi16Ux8, UNDEF_UNKNOWN), }, { DEFOP(Iop_MulHi32Ux4, UNDEF_UNKNOWN), }, + { DEFOP(Iop_MulHi8Sx16, UNDEF_UNKNOWN), }, { DEFOP(Iop_MulHi16Sx8, UNDEF_UNKNOWN), }, { DEFOP(Iop_MulHi32Sx4, UNDEF_UNKNOWN), }, /* Result of the Iop_MullEvenBxE is 2*BxE/2 */ @@ -785,6 +791,7 @@ static irop_t irops[] = { { DEFOP(Iop_PwAddL8Ux16, UNDEF_UNKNOWN), }, { DEFOP(Iop_PwAddL16Ux8, UNDEF_UNKNOWN), }, { DEFOP(Iop_PwAddL32Ux4, UNDEF_UNKNOWN), }, + { DEFOP(Iop_PwAddL64Ux2, UNDEF_UNKNOWN), }, { DEFOP(Iop_PwAddL8Sx16, UNDEF_UNKNOWN), }, { DEFOP(Iop_PwAddL16Sx8, UNDEF_UNKNOWN), }, { DEFOP(Iop_PwAddL32Sx4, UNDEF_UNKNOWN), }, @@ -795,9 +802,11 @@ static irop_t irops[] = { { DEFOP(Iop_Avg8Ux16, UNDEF_UNKNOWN), }, { DEFOP(Iop_Avg16Ux8, UNDEF_UNKNOWN), }, { DEFOP(Iop_Avg32Ux4, UNDEF_UNKNOWN), }, + { DEFOP(Iop_Avg64Ux2, UNDEF_UNKNOWN), }, { DEFOP(Iop_Avg8Sx16, UNDEF_UNKNOWN), }, { DEFOP(Iop_Avg16Sx8, UNDEF_UNKNOWN), }, { DEFOP(Iop_Avg32Sx4, UNDEF_UNKNOWN), }, + { DEFOP(Iop_Avg64Sx2, UNDEF_UNKNOWN), }, { DEFOP(Iop_Max8Sx16, UNDEF_UNKNOWN), }, { DEFOP(Iop_Max16Sx8, UNDEF_UNKNOWN), }, { DEFOP(Iop_Max32Sx4, UNDEF_UNKNOWN), },