From: Florian Krohm Date: Mon, 20 Feb 2012 15:01:14 +0000 (+0000) Subject: Improve code generation on s390x for assignment of constant X-Git-Tag: svn/VALGRIND_3_8_1^2~201 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4546ae3379d7bb6673f35608f804affc91d750fb;p=thirdparty%2Fvalgrind.git Improve code generation on s390x for assignment of constant values to guest registers. Motivated by the observation that piecing together a 64-bit value requires 4 insns on z900 and 2 insns on newer models. Specifically: (1) Assigning 0 can be done by using XC (2) Assigning a value that differs by a small amount from the value previously assigned can be done using AGSI (Happens a lot for guest IA updates). (3) If the new value differs from the previous one only in the lower word it is sufficient to assign the lower word. (4) If the new value equals the old value the assignment is redundant and can be eliminated. This happens surprisingly often. This buys us somewhere between 5% and 11.8% of insns (as measured on the perf bucket). git-svn-id: svn://svn.valgrind.org/vex/trunk@2258 --- diff --git a/VEX/auxprogs/genoffsets.c b/VEX/auxprogs/genoffsets.c index c371a9d246..e0dd3e92e9 100644 --- a/VEX/auxprogs/genoffsets.c +++ b/VEX/auxprogs/genoffsets.c @@ -169,6 +169,10 @@ void foo ( void ) GENOFFSET(S390X,s390x,SYSNO); GENOFFSET(S390X,s390x,IP_AT_SYSCALL); GENOFFSET(S390X,s390x,fpc); + GENOFFSET(S390X,s390x,CC_OP); + GENOFFSET(S390X,s390x,CC_DEP1); + GENOFFSET(S390X,s390x,CC_DEP2); + GENOFFSET(S390X,s390x,CC_NDEP); } /*--------------------------------------------------------------------*/ diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h index bf4b762803..754ce3d091 100644 --- a/VEX/priv/guest_s390_defs.h +++ b/VEX/priv/guest_s390_defs.h @@ -37,6 +37,7 @@ #include "libvex_ir.h" // IRSB (needed by bb_to_IR.h) #include "libvex.h" // VexArch (needed by bb_to_IR.h) #include "guest_generic_bb_to_IR.h" // DisResult +#include "libvex_guest_s390x.h" // VexGuestS390XState /* Convert one s390 insn to IR. See the type DisOneInstrFn in diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 7319781134..47e3ab5826 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -13593,7 +13593,7 @@ disInstr_S390_WRK(UChar *insn) DisResult disInstr_S390(IRSB *irsb_IN, - Bool put_IP, + Bool put_IP __attribute__((unused)), Bool (*resteerOkFn)(void *, Addr64), Bool resteerCisOk, void *callback_opaque, @@ -13616,10 +13616,9 @@ disInstr_S390(IRSB *irsb_IN, resteer_fn = resteerOkFn; resteer_data = callback_opaque; - /* We may be asked to update the guest IA before going further. */ - if (put_IP) - addStmtToIRSB(irsb, IRStmt_Put(S390X_GUEST_OFFSET(guest_IA), - mkaddr_expr(guest_IA_curr_instr))); + /* Always update the guest IA. See comment in s390_isel_stmt for Ist_Put. */ + addStmtToIRSB(irsb, IRStmt_Put(S390X_GUEST_OFFSET(guest_IA), + mkaddr_expr(guest_IA_curr_instr))); return disInstr_S390_WRK(guest_code + delta); } diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index f086e55d75..2efc105301 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -714,6 +714,8 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn) break; case S390_INSN_MFENCE: + case S390_INSN_GZERO: + case S390_INSN_GADD: break; default: @@ -917,6 +919,8 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn) break; case S390_INSN_MFENCE: + case S390_INSN_GZERO: + case S390_INSN_GADD: break; default: @@ -1115,6 +1119,35 @@ emit_S(UChar *p, UInt op, UChar b2, UShort d2) } +static UChar * +emit_SIY(UChar *p, ULong op, UChar i2, UChar b1, UShort dl1, UChar dh1) +{ + ULong the_insn = op; + + the_insn |= ((ULong)i2) << 32; + the_insn |= ((ULong)b1) << 28; + the_insn |= ((ULong)dl1) << 16; + the_insn |= ((ULong)dh1) << 8; + + return emit_6bytes(p, the_insn); +} + + +static UChar * +emit_SSa(UChar *p, ULong op, UChar l, UChar b1, UShort d1, UChar b2, UShort d2) +{ + ULong the_insn = op; + + the_insn |= ((ULong)l) << 32; + the_insn |= ((ULong)b1) << 28; + the_insn |= ((ULong)d1) << 16; + the_insn |= ((ULong)b2) << 12; + the_insn |= ((ULong)d2) << 0; + + return emit_6bytes(p, the_insn); +} + + /*------------------------------------------------------------*/ /*--- Functions to emit particular instructions ---*/ /*------------------------------------------------------------*/ @@ -1239,6 +1272,18 @@ s390_emit_AGHI(UChar *p, UChar r1, UShort i2) } +static UChar * +s390_emit_AGSI(UChar *p, UChar i2, UChar b1, UShort dl1, UChar dh1) +{ + vassert(s390_host_has_gie); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC3(MNM, INT, SDXB), "agsi", (Int)(Char)i2, dh1, dl1, 0, b1); + + return emit_SIY(p, 0xeb000000007aULL, i2, b1, dl1, dh1); +} + + static UChar * s390_emit_NR(UChar *p, UChar r1, UChar r2) { @@ -1687,6 +1732,16 @@ s390_emit_XILF(UChar *p, UChar r1, UInt i2) } +static UChar * +s390_emit_XC(UChar *p, UInt l, UChar b1, UShort d1, UChar b2, UShort d2) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC3(MNM, UDLB, UDXB), "xc", d1, l, b1, d2, 0, b2); + + return emit_SSa(p, 0xd70000000000ULL, l, b1, d1, b2, d2); +} + + static UChar * s390_emit_FLOGR(UChar *p, UChar r1, UChar r2) { @@ -4406,6 +4461,34 @@ s390_insn_mfence(void) } +s390_insn * +s390_insn_gzero(UChar size, UInt offset) +{ + s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn)); + + insn->tag = S390_INSN_GZERO; + insn->size = size; + insn->variant.gzero.offset = offset; + + return insn; +} + + +s390_insn * +s390_insn_gadd(UChar size, UInt offset, UChar delta, ULong value) +{ + s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn)); + + insn->tag = S390_INSN_GADD; + insn->size = size; + insn->variant.gadd.offset = offset; + insn->variant.gadd.delta = delta; + insn->variant.gadd.value = value; + + return insn; +} + + /*---------------------------------------------------------------*/ /*--- Debug print ---*/ /*---------------------------------------------------------------*/ @@ -4477,6 +4560,10 @@ s390_sprintf(HChar *buf, HChar *fmt, ...) s390_amode_as_string(va_arg(args, s390_amode *))); continue; + case 'G': /* %G = guest state @ offset */ + p += vex_sprintf(p, "guest[%d]", va_arg(args, UInt)); + continue; + case 'C': /* %C = condition code */ p += vex_sprintf(p, "%s", s390_cc_as_string(va_arg(args, s390_cc_t))); continue; @@ -4821,6 +4908,17 @@ s390_insn_as_string(const s390_insn *insn) s390_sprintf(buf, "%M", "v-mfence"); return buf; /* avoid printing "size = ..." which is meaningless */ + case S390_INSN_GZERO: + s390_sprintf(buf, "%M %G", "v-gzero", insn->variant.gzero.offset); + break; + + case S390_INSN_GADD: + s390_sprintf(buf, "%M %G += %I (= %I)", "v-gadd", + insn->variant.gadd.offset, + (Long)(Char)insn->variant.gadd.delta, + insn->variant.gadd.value); + break; + default: goto fail; } @@ -7042,6 +7140,24 @@ s390_insn_mfence_emit(UChar *buf, const s390_insn *insn) } +static UChar * +s390_insn_gzero_emit(UChar *buf, const s390_insn *insn) +{ + return s390_emit_XC(buf, insn->size - 1, + S390_REGNO_GUEST_STATE_POINTER, insn->variant.gzero.offset, + S390_REGNO_GUEST_STATE_POINTER, insn->variant.gzero.offset); +} + + +static UChar * +s390_insn_gadd_emit(UChar *buf, const s390_insn *insn) +{ + return s390_emit_AGSI(buf, insn->variant.gadd.delta, + S390_REGNO_GUEST_STATE_POINTER, + DISP20(insn->variant.gadd.offset)); +} + + Int emit_S390Instr(UChar *buf, Int nbuf, s390_insn *insn, Bool mode64, void *dispatch_unassisted, void *dispatch_assisted) @@ -7159,6 +7275,14 @@ emit_S390Instr(UChar *buf, Int nbuf, s390_insn *insn, Bool mode64, end = s390_insn_mfence_emit(buf, insn); break; + case S390_INSN_GZERO: + end = s390_insn_gzero_emit(buf, insn); + break; + + case S390_INSN_GADD: + end = s390_insn_gadd_emit(buf, insn); + break; + default: vpanic("s390_insn_emit"); } diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index cdce301469..b5dff6827f 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -142,7 +142,9 @@ typedef enum { S390_INSN_BFP128_COMPARE, S390_INSN_BFP128_CONVERT_TO, S390_INSN_BFP128_CONVERT_FROM, - S390_INSN_MFENCE + S390_INSN_MFENCE, + S390_INSN_GZERO, /* Assign zero to a guest register */ + S390_INSN_GADD /* Add a value to a guest register */ } s390_insn_tag; @@ -397,6 +399,14 @@ typedef struct { HReg op2_hi; /* right operand; high part */ HReg op2_lo; /* right operand; low part */ } bfp128_compare; + struct { + UInt offset; + } gzero; + struct { + UInt offset; + UChar delta; + ULong value; /* for debugging only */ + } gadd; } variant; } s390_insn; @@ -447,6 +457,8 @@ s390_insn *s390_insn_bfp128_convert_from(UChar size, s390_bfp_unop_t, HReg dst, HReg op_hi, HReg op_lo, s390_round_t); s390_insn *s390_insn_mfence(void); +s390_insn *s390_insn_gzero(UChar size, UInt offset); +s390_insn *s390_insn_gadd(UChar size, UInt offset, UChar delta, ULong value); UInt s390_insn_emit(UChar *buf, Int nbuf, const s390_insn *insn, void *dispatch); diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c index 2bc23e8833..f86301f04d 100644 --- a/VEX/priv/host_s390_isel.c +++ b/VEX/priv/host_s390_isel.c @@ -34,10 +34,11 @@ #include "libvex_ir.h" #include "libvex.h" #include "libvex_s390x_common.h" +#include "libvex_guest_offsets.h" -#include "ir_match.h" #include "main_util.h" #include "main_globals.h" +#include "guest_s390_defs.h" /* guest_s390x_state_requires_precise_mem_exns */ #include "host_generic_regs.h" #include "host_s390_defs.h" @@ -68,8 +69,27 @@ - The host subarchitecture we are selecting insns for. This is set at the start and does not change. + + - A flag to indicate whether the guest IA has been assigned to. + + - Values of certain guest registers which are often assigned constants. */ +/* Symbolic names for guest registers whose value we're tracking */ +enum { + GUEST_IA, + GUEST_CC_OP, + GUEST_CC_DEP1, + GUEST_CC_DEP2, + GUEST_CC_NDEP, + GUEST_SYSNO, + GUEST_UNKNOWN /* must be the last entry */ +}; + +/* Number of registers we're tracking. */ +#define NUM_TRACKED_REGS GUEST_UNKNOWN + + typedef struct { IRTypeEnv *type_env; @@ -79,10 +99,12 @@ typedef struct { HInstrArray *code; + ULong old_value[NUM_TRACKED_REGS]; UInt vreg_ctr; UInt hwcaps; - + Bool first_IA_assignment; + Bool old_value_valid[NUM_TRACKED_REGS]; } ISelEnv; @@ -96,6 +118,33 @@ static HReg s390_isel_float_expr(ISelEnv *, IRExpr *); static void s390_isel_float128_expr(HReg *, HReg *, ISelEnv *, IRExpr *); +static Int +get_guest_reg(Int offset) +{ + switch (offset) { + case OFFSET_s390x_IA: return GUEST_IA; + case OFFSET_s390x_CC_OP: return GUEST_CC_OP; + case OFFSET_s390x_CC_DEP1: return GUEST_CC_DEP1; + case OFFSET_s390x_CC_DEP2: return GUEST_CC_DEP2; + case OFFSET_s390x_CC_NDEP: return GUEST_CC_NDEP; + case OFFSET_s390x_SYSNO: return GUEST_SYSNO; + + /* Also make sure there is never a partial write to one of + these registers. That would complicate matters. */ + case OFFSET_s390x_IA+1 ... OFFSET_s390x_IA+7: + case OFFSET_s390x_CC_OP+1 ... OFFSET_s390x_CC_OP+7: + case OFFSET_s390x_CC_DEP1+1 ... OFFSET_s390x_CC_DEP1+7: + case OFFSET_s390x_CC_DEP2+1 ... OFFSET_s390x_CC_DEP2+7: + case OFFSET_s390x_CC_NDEP+1 ... OFFSET_s390x_CC_NDEP+7: + vassert("partial update of this guest state register is not allowed"); + break; + + default: break; + } + + return GUEST_UNKNOWN; +} + /* Add an instruction */ static void addInstr(ISelEnv *env, s390_insn *insn) @@ -203,6 +252,16 @@ ulong_fits_signed_20bit(ULong val) } +static __inline__ Bool +ulong_fits_signed_8bit(ULong val) +{ + Long v = val & 0xFFu; + + v = (v << 56) >> 56; /* sign extend */ + + return val == (ULong)v; +} + /* EXPR is an expression that is used as an address. Return an s390_amode for it. */ static s390_amode * @@ -2139,7 +2198,98 @@ s390_isel_stmt(ISelEnv *env, IRStmt *stmt) IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); HReg src; s390_amode *am; + ULong new_value, old_value, difference; + + /* Detect updates to certain guest registers. We track the contents + of those registers as long as they contain constants. If the new + constant is either zero or in the 8-bit neighbourhood of the + current value we can use a memory-to-memory insn to do the update. */ + + Int offset = stmt->Ist.Put.offset; + + /* Check necessary conditions: + (1) must be one of the registers we care about + (2) assigned value must be a constant */ + Int guest_reg = get_guest_reg(offset); + + if (guest_reg == GUEST_UNKNOWN) goto not_special; + + if (guest_reg == GUEST_IA) { + /* If this is the first assignment to the IA reg, don't special case + it. We need to do a full 8-byte assignment here. The reason is + that in case of a redirected translation the guest IA does not + contain the redirected-to address. Instead it contains the + redirected-from address and those can be far apart. So in order to + do incremnetal updates if the IA in the future we need to get the + initial address of the super block correct. */ + if (env->first_IA_assignment) { + env->first_IA_assignment = False; + goto not_special; + } + } + + if (stmt->Ist.Put.data->tag != Iex_Const) { + /* Invalidate guest register contents */ + env->old_value_valid[guest_reg] = False; + goto not_special; + } + /* OK. Necessary conditions are satisfied. */ + + /* Get the old value and update it */ + vassert(tyd == Ity_I64); + + old_value = env->old_value[guest_reg]; + new_value = stmt->Ist.Put.data->Iex.Const.con->Ico.U64; + env->old_value[guest_reg] = new_value; + + Bool old_value_is_valid = env->old_value_valid[guest_reg]; + env->old_value_valid[guest_reg] = True; + + /* If the register already contains the new value, there is nothing + to do here. Unless the guest register requires precise memory + exceptions. */ + if (old_value_is_valid && new_value == old_value) { + if (! guest_s390x_state_requires_precise_mem_exns(offset, offset + 8)) { + return; + } + } + + /* guest register = 0 */ + if (new_value == 0) { + addInstr(env, s390_insn_gzero(sizeofIRType(tyd), offset)); + return; + } + + if (old_value_is_valid == False) goto not_special; + + /* If the new value is in the neighbourhood of the old value + we can use a memory-to-memory insn */ + difference = new_value - old_value; + + if (s390_host_has_gie && ulong_fits_signed_8bit(difference)) { + addInstr(env, s390_insn_gadd(sizeofIRType(tyd), offset, + (difference & 0xFF), new_value)); + return; + } + + /* If the high word is the same it is sufficient to load the low word. + Use R0 as a scratch reg. */ + if ((old_value >> 32) == (new_value >> 32)) { + HReg r0 = make_gpr(env, 0); + HReg gsp = make_gpr(env, S390_REGNO_GUEST_STATE_POINTER); + s390_amode *gam; + + gam = s390_amode_b12(offset + 4, gsp); + addInstr(env, s390_insn_load_immediate(4, r0, + new_value & 0xFFFFFFFF)); + addInstr(env, s390_insn_store(4, gam, r0)); + return; + } + + /* No special case applies... fall through */ + + not_special: am = s390_amode_for_guest_state(stmt->Ist.Put.offset); switch (tyd) { @@ -2230,6 +2380,17 @@ s390_isel_stmt(ISelEnv *env, IRStmt *stmt) IRType retty; IRDirty* d = stmt->Ist.Dirty.details; Bool passBBP; + Int i; + + /* Invalidate tracked values of those guest state registers that are + modified by this helper. */ + for (i = 0; i < d->nFxState; ++i) { + if ((d->fxState[i].fx == Ifx_Write || d->fxState[i].fx == Ifx_Modify)) { + Int guest_reg = get_guest_reg(d->fxState[i].offset); + if (guest_reg != GUEST_UNKNOWN) + env->old_value_valid[guest_reg] = False; + } + } if (d->nFxState == 0) vassert(!d->needsBBP); @@ -2372,6 +2533,13 @@ iselSB_S390(IRSB *bb, VexArch arch_host, VexArchInfo *archinfo_host, /* Copy BB's type env. */ env->type_env = bb->tyenv; + /* Set up data structures for tracking guest register values. */ + env->first_IA_assignment = True; + for (i = 0; i < NUM_TRACKED_REGS; ++i) { + env->old_value[i] = 0; /* just something to have a defined value */ + env->old_value_valid[i] = False; + } + /* Make up an IRTemp -> virtual HReg mapping. This doesn't change as we go along. For some reason types_used has Int type -- but it should be unsigned. Internally we use an unsigned type; so we