From: Florian Krohm Date: Sat, 2 Feb 2013 00:16:58 +0000 (+0000) Subject: s390: Change insn selection to recognize memcpy-like statements. X-Git-Tag: svn/VALGRIND_3_9_0^2~119 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4a1d58e96549030cd9687e83e35c237520ab9701;p=thirdparty%2Fvalgrind.git s390: Change insn selection to recognize memcpy-like statements. Add S390_INCN_MEMCPY and generate MVC for that later on. Saves between 0.1 - 1.5% of insns. Observed runtime differences on the perf bucket were within noise margin. git-svn-id: svn://svn.valgrind.org/vex/trunk@2675 --- diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 97ff1087f4..7b716a026d 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -552,6 +552,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn) addHRegUse(u, HRmWrite, insn->variant.move.dst); break; + case S390_INSN_MEMCPY: + s390_amode_get_reg_usage(u, insn->variant.memcpy.src); + s390_amode_get_reg_usage(u, insn->variant.memcpy.dst); + break; + case S390_INSN_COND_MOVE: s390_opnd_RMI_get_reg_usage(u, insn->variant.cond_move.src); addHRegUse(u, HRmWrite, insn->variant.cond_move.dst); @@ -849,6 +854,11 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn) insn->variant.move.src = lookupHRegRemap(m, insn->variant.move.src); break; + case S390_INSN_MEMCPY: + s390_amode_map_regs(m, insn->variant.memcpy.dst); + s390_amode_map_regs(m, insn->variant.memcpy.src); + break; + case S390_INSN_COND_MOVE: insn->variant.cond_move.dst = lookupHRegRemap(m, insn->variant.cond_move.dst); s390_opnd_RMI_map_regs(m, &insn->variant.cond_move.src); @@ -2822,6 +2832,16 @@ s390_emit_MSGFI(UChar *p, UChar r1, UInt i2) } +static UChar * +s390_emit_MVC(UChar *p, UInt l, UChar b1, UShort d1, UChar b2, UShort d2) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC3(MNM, UDLB, UDXB), "mvc", d1, l, b1, d2, 0, b2); + + return emit_SSa(p, 0xd20000000000ULL, l, b1, d1, b2, d2); +} + + static UChar * s390_emit_MVI(UChar *p, UChar i2, UChar b1, UShort d1) { @@ -5171,6 +5191,27 @@ s390_insn_move(UChar size, HReg dst, HReg src) } +s390_insn * +s390_insn_memcpy(UChar size, s390_amode *dst, s390_amode *src) +{ + s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn)); + + /* This insn will be mapped to MVC which requires base register + plus 12-bit displacement */ + vassert(src->tag == S390_AMODE_B12); + vassert(dst->tag == S390_AMODE_B12); + + insn->tag = S390_INSN_MEMCPY; + insn->size = size; + insn->variant.memcpy.src = src; + insn->variant.memcpy.dst = dst; + + vassert(size == 1 || size == 2 || size == 4 || size == 8); + + return insn; +} + + s390_insn * s390_insn_cond_move(UChar size, s390_cc_t cond, HReg dst, s390_opnd_RMI src) { @@ -6269,6 +6310,11 @@ s390_insn_as_string(const s390_insn *insn) insn->variant.move.src); break; + case S390_INSN_MEMCPY: + s390_sprintf(buf, "%M %A,%A", "v-memcpy", insn->variant.memcpy.dst, + insn->variant.memcpy.src); + break; + case S390_INSN_COND_MOVE: s390_sprintf(buf, "%M if (%C) %R,%O", "v-move", insn->variant.cond_move.cond, insn->variant.cond_move.dst, @@ -6963,6 +7009,17 @@ s390_insn_move_emit(UChar *buf, const s390_insn *insn) } +static UChar * +s390_insn_memcpy_emit(UChar *buf, const s390_insn *insn) +{ + s390_amode *dst = insn->variant.memcpy.dst; + s390_amode *src = insn->variant.memcpy.src; + + return s390_emit_MVC(buf, insn->size - 1, hregNumber(dst->b), dst->d, + hregNumber(src->b), src->d); +} + + static UChar * s390_insn_load_immediate_emit(UChar *buf, const s390_insn *insn) { @@ -9433,6 +9490,10 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, s390_insn *insn, end = s390_insn_move_emit(buf, insn); break; + case S390_INSN_MEMCPY: + end = s390_insn_memcpy_emit(buf, insn); + break; + case S390_INSN_COND_MOVE: end = s390_insn_cond_move_emit(buf, insn); break; diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 476cc12b64..5fb5a0d3c6 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -119,6 +119,7 @@ typedef enum { S390_INSN_LOAD, /* load register from memory */ S390_INSN_STORE, /* store register to memory */ S390_INSN_MOVE, /* from register to register */ + S390_INSN_MEMCPY, /* from memory to memory */ S390_INSN_COND_MOVE, /* conditonal "move" to register */ S390_INSN_LOAD_IMMEDIATE, S390_INSN_ALU, @@ -333,6 +334,10 @@ typedef struct { HReg dst; HReg src; } move; + struct { + s390_amode *dst; + s390_amode *src; + } memcpy; struct { s390_cc_t cond; HReg dst; @@ -561,6 +566,7 @@ typedef struct { s390_insn *s390_insn_load(UChar size, HReg dst, s390_amode *src); s390_insn *s390_insn_store(UChar size, s390_amode *dst, HReg src); s390_insn *s390_insn_move(UChar size, HReg dst, HReg src); +s390_insn *s390_insn_memcpy(UChar size, s390_amode *dst, s390_amode *src); s390_insn *s390_insn_cond_move(UChar size, s390_cc_t cond, HReg dst, s390_opnd_RMI src); s390_insn *s390_insn_load_immediate(UChar size, HReg dst, ULong val); diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c index 6af9d7b2af..a1c99d8850 100644 --- a/VEX/priv/host_s390_isel.c +++ b/VEX/priv/host_s390_isel.c @@ -3073,6 +3073,19 @@ s390_isel_stmt(ISelEnv *env, IRStmt *stmt) addInstr(env, s390_insn_mimm(sizeofIRType(tyd), am, value)); return; } + /* Check whether we can use a memcpy here. Currently, the restriction + is that both amodes need to be B12, so MVC can be emitted. + We do not consider a store whose data expression is a load because + we don't want to deal with overlapping locations. */ + /* store(get) never overlaps*/ + if (am->tag == S390_AMODE_B12 && + stmt->Ist.Store.data->tag == Iex_Get) { + UInt offset = stmt->Ist.Store.data->Iex.Get.offset; + s390_amode *from = s390_amode_for_guest_state(offset); + addInstr(env, s390_insn_memcpy(sizeofIRType(tyd), am, from)); + return; + } + /* General case: compile data into a register */ src = s390_isel_int_expr(env, stmt->Ist.Store.data); break; @@ -3182,6 +3195,43 @@ s390_isel_stmt(ISelEnv *env, IRStmt *stmt) addInstr(env, s390_insn_mimm(sizeofIRType(tyd), am, value)); return; } + /* Check whether we can use a memcpy here. Currently, the restriction + is that both amodes need to be B12, so MVC can be emitted. */ + /* put(load) never overlaps */ + if (am->tag == S390_AMODE_B12 && + stmt->Ist.Put.data->tag == Iex_Load) { + if (stmt->Ist.Put.data->Iex.Load.end != Iend_BE) goto stmt_fail; + IRExpr *data = stmt->Ist.Put.data->Iex.Load.addr; + s390_amode *from = s390_isel_amode(env, data); + UInt size = sizeofIRType(tyd); + + if (from->tag == S390_AMODE_B12) { + /* Source can be compiled into a B12 amode. */ + addInstr(env, s390_insn_memcpy(size, am, from)); + return; + } + + src = newVRegI(env); + addInstr(env, s390_insn_load(size, src, from)); + break; + } + /* put(get) */ + if (am->tag == S390_AMODE_B12 && + stmt->Ist.Put.data->tag == Iex_Get) { + UInt put_offset = am->d; + UInt get_offset = stmt->Ist.Put.data->Iex.Get.offset; + UInt size = sizeofIRType(tyd); + /* don't memcpy in case of overlap */ + if (put_offset + size <= get_offset || + get_offset + size <= put_offset) { + s390_amode *from = s390_amode_for_guest_state(get_offset); + addInstr(env, s390_insn_memcpy(size, am, from)); + return; + } + goto no_memcpy_put; + } + /* General case: compile data into a register */ +no_memcpy_put: src = s390_isel_int_expr(env, stmt->Ist.Put.data); break;