From: Andreas Arnez Date: Fri, 5 May 2023 15:48:31 +0000 (+0200) Subject: s390x: Optimize CLC for 1, 2, 4, and 8 bytes X-Git-Tag: VALGRIND_3_22_0~152 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=97d335621a60a3c29ded56ea4a29ae1968ed7c8a;p=thirdparty%2Fvalgrind.git s390x: Optimize CLC for 1, 2, 4, and 8 bytes The CLC instruction compares two memory areas with sizes from 1 up to 256 bytes. Currently Valgrind always implements it with a bytewise loop. Add special handling for the sizes 1, 2, 4, and 8. Realize CLC with an 8-, 16-, 32-, and 64-bit integer comparison, respectively, in those cases. Apart from a slight optimization this also improves the diagnostics for uninitialized values since it avoids the manufactured conditional jump that breaks out of the loop over the individual bytes. --- diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 250daeca13..39356e088f 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -12849,11 +12849,28 @@ s390_irgen_TDGXT(UChar r1, IRTemp op2addr) static const HChar * s390_irgen_CLC(UChar length, IRTemp start1, IRTemp start2) { - IRTemp len = newTemp(Ity_I64); - - assign(len, mkU64(length)); - s390_irgen_CLC_EX(len, start1, start2); + IRType ty; + + switch (length) { + case 0: ty = Ity_I8; break; + case 1: ty = Ity_I16; break; + case 3: ty = Ity_I32; break; + case 7: ty = Ity_I64; break; + default: ty = Ity_INVALID; + } + if (ty != Ity_INVALID) { + IRTemp a = newTemp(ty); + IRTemp b = newTemp(ty); + + assign(a, load(ty, mkexpr(start1))); + assign(b, load(ty, mkexpr(start2))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, a, b); + } else { + IRTemp len = newTemp(Ity_I64); + assign(len, mkU64(length)); + s390_irgen_CLC_EX(len, start1, start2); + } return "clc"; }