From: Julian Seward Date: Sun, 7 Aug 2016 16:42:37 +0000 (+0000) Subject: Implement SHA1C, SHA1M, SHA1P, SHA1SU0, SHA256H2, SHA256H, SHA256SU1, X-Git-Tag: svn/VALGRIND_3_12_0^2~25 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4a9cf8c50d9b7e6969df502d3890f55714b41a80;p=thirdparty%2Fvalgrind.git Implement SHA1C, SHA1M, SHA1P, SHA1SU0, SHA256H2, SHA256H, SHA256SU1, SHA1H, SHA1SU1, SHA256SU0. git-svn-id: svn://svn.valgrind.org/vex/trunk@3241 --- diff --git a/VEX/priv/guest_arm_defs.h b/VEX/priv/guest_arm_defs.h index 34628fd6d2..ed4b7ffe49 100644 --- a/VEX/priv/guest_arm_defs.h +++ b/VEX/priv/guest_arm_defs.h @@ -113,25 +113,109 @@ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2, /* --- DIRTY HELPERS --- */ +/* Confusingly, for the AES insns, the 32-bit ARM docs refers to the + one-and-only source register as 'm' whereas the 64-bit docs refer to + it as 'n'. We sidestep that here by just calling it 'arg32_*'. */ + +extern +void armg_dirtyhelper_AESE ( + /*OUT*/V128* res, + UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0 + ); + +extern +void armg_dirtyhelper_AESD ( + /*OUT*/V128* res, + UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0 + ); + +extern +void armg_dirtyhelper_AESMC ( + /*OUT*/V128* res, + UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0 + ); + +extern +void armg_dirtyhelper_AESIMC ( + /*OUT*/V128* res, + UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0 + ); + +extern +void armg_dirtyhelper_SHA1C ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); + +extern +void armg_dirtyhelper_SHA1P ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); + +extern +void armg_dirtyhelper_SHA1M ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); + +extern +void armg_dirtyhelper_SHA1SU0 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); + +extern +void armg_dirtyhelper_SHA256H ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); + +extern +void armg_dirtyhelper_SHA256H2 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); + extern -void armg_dirtyhelper_AESE ( /*OUT*/V128* res, - UInt argW3, UInt argW2, - UInt argW1, UInt argW0 ); +void armg_dirtyhelper_SHA256SU1 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); extern -void armg_dirtyhelper_AESD ( /*OUT*/V128* res, - UInt argW3, UInt argW2, - UInt argW1, UInt argW0 ); +void armg_dirtyhelper_SHA1SU1 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); extern -void armg_dirtyhelper_AESMC ( /*OUT*/V128* res, - UInt argW3, UInt argW2, - UInt argW1, UInt argW0 ); +void armg_dirtyhelper_SHA256SU0 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); extern -void armg_dirtyhelper_AESIMC ( /*OUT*/V128* res, - UInt argW3, UInt argW2, - UInt argW1, UInt argW0 ); +void armg_dirtyhelper_SHA1H ( + /*OUT*/V128* res, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ); /*---------------------------------------------------------*/ diff --git a/VEX/priv/guest_arm_helpers.c b/VEX/priv/guest_arm_helpers.c index da6bba80b6..48a3ba46c3 100644 --- a/VEX/priv/guest_arm_helpers.c +++ b/VEX/priv/guest_arm_helpers.c @@ -553,50 +553,234 @@ UInt armg_calculate_condition ( UInt cond_n_op /* (ARMCondcode << 4) | cc_op */, be at least 8 aligned. */ /* CALLED FROM GENERATED CODE */ -void armg_dirtyhelper_AESE ( /*OUT*/V128* res, - UInt argW3, UInt argW2, - UInt argW1, UInt argW0 ) +void armg_dirtyhelper_AESE ( + /*OUT*/V128* res, + UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0 + ) { vassert(0 == (((HWord)res) & (8-1))); - ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2); - ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0); + ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2); + ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0); arm64g_dirtyhelper_AESE(res, argHi, argLo); } /* CALLED FROM GENERATED CODE */ -void armg_dirtyhelper_AESD ( /*OUT*/V128* res, - UInt argW3, UInt argW2, - UInt argW1, UInt argW0 ) +void armg_dirtyhelper_AESD ( + /*OUT*/V128* res, + UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0 + ) { vassert(0 == (((HWord)res) & (8-1))); - ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2); - ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0); + ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2); + ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0); arm64g_dirtyhelper_AESD(res, argHi, argLo); } /* CALLED FROM GENERATED CODE */ -void armg_dirtyhelper_AESMC ( /*OUT*/V128* res, - UInt argW3, UInt argW2, - UInt argW1, UInt argW0 ) +void armg_dirtyhelper_AESMC ( + /*OUT*/V128* res, + UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0 + ) { vassert(0 == (((HWord)res) & (8-1))); - ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2); - ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0); + ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2); + ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0); arm64g_dirtyhelper_AESMC(res, argHi, argLo); } /* CALLED FROM GENERATED CODE */ -void armg_dirtyhelper_AESIMC ( /*OUT*/V128* res, - UInt argW3, UInt argW2, - UInt argW1, UInt argW0 ) +void armg_dirtyhelper_AESIMC ( + /*OUT*/V128* res, + UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0 + ) { vassert(0 == (((HWord)res) & (8-1))); - ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2); - ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0); + ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2); + ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0); arm64g_dirtyhelper_AESIMC(res, argHi, argLo); } +/* DIRTY HELPERS for the SHA instruction family. Same comments + as for the AES group above apply. +*/ + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA1C ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2); + ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA1C(res, argDhi, argDlo, + argNhi, argNlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA1P ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2); + ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA1P(res, argDhi, argDlo, + argNhi, argNlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA1M ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2); + ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA1M(res, argDhi, argDlo, + argNhi, argNlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA1SU0 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2); + ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA1SU0(res, argDhi, argDlo, + argNhi, argNlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA256H ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2); + ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA256H(res, argDhi, argDlo, + argNhi, argNlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA256H2 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2); + ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA256H2(res, argDhi, argDlo, + argNhi, argNlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA256SU1 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argN3, UInt argN2, UInt argN1, UInt argN0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2); + ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA256SU1(res, argDhi, argDlo, + argNhi, argNlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA1SU1 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA1SU1(res, argDhi, argDlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA256SU0 ( + /*OUT*/V128* res, + UInt argD3, UInt argD2, UInt argD1, UInt argD0, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2); + ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA256SU0(res, argDhi, argDlo, argMhi, argMlo); +} + +/* CALLED FROM GENERATED CODE */ +void armg_dirtyhelper_SHA1H ( + /*OUT*/V128* res, + UInt argM3, UInt argM2, UInt argM1, UInt argM0 + ) +{ + vassert(0 == (((HWord)res) & (8-1))); + ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2); + ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0); + arm64g_dirtyhelper_SHA1H(res, argMhi, argMlo); +} + + /*---------------------------------------------------------------*/ /*--- Flag-helpers translation-time function specialisers. ---*/ /*--- These help iropt specialise calls the above run-time ---*/ diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c index 1365d1cfba..f57276186e 100644 --- a/VEX/priv/guest_arm_toIR.c +++ b/VEX/priv/guest_arm_toIR.c @@ -12688,7 +12688,7 @@ static Bool decode_V8_instruction ( vassert(conq >= ARMCondEQ && conq <= ARMCondNV); } - /* ----------- AESD.8 q_q ----------- */ + /* ----------- {AESD, AESE, AESMC, AESIMC}.8 q_q ----------- */ /* 31 27 23 21 19 17 15 11 7 3 T1: 1111 1111 1 D 11 sz 00 d 0011 00 M 0 m AESE Qd, Qm A1: 1111 0011 1 D 11 sz 00 d 0011 00 M 0 m AESE Qd, Qm @@ -12703,7 +12703,8 @@ static Bool decode_V8_instruction ( A1: 1111 0011 1 D 11 sz 00 d 0011 11 M 0 m AESIMC Qd, Qm sz must be 00 - ARM encoding is in NV space + ARM encoding is in NV space. + In Thumb mode, we must not be in an IT block. */ { UInt regD = 99, regM = 99, opc = 4/*invalid*/; @@ -12727,6 +12728,9 @@ static Bool decode_V8_instruction ( if (isT) { gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate); } + /* In ARM mode, this is statically unconditional. In Thumb mode, + this must be dynamically unconditional, and we've SIGILLd if not. + In either case we can create unconditional IR. */ IRTemp op1 = newTemp(Ity_V128); IRTemp op2 = newTemp(Ity_V128); IRTemp src = newTemp(Ity_V128); @@ -12769,6 +12773,276 @@ static Bool decode_V8_instruction ( /* fall through */ } + /* ----------- SHA 3-reg insns q_q_q ----------- */ + /* + 31 27 23 19 15 11 7 3 + T1: 1110 1111 0 D 00 n d 1100 N Q M 0 m SHA1C Qd, Qn, Qm ix=0 + A1: 1111 0010 ---------------------------- + + T1: 1110 1111 0 D 01 n d 1100 N Q M 0 m SHA1P Qd, Qn, Qm ix=1 + A1: 1111 0010 ---------------------------- + + T1: 1110 1111 0 D 10 n d 1100 N Q M 0 m SHA1M Qd, Qn, Qm ix=2 + A1: 1111 0010 ---------------------------- + + T1: 1110 1111 0 D 11 n d 1100 N Q M 0 m SHA1SU0 Qd, Qn, Qm ix=3 + A1: 1111 0010 ---------------------------- + (that's a complete set of 4, based on insn[21,20]) + + T1: 1111 1111 0 D 00 n d 1100 N Q M 0 m SHA256H Qd, Qn, Qm ix=4 + A1: 1111 0011 ---------------------------- + + T1: 1111 1111 0 D 01 n d 1100 N Q M 0 m SHA256H2 Qd, Qn, Qm ix=5 + A1: 1111 0011 ---------------------------- + + T1: 1111 1111 0 D 10 n d 1100 N Q M 0 m SHA256SU1 Qd, Qn, Qm ix=6 + A1: 1111 0011 ---------------------------- + (3/4 of a complete set of 4, based on insn[21,20]) + + Q must be 1. Same comments about conditionalisation as for the AES + group above apply. + */ + { + UInt ix = 8; /* invalid */ + Bool gate = False; + + UInt hi9_sha1 = isT ? BITS9(1,1,1,0,1,1,1,1,0) + : BITS9(1,1,1,1,0,0,1,0,0); + UInt hi9_sha256 = isT ? BITS9(1,1,1,1,1,1,1,1,0) + : BITS9(1,1,1,1,0,0,1,1,0); + if ((INSNA(31,23) == hi9_sha1 || INSNA(31,23) == hi9_sha256) + && INSNA(11,8) == BITS4(1,1,0,0) + && INSNA(6,6) == 1 && INSNA(4,4) == 0) { + ix = INSNA(21,20); + if (INSNA(31,23) == hi9_sha256) + ix |= 4; + if (ix < 7) + gate = True; + } + + UInt regN = (INSNA(7,7) << 4) | INSNA(19,16); + UInt regD = (INSNA(22,22) << 4) | INSNA(15,12); + UInt regM = (INSNA(5,5) << 4) | INSNA(3,0); + if ((regD & 1) == 1 || (regM & 1) == 1 || (regN & 1) == 1) + gate = False; + + if (gate) { + vassert(ix >= 0 && ix < 7); + const HChar* inames[7] + = { "sha1c", "sha1p", "sha1m", "sha1su0", + "sha256h", "sha256h2", "sha256su1" }; + void(*helpers[7])(V128*,UInt,UInt,UInt,UInt,UInt,UInt, + UInt,UInt,UInt,UInt,UInt,UInt) + = { &armg_dirtyhelper_SHA1C, &armg_dirtyhelper_SHA1P, + &armg_dirtyhelper_SHA1M, &armg_dirtyhelper_SHA1SU0, + &armg_dirtyhelper_SHA256H, &armg_dirtyhelper_SHA256H2, + &armg_dirtyhelper_SHA256SU1 }; + const HChar* hnames[7] + = { "armg_dirtyhelper_SHA1C", "armg_dirtyhelper_SHA1P", + "armg_dirtyhelper_SHA1M", "armg_dirtyhelper_SHA1SU0", + "armg_dirtyhelper_SHA256H", "armg_dirtyhelper_SHA256H2", + "armg_dirtyhelper_SHA256SU1" }; + + /* This is a really lame way to implement this, even worse than + the arm64 version. But at least it works. */ + + if (isT) { + gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate); + } + + IRTemp vD = newTemp(Ity_V128); + IRTemp vN = newTemp(Ity_V128); + IRTemp vM = newTemp(Ity_V128); + assign(vD, getQReg(regD >> 1)); + assign(vN, getQReg(regN >> 1)); + assign(vM, getQReg(regM >> 1)); + + IRTemp d32_3, d32_2, d32_1, d32_0; + d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID; + breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 ); + + IRTemp n32_3_pre, n32_2_pre, n32_1_pre, n32_0_pre; + n32_3_pre = n32_2_pre = n32_1_pre = n32_0_pre = IRTemp_INVALID; + breakupV128to32s( vN, &n32_3_pre, &n32_2_pre, &n32_1_pre, &n32_0_pre ); + + IRTemp m32_3, m32_2, m32_1, m32_0; + m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID; + breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 ); + + IRTemp n32_3 = newTemp(Ity_I32); + IRTemp n32_2 = newTemp(Ity_I32); + IRTemp n32_1 = newTemp(Ity_I32); + IRTemp n32_0 = newTemp(Ity_I32); + + /* Mask off any bits of the N register operand that aren't actually + needed, so that Memcheck doesn't complain unnecessarily. */ + switch (ix) { + case 0: case 1: case 2: + assign(n32_3, mkU32(0)); + assign(n32_2, mkU32(0)); + assign(n32_1, mkU32(0)); + assign(n32_0, mkexpr(n32_0_pre)); + break; + case 3: case 4: case 5: case 6: + assign(n32_3, mkexpr(n32_3_pre)); + assign(n32_2, mkexpr(n32_2_pre)); + assign(n32_1, mkexpr(n32_1_pre)); + assign(n32_0, mkexpr(n32_0_pre)); + break; + default: + vassert(0); + } + + IRExpr** argvec + = mkIRExprVec_13( + IRExpr_VECRET(), + mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0), + mkexpr(n32_3), mkexpr(n32_2), mkexpr(n32_1), mkexpr(n32_0), + mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0) + ); + + IRTemp res = newTemp(Ity_V128); + IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/, + hnames[ix], helpers[ix], argvec ); + stmt(IRStmt_Dirty(di)); + putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID); + + DIP("%s.8 q%u, q%u, q%u\n", + inames[ix], regD >> 1, regN >> 1, regM >> 1); + return True; + } + /* fall through */ + } + + /* ----------- SHA1SU1, SHA256SU0 ----------- */ + /* + 31 27 23 21 19 15 11 7 3 + T1: 1111 1111 1 D 11 1010 d 0011 10 M 0 m SHA1SU1 Qd, Qm + A1: 1111 0011 ---------------------------- + + T1: 1111 1111 1 D 11 1010 d 0011 11 M 0 m SHA256SU0 Qd, Qm + A1: 1111 0011 ---------------------------- + + Same comments about conditionalisation as for the AES group above apply. + */ + { + Bool gate = False; + + UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,0); + if (INSNA(31,23) == hi9 && INSNA(21,16) == BITS6(1,1,1,0,1,0) + && INSNA(11,7) == BITS5(0,0,1,1,1) && INSNA(4,4) == 0) { + gate = True; + } + + UInt regD = (INSNA(22,22) << 4) | INSNA(15,12); + UInt regM = (INSNA(5,5) << 4) | INSNA(3,0); + if ((regD & 1) == 1 || (regM & 1) == 1) + gate = False; + + Bool is_1SU1 = INSNA(6,6) == 0; + + if (gate) { + const HChar* iname + = is_1SU1 ? "sha1su1" : "sha256su0"; + void (*helper)(V128*,UInt,UInt,UInt,UInt,UInt,UInt,UInt,UInt) + = is_1SU1 ? &armg_dirtyhelper_SHA1SU1 + : *armg_dirtyhelper_SHA256SU0; + const HChar* hname + = is_1SU1 ? "armg_dirtyhelper_SHA1SU1" + : "armg_dirtyhelper_SHA256SU0"; + + if (isT) { + gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate); + } + + IRTemp vD = newTemp(Ity_V128); + IRTemp vM = newTemp(Ity_V128); + assign(vD, getQReg(regD >> 1)); + assign(vM, getQReg(regM >> 1)); + + IRTemp d32_3, d32_2, d32_1, d32_0; + d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID; + breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 ); + + IRTemp m32_3, m32_2, m32_1, m32_0; + m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID; + breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 ); + + IRExpr** argvec + = mkIRExprVec_9( + IRExpr_VECRET(), + mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0), + mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0) + ); + + IRTemp res = newTemp(Ity_V128); + IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/, + hname, helper, argvec ); + stmt(IRStmt_Dirty(di)); + putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID); + + DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1); + return True; + } + /* fall through */ + } + + /* ----------- SHA1H ----------- */ + /* + 31 27 23 21 19 15 11 7 3 + T1: 1111 1111 1 D 11 1001 d 0010 11 M 0 m SHA1H Qd, Qm + A1: 1111 0011 ---------------------------- + + Same comments about conditionalisation as for the AES group above apply. + */ + { + Bool gate = False; + + UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,0); + if (INSNA(31,23) == hi9 && INSNA(21,16) == BITS6(1,1,1,0,0,1) + && INSNA(11,6) == BITS6(0,0,1,0,1,1) && INSNA(4,4) == 0) { + gate = True; + } + + UInt regD = (INSNA(22,22) << 4) | INSNA(15,12); + UInt regM = (INSNA(5,5) << 4) | INSNA(3,0); + if ((regD & 1) == 1 || (regM & 1) == 1) + gate = False; + + if (gate) { + const HChar* iname = "sha1h"; + void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_SHA1H; + const HChar* hname = "armg_dirtyhelper_SHA1H"; + + if (isT) { + gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate); + } + + IRTemp vM = newTemp(Ity_V128); + assign(vM, getQReg(regM >> 1)); + + IRTemp m32_3, m32_2, m32_1, m32_0; + m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID; + breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 ); + /* m32_3, m32_2, m32_1 are just abandoned. No harm; iropt will + remove them. */ + + IRExpr* zero = mkU32(0); + IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(), + zero, zero, zero, mkexpr(m32_0)); + + IRTemp res = newTemp(Ity_V128); + IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/, + hname, helper, argvec ); + stmt(IRStmt_Dirty(di)); + putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID); + + DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1); + return True; + } + /* fall through */ + } + /* ---------- Doesn't match anything. ---------- */ return False; diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c index a11bc6d269..426f85d62d 100644 --- a/VEX/priv/host_arm_isel.c +++ b/VEX/priv/host_arm_isel.c @@ -436,9 +436,9 @@ Bool doHelperCallWithArgsOnStack ( /*OUT*/UInt* stackAdjustAfterCall, HReg argVRegs[n_real_args]; but that makes it impossible to do 'goto's forward past. Hence the following kludge. */ - vassert(n_real_args <= 11); - HReg argVRegs[11]; - for (i = 0; i < 11; i++) + vassert(n_real_args <= 12); + HReg argVRegs[12]; + for (i = 0; i < 12; i++) argVRegs[i] = INVALID_HREG; /* Compute args into vregs. */ diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 8fdfcab823..cd29630e3e 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -1986,6 +1986,45 @@ IRExpr** mkIRExprVec_8 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3, vec[8] = NULL; return vec; } +IRExpr** mkIRExprVec_9 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3, + IRExpr* arg4, IRExpr* arg5, IRExpr* arg6, + IRExpr* arg7, IRExpr* arg8, IRExpr* arg9 ) { + IRExpr** vec = LibVEX_Alloc_inline(10 * sizeof(IRExpr*)); + vec[0] = arg1; + vec[1] = arg2; + vec[2] = arg3; + vec[3] = arg4; + vec[4] = arg5; + vec[5] = arg6; + vec[6] = arg7; + vec[7] = arg8; + vec[8] = arg9; + vec[9] = NULL; + return vec; +} +IRExpr** mkIRExprVec_13 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3, + IRExpr* arg4, IRExpr* arg5, IRExpr* arg6, + IRExpr* arg7, IRExpr* arg8, IRExpr* arg9, + IRExpr* arg10, IRExpr* arg11, IRExpr* arg12, + IRExpr* arg13 + ) { + IRExpr** vec = LibVEX_Alloc_inline(14 * sizeof(IRExpr*)); + vec[0] = arg1; + vec[1] = arg2; + vec[2] = arg3; + vec[3] = arg4; + vec[4] = arg5; + vec[5] = arg6; + vec[6] = arg7; + vec[7] = arg8; + vec[8] = arg9; + vec[9] = arg10; + vec[10] = arg11; + vec[11] = arg12; + vec[12] = arg13; + vec[13] = NULL; + return vec; +} /* Constructors -- IRDirty */ diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 4c0539950d..a945efaeaa 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -2197,7 +2197,12 @@ extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr* ); extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, - IRExpr*, IRExpr*, IRExpr*, IRExpr*); + IRExpr*, IRExpr*, IRExpr*, IRExpr* ); +extern IRExpr** mkIRExprVec_9 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, + IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr* ); +extern IRExpr** mkIRExprVec_13 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, + IRExpr*, IRExpr*, IRExpr*, IRExpr*, + IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr* ); /* IRExpr copiers: - shallowCopy: shallow-copy (ie. create a new vector that shares the