From: Joerg Schmidbauer Date: Thu, 29 Feb 2024 11:50:05 +0000 (+0100) Subject: s390x: support CPACF sha3/shake performance improvements X-Git-Tag: openssl-3.4.0-alpha1~25 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=25f5d7b85f6657cd2f9f1ab7ae87f319d9bafe54;p=thirdparty%2Fopenssl.git s390x: support CPACF sha3/shake performance improvements On newer machines the SHA3/SHAKE performance of CPACF instructions KIMD and KLMD can be enhanced by using additional modifier bits. This allows the application to omit initializing the ICV, but also affects the internal processing of the instructions. Performance is mostly gained when processing short messages. The new CPACF feature is backwards compatible with older machines, i.e. the new modifier bits are ignored on older machines. However, to save the ICV initialization, the application must detect the MSA level and omit the ICV initialization only if this feature is supported. Signed-off-by: Joerg Schmidbauer Reviewed-by: Paul Dale Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/25235) --- diff --git a/crypto/s390x_arch.h b/crypto/s390x_arch.h index 857e5b56322..2bb82347ffa 100644 --- a/crypto/s390x_arch.h +++ b/crypto/s390x_arch.h @@ -191,6 +191,9 @@ extern int OPENSSL_s390xcex; # define S390X_KMA_LAAD 0x200 # define S390X_KMA_HS 0x400 # define S390X_KDSA_D 0x80 +# define S390X_KIMD_NIP 0x8000 +# define S390X_KLMD_DUFOP 0x4000 +# define S390X_KLMD_NIP 0x8000 # define S390X_KLMD_PS 0x100 # define S390X_KMAC_IKP 0x8000 # define S390X_KMAC_IIMP 0x4000 diff --git a/crypto/s390xcpuid.pl b/crypto/s390xcpuid.pl index 560a2f09e97..1ff9cab99e3 100755 --- a/crypto/s390xcpuid.pl +++ b/crypto/s390xcpuid.pl @@ -308,7 +308,7 @@ s390x_kimd: llgfr %r0,$fc lgr %r1,$param - .long 0xb93e0002 # kimd %r0,%r2 + .long 0xb93e8002 # kimd %r0,%r2[,M3] brc 1,.-4 # pay attention to "partial completion" br $ra @@ -329,7 +329,7 @@ s390x_klmd: llgfr %r0,$fc l${g} %r1,$stdframe($sp) - .long 0xb93f0042 # klmd %r4,%r2 + .long 0xb93f8042 # klmd %r4,%r2[,M3] brc 1,.-4 # pay attention to "partial completion" br $ra diff --git a/crypto/sha/sha3.c b/crypto/sha/sha3.c index 4d54712168d..56f87133f72 100644 --- a/crypto/sha/sha3.c +++ b/crypto/sha/sha3.c @@ -8,13 +8,19 @@ */ #include +#if defined(__s390x__) && defined(OPENSSL_CPUID_OBJ) +# include "crypto/s390x_arch.h" +#endif #include "internal/sha3.h" void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r, int next); void ossl_sha3_reset(KECCAK1600_CTX *ctx) { - memset(ctx->A, 0, sizeof(ctx->A)); +#if defined(__s390x__) && defined(OPENSSL_CPUID_OBJ) + if (!(OPENSSL_s390xcap_P.stfle[1] & S390X_CAPBIT(S390X_MSA12))) +#endif + memset(ctx->A, 0, sizeof(ctx->A)); ctx->bufsz = 0; ctx->xof_state = XOF_STATE_INIT; } diff --git a/providers/implementations/digests/sha3_prov.c b/providers/implementations/digests/sha3_prov.c index 5d08e32636f..bfaca10532d 100644 --- a/providers/implementations/digests/sha3_prov.c +++ b/providers/implementations/digests/sha3_prov.c @@ -193,26 +193,32 @@ static size_t s390x_sha3_absorb(void *vctx, const void *inp, size_t len) { KECCAK1600_CTX *ctx = vctx; size_t rem = len % ctx->block_size; + unsigned int fc; if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB)) return 0; + fc = ctx->pad; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0; ctx->xof_state = XOF_STATE_ABSORB; - s390x_kimd(inp, len - rem, ctx->pad, ctx->A); + s390x_kimd(inp, len - rem, fc, ctx->A); return rem; } static int s390x_sha3_final(void *vctx, unsigned char *out, size_t outlen) { KECCAK1600_CTX *ctx = vctx; + unsigned int fc; if (!ossl_prov_is_running()) return 0; if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB)) return 0; + fc = ctx->pad | S390X_KLMD_DUFOP; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0; ctx->xof_state = XOF_STATE_FINAL; - s390x_klmd(ctx->buf, ctx->bufsz, NULL, 0, ctx->pad, ctx->A); + s390x_klmd(ctx->buf, ctx->bufsz, NULL, 0, fc, ctx->A); memcpy(out, ctx->A, outlen); return 1; } @@ -220,14 +226,17 @@ static int s390x_sha3_final(void *vctx, unsigned char *out, size_t outlen) static int s390x_shake_final(void *vctx, unsigned char *out, size_t outlen) { KECCAK1600_CTX *ctx = vctx; + unsigned int fc; if (!ossl_prov_is_running()) return 0; if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB)) return 0; + fc = ctx->pad | S390X_KLMD_DUFOP; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KLMD_NIP : 0; ctx->xof_state = XOF_STATE_FINAL; - s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, ctx->pad, ctx->A); + s390x_klmd(ctx->buf, ctx->bufsz, out, outlen, fc, ctx->A); return 1; } @@ -277,24 +286,28 @@ static int s390x_keccakc_final(void *vctx, unsigned char *out, size_t outlen, size_t bsz = ctx->block_size; size_t num = ctx->bufsz; size_t needed = outlen; + unsigned int fc; if (!ossl_prov_is_running()) return 0; if (!(ctx->xof_state == XOF_STATE_INIT || ctx->xof_state == XOF_STATE_ABSORB)) return 0; + fc = ctx->pad; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0; ctx->xof_state = XOF_STATE_FINAL; if (outlen == 0) return 1; memset(ctx->buf + num, 0, bsz - num); ctx->buf[num] = padding; ctx->buf[bsz - 1] |= 0x80; - s390x_kimd(ctx->buf, bsz, ctx->pad, ctx->A); + s390x_kimd(ctx->buf, bsz, fc, ctx->A); num = needed > bsz ? bsz : needed; memcpy(out, ctx->A, num); needed -= num; if (needed > 0) - s390x_klmd(NULL, 0, out + bsz, needed, ctx->pad | S390X_KLMD_PS, ctx->A); + s390x_klmd(NULL, 0, out + bsz, needed, + ctx->pad | S390X_KLMD_PS | S390X_KLMD_DUFOP, ctx->A); return 1; } @@ -314,6 +327,7 @@ static int s390x_keccakc_squeeze(void *vctx, unsigned char *out, size_t outlen, { KECCAK1600_CTX *ctx = vctx; size_t len; + unsigned int fc; if (!ossl_prov_is_running()) return 0; @@ -329,7 +343,9 @@ static int s390x_keccakc_squeeze(void *vctx, unsigned char *out, size_t outlen, memset(ctx->buf + ctx->bufsz, 0, len); ctx->buf[ctx->bufsz] = padding; ctx->buf[ctx->block_size - 1] |= 0x80; - s390x_kimd(ctx->buf, ctx->block_size, ctx->pad, ctx->A); + fc = ctx->pad; + fc |= ctx->xof_state == XOF_STATE_INIT ? S390X_KIMD_NIP : 0; + s390x_kimd(ctx->buf, ctx->block_size, fc, ctx->A); ctx->bufsz = 0; /* reuse ctx->bufsz to count bytes squeezed from current sponge */ }