From: Elizarova Alina Date: Tue, 1 Apr 2025 10:40:40 +0000 (-0700) Subject: Enable x86-64 SM4 optimizations with SM4 ISA extension X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b12cd40e8bdf74b8eacaf2dc638e43986ae13b8d;p=thirdparty%2Fopenssl.git Enable x86-64 SM4 optimizations with SM4 ISA extension Reviewed-by: Tim Hudson Reviewed-by: Neil Horman Reviewed-by: Paul Yang Reviewed-by: Paul Dale (Merged from https://github.com/openssl/openssl/pull/26664) --- diff --git a/CHANGES.md b/CHANGES.md index e2357a03009..02e7934de18 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -37,6 +37,13 @@ OpenSSL 3.6 *Frederik Wedel-Heinen* + * Enabled x86-64 SM4 optimizations with SM4 ISA Extension available starting + Lunar Lake and Arrow Lake S CPUs. The expected performance improvement is + ~3.6x for sm4-cbc, ~2.9x for sm4-gcm, ~9.2x for sm4-xts, ~5.3x for sm4-ccm + (on average, may vary depending on the data size) on Arrow Lake S. + + *Alina Elizarova* + OpenSSL 3.5 ----------- diff --git a/crypto/sm4/asm/sm4-x86_64.pl b/crypto/sm4/asm/sm4-x86_64.pl new file mode 100644 index 00000000000..9fc40fb96a4 --- /dev/null +++ b/crypto/sm4/asm/sm4-x86_64.pl @@ -0,0 +1,312 @@ +#! /usr/bin/env perl +# Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. +# Copyright (c) 2025, Intel Corporation. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html +# +# +# This module implements support for Intel(R) SM4 instructions +# from Intel(R) Multi-Buffer Crypto for IPsec Library +# (https://github.com/intel/intel-ipsec-mb). +# Original author is Tomasz Kantecki + +# $output is the last argument if it looks like a file (it has an extension) +# $flavour is the first argument if it doesn't look like a file +$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; +$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; + +$win64=0; +$win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; +$dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +# Check Intel(R) SM4 instructions support +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx2_sm4_ni = ($1>=2.22); # minimal avx2 supported version, binary translation for SM4 instructions (sub sm4op) is used + $avx2_sm4_ni_native = ($1>=2.42); # support added at GNU asm 2.42 +} + +if (!$avx2_sm4_ni && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9])\.([0-9]+)\.([0-9]+)/) { + my ($major, $minor, $patch) = ($1, $2, $3); + $avx2_sm4_ni = ($major > 2) || ($major == 2 && $minor > 10); # minimal avx2 supported version, binary translation for SM4 instructions (sub sm4op) is used + $avx2_sm4_ni_native = ($major > 2) || ($major == 2 && $minor > 16) || ($major == 2 && $minor == 16 && $patch >= 2); # support added at NASM 2.16.02 +} + +if (!$avx2_sm4_ni && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { + $avx2_sm4_ni = ($2>=7.0); # minimal tested version, binary translation for SM4 instructions (sub sm4op) is used + $avx2_sm4_ni_native = ($2>=17.0); # support added at LLVM 17.0.1 +} + +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" + or die "can't call $xlate: $!"; +*STDOUT=*OUT; + +$prefix="hw_x86_64_sm4"; + +if ($avx2_sm4_ni>0) { + +$code.= ".text\n"; +{ +# input arguments aliases for set_key +my ($userKey,$key) = ("%rdi","%rsi"); + +# input arguments aliases for encrypt/decrypt +my ($in,$out,$ks) = ("%rdi","%rsi","%rdx"); + +$code.=<<___; +.section .rodata align=64 +.align 16 +SM4_FK: +.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc + +.align 16 +SM4_CK: +.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 +.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 +.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 +.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 +.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 +.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 +.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 +.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 + +IN_SHUFB: +.byte 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04 +.byte 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c +.byte 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04 +.byte 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c + +OUT_SHUFB: +.byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 +.byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 +.byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 +.byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + +.text + +# int ${prefix}_set_key(const unsigned char *userKey, SM4_KEY *key) +# +# input: $userKey secret key +# $key round keys +# + +.globl ${prefix}_set_key +.type ${prefix}_set_key,\@function,2 +.align 32 +${prefix}_set_key: +.cfi_startproc + endbranch +# Prolog + push %rbp +.cfi_push %rbp +# Prolog ends here. +.Lossl_${prefix}_set_key_seh_prolog_end: + + vmovdqu ($userKey), %xmm0 + vpshufb IN_SHUFB(%rip), %xmm0, %xmm0 + vpxor SM4_FK(%rip), %xmm0, %xmm0 + + vmovdqu SM4_CK(%rip), %xmm1 + vsm4key4 %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, ($key) + vmovdqu SM4_CK + 16(%rip), %xmm1 + vsm4key4 %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, 16($key) + vmovdqu SM4_CK + 32(%rip), %xmm1 + vsm4key4 %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, 32($key) + vmovdqu SM4_CK + 48(%rip), %xmm1 + vsm4key4 %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, 48($key) + vmovdqu SM4_CK + 64(%rip), %xmm1 + vsm4key4 %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, 64($key) + vmovdqu SM4_CK + 80(%rip), %xmm1 + vsm4key4 %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, 80($key) + vmovdqu SM4_CK + 96(%rip), %xmm1 + vsm4key4 %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, 96($key) + vmovdqu SM4_CK + 112(%rip), %xmm1 + vsm4key4 %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, 112($key) + + vpxor %xmm0, %xmm0, %xmm0 # clear register + mov \$1, %eax + pop %rbp +.cfi_pop %rbp + ret +.cfi_endproc + +# void ${prefix}_encrypt(const uint8_t *in, uint8_t *out, const SM4_KEY *ks) + +.globl ${prefix}_encrypt +.type ${prefix}_encrypt,\@function,3 +.align 32 +${prefix}_encrypt: +.cfi_startproc + endbranch +# Prolog + push %rbp +.cfi_push %rbp +# Prolog ends here. +.Lossl_${prefix}_encrypt_seh_prolog_end: + + vmovdqu ($in), %xmm0 + vpshufb IN_SHUFB(%rip), %xmm0, %xmm0 + + # note: to simplify binary instructions translation + mov $ks, %r10 + + vsm4rnds4 (%r10), %xmm0, %xmm0 + vsm4rnds4 16(%r10), %xmm0, %xmm0 + vsm4rnds4 32(%r10), %xmm0, %xmm0 + vsm4rnds4 48(%r10), %xmm0, %xmm0 + vsm4rnds4 64(%r10), %xmm0, %xmm0 + vsm4rnds4 80(%r10), %xmm0, %xmm0 + vsm4rnds4 96(%r10), %xmm0, %xmm0 + vsm4rnds4 112(%r10), %xmm0, %xmm0 + + vpshufb OUT_SHUFB(%rip), %xmm0, %xmm0 + vmovdqu %xmm0, ($out) + vpxor %xmm0, %xmm0, %xmm0 # clear register + pop %rbp +.cfi_pop %rbp + ret +.cfi_endproc + +# void ${prefix}_decrypt(const uint8_t *in, uint8_t *out, const SM4_KEY *ks) + +.globl ${prefix}_decrypt +.type ${prefix}_decrypt,\@function,3 +.align 32 +${prefix}_decrypt: +.cfi_startproc + endbranch +# Prolog + push %rbp +.cfi_push %rbp +# Prolog ends here. +.Lossl_${prefix}_decrypt_seh_prolog_end: + + vmovdqu ($in), %xmm0 + vpshufb IN_SHUFB(%rip), %xmm0, %xmm0 + + vmovdqu 112($ks), %xmm1 + vpshufd \$27, %xmm1, %xmm1 + vsm4rnds4 %xmm1, %xmm0, %xmm0 + vmovdqu 96($ks), %xmm1 + vpshufd \$27, %xmm1, %xmm1 + vsm4rnds4 %xmm1, %xmm0, %xmm0 + vmovdqu 80($ks), %xmm1 + vpshufd \$27, %xmm1, %xmm1 + vsm4rnds4 %xmm1, %xmm0, %xmm0 + vmovdqu 64($ks), %xmm1 + vpshufd \$27, %xmm1, %xmm1 + vsm4rnds4 %xmm1, %xmm0, %xmm0 + vmovdqu 48($ks), %xmm1 + vpshufd \$27, %xmm1, %xmm1 + vsm4rnds4 %xmm1, %xmm0, %xmm0 + vmovdqu 32($ks), %xmm1 + vpshufd \$27, %xmm1, %xmm1 + vsm4rnds4 %xmm1, %xmm0, %xmm0 + vmovdqu 16($ks), %xmm1 + vpshufd \$27, %xmm1, %xmm1 + vsm4rnds4 %xmm1, %xmm0, %xmm0 + vmovdqu ($ks), %xmm1 + vpshufd \$27, %xmm1, %xmm1 + vsm4rnds4 %xmm1, %xmm0, %xmm0 + + vpshufb OUT_SHUFB(%rip), %xmm0, %xmm0 + vmovdqu %xmm0, ($out) + vpxor %xmm0, %xmm0, %xmm0 # clear registers + vpxor %xmm1, %xmm1, %xmm1 + pop %rbp +.cfi_pop %rbp + ret +.cfi_endproc +___ +} + +} else { # fallback for the unsupported configurations with undefined instruction + +$code .= <<___; +.text + +.globl ${prefix}_set_key +.type ${prefix}_set_key,\@abi-omnipotent +${prefix}_set_key: + .byte 0x0f,0x0b # ud2 + ret +.size ${prefix}_set_key, .-${prefix}_set_key + +.globl ${prefix}_encrypt +.type ${prefix}_encrypt,\@abi-omnipotent +${prefix}_encrypt: + .byte 0x0f,0x0b # ud2 + ret +.size ${prefix}_encrypt, .-${prefix}_encrypt + +.globl ${prefix}_decrypt +.type ${prefix}_decrypt,\@abi-omnipotent +${prefix}_decrypt: + .byte 0x0f,0x0b # ud2 + ret +.size ${prefix}_decrypt, .-${prefix}_decrypt +___ +} # avx2_sm4_ni + +if ($avx2_sm4_ni_native > 0) { # SM4 instructions are supported in asm + $code =~ s/\`([^\`]*)\`/eval $1/gem; + print $code; +} else { # binary translation for SM4 instructions + sub sm4op { + my $instr = shift; + my $args = shift; + if ($args =~ /^(.+)\s*#/) { + $args = $1; # drop comment and its leading whitespace + } + if (($instr eq "vsm4key4") && ($args =~ /%xmm(\d{1,2})\s*,\s*%xmm(\d{1,2})\s*,\s*%xmm(\d{1,2})/)) { + my $b1 = sprintf("0x%02x", 0x62 | ((1-int($1/8))<<5) | ((1-int($3/8))<<7) ); + my $b2 = sprintf("0x%02x", 0x02 | (15 - $2 & 15)<<3 ); + my $b3 = sprintf("0x%02x", 0xc0 | ($1 & 7) | (($3 & 7)<<3) ); + return ".byte 0xc4,".$b1.",".$b2.",0xda,".$b3; + } + elsif (($instr eq "vsm4rnds4") && ($args =~ /(\d*)\(([^)]+)\)\s*,\s*%xmm(\d{1,2})\s*,\s*%xmm(\d{1,2})/)) { + my $shift = $1; + my $b3_offset = 0x00; + if ($shift) { + $shift = ",0x".sprintf("%02x", $shift); + $b3_offset = 0x40; + } + my $b1 = sprintf("0x%02x", 0x42 | ((1-int($4/8))<<7) ); + my $b2 = sprintf("0x%02x", 0x03 | (15 - $3 & 15)<<3 ); + my $b3 = sprintf("0x%02x", 0x02 | ($4 & 7)<<3 | $b3_offset ); + return ".byte 0xc4,".$b1.",".$b2.",0xda,".$b3.$shift; + } + elsif (($instr eq "vsm4rnds4") && ($args =~ /%xmm(\d{1,2})\s*,\s*%xmm(\d{1,2})\s*,\s*%xmm(\d{1,2})/)) { + my $b1 = sprintf("0x%02x", 0x62 | ((1-int($1/8))<<5) | ((1-int($3/8))<<7) ); + my $b2 = sprintf("0x%02x", 0x03 | (15 - $2 & 15)<<3 ); + my $b3 = sprintf("0x%02x", 0xc0 | ($1 & 7) | (($3 & 7)<<3) ); + return ".byte 0xc4,".$b1.",".$b2.",0xda,".$b3; + } + return $instr."\t".$args; + } + + foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + s/\b(vsm4[^\s]*)\s+(.*)/sm4op($1,$2)/geo; + print $_,"\n"; + } +} # avx2_sm4_ni_native > 0 + +close STDOUT or die "error closing STDOUT: $!"; diff --git a/crypto/sm4/build.info b/crypto/sm4/build.info index 990797a30e1..2f36b6b595e 100644 --- a/crypto/sm4/build.info +++ b/crypto/sm4/build.info @@ -7,6 +7,9 @@ IF[{- !$disabled{asm} -}] $SM4DEF_riscv64=SM4_ASM $SM4ASM_riscv64=sm4-riscv64-zvksed.s + $SM4DEF_x86_64=SM4_ASM + $SM4ASM_x86_64=sm4-x86_64.S + # Now that we have defined all the arch specific variables, use the # appropriate one, and define the appropriate macros IF[$SM4ASM_{- $target{asm_arch} -}] @@ -38,3 +41,4 @@ INCLUDE[sm4-armv8.o]=.. INCLUDE[vpsm4-armv8.o]=.. INCLUDE[vpsm4_ex-armv8.o]=.. GENERATE[sm4-riscv64-zvksed.s]=asm/sm4-riscv64-zvksed.pl +GENERATE[sm4-x86_64.S]=asm/sm4-x86_64.pl diff --git a/include/crypto/sm4_platform.h b/include/crypto/sm4_platform.h index 3df1b4256de..ffb9e08c631 100644 --- a/include/crypto/sm4_platform.h +++ b/include/crypto/sm4_platform.h @@ -50,7 +50,18 @@ void rv64i_zvksed_sm4_encrypt(const unsigned char *in, unsigned char *out, const SM4_KEY *key); void rv64i_zvksed_sm4_decrypt(const unsigned char *in, unsigned char *out, const SM4_KEY *key); -# endif /* RV64 */ +# elif (defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)) +/* Intel x86_64 support */ +# include "internal/cryptlib.h" +# define HWSM4_CAPABLE_X86_64 \ + ((OPENSSL_ia32cap_P[2] & (1 << 5)) && (OPENSSL_ia32cap_P[5] & (1 << 2))) +int hw_x86_64_sm4_set_key(const unsigned char *userKey, SM4_KEY *key); +int hw_x86_64_sm4_set_decryption_key(const unsigned char *userKey, SM4_KEY *key); +void hw_x86_64_sm4_encrypt(const unsigned char *in, unsigned char *out, + const SM4_KEY *key); +void hw_x86_64_sm4_decrypt(const unsigned char *in, unsigned char *out, + const SM4_KEY *key); +# endif # endif /* OPENSSL_CPUID_OBJ */ # if defined(HWSM4_CAPABLE) diff --git a/providers/implementations/ciphers/cipher_sm4_ccm_hw.c b/providers/implementations/ciphers/cipher_sm4_ccm_hw.c index 1c1d60494ab..c228276fe0c 100644 --- a/providers/implementations/ciphers/cipher_sm4_ccm_hw.c +++ b/providers/implementations/ciphers/cipher_sm4_ccm_hw.c @@ -61,6 +61,9 @@ static const PROV_CCM_HW ccm_sm4 = { #if defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64 # include "cipher_sm4_ccm_hw_rv64i.inc" +#elif defined(OPENSSL_CPUID_OBJ) && (defined(__x86_64) || defined(__x86_64__) \ + || defined(_M_AMD64) || defined(_M_X64)) +# include "cipher_sm4_ccm_hw_x86_64.inc" #else const PROV_CCM_HW *ossl_prov_sm4_hw_ccm(size_t keybits) { diff --git a/providers/implementations/ciphers/cipher_sm4_ccm_hw_x86_64.inc b/providers/implementations/ciphers/cipher_sm4_ccm_hw_x86_64.inc new file mode 100644 index 00000000000..6e4e6a3665d --- /dev/null +++ b/providers/implementations/ciphers/cipher_sm4_ccm_hw_x86_64.inc @@ -0,0 +1,41 @@ +/* + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/*- + * x86_64 support for SM4 CCM. + * This file is included by cipher_sm4_ccm_hw.c + */ + +static int hw_x86_64_sm4_ccm_initkey(PROV_CCM_CTX *ctx, + const unsigned char *key, + size_t keylen) +{ + PROV_SM4_CCM_CTX *actx = (PROV_SM4_CCM_CTX *)ctx; + + SM4_HW_CCM_SET_KEY_FN(hw_x86_64_sm4_set_key, + hw_x86_64_sm4_encrypt, NULL, NULL); + return 1; +} + +static const PROV_CCM_HW hw_x86_64_sm4_ccm = { + hw_x86_64_sm4_ccm_initkey, + ossl_ccm_generic_setiv, + ossl_ccm_generic_setaad, + ossl_ccm_generic_auth_encrypt, + ossl_ccm_generic_auth_decrypt, + ossl_ccm_generic_gettag +}; + +const PROV_CCM_HW *ossl_prov_sm4_hw_ccm(size_t keybits) +{ + if (HWSM4_CAPABLE_X86_64) + return &hw_x86_64_sm4_ccm; + else + return &ccm_sm4; +} diff --git a/providers/implementations/ciphers/cipher_sm4_gcm_hw.c b/providers/implementations/ciphers/cipher_sm4_gcm_hw.c index c1e354be45b..3c424383fc5 100644 --- a/providers/implementations/ciphers/cipher_sm4_gcm_hw.c +++ b/providers/implementations/ciphers/cipher_sm4_gcm_hw.c @@ -91,6 +91,9 @@ static const PROV_GCM_HW sm4_gcm = { #if defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64 # include "cipher_sm4_gcm_hw_rv64i.inc" +#elif defined(OPENSSL_CPUID_OBJ) && (defined(__x86_64) || defined(__x86_64__) \ + || defined(_M_AMD64) || defined(_M_X64)) +# include "cipher_sm4_gcm_hw_x86_64.inc" #else const PROV_GCM_HW *ossl_prov_sm4_hw_gcm(size_t keybits) { diff --git a/providers/implementations/ciphers/cipher_sm4_gcm_hw_x86_64.inc b/providers/implementations/ciphers/cipher_sm4_gcm_hw_x86_64.inc new file mode 100644 index 00000000000..2339cb2f5bb --- /dev/null +++ b/providers/implementations/ciphers/cipher_sm4_gcm_hw_x86_64.inc @@ -0,0 +1,42 @@ +/* + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/*- + * x86_64 support for SM4 GCM. + * This file is included by cipher_sm4_gcm_hw.c + */ + +static int hw_x86_64_sm4_gcm_initkey(PROV_GCM_CTX *ctx, + const unsigned char *key, + size_t keylen) +{ + PROV_SM4_GCM_CTX *actx = (PROV_SM4_GCM_CTX *)ctx; + SM4_KEY *ks = &actx->ks.ks; + + SM4_GCM_HW_SET_KEY_CTR_FN(ks, hw_x86_64_sm4_set_key, + hw_x86_64_sm4_encrypt, NULL); + return 1; +} + +static const PROV_GCM_HW hw_x86_64_sm4_gcm = { + hw_x86_64_sm4_gcm_initkey, + ossl_gcm_setiv, + ossl_gcm_aad_update, + hw_gcm_cipher_update, + ossl_gcm_cipher_final, + ossl_gcm_one_shot +}; + +const PROV_GCM_HW *ossl_prov_sm4_hw_gcm(size_t keybits) +{ + if (HWSM4_CAPABLE_X86_64) + return &hw_x86_64_sm4_gcm; + else + return &sm4_gcm; +} diff --git a/providers/implementations/ciphers/cipher_sm4_hw.c b/providers/implementations/ciphers/cipher_sm4_hw.c index 05a83843eb4..4d9a63bb727 100644 --- a/providers/implementations/ciphers/cipher_sm4_hw.c +++ b/providers/implementations/ciphers/cipher_sm4_hw.c @@ -136,6 +136,9 @@ const PROV_CIPHER_HW *ossl_prov_cipher_hw_sm4_##mode(size_t keybits) \ #if defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64 # include "cipher_sm4_hw_rv64i.inc" +#elif defined(OPENSSL_CPUID_OBJ) && (defined(__x86_64) || defined(__x86_64__) \ + || defined(_M_AMD64) || defined(_M_X64)) +# include "cipher_sm4_hw_x86_64.inc" #else /* The generic case */ # define PROV_CIPHER_HW_declare(mode) diff --git a/providers/implementations/ciphers/cipher_sm4_hw_x86_64.inc b/providers/implementations/ciphers/cipher_sm4_hw_x86_64.inc new file mode 100644 index 00000000000..01b7d112684 --- /dev/null +++ b/providers/implementations/ciphers/cipher_sm4_hw_x86_64.inc @@ -0,0 +1,52 @@ +/* + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/*- + * x86_64 SM4 support for modes ecb, cbc, ofb, cfb, ctr. + * This file is included by cipher_sm4_hw.c + */ + +#define cipher_hw_x86_64_sm4_cbc ossl_cipher_hw_generic_cbc +#define cipher_hw_x86_64_sm4_ecb ossl_cipher_hw_generic_ecb +#define cipher_hw_x86_64_sm4_ofb128 ossl_cipher_hw_generic_ofb128 +#define cipher_hw_x86_64_sm4_cfb128 ossl_cipher_hw_generic_cfb128 +#define cipher_hw_x86_64_sm4_ctr ossl_cipher_hw_generic_ctr + +static int cipher_hw_x86_64_sm4_initkey(PROV_CIPHER_CTX *ctx, + const unsigned char *key, + size_t keylen) +{ + PROV_SM4_CTX *sctx = (PROV_SM4_CTX *)ctx; + SM4_KEY *ks = &sctx->ks.ks; + + ctx->ks = ks; + hw_x86_64_sm4_set_key(key, ks); + if (ctx->enc + || (ctx->mode != EVP_CIPH_ECB_MODE + && ctx->mode != EVP_CIPH_CBC_MODE)) { + ctx->block = (block128_f) hw_x86_64_sm4_encrypt; + ctx->stream.cbc = NULL; + } else { + ctx->block = (block128_f) hw_x86_64_sm4_decrypt; + ctx->stream.cbc = NULL; + } + + return 1; +} + +#define PROV_CIPHER_HW_declare(mode) \ + static const PROV_CIPHER_HW hw_x86_64_sm4_##mode = { \ + cipher_hw_x86_64_sm4_initkey, \ + cipher_hw_x86_64_sm4_##mode, \ + cipher_hw_sm4_copyctx \ + }; + +#define PROV_CIPHER_HW_select(mode) \ + if (HWSM4_CAPABLE_X86_64) \ + return &hw_x86_64_sm4_##mode; diff --git a/providers/implementations/ciphers/cipher_sm4_xts_hw.c b/providers/implementations/ciphers/cipher_sm4_xts_hw.c index d147cf1a611..6e94684ecbb 100644 --- a/providers/implementations/ciphers/cipher_sm4_xts_hw.c +++ b/providers/implementations/ciphers/cipher_sm4_xts_hw.c @@ -91,6 +91,9 @@ static const PROV_CIPHER_HW sm4_generic_xts = { #if defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64 # include "cipher_sm4_xts_hw_rv64i.inc" +#elif defined(OPENSSL_CPUID_OBJ) && (defined(__x86_64) || defined(__x86_64__) \ + || defined(_M_AMD64) || defined(_M_X64)) +# include "cipher_sm4_xts_hw_x86_64.inc" #else const PROV_CIPHER_HW *ossl_prov_cipher_hw_sm4_xts(size_t keybits) { diff --git a/providers/implementations/ciphers/cipher_sm4_xts_hw_x86_64.inc b/providers/implementations/ciphers/cipher_sm4_xts_hw_x86_64.inc new file mode 100644 index 00000000000..8428106fae9 --- /dev/null +++ b/providers/implementations/ciphers/cipher_sm4_xts_hw_x86_64.inc @@ -0,0 +1,43 @@ +/* + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/*- + * x86_64 support for SM4 XTS. + * This file is included by cipher_sm4_xts_hw.c + */ + +static int hw_x86_64_sm4_xts_initkey(PROV_CIPHER_CTX *ctx, + const unsigned char *key, + size_t keylen) +{ + PROV_SM4_XTS_CTX *xctx = (PROV_SM4_XTS_CTX *)ctx; + OSSL_xts_stream_fn stream_fn = NULL; + OSSL_xts_stream_fn stream_gb_fn = NULL; + + XTS_SET_KEY_FN(hw_x86_64_sm4_set_key, + hw_x86_64_sm4_set_key, + hw_x86_64_sm4_encrypt, + hw_x86_64_sm4_decrypt, + stream_fn, stream_gb_fn); + return 1; +} + +static const PROV_CIPHER_HW hw_x86_64_sm4_xts = { + hw_x86_64_sm4_xts_initkey, + NULL, + cipher_hw_sm4_xts_copyctx +}; + +const PROV_CIPHER_HW *ossl_prov_cipher_hw_sm4_xts(size_t keybits) +{ + if (HWSM4_CAPABLE_X86_64) + return &hw_x86_64_sm4_xts; + else + return &sm4_generic_xts; +}