For larger data blocks, AES gets performance gains of up to 26.5%,
and SHA3 up to 15%. Tested on a Dell 9345 with Snapdragon X1E-80-100.
Below are detailed data.
Before:
type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes
AES-128-GCM 176805.90k 677599.17k
1932362.15k
4803628.03k
7696804.52k
8036674.22k
AES-192-GCM 178621.88k 673165.14k
1884515.41k
4690465.11k
7669383.17k
8021562.71k
AES-256-GCM 177187.09k 668302.19k
1843664.13k
4405732.35k
6911937.19k
7201800.19k
sha3-224 71694.30k 286854.87k 686716.67k 784932.86k 922643.11k 926182.06k
sha3-256 71997.70k 288208.13k 688777.98k 786555.22k 865192.62k 874643.46k
sha3-384 72167.35k 287085.80k 487257.00k 639260.33k 673180.33k 675108.18k
sha3-512 71581.52k 287953.09k 378381.31k 436353.02k 471433.22k 472804.01k
After:
type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes
AES-128-GCM 177140.20k 674365.06k
1928108.37k
5285980.84k
9581854.72k
10167205.89k
AES-192-GCM 178512.57k 669584.09k
1890147.75k
5052911.96k
8831027.88k
9337787.73k
AES-256-GCM 177685.05k 664798.83k
1844782.17k
4816555.35k
8176435.20k
8592359.42k
sha3-224 71444.47k 286178.99k 747224.32k 911052.46k
1063985.15k
1067712.51k
sha3-256 71188.32k 285318.74k 746304.34k 903716.52k 984566.44k 997430.61k
sha3-384 71715.30k 285965.27k 546631.59k 733781.67k 766448.98k 768262.14k
sha3-512 70853.85k 284883.33k 427553.71k 489698.99k 520415.91k 520743.59k
Change:
type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes
AES-128-GCM +0.2 % -0.5 % -0.2 % +10.0 % +24.5 % +26.5 %
AES-192-GCM -0.1 % -0.5 % +0.3 % +7.7 % +15.1 % +16.4 %
AES-256-GCM +0.3 % -0.5 % +0.1 % +9.3 % +18.3 % +19.3 %
sha3-224 -0.3 % -0.2 % +8.8 % +16.1 % +15.3 % +15.3 %
sha3-256 -1.1 % -1.0 % +8.4 % +14.9 % +13.8 % +14.0 %
sha3-384 -0.6 % -0.4 % +12.2 % +14.8 % +13.9 % +13.8 %
sha3-512 -1.0 % -1.1 % +13.0 % +12.2 % +10.4 % +10.1 %
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <ppzgs1@gmail.com>
(Merged from https://github.com/openssl/openssl/pull/27503)
# define ARM_CPU_IMP_ARM 0x41
# define HISI_CPU_IMP 0x48
+# define ARM_CPU_IMP_QCOMM 0x51
# define ARM_CPU_IMP_APPLE 0x61
# define ARM_CPU_IMP_MICROSOFT 0x6D
# define ARM_CPU_IMP_AMPERE 0xC0
# define ARM_CPU_PART_N3 0xD8E
# define ARM_CPU_PART_V3 0xD84
+# define QCOM_CPU_PART_ORYON_X1 0x001
+
# define APPLE_CPU_PART_M1_ICESTORM 0x022
# define APPLE_CPU_PART_M1_FIRESTORM 0x023
# define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024
}
if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N2) ||
+ MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_QCOMM, QCOM_CPU_PART_ORYON_X1) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_COBALT_100) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N3) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO) ||
MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) ||
- MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)) &&
+ MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) ||
+ MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_QCOMM, QCOM_CPU_PART_ORYON_X1)) &&
(OPENSSL_armcap_P & ARMV8_SHA3))
OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
# endif