From d891462ba3f6e39e3696b657dcb69e9869ad9762 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Niels=20M=C3=B6ller?= Date: Sun, 9 Feb 2025 21:01:20 +0100 Subject: [PATCH] Avoid using stxv/lxv instructions in powerpc64/p8 files. --- ChangeLog | 7 +++++ powerpc64/README | 9 ++++++ powerpc64/p8/gcm-aes-decrypt.asm | 47 ++++++++++++++++++-------------- powerpc64/p8/gcm-aes-encrypt.asm | 45 +++++++++++++++++------------- 4 files changed, 69 insertions(+), 39 deletions(-) diff --git a/ChangeLog b/ChangeLog index befcb5c1..2640faab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2025-02-09 Niels Möller + + * powerpc64/p8/gcm-aes-decrypt.asm: Use stxvd2x/lxvd2x rather than + stxv/lxv for save and restore of vector registers, since the + latter instructions are not available on Power8 (ISA v2.07). + * powerpc64/p8/gcm-aes-encrypt.asm: Likewise. + 2024-12-30 Niels Möller * Released Nettle-3.10.1. diff --git a/powerpc64/README b/powerpc64/README index 50859b00..2ac1a49c 100644 --- a/powerpc64/README +++ b/powerpc64/README @@ -93,6 +93,15 @@ be used for storage (obviously "volatile": not preserved if calling other functions). Both ELFv1 [4] and ELFv2 [3] ABIs are the same in this respect. +Instruction variants: + +Power5 supports ISA v2.02 +Power6 supports ISA v2.05 +Power7 (p7 subdirectory) supports ISA v2.06 +Power8 (v8 subdirectory) supports ISA v2.07 +Power9 (p9 subdirectory) supports ISA v3.0 +Power10 (p10 subdirectory) supports ISA v3.1 + [1] http://www.ibm.com/developerworks/linux/library/l-powasm1.html [2] https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture [3] https://openpowerfoundation.org/specifications/64bitelfabi/ diff --git a/powerpc64/p8/gcm-aes-decrypt.asm b/powerpc64/p8/gcm-aes-decrypt.asm index d6be4169..f3c4b125 100644 --- a/powerpc64/p8/gcm-aes-decrypt.asm +++ b/powerpc64/p8/gcm-aes-decrypt.asm @@ -36,12 +36,14 @@ C Register usage: define(`SP', `r1') define(`TOCP', `r2') +C Input arguments. define(`HT', `r3') define(`SRND', `r4') define(`SLEN', `r5') define(`SDST', `r6') define(`SSRC', `r7') -define(`RK', `r8') + +define(`RK', `r8') C Round key, also used as temporary in prologue. C r9-r11 used as constant indices. define(`LOOP', `r12') @@ -102,22 +104,28 @@ PROLOGUE(_nettle_gcm_aes_decrypt) sldi SLEN, LOOP, 7 beq end + li r9,1*16 + li r10,2*16 + li r11,3*16 + C 288 byte "protected zone" is sufficient for storage. - stxv VSR(v20), -16(SP) - stxv VSR(v21), -32(SP) - stxv VSR(v22), -48(SP) - stxv VSR(v23), -64(SP) - stxv VSR(v24), -80(SP) - stxv VSR(v25), -96(SP) + subi RK, SP, 64 + stxvd2x VSR(v20), r11, RK + stxvd2x VSR(v21), r10, RK + stxvd2x VSR(v22), r9, RK + stxvd2x VSR(v23), 0, RK + subi RK, SP, 96 + stxvd2x VSR(v24), r9, RK + stxvd2x VSR(v25), 0, RK vxor ZERO,ZERO,ZERO vspltisb CNT1, 1 - vsldoi CNT1, ZERO, CNT1, 1 C counter 1 + vsldoi CNT1, ZERO, CNT1, 1 C counter 1 - DATA_LOAD_VEC(POLY,.polynomial,r9) + DATA_LOAD_VEC(POLY,.polynomial,RK) - li r9,0 - lvsl LE_MASK,0,r9 + li RK,0 + lvsl LE_MASK,0,RK IF_LE(`vspltisb LE_TEMP,0x07') IF_BE(`vspltisb LE_TEMP,0x03') vxor LE_MASK,LE_MASK,LE_TEMP @@ -125,9 +133,6 @@ IF_BE(`vspltisb LE_TEMP,0x03') xxmrghd VSR(POLY_L),VSR(ZERO),VSR(POLY) C load table elements - li r9,1*16 - li r10,2*16 - li r11,3*16 lxvd2x VSR(H1M),0,HT lxvd2x VSR(H1L),r9,HT lxvd2x VSR(H2M),r10,HT @@ -400,12 +405,14 @@ IF_LE(` ') stxvd2x VSR(LASTCNT), 0, HT C store ctr - lxv VSR(v20), -16(SP) - lxv VSR(v21), -32(SP) - lxv VSR(v22), -48(SP) - lxv VSR(v23), -64(SP) - lxv VSR(v24), -80(SP) - lxv VSR(v25), -96(SP) + subi RK, SP, 64 + lxvd2x VSR(v20), r11, RK + lxvd2x VSR(v21), r10, RK + lxvd2x VSR(v22), r9, RK + lxvd2x VSR(v23), 0, RK + subi RK, SP, 96 + lxvd2x VSR(v24), r9, RK + lxvd2x VSR(v25), 0, RK end: mr r3, SLEN diff --git a/powerpc64/p8/gcm-aes-encrypt.asm b/powerpc64/p8/gcm-aes-encrypt.asm index 67ed6f27..3cac6967 100644 --- a/powerpc64/p8/gcm-aes-encrypt.asm +++ b/powerpc64/p8/gcm-aes-encrypt.asm @@ -36,12 +36,14 @@ C Register usage: define(`SP', `r1') define(`TOCP', `r2') +C Input arguments. define(`HT', `r3') define(`SRND', `r4') define(`SLEN', `r5') define(`SDST', `r6') define(`SSRC', `r7') -define(`RK', `r8') + +define(`RK', `r8') C Round key, also used as temporary in prologue. C r9-r11 used as constant indices. define(`LOOP', `r12') @@ -102,22 +104,28 @@ PROLOGUE(_nettle_gcm_aes_encrypt) sldi SLEN, LOOP, 7 beq end + li r9,1*16 + li r10,2*16 + li r11,3*16 + C 288 byte "protected zone" is sufficient for storage. - stxv VSR(v20), -16(SP) - stxv VSR(v21), -32(SP) - stxv VSR(v22), -48(SP) - stxv VSR(v23), -64(SP) - stxv VSR(v24), -80(SP) - stxv VSR(v25), -96(SP) + subi RK, SP, 64 + stxvd2x VSR(v20), r11, RK + stxvd2x VSR(v21), r10, RK + stxvd2x VSR(v22), r9, RK + stxvd2x VSR(v23), 0, RK + subi RK, SP, 96 + stxvd2x VSR(v24), r9, RK + stxvd2x VSR(v25), 0, RK vxor ZERO,ZERO,ZERO vspltisb CNT1, 1 vsldoi CNT1, ZERO, CNT1, 1 C counter 1 - DATA_LOAD_VEC(POLY,.polynomial,r9) + DATA_LOAD_VEC(POLY,.polynomial,RK) - li r9,0 - lvsl LE_MASK,0,r9 + li RK,0 + lvsl LE_MASK,0,RK IF_LE(`vspltisb LE_TEMP,0x07') IF_BE(`vspltisb LE_TEMP,0x03') vxor LE_MASK,LE_MASK,LE_TEMP @@ -125,9 +133,6 @@ IF_BE(`vspltisb LE_TEMP,0x03') xxmrghd VSR(POLY_L),VSR(ZERO),VSR(POLY) C load table elements - li r9,1*16 - li r10,2*16 - li r11,3*16 lxvd2x VSR(H1M),0,HT lxvd2x VSR(H1L),r9,HT lxvd2x VSR(H2M),r10,HT @@ -404,12 +409,14 @@ IF_LE(` ') stxvd2x VSR(LASTCNT), 0, HT C store ctr - lxv VSR(v20), -16(SP) - lxv VSR(v21), -32(SP) - lxv VSR(v22), -48(SP) - lxv VSR(v23), -64(SP) - lxv VSR(v24), -80(SP) - lxv VSR(v25), -96(SP) + subi RK, SP, 64 + lxvd2x VSR(v20), r11, RK + lxvd2x VSR(v21), r10, RK + lxvd2x VSR(v22), r9, RK + lxvd2x VSR(v23), 0, RK + subi RK, SP, 96 + lxvd2x VSR(v24), r9, RK + lxvd2x VSR(v25), 0, RK end: mr r3, SLEN -- 2.47.3