From: Samaresh Kumar Singh Date: Thu, 26 Mar 2026 14:19:22 +0000 (-0500) Subject: chacha/asm: fix ELFv2 ABI violation in ChaCha20_ctr32_vsx_8x X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9f52ccf3713a137898313f427dc76c2e415d243a;p=thirdparty%2Fopenssl.git chacha/asm: fix ELFv2 ABI violation in ChaCha20_ctr32_vsx_8x The 8-block POWER10 ChaCha20 path uses vxxlor to spill VMX values into VSR0-VSR26, which aliases FPR0-FPR26. FPR14-FPR31 are callee-saved per the ELFv2 ABI, but the function was never saving or restoring them, silently corrupting 11 FPRs (12 on big-endian) across any call with a buffer larger than 255 bytes. VMX registers v20-v23, also callee-saved, had the same problem. Fix by increasing the frame size to accommodate save slots for FPR14-FPR25 (and FPR26 on BE) and VMX v20-v23, and adding the corresponding stfd/lfd and stvx/lvx pairs in the prologue and epilogue. The VRSAVE save offset is updated to a fixed expression so it stays clear of the new save area. Fix for the bug #30584 Reviewed-by: Tomas Mraz Reviewed-by: Paul Dale MergeDate: Sat Apr 11 20:06:02 2026 (Merged from https://github.com/openssl/openssl/pull/30587) --- diff --git a/crypto/chacha/asm/chachap10-ppc.pl b/crypto/chacha/asm/chachap10-ppc.pl index ef43a117aeb..60992d67142 100755 --- a/crypto/chacha/asm/chachap10-ppc.pl +++ b/crypto/chacha/asm/chachap10-ppc.pl @@ -501,7 +501,7 @@ my ($xv8,$xv9,$xv10,$xv11,$xv12,$xv13,$xv14,$xv15,$xv16,$xv17) = map("v$_",(8..1 my ($xv18,$xv19,$xv20,$xv21) = map("v$_",(18..21)); my ($xv22,$xv23,$xv24,$xv25,$xv26) = map("v$_",(22..26)); -my $FRAME=$LOCALS+64+9*16; # 8*16 is for v24-v31 offload +my $FRAME=$LOCALS+64+9*16+12*8+4*16; # 8*16 for v24-v31 offload, 12*8 for f14-f26, 4*16 for v20-v23 sub VSX_lane_ROUND_8x { my ($a0,$b0,$c0,$d0,$a4,$b4,$c4,$d4)=@_; @@ -665,7 +665,27 @@ $code.=<<___; addi r11,r11,32 stvx v30,r10,$sp stvx v31,r11,$sp - stw r12,`$FRAME-4`($sp) # save vrsave + stfd f14,`$LOCALS+64+9*16+0*8`($sp) # save FPR14-FPR25 (callee-saved per ELFv2 ABI) + stfd f15,`$LOCALS+64+9*16+1*8`($sp) + stfd f16,`$LOCALS+64+9*16+2*8`($sp) + stfd f18,`$LOCALS+64+9*16+3*8`($sp) + stfd f19,`$LOCALS+64+9*16+4*8`($sp) + stfd f20,`$LOCALS+64+9*16+5*8`($sp) + stfd f21,`$LOCALS+64+9*16+6*8`($sp) + stfd f22,`$LOCALS+64+9*16+7*8`($sp) + stfd f23,`$LOCALS+64+9*16+8*8`($sp) + stfd f24,`$LOCALS+64+9*16+9*8`($sp) + stfd f25,`$LOCALS+64+9*16+10*8`($sp) + be?stfd f26,`$LOCALS+64+9*16+11*8`($sp) # BE only + li r10,`$LOCALS+64+9*16+12*8+15` + li r11,`$LOCALS+64+9*16+12*8+31` + stvx v20,r10,$sp # save VMX v20-v23 (callee-saved per ELFv2 ABI) + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + stvx v23,r11,$sp + stw r12,`$LOCALS+64+9*16-4`($sp) # save vrsave li r12,-4096+63 $PUSH r0, `$FRAME+$LRSAVE`($sp) mtspr 256,r12 # preserve 29 AltiVec registers @@ -1159,7 +1179,27 @@ $code.=<<___; bne Loop_outer_vsx_8x Ldone_vsx_8x: - lwz r12,`$FRAME-4`($sp) # pull vrsave + lwz r12,`$LOCALS+64+9*16-4`($sp) # pull vrsave + lfd f14,`$LOCALS+64+9*16+0*8`($sp) # restore FPR14-FPR25 (callee-saved per ELFv2 ABI) + lfd f15,`$LOCALS+64+9*16+1*8`($sp) + lfd f16,`$LOCALS+64+9*16+2*8`($sp) + lfd f18,`$LOCALS+64+9*16+3*8`($sp) + lfd f19,`$LOCALS+64+9*16+4*8`($sp) + lfd f20,`$LOCALS+64+9*16+5*8`($sp) + lfd f21,`$LOCALS+64+9*16+6*8`($sp) + lfd f22,`$LOCALS+64+9*16+7*8`($sp) + lfd f23,`$LOCALS+64+9*16+8*8`($sp) + lfd f24,`$LOCALS+64+9*16+9*8`($sp) + lfd f25,`$LOCALS+64+9*16+10*8`($sp) + be?lfd f26,`$LOCALS+64+9*16+11*8`($sp) # BE only + li r10,`$LOCALS+64+9*16+12*8+15` + li r11,`$LOCALS+64+9*16+12*8+31` + lvx v20,r10,$sp # restore VMX v20-v23 (callee-saved per ELFv2 ABI) + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + lvx v23,r11,$sp li r10,`15+$LOCALS+64` li r11,`31+$LOCALS+64` $POP r0, `$FRAME+$LRSAVE`($sp)