]> git.ipfire.org Git - thirdparty/openssl.git/commitdiff
chacha/asm: fix ELFv2 ABI violation in ChaCha20_ctr32_vsx_8x
authorSamaresh Kumar Singh <ssam3003@gmail.com>
Thu, 26 Mar 2026 14:19:22 +0000 (09:19 -0500)
committerNikola Pajkovsky <nikolap@openssl.org>
Sat, 11 Apr 2026 20:05:30 +0000 (22:05 +0200)
The 8-block POWER10 ChaCha20 path uses vxxlor to spill VMX values into
VSR0-VSR26, which aliases FPR0-FPR26. FPR14-FPR31 are callee-saved per
the ELFv2 ABI, but the function was never saving or restoring them,
silently corrupting 11 FPRs (12 on big-endian) across any call with a
buffer larger than 255 bytes. VMX registers v20-v23, also
callee-saved, had the same problem.

Fix by increasing the frame size to accommodate save slots for
FPR14-FPR25 (and FPR26 on BE) and VMX v20-v23, and adding the
corresponding stfd/lfd and stvx/lvx pairs in the prologue and
epilogue. The VRSAVE save offset is updated to a fixed expression so
it stays clear of the new save area.

Fix for the bug #30584

Reviewed-by: Tomas Mraz <tomas@openssl.foundation>
Reviewed-by: Paul Dale <paul.dale@oracle.com>
MergeDate: Sat Apr 11 20:06:02 2026
(Merged from https://github.com/openssl/openssl/pull/30587)

crypto/chacha/asm/chachap10-ppc.pl

index ef43a117aeb6fed8a61b5782c2e7d9b3bca32492..60992d67142c0fdb2b22059eacd532d1868ff45c 100755 (executable)
@@ -501,7 +501,7 @@ my ($xv8,$xv9,$xv10,$xv11,$xv12,$xv13,$xv14,$xv15,$xv16,$xv17) = map("v$_",(8..1
 my ($xv18,$xv19,$xv20,$xv21) = map("v$_",(18..21));
 my ($xv22,$xv23,$xv24,$xv25,$xv26) = map("v$_",(22..26));
 
-my $FRAME=$LOCALS+64+9*16;     # 8*16 is for v24-v31 offload
+my $FRAME=$LOCALS+64+9*16+12*8+4*16;   # 8*16 for v24-v31 offload, 12*8 for f14-f26, 4*16 for v20-v23
 
 sub VSX_lane_ROUND_8x {
 my ($a0,$b0,$c0,$d0,$a4,$b4,$c4,$d4)=@_;
@@ -665,7 +665,27 @@ $code.=<<___;
        addi    r11,r11,32
        stvx    v30,r10,$sp
        stvx    v31,r11,$sp
-       stw     r12,`$FRAME-4`($sp)             # save vrsave
+       stfd    f14,`$LOCALS+64+9*16+0*8`($sp)  # save FPR14-FPR25 (callee-saved per ELFv2 ABI)
+       stfd    f15,`$LOCALS+64+9*16+1*8`($sp)
+       stfd    f16,`$LOCALS+64+9*16+2*8`($sp)
+       stfd    f18,`$LOCALS+64+9*16+3*8`($sp)
+       stfd    f19,`$LOCALS+64+9*16+4*8`($sp)
+       stfd    f20,`$LOCALS+64+9*16+5*8`($sp)
+       stfd    f21,`$LOCALS+64+9*16+6*8`($sp)
+       stfd    f22,`$LOCALS+64+9*16+7*8`($sp)
+       stfd    f23,`$LOCALS+64+9*16+8*8`($sp)
+       stfd    f24,`$LOCALS+64+9*16+9*8`($sp)
+       stfd    f25,`$LOCALS+64+9*16+10*8`($sp)
+       be?stfd f26,`$LOCALS+64+9*16+11*8`($sp) # BE only
+       li      r10,`$LOCALS+64+9*16+12*8+15`
+       li      r11,`$LOCALS+64+9*16+12*8+31`
+       stvx    v20,r10,$sp                     # save VMX v20-v23 (callee-saved per ELFv2 ABI)
+       addi    r10,r10,32
+       stvx    v21,r11,$sp
+       addi    r11,r11,32
+       stvx    v22,r10,$sp
+       stvx    v23,r11,$sp
+       stw     r12,`$LOCALS+64+9*16-4`($sp)            # save vrsave
        li      r12,-4096+63
        $PUSH   r0, `$FRAME+$LRSAVE`($sp)
        mtspr   256,r12                         # preserve 29 AltiVec registers
@@ -1159,7 +1179,27 @@ $code.=<<___;
        bne     Loop_outer_vsx_8x
 
 Ldone_vsx_8x:
-       lwz     r12,`$FRAME-4`($sp)             # pull vrsave
+       lwz     r12,`$LOCALS+64+9*16-4`($sp)            # pull vrsave
+       lfd     f14,`$LOCALS+64+9*16+0*8`($sp)  # restore FPR14-FPR25 (callee-saved per ELFv2 ABI)
+       lfd     f15,`$LOCALS+64+9*16+1*8`($sp)
+       lfd     f16,`$LOCALS+64+9*16+2*8`($sp)
+       lfd     f18,`$LOCALS+64+9*16+3*8`($sp)
+       lfd     f19,`$LOCALS+64+9*16+4*8`($sp)
+       lfd     f20,`$LOCALS+64+9*16+5*8`($sp)
+       lfd     f21,`$LOCALS+64+9*16+6*8`($sp)
+       lfd     f22,`$LOCALS+64+9*16+7*8`($sp)
+       lfd     f23,`$LOCALS+64+9*16+8*8`($sp)
+       lfd     f24,`$LOCALS+64+9*16+9*8`($sp)
+       lfd     f25,`$LOCALS+64+9*16+10*8`($sp)
+       be?lfd  f26,`$LOCALS+64+9*16+11*8`($sp) # BE only
+       li      r10,`$LOCALS+64+9*16+12*8+15`
+       li      r11,`$LOCALS+64+9*16+12*8+31`
+       lvx     v20,r10,$sp                     # restore VMX v20-v23 (callee-saved per ELFv2 ABI)
+       addi    r10,r10,32
+       lvx     v21,r11,$sp
+       addi    r11,r11,32
+       lvx     v22,r10,$sp
+       lvx     v23,r11,$sp
        li      r10,`15+$LOCALS+64`
        li      r11,`31+$LOCALS+64`
        $POP    r0, `$FRAME+$LRSAVE`($sp)