]> git.ipfire.org Git - thirdparty/openssl.git/commitdiff
chacha/asm: save f17 in 8x prologue for contiguous f14-f25 range
authorSamaresh Kumar Singh <ssam3003@gmail.com>
Sat, 28 Mar 2026 19:43:47 +0000 (14:43 -0500)
committerNikola Pajkovsky <nikolap@openssl.org>
Sat, 11 Apr 2026 20:12:27 +0000 (22:12 +0200)
f17 is not directly clobbered by any vxxlor in this function, but
saving the full contiguous range f14-f25 is cleaner and avoids any
future ambiguity if the code is modified. Adjust all subsequent FPR
slot offsets and the VMX base offset accordingly, and update the frame
size comment.

Reviewed-by: Tomas Mraz <tomas@openssl.foundation>
Reviewed-by: Paul Dale <paul.dale@oracle.com>
MergeDate: Sat Apr 11 20:12:58 2026
(Merged from https://github.com/openssl/openssl/pull/30587)

crypto/chacha/asm/chachap10-ppc.pl

index 60992d67142c0fdb2b22059eacd532d1868ff45c..10f8a577499018f0b62682ff9beb8e125277be22 100755 (executable)
@@ -501,7 +501,7 @@ my ($xv8,$xv9,$xv10,$xv11,$xv12,$xv13,$xv14,$xv15,$xv16,$xv17) = map("v$_",(8..1
 my ($xv18,$xv19,$xv20,$xv21) = map("v$_",(18..21));
 my ($xv22,$xv23,$xv24,$xv25,$xv26) = map("v$_",(22..26));
 
-my $FRAME=$LOCALS+64+9*16+12*8+4*16;   # 8*16 for v24-v31 offload, 12*8 for f14-f26, 4*16 for v20-v23
+my $FRAME=$LOCALS+64+9*16+13*8+4*16;   # 8*16 for v24-v31 offload, 13*8 for f14-f26, 4*16 for v20-v23
 
 sub VSX_lane_ROUND_8x {
 my ($a0,$b0,$c0,$d0,$a4,$b4,$c4,$d4)=@_;
@@ -665,20 +665,21 @@ $code.=<<___;
        addi    r11,r11,32
        stvx    v30,r10,$sp
        stvx    v31,r11,$sp
-       stfd    f14,`$LOCALS+64+9*16+0*8`($sp)  # save FPR14-FPR25 (callee-saved per ELFv2 ABI)
+       stfd    f14,`$LOCALS+64+9*16+0*8`($sp)  # save FPR14-FPR26 (callee-saved per ELFv2 ABI)
        stfd    f15,`$LOCALS+64+9*16+1*8`($sp)
        stfd    f16,`$LOCALS+64+9*16+2*8`($sp)
-       stfd    f18,`$LOCALS+64+9*16+3*8`($sp)
-       stfd    f19,`$LOCALS+64+9*16+4*8`($sp)
-       stfd    f20,`$LOCALS+64+9*16+5*8`($sp)
-       stfd    f21,`$LOCALS+64+9*16+6*8`($sp)
-       stfd    f22,`$LOCALS+64+9*16+7*8`($sp)
-       stfd    f23,`$LOCALS+64+9*16+8*8`($sp)
-       stfd    f24,`$LOCALS+64+9*16+9*8`($sp)
-       stfd    f25,`$LOCALS+64+9*16+10*8`($sp)
-       be?stfd f26,`$LOCALS+64+9*16+11*8`($sp) # BE only
-       li      r10,`$LOCALS+64+9*16+12*8+15`
-       li      r11,`$LOCALS+64+9*16+12*8+31`
+       stfd    f17,`$LOCALS+64+9*16+3*8`($sp)
+       stfd    f18,`$LOCALS+64+9*16+4*8`($sp)
+       stfd    f19,`$LOCALS+64+9*16+5*8`($sp)
+       stfd    f20,`$LOCALS+64+9*16+6*8`($sp)
+       stfd    f21,`$LOCALS+64+9*16+7*8`($sp)
+       stfd    f22,`$LOCALS+64+9*16+8*8`($sp)
+       stfd    f23,`$LOCALS+64+9*16+9*8`($sp)
+       stfd    f24,`$LOCALS+64+9*16+10*8`($sp)
+       stfd    f25,`$LOCALS+64+9*16+11*8`($sp)
+       be?stfd f26,`$LOCALS+64+9*16+12*8`($sp) # BE only
+       li      r10,`$LOCALS+64+9*16+13*8+15`
+       li      r11,`$LOCALS+64+9*16+13*8+31`
        stvx    v20,r10,$sp                     # save VMX v20-v23 (callee-saved per ELFv2 ABI)
        addi    r10,r10,32
        stvx    v21,r11,$sp
@@ -1180,20 +1181,21 @@ $code.=<<___;
 
 Ldone_vsx_8x:
        lwz     r12,`$LOCALS+64+9*16-4`($sp)            # pull vrsave
-       lfd     f14,`$LOCALS+64+9*16+0*8`($sp)  # restore FPR14-FPR25 (callee-saved per ELFv2 ABI)
+       lfd     f14,`$LOCALS+64+9*16+0*8`($sp)  # restore FPR14-FPR26 (callee-saved per ELFv2 ABI)
        lfd     f15,`$LOCALS+64+9*16+1*8`($sp)
        lfd     f16,`$LOCALS+64+9*16+2*8`($sp)
-       lfd     f18,`$LOCALS+64+9*16+3*8`($sp)
-       lfd     f19,`$LOCALS+64+9*16+4*8`($sp)
-       lfd     f20,`$LOCALS+64+9*16+5*8`($sp)
-       lfd     f21,`$LOCALS+64+9*16+6*8`($sp)
-       lfd     f22,`$LOCALS+64+9*16+7*8`($sp)
-       lfd     f23,`$LOCALS+64+9*16+8*8`($sp)
-       lfd     f24,`$LOCALS+64+9*16+9*8`($sp)
-       lfd     f25,`$LOCALS+64+9*16+10*8`($sp)
-       be?lfd  f26,`$LOCALS+64+9*16+11*8`($sp) # BE only
-       li      r10,`$LOCALS+64+9*16+12*8+15`
-       li      r11,`$LOCALS+64+9*16+12*8+31`
+       lfd     f17,`$LOCALS+64+9*16+3*8`($sp)
+       lfd     f18,`$LOCALS+64+9*16+4*8`($sp)
+       lfd     f19,`$LOCALS+64+9*16+5*8`($sp)
+       lfd     f20,`$LOCALS+64+9*16+6*8`($sp)
+       lfd     f21,`$LOCALS+64+9*16+7*8`($sp)
+       lfd     f22,`$LOCALS+64+9*16+8*8`($sp)
+       lfd     f23,`$LOCALS+64+9*16+9*8`($sp)
+       lfd     f24,`$LOCALS+64+9*16+10*8`($sp)
+       lfd     f25,`$LOCALS+64+9*16+11*8`($sp)
+       be?lfd  f26,`$LOCALS+64+9*16+12*8`($sp) # BE only
+       li      r10,`$LOCALS+64+9*16+13*8+15`
+       li      r11,`$LOCALS+64+9*16+13*8+31`
        lvx     v20,r10,$sp                     # restore VMX v20-v23 (callee-saved per ELFv2 ABI)
        addi    r10,r10,32
        lvx     v21,r11,$sp