]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
ppc64le: Revert "powerpc: Fix performance issues of strcmp power10" (CVE-2025-5702)
authorCarlos O'Donell <carlos@redhat.com>
Wed, 11 Jun 2025 13:43:50 +0000 (09:43 -0400)
committerCarlos O'Donell <carlos@redhat.com>
Mon, 16 Jun 2025 22:02:58 +0000 (18:02 -0400)
This reverts commit 90bcc8721ef82b7378d2b080141228660e862d56

This change is in the chain of the final revert that fixes the CVE
i.e. 3367d8e180848030d1646f088759f02b8dfe0d6f

Reason for revert: Power10 strcmp clobbers non-volatile vector
registers (Bug 33056)

Tested on ppc64le with no regressions.

sysdeps/powerpc/powerpc64/le/power10/strcmp.S

index fffa1ee0a97a01805059ea9f1e2fbd51d3d73c69..8a654d8e568f381f5ae5336e4ecc97e1797ceebc 100644 (file)
@@ -62,7 +62,7 @@
        lxvl      32+v5,reg2,r0;         \
        add       reg1,reg1,len_reg;     \
        add       reg2,reg2,len_reg;     \
-       vcmpnezb  v7,v4,v5;              \
+       vcmpnezb. v7,v4,v5;              \
        vctzlsbb  r6,v7;                 \
        cmpld     cr7,r6,len_reg;        \
        blt       cr7,L(different);      \
 
        .machine  power9
 ENTRY_TOCLESS (STRCMP, 4)
-       andi.   r7,r3,4095
-       andi.   r8,r4,4095
-       cmpldi  cr0,r7,4096-16
-       cmpldi  cr1,r8,4096-16
-       bgt     cr0,L(crosses)
-       bgt     cr1,L(crosses)
-       COMPARE_16(v4,v5,0)
-
-L(crosses):
-       andi.   r7,r3,15
-       subfic  r7,r7,16        /* r7(nalign1) = 16 - (str1 & 15).  */
-       andi.   r9,r4,15
-       subfic  r5,r9,16        /* r5(nalign2) = 16 - (str2 & 15).  */
-       cmpld   cr7,r7,r5
-       beq     cr7,L(same_aligned)
-       blt     cr7,L(nalign1_min)
+       li       r11,16
+       /* eq bit of cr1 used as swap status flag to indicate if
+       source pointers were swapped.  */
+       crclr    4*cr1+eq
+       vspltisb v19,-1
+       andi.    r7,r3,15
+       sub      r7,r11,r7      /* r7(nalign1) = 16 - (str1 & 15).  */
+       andi.    r9,r4,15
+       sub      r5,r11,r9      /* r5(nalign2) = 16 - (str2 & 15).  */
+       cmpld    cr7,r7,r5
+       beq      cr7,L(same_aligned)
+       blt      cr7,L(nalign1_min)
+       /* Swap r3 and r4, and r7 and r5 such that r3 and r7 hold the
+       pointer which is closer to the next 16B boundary so that only
+       one CHECK_N_BYTES is needed before entering the loop below.  */
+       mr       r8,r4
+       mr       r4,r3
+       mr       r3,r8
+       mr       r12,r7
+       mr       r7,r5
+       mr       r5,r12
+       crset    4*cr1+eq       /* Set bit on swapping source pointers.  */
 
-       /* nalign2 is minimum and s2 pointer is aligned.  */
-       CHECK_N_BYTES(r3,r4,r5)
-       /* Are we on the 64B hunk which crosses a page?  */
-       andi.   r10,r3,63       /* Determine offset into 64B hunk.  */
-       andi.   r8,r3,15        /* The offset into the 16B hunk.  */
-       neg     r7,r3
-       andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
-       rlwinm. r7,r7,26,0x3F   /* ((r3-4096))>>6&63.  */
-       beq     L(compare_64_pagecross)
-       mtctr   r7
-       b       L(compare_64B_unaligned)
-
-       /* nalign1 is minimum and s1 pointer is aligned.  */
+       .p2align 5
 L(nalign1_min):
        CHECK_N_BYTES(r3,r4,r7)
-       /* Are we on the 64B hunk which crosses a page?  */
-       andi.   r10,r4,63       /* Determine offset into 64B hunk.  */
-       andi.   r8,r4,15        /* The offset into the 16B hunk.  */
-       neg     r7,r4
-       andi.   r9,r7,15        /* Number of bytes after a 16B cross.  */
-       rlwinm. r7,r7,26,0x3F   /* ((r4-4096))>>6&63.  */
-       beq     L(compare_64_pagecross)
-       mtctr   r7
 
        .p2align 5
-L(compare_64B_unaligned):
-       COMPARE_16(v4,v5,0)
-       COMPARE_16(v4,v5,16)
-       COMPARE_16(v4,v5,32)
-       COMPARE_16(v4,v5,48)
-       addi    r3,r3,64
-       addi    r4,r4,64
-       bdnz    L(compare_64B_unaligned)
+L(s1_aligned):
+       /* r9 and r5 is number of bytes to be read after and before
+        page boundary correspondingly.  */
+       sub     r5,r5,r7
+       subfic  r9,r5,16
+       /* Now let r7 hold the count of quadwords which can be
+       checked without crossing a page boundary. quadword offset is
+       (str2>>4)&0xFF.  */
+       rlwinm  r7,r4,28,0xFF
+       /* Below check is required only for first iteration. For second
+       iteration and beyond, the new loop counter is always 255.  */
+       cmpldi  r7,255
+       beq     L(L3)
+       /* Get the initial loop count by 255-((str2>>4)&0xFF).  */
+       subfic  r11,r7,255
 
-       /* Cross the page boundary of s2, carefully. Only for first
-       iteration we have to get the count of 64B blocks to be checked.
-       From second iteration and beyond, loop counter is always 63.  */
-L(compare_64_pagecross):
-       li      r11, 63
+       .p2align 5
+L(L1):
        mtctr   r11
-       cmpldi  r10,16
-       ble     L(cross_4)
-       cmpldi  r10,32
-       ble     L(cross_3)
-       cmpldi  r10,48
-       ble     L(cross_2)
-L(cross_1):
-       CHECK_N_BYTES(r3,r4,r9)
-       CHECK_N_BYTES(r3,r4,r8)
-       COMPARE_16(v4,v5,0)
-       COMPARE_16(v4,v5,16)
-       COMPARE_16(v4,v5,32)
-       addi    r3,r3,48
-       addi    r4,r4,48
-       b       L(compare_64B_unaligned)
-L(cross_2):
-       COMPARE_16(v4,v5,0)
-       addi    r3,r3,16
-       addi    r4,r4,16
-       CHECK_N_BYTES(r3,r4,r9)
-       CHECK_N_BYTES(r3,r4,r8)
-       COMPARE_16(v4,v5,0)
-       COMPARE_16(v4,v5,16)
-       addi    r3,r3,32
-       addi    r4,r4,32
-       b       L(compare_64B_unaligned)
-L(cross_3):
-       COMPARE_16(v4,v5,0)
-       COMPARE_16(v4,v5,16)
-       addi    r3,r3,32
-       addi    r4,r4,32
-       CHECK_N_BYTES(r3,r4,r9)
-       CHECK_N_BYTES(r3,r4,r8)
-       COMPARE_16(v4,v5,0)
+
+       .p2align 5
+L(L2):
+       COMPARE_16(v4,v5,0)     /* Load 16B blocks using lxv.  */
        addi    r3,r3,16
        addi    r4,r4,16
-       b       L(compare_64B_unaligned)
-L(cross_4):
-       COMPARE_16(v4,v5,0)
-       COMPARE_16(v4,v5,16)
-       COMPARE_16(v4,v5,32)
-       addi    r3,r3,48
-       addi    r4,r4,48
+       bdnz    L(L2)
+       /* Cross the page boundary of s2, carefully.  */
+
+       .p2align 5
+L(L3):
+       CHECK_N_BYTES(r3,r4,r5)
        CHECK_N_BYTES(r3,r4,r9)
-       CHECK_N_BYTES(r3,r4,r8)
-       b       L(compare_64B_unaligned)
+       li      r11,255         /* Load the new loop counter.  */
+       b       L(L1)
 
+       .p2align 5
 L(same_aligned):
        CHECK_N_BYTES(r3,r4,r7)
         /* Align s1 to 32B and adjust s2 address.
@@ -208,7 +168,18 @@ L(16B_aligned_loop):
 
        /* Calculate and return the difference.  */
 L(different):
-       TAIL(v4,v5)
+       vctzlsbb r6,v7
+       vextubrx r5,r6,v4
+       vextubrx r4,r6,v5
+       bt       4*cr1+eq,L(swapped)
+       subf     r3,r4,r5
+       blr
+
+       /* If src pointers were swapped, then swap the
+       indices and calculate the return value.  */
+L(swapped):
+       subf     r3,r5,r4
+       blr
 
        .p2align 5
 L(32B_aligned_loop):