From: Andreas Schwab Date: Thu, 5 Jun 2025 08:24:46 +0000 (+0200) Subject: powerpc: Remove assembler workarounds X-Git-Tag: glibc-2.42~115 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=eae5bb0f60205e6f709803cc6bba749daf5ece72;p=thirdparty%2Fglibc.git powerpc: Remove assembler workarounds Now that we require at least binutils 2.39 the support for POWER9 and POWER10 instructions can be assumed. --- diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S index f32dc3815f..9c1a41b805 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S +++ b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S @@ -18,26 +18,10 @@ #include -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - /* Compare 32 bytes. */ #define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\ - LXVP(32+vr1,offset,r3); \ - LXVP(32+vr2,offset,r4); \ + lxvp 32+vr1,offset(r3); \ + lxvp 32+vr2,offset(r4); \ vcmpneb. v5,vr1+1,vr2+1; \ bne cr6,L(tail_2); \ vcmpneb. v4,vr1,vr2; \ diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S index 4985a9291b..74f572c2c6 100644 --- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S +++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S @@ -63,22 +63,6 @@ blr #endif /* USE_AS_RAWMEMCHR */ -/* TODO: Replace macros by the actual instructions when minimum binutils becomes - >= 2.35. This is used to keep compatibility with older versions. */ -#define VEXTRACTBM(rt,vrb) \ - .long(((4)<<(32-6)) \ - | ((rt)<<(32-11)) \ - | ((8)<<(32-16)) \ - | ((vrb)<<(32-21)) \ - | 1602) - -#define LXVP(xtp,dq,ra) \ - .long(((6)<<(32-6)) \ - | ((((xtp)-32)>>1)<<(32-10)) \ - | ((1)<<(32-11)) \ - | ((ra)<<(32-16)) \ - | dq) - #define CHECK16(vreg,offset,addr,label) \ lxv vreg+32,offset(addr); \ vcmpequb. vreg,vreg,v18; \ @@ -88,8 +72,8 @@ of bytes already checked. */ #define CHECK64(offset,addr,label) \ li r6,offset; \ - LXVP(v4+32,offset,addr); \ - LXVP(v6+32,offset+32,addr); \ + lxvp v4+32,offset(addr); \ + lxvp v6+32,offset+32(addr); \ RAWMEMCHR_SUBTRACT_VECTORS; \ vminub v14,v4,v5; \ vminub v15,v6,v7; \ @@ -234,10 +218,10 @@ L(tail_64b): add r5,r5,r6 /* Extract first bit of each byte. */ - VEXTRACTBM(r7,v1) - VEXTRACTBM(r8,v2) - VEXTRACTBM(r9,v3) - VEXTRACTBM(r10,v4) + vextractbm r7,v1 + vextractbm r8,v2 + vextractbm r9,v3 + vextractbm r10,v4 /* Shift each value into their corresponding position. */ sldi r8,r8,16 diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S index 83b21c65c3..f0cde8171d 100644 --- a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S +++ b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S @@ -28,21 +28,6 @@ The implementation uses unaligned doubleword access for first 32 bytes as in POWER8 patch and uses vectorised loops after that. */ -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ -#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) - -#define VEXTUBRX(t,a,b) .long (0x1000070d \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - -#define VCMPNEZB(t,a,b) .long (0x10000507 \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - /* Get 16 bytes for unaligned case. reg1: Vector to hold next 16 bytes. reg2: Address to read from. @@ -61,10 +46,7 @@ 2: \ vperm reg1, v9, reg1, reg3; -/* TODO: change this to .machine power9 when the minimum required binutils - allows it. */ - - .machine power7 + .machine power9 ENTRY_TOCLESS (STRCMP, 4) li r0, 0 @@ -116,7 +98,7 @@ L(align): /* Both s1 and s2 are unaligned. */ GET16BYTES(v4, r7, v10) GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 beq cr6, L(match) b L(different) @@ -136,28 +118,28 @@ L(match): L(s1_align): lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, r7, r0 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 beq cr6, L(s1_align) @@ -167,37 +149,37 @@ L(s1_align): L(aligned): lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 bne cr6, L(different) lvx v4, 0, r7 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 addi r7, r7, 16 addi r4, r4, 16 beq cr6, L(aligned) /* Calculate and return the difference. */ L(different): - VCTZLSBB(r6, v7) - VEXTUBRX(r5, r6, v4) - VEXTUBRX(r4, r6, v5) + vctzlsbb r6, v7 + vextubrx r5, r6, v4 + vextubrx r4, r6, v5 subf r3, r4, r5 extsw r3, r3 blr diff --git a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S index 60c74ab603..5a25f94900 100644 --- a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S +++ b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S @@ -29,21 +29,6 @@ # define STRNCMP strncmp #endif -/* TODO: Change this to actual instructions when minimum binutils is upgraded - to 2.27. Macros are defined below for these newer instructions in order - to maintain compatibility. */ -#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21))) - -#define VEXTUBRX(t,a,b) .long (0x1000070d \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - -#define VCMPNEZB(t,a,b) .long (0x10000507 \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - /* Get 16 bytes for unaligned case. reg1: Vector to hold next 16 bytes. reg2: Address to read from. @@ -64,9 +49,7 @@ 2: \ vperm reg1, v9, reg1, reg3; -/* TODO: change this to .machine power9 when minimum binutils - is upgraded to 2.27. */ - .machine power7 + .machine power9 ENTRY_TOCLESS (STRNCMP, 4) /* Check if size is 0. */ cmpdi cr0, r5, 0 @@ -163,7 +146,7 @@ L(align): clrldi r6, r3, 60 subfic r11, r6, 16 GET16BYTES(v4, r3, v10) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 beq cr6, L(match) b L(different) @@ -186,7 +169,7 @@ L(match): L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -196,7 +179,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -206,7 +189,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -216,7 +199,7 @@ L(s1_align): lvx v4, 0, r3 GET16BYTES(v5, r4, v6) - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -228,7 +211,7 @@ L(s1_align): L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -238,7 +221,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -248,7 +231,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -258,7 +241,7 @@ L(aligned): lvx v4, 0, r3 lvx v5, 0, r4 - VCMPNEZB(v7, v5, v4) + vcmpnezb. v7, v5, v4 bne cr6, L(different) cmpldi cr7, r5, 16 ble cr7, L(ret0) @@ -268,11 +251,11 @@ L(aligned): b L(aligned) /* Calculate and return the difference. */ L(different): - VCTZLSBB(r6, v7) + vctzlsbb r6, v7 cmplw cr7, r5, r6 ble cr7, L(ret0) - VEXTUBRX(r5, r6, v4) - VEXTUBRX(r4, r6, v5) + vextubrx r5, r6, v4 + vextubrx r4, r6, v5 subf r3, r4, r5 extsw r3, r3 blr