To align a quadword aligned address to 64 bytes, maximum of three
16 bytes load is needed for worst case instead of loading four times.
+2017-07-03 Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
+
+ * sysdeps/powerpc/powerpc64/power8/strlen.S: Remove unreachable code.
+ * sysdeps/powerpc/powerpc64/power8/strnlen.S: Likewise.
+
2017-07-01 Florian Weimer <fweimer@redhat.com>
H.J. Lu <hongjiu.lu@intel.com>
addi r9,r9,16
bne cr7,L(dword_zero)
- andi. r10,r9,63
- beq cr0,L(preloop)
- ld r6,8(r4)
- ldu r5,16(r4)
- cmpb r10,r6,r0
- cmpb r11,r5,r0
- or r5,r10,r11
- cmpdi cr7,r5,0
- addi r9,r9,16
- bne cr7,L(dword_zero)
-
andi. r10,r9,63
beq cr0,L(preloop)
ld r6,8(r4)
addi r4,r4,-16 /* Decrement maxlen in 16 bytes. */
bne cr6,L(found_aligning64B) /* If found null bytes. */
- /* Unroll 3x above code block until aligned or find null bytes. */
- andi. r7,r5,63
- beq cr0,L(preloop_64B)
- lvx v1,r5,r6
- vcmpequb. v1,v1,v0
- addi r5,r5,16
- addi r4,r4,-16
- bne cr6,L(found_aligning64B)
-
+ /* Unroll 2x above code block until aligned or find null bytes. */
andi. r7,r5,63
beq cr0,L(preloop_64B)
lvx v1,r5,r6