and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
addp vend.16b, vend.16b, vend.16b /* 128->64 */
- mov synd, vend.2d[0]
+ mov synd, vend.d[0]
/* Clear the soff*2 lower bits */
lsl tmp, soff, #1
lsr synd, synd, tmp
/* Use a fast check for the termination condition */
orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
addp vend.2d, vend.2d, vend.2d
- mov synd, vend.2d[0]
+ mov synd, vend.d[0]
/* We're not out of data, loop if we haven't found the character */
cbz synd, L(loop)
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
addp vend.16b, vend.16b, vend.16b /* 128->64 */
- mov synd, vend.2d[0]
+ mov synd, vend.d[0]
/* Only do the clear for the last possible block */
b.hi L(tail)
addp vend1.16b, vend1.16b, vend2.16b // 128->64
lsr tmp1, tmp3, tmp1
- mov tmp3, vend1.2d[0]
+ mov tmp3, vend1.d[0]
bic tmp1, tmp3, tmp1 // Mask padding bits.
cbnz tmp1, L(tail)
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
orr vend1.16b, vend1.16b, vend2.16b
addp vend1.2d, vend1.2d, vend1.2d
- mov tmp1, vend1.2d[0]
+ mov tmp1, vend1.d[0]
cbz tmp1, L(loop)
/* Termination condition found. Now need to establish exactly why
addp vend1.16b, vend1.16b, vend2.16b // 256->128
addp vend1.16b, vend1.16b, vend2.16b // 128->64
- mov tmp1, vend1.2d[0]
+ mov tmp1, vend1.d[0]
L(tail):
sub src, src, #32
rbit tmp1, tmp1
addp vend1.16b, vend1.16b, vend1.16b // 128->64
lsr tmp1, tmp3, tmp1
- mov tmp3, vend1.2d[0]
+ mov tmp3, vend1.d[0]
bic tmp1, tmp3, tmp1 // Mask padding bits.
cbnz tmp1, L(tail)
orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
addp vend1.2d, vend1.2d, vend1.2d
- mov tmp1, vend1.2d[0]
+ mov tmp1, vend1.d[0]
cbz tmp1, L(loop)
/* Termination condition found. Now need to establish exactly why
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vend1.16b, vend1.16b, vend1.16b // 128->64
- mov tmp1, vend1.2d[0]
+ mov tmp1, vend1.d[0]
L(tail):
/* Count the trailing zeros, by bit reversing... */
rbit tmp1, tmp1
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
- mov nul_match, vhas_nul1.2d[0]
+ mov nul_match, vhas_nul1.d[0]
lsl tmp1, tmp1, #1
mov const_m1, #~0
- mov chr_match, vhas_chr1.2d[0]
+ mov chr_match, vhas_chr1.d[0]
lsr tmp3, const_m1, tmp1
bic nul_match, nul_match, tmp3 // Mask padding bits.
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vend1.16b, vend1.16b, vend1.16b // 128->64
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
- mov nul_match, vend1.2d[0]
- mov chr_match, vhas_chr1.2d[0]
+ mov nul_match, vend1.d[0]
+ mov chr_match, vhas_chr1.d[0]
cbz nul_match, L(loop)
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
- mov nul_match, vhas_nul1.2d[0]
+ mov nul_match, vhas_nul1.d[0]
L(tail):
/* Work out exactly where the string ends. */