]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
1da177e4 LT |
2 | /* |
3 | * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) | |
4 | * | |
5 | * Finds length of a 0-terminated string. Optimized for the | |
6 | * Alpha architecture: | |
7 | * | |
8 | * - memory accessed as aligned quadwords only | |
9 | * - uses bcmpge to compare 8 bytes in parallel | |
10 | * - does binary search to find 0 byte in last | |
11 | * quadword (HAKMEM needed 12 instructions to | |
12 | * do this instead of the 9 instructions that | |
13 | * binary search needs). | |
14 | */ | |
00fc0e0d | 15 | #include <asm/export.h> |
1da177e4 LT |
16 | .set noreorder |
17 | .set noat | |
18 | ||
19 | .align 3 | |
20 | ||
21 | .globl strlen | |
22 | .ent strlen | |
23 | ||
24 | strlen: | |
25 | ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) | |
26 | lda $2, -1($31) | |
27 | insqh $2, $16, $2 | |
28 | andnot $16, 7, $0 | |
29 | or $2, $1, $1 | |
30 | cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 | |
31 | bne $2, found | |
32 | ||
33 | loop: ldq $1, 8($0) | |
34 | addq $0, 8, $0 # addr += 8 | |
35 | nop # helps dual issue last two insns | |
36 | cmpbge $31, $1, $2 | |
37 | beq $2, loop | |
38 | ||
39 | found: blbs $2, done # make aligned case fast | |
40 | negq $2, $3 | |
41 | and $2, $3, $2 | |
42 | ||
43 | and $2, 0x0f, $1 | |
44 | addq $0, 4, $3 | |
45 | cmoveq $1, $3, $0 | |
46 | ||
47 | and $2, 0x33, $1 | |
48 | addq $0, 2, $3 | |
49 | cmoveq $1, $3, $0 | |
50 | ||
51 | and $2, 0x55, $1 | |
52 | addq $0, 1, $3 | |
53 | cmoveq $1, $3, $0 | |
54 | ||
55 | done: subq $0, $16, $0 | |
56 | ret $31, ($26) | |
57 | ||
58 | .end strlen | |
00fc0e0d | 59 | EXPORT_SYMBOL(strlen) |