]>
Commit | Line | Data |
---|---|---|
55e84dc6 | 1 | /* Optimized memrchr implementation using LoongArch LASX instructions. |
dff8da6b | 2 | Copyright (C) 2023-2024 Free Software Foundation, Inc. |
55e84dc6 | 3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library. If not, see | |
17 | <https://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #include <sysdep.h> | |
20 | #include <sys/regdef.h> | |
21 | #include <sys/asm.h> | |
22 | ||
23 | #if IS_IN (libc) && !defined __loongarch_soft_float | |
24 | ||
25 | #ifndef MEMRCHR | |
26 | # define MEMRCHR __memrchr_lasx | |
27 | #endif | |
28 | ||
29 | LEAF(MEMRCHR, 6) | |
30 | beqz a2, L(ret0) | |
31 | addi.d a2, a2, -1 | |
32 | add.d a3, a0, a2 | |
33 | andi t1, a3, 0x3f | |
34 | ||
35 | bstrins.d a3, zero, 5, 0 | |
36 | addi.d t1, t1, 1 | |
37 | xvld xr0, a3, 0 | |
38 | xvld xr1, a3, 32 | |
39 | ||
40 | sub.d t2, zero, t1 | |
41 | li.d t3, -1 | |
42 | xvreplgr2vr.b xr2, a1 | |
43 | andi t4, a0, 0x3f | |
44 | ||
45 | srl.d t2, t3, t2 | |
46 | xvseq.b xr0, xr0, xr2 | |
47 | xvseq.b xr1, xr1, xr2 | |
48 | xvmsknz.b xr0, xr0 | |
49 | ||
50 | ||
51 | xvmsknz.b xr1, xr1 | |
52 | xvpickve.w xr3, xr0, 4 | |
53 | xvpickve.w xr4, xr1, 4 | |
54 | vilvl.h vr0, vr3, vr0 | |
55 | ||
56 | vilvl.h vr1, vr4, vr1 | |
57 | vilvl.w vr0, vr1, vr0 | |
58 | movfr2gr.d t0, fa0 | |
59 | and t0, t0, t2 | |
60 | ||
61 | bltu a2, t1, L(end) | |
62 | bnez t0, L(found) | |
63 | bstrins.d a0, zero, 5, 0 | |
64 | L(loop): | |
65 | xvld xr0, a3, -64 | |
66 | ||
67 | xvld xr1, a3, -32 | |
68 | addi.d a3, a3, -64 | |
69 | xvseq.b xr0, xr0, xr2 | |
70 | xvseq.b xr1, xr1, xr2 | |
71 | ||
72 | ||
73 | beq a0, a3, L(out) | |
74 | xvmax.bu xr3, xr0, xr1 | |
75 | xvseteqz.v fcc0, xr3 | |
76 | bcnez fcc0, L(loop) | |
77 | ||
78 | xvmsknz.b xr0, xr0 | |
79 | xvmsknz.b xr1, xr1 | |
80 | xvpickve.w xr3, xr0, 4 | |
81 | xvpickve.w xr4, xr1, 4 | |
82 | ||
83 | vilvl.h vr0, vr3, vr0 | |
84 | vilvl.h vr1, vr4, vr1 | |
85 | vilvl.w vr0, vr1, vr0 | |
86 | movfr2gr.d t0, fa0 | |
87 | ||
88 | L(found): | |
89 | addi.d a0, a3, 63 | |
90 | clz.d t1, t0 | |
91 | sub.d a0, a0, t1 | |
92 | jr ra | |
93 | ||
94 | ||
95 | L(out): | |
96 | xvmsknz.b xr0, xr0 | |
97 | xvmsknz.b xr1, xr1 | |
98 | xvpickve.w xr3, xr0, 4 | |
99 | xvpickve.w xr4, xr1, 4 | |
100 | ||
101 | vilvl.h vr0, vr3, vr0 | |
102 | vilvl.h vr1, vr4, vr1 | |
103 | vilvl.w vr0, vr1, vr0 | |
104 | movfr2gr.d t0, fa0 | |
105 | ||
106 | L(end): | |
107 | sll.d t2, t3, t4 | |
108 | and t0, t0, t2 | |
109 | addi.d a0, a3, 63 | |
110 | clz.d t1, t0 | |
111 | ||
112 | sub.d a0, a0, t1 | |
113 | maskeqz a0, a0, t0 | |
114 | jr ra | |
115 | L(ret0): | |
116 | move a0, zero | |
117 | ||
118 | ||
119 | jr ra | |
120 | END(MEMRCHR) | |
121 | ||
122 | libc_hidden_builtin_def (MEMRCHR) | |
123 | #endif |