]>
Commit | Line | Data |
---|---|---|
dff8da6b | 1 | /* Copyright (C) 1991-2024 Free Software Foundation, Inc. |
6d52618b | 2 | This file is part of the GNU C Library. |
28f540f4 | 3 | |
6d52618b | 4 | The GNU C Library is free software; you can redistribute it and/or |
41bdb6e2 AJ |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
28f540f4 | 8 | |
6d52618b UD |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 12 | Lesser General Public License for more details. |
28f540f4 | 13 | |
41bdb6e2 | 14 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 15 | License along with the GNU C Library; if not, see |
5a82c748 | 16 | <https://www.gnu.org/licenses/>. */ |
28f540f4 | 17 | |
061d137b UD |
18 | /* If you consider tuning this algorithm, you should consult first: |
19 | Engineering a sort function; Jon Bentley and M. Douglas McIlroy; | |
20 | Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ | |
21 | ||
709fbd3e | 22 | #include <errno.h> |
061d137b | 23 | #include <limits.h> |
21d30c77 | 24 | #include <memswap.h> |
28f540f4 RM |
25 | #include <stdlib.h> |
26 | #include <string.h> | |
21d30c77 | 27 | #include <stdbool.h> |
28f540f4 | 28 | |
21d30c77 AZ |
29 | /* Swap SIZE bytes between addresses A and B. These helpers are provided |
30 | along the generic one as an optimization. */ | |
31 | ||
32 | enum swap_type_t | |
33 | { | |
34 | SWAP_WORDS_64, | |
35 | SWAP_WORDS_32, | |
709fbd3e | 36 | SWAP_VOID_ARG, |
21d30c77 AZ |
37 | SWAP_BYTES |
38 | }; | |
39 | ||
709fbd3e AZ |
40 | typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t; |
41 | typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t; | |
42 | ||
21d30c77 AZ |
43 | static inline void |
44 | swap_words_64 (void * restrict a, void * restrict b, size_t n) | |
45 | { | |
21d30c77 AZ |
46 | do |
47 | { | |
48 | n -= 8; | |
49 | u64_alias_t t = *(u64_alias_t *)(a + n); | |
50 | *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n); | |
51 | *(u64_alias_t *)(b + n) = t; | |
52 | } while (n); | |
53 | } | |
54 | ||
55 | static inline void | |
56 | swap_words_32 (void * restrict a, void * restrict b, size_t n) | |
57 | { | |
21d30c77 AZ |
58 | do |
59 | { | |
60 | n -= 4; | |
61 | u32_alias_t t = *(u32_alias_t *)(a + n); | |
62 | *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n); | |
63 | *(u32_alias_t *)(b + n) = t; | |
64 | } while (n); | |
65 | } | |
66 | ||
67 | /* Replace the indirect call with a serie of if statements. It should help | |
68 | the branch predictor. */ | |
69 | static void | |
70 | do_swap (void * restrict a, void * restrict b, size_t size, | |
71 | enum swap_type_t swap_type) | |
72 | { | |
73 | if (swap_type == SWAP_WORDS_64) | |
74 | swap_words_64 (a, b, size); | |
75 | else if (swap_type == SWAP_WORDS_32) | |
76 | swap_words_32 (a, b, size); | |
77 | else | |
78 | __memswap (a, b, size); | |
79 | } | |
28f540f4 | 80 | |
55364e1f FW |
81 | /* Establish the heap condition at index K, that is, the key at K will |
82 | not be less than either of its children, at 2 * K + 1 and 2 * K + 2 | |
83 | (if they exist). N is the last valid index. */ | |
274a46c9 AZ |
84 | static inline void |
85 | siftdown (void *base, size_t size, size_t k, size_t n, | |
86 | enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg) | |
87 | { | |
55364e1f FW |
88 | /* There can only be a heap condition violation if there are |
89 | children. */ | |
90 | while (2 * k + 1 <= n) | |
274a46c9 | 91 | { |
55364e1f FW |
92 | /* Left child. */ |
93 | size_t j = 2 * k + 1; | |
94 | /* If the right child is larger, use it. */ | |
274a46c9 AZ |
95 | if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0) |
96 | j++; | |
97 | ||
55364e1f | 98 | /* If k is already >= to its children, we are done. */ |
f8cfb683 | 99 | if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0) |
274a46c9 AZ |
100 | break; |
101 | ||
55364e1f | 102 | /* Heal the violation. */ |
274a46c9 | 103 | do_swap (base + (size * j), base + (k * size), size, swap_type); |
55364e1f FW |
104 | |
105 | /* Swapping with j may have introduced a violation at j. Fix | |
106 | it in the next loop iteration. */ | |
274a46c9 AZ |
107 | k = j; |
108 | } | |
109 | } | |
110 | ||
55364e1f | 111 | /* Establish the heap condition for the indices 0 to N (inclusive). */ |
274a46c9 AZ |
112 | static inline void |
113 | heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type, | |
114 | __compar_d_fn_t cmp, void *arg) | |
115 | { | |
55364e1f FW |
116 | /* If n is odd, k = n / 2 has a left child at n, so this is the |
117 | largest index that can have a heap condition violation regarding | |
118 | its children. */ | |
274a46c9 AZ |
119 | size_t k = n / 2; |
120 | while (1) | |
121 | { | |
122 | siftdown (base, size, k, n, swap_type, cmp, arg); | |
123 | if (k-- == 0) | |
124 | break; | |
125 | } | |
126 | } | |
127 | ||
709fbd3e AZ |
128 | static enum swap_type_t |
129 | get_swap_type (void *const pbase, size_t size) | |
130 | { | |
131 | if ((size & (sizeof (uint32_t) - 1)) == 0 | |
132 | && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0) | |
133 | { | |
134 | if (size == sizeof (uint32_t)) | |
135 | return SWAP_WORDS_32; | |
136 | else if (size == sizeof (uint64_t) | |
137 | && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0) | |
138 | return SWAP_WORDS_64; | |
139 | } | |
140 | return SWAP_BYTES; | |
141 | } | |
142 | ||
143 | ||
144 | /* A non-recursive heapsort with worst-case performance of O(nlog n) and | |
145 | worst-case space complexity of O(1). It sorts the array starting at | |
146 | BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback | |
147 | function used to swap elements, and CMP is the function used to compare | |
148 | elements. */ | |
274a46c9 | 149 | static void |
709fbd3e | 150 | heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg) |
274a46c9 | 151 | { |
74d2731a | 152 | if (n == 0) |
274a46c9 AZ |
153 | return; |
154 | ||
709fbd3e AZ |
155 | enum swap_type_t swap_type = get_swap_type (base, size); |
156 | ||
274a46c9 AZ |
157 | /* Build the binary heap, largest value at the base[0]. */ |
158 | heapify (base, size, n, swap_type, cmp, arg); | |
159 | ||
55364e1f | 160 | while (true) |
274a46c9 | 161 | { |
55364e1f FW |
162 | /* Indices 0 .. n contain the binary heap. Extract the largest |
163 | element put it into the final position in the array. */ | |
274a46c9 | 164 | do_swap (base, base + (n * size), size, swap_type); |
55364e1f FW |
165 | |
166 | /* The heap is now one element shorter. */ | |
274a46c9 | 167 | n--; |
55364e1f FW |
168 | if (n == 0) |
169 | break; | |
170 | ||
171 | /* By swapping in elements 0 and the previous value of n (now at | |
172 | n + 1), we likely introduced a heap condition violation. Fix | |
173 | it for the reduced heap. */ | |
274a46c9 AZ |
174 | siftdown (base, size, 0, n, swap_type, cmp, arg); |
175 | } | |
176 | } | |
28f540f4 | 177 | |
709fbd3e AZ |
178 | /* The maximum size in bytes required by mergesort that will be provided |
179 | through a buffer allocated in the stack. */ | |
180 | #define QSORT_STACK_SIZE 1024 | |
181 | ||
182 | /* Elements larger than this value will be sorted through indirect sorting | |
183 | to minimize the need to memory swap calls. */ | |
184 | #define INDIRECT_SORT_SIZE_THRES 32 | |
185 | ||
186 | struct msort_param | |
a035a985 | 187 | { |
709fbd3e AZ |
188 | size_t s; |
189 | enum swap_type_t var; | |
190 | __compar_d_fn_t cmp; | |
191 | void *arg; | |
192 | char *t; | |
193 | }; | |
a035a985 | 194 | |
709fbd3e AZ |
195 | static void |
196 | msort_with_tmp (const struct msort_param *p, void *b, size_t n) | |
197 | { | |
198 | char *b1, *b2; | |
199 | size_t n1, n2; | |
a035a985 | 200 | |
709fbd3e AZ |
201 | if (n <= 1) |
202 | return; | |
a035a985 | 203 | |
709fbd3e AZ |
204 | n1 = n / 2; |
205 | n2 = n - n1; | |
206 | b1 = b; | |
207 | b2 = (char *) b + (n1 * p->s); | |
a035a985 | 208 | |
709fbd3e AZ |
209 | msort_with_tmp (p, b1, n1); |
210 | msort_with_tmp (p, b2, n2); | |
a035a985 | 211 | |
709fbd3e AZ |
212 | char *tmp = p->t; |
213 | const size_t s = p->s; | |
214 | __compar_d_fn_t cmp = p->cmp; | |
215 | void *arg = p->arg; | |
216 | switch (p->var) | |
a035a985 | 217 | { |
709fbd3e AZ |
218 | case SWAP_WORDS_32: |
219 | while (n1 > 0 && n2 > 0) | |
220 | { | |
221 | if (cmp (b1, b2, arg) <= 0) | |
222 | { | |
223 | *(u32_alias_t *) tmp = *(u32_alias_t *) b1; | |
224 | b1 += sizeof (u32_alias_t); | |
225 | --n1; | |
226 | } | |
227 | else | |
228 | { | |
229 | *(u32_alias_t *) tmp = *(u32_alias_t *) b2; | |
230 | b2 += sizeof (u32_alias_t); | |
231 | --n2; | |
232 | } | |
233 | tmp += sizeof (u32_alias_t); | |
234 | } | |
235 | break; | |
236 | case SWAP_WORDS_64: | |
237 | while (n1 > 0 && n2 > 0) | |
238 | { | |
239 | if (cmp (b1, b2, arg) <= 0) | |
240 | { | |
241 | *(u64_alias_t *) tmp = *(u64_alias_t *) b1; | |
242 | b1 += sizeof (u64_alias_t); | |
243 | --n1; | |
244 | } | |
245 | else | |
246 | { | |
247 | *(u64_alias_t *) tmp = *(u64_alias_t *) b2; | |
248 | b2 += sizeof (u64_alias_t); | |
249 | --n2; | |
250 | } | |
251 | tmp += sizeof (u64_alias_t); | |
252 | } | |
253 | break; | |
254 | case SWAP_VOID_ARG: | |
255 | while (n1 > 0 && n2 > 0) | |
256 | { | |
257 | if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0) | |
258 | { | |
259 | *(void **) tmp = *(void **) b1; | |
260 | b1 += sizeof (void *); | |
261 | --n1; | |
262 | } | |
263 | else | |
264 | { | |
265 | *(void **) tmp = *(void **) b2; | |
266 | b2 += sizeof (void *); | |
267 | --n2; | |
268 | } | |
269 | tmp += sizeof (void *); | |
270 | } | |
271 | break; | |
272 | default: | |
273 | while (n1 > 0 && n2 > 0) | |
274 | { | |
275 | if (cmp (b1, b2, arg) <= 0) | |
276 | { | |
277 | tmp = (char *) __mempcpy (tmp, b1, s); | |
278 | b1 += s; | |
279 | --n1; | |
280 | } | |
281 | else | |
282 | { | |
283 | tmp = (char *) __mempcpy (tmp, b2, s); | |
284 | b2 += s; | |
285 | --n2; | |
286 | } | |
287 | } | |
288 | break; | |
a035a985 | 289 | } |
28f540f4 | 290 | |
709fbd3e AZ |
291 | if (n1 > 0) |
292 | memcpy (tmp, b1, n1 * s); | |
293 | memcpy (b, p->t, (n - n2) * s); | |
294 | } | |
28f540f4 | 295 | |
709fbd3e AZ |
296 | static void |
297 | __attribute_used__ | |
298 | indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n, | |
299 | size_t s) | |
300 | { | |
301 | /* Indirect sorting. */ | |
302 | char *ip = (char *) b; | |
303 | void **tp = (void **) (p->t + n * sizeof (void *)); | |
304 | void **t = tp; | |
305 | void *tmp_storage = (void *) (tp + n); | |
28f540f4 | 306 | |
709fbd3e AZ |
307 | while ((void *) t < tmp_storage) |
308 | { | |
309 | *t++ = ip; | |
310 | ip += s; | |
311 | } | |
312 | msort_with_tmp (p, p->t + n * sizeof (void *), n); | |
313 | ||
314 | /* tp[0] .. tp[n - 1] is now sorted, copy around entries of | |
315 | the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ | |
316 | char *kp; | |
317 | size_t i; | |
318 | for (i = 0, ip = (char *) b; i < n; i++, ip += s) | |
319 | if ((kp = tp[i]) != ip) | |
320 | { | |
321 | size_t j = i; | |
322 | char *jp = ip; | |
323 | memcpy (tmp_storage, ip, s); | |
324 | ||
325 | do | |
326 | { | |
327 | size_t k = (kp - (char *) b) / s; | |
328 | tp[j] = jp; | |
329 | memcpy (jp, kp, s); | |
330 | j = k; | |
331 | jp = kp; | |
332 | kp = tp[k]; | |
333 | } | |
334 | while (kp != ip); | |
335 | ||
336 | tp[j] = jp; | |
337 | memcpy (jp, tmp_storage, s); | |
338 | } | |
339 | } | |
28f540f4 RM |
340 | |
341 | void | |
03bf8357 AZ |
342 | __qsort_r (void *const pbase, size_t total_elems, size_t size, |
343 | __compar_d_fn_t cmp, void *arg) | |
28f540f4 | 344 | { |
274a46c9 | 345 | if (total_elems <= 1) |
28f540f4 RM |
346 | return; |
347 | ||
709fbd3e AZ |
348 | /* Align to the maximum size used by the swap optimization. */ |
349 | _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE]; | |
350 | size_t total_size = total_elems * size; | |
351 | char *buf; | |
21d30c77 | 352 | |
709fbd3e AZ |
353 | if (size > INDIRECT_SORT_SIZE_THRES) |
354 | total_size = 2 * total_elems * sizeof (void *) + size; | |
274a46c9 | 355 | |
709fbd3e AZ |
356 | if (total_size < sizeof buf) |
357 | buf = tmp; | |
358 | else | |
28f540f4 | 359 | { |
709fbd3e AZ |
360 | int save = errno; |
361 | buf = malloc (total_size); | |
362 | __set_errno (save); | |
363 | if (buf == NULL) | |
364 | { | |
365 | /* Fallback to heapsort in case of memory failure. */ | |
366 | heapsort_r (pbase, total_elems - 1, size, cmp, arg); | |
367 | return; | |
368 | } | |
369 | } | |
370 | ||
371 | if (size > INDIRECT_SORT_SIZE_THRES) | |
372 | { | |
373 | const struct msort_param msort_param = | |
374 | { | |
375 | .s = sizeof (void *), | |
376 | .cmp = cmp, | |
377 | .arg = arg, | |
378 | .var = SWAP_VOID_ARG, | |
379 | .t = buf, | |
380 | }; | |
381 | indirect_msort_with_tmp (&msort_param, pbase, total_elems, size); | |
382 | } | |
383 | else | |
384 | { | |
385 | const struct msort_param msort_param = | |
386 | { | |
387 | .s = size, | |
388 | .cmp = cmp, | |
389 | .arg = arg, | |
390 | .var = get_swap_type (pbase, size), | |
391 | .t = buf, | |
392 | }; | |
393 | msort_with_tmp (&msort_param, pbase, total_elems); | |
28f540f4 RM |
394 | } |
395 | ||
709fbd3e AZ |
396 | if (buf != tmp) |
397 | free (buf); | |
28f540f4 | 398 | } |
03bf8357 AZ |
399 | libc_hidden_def (__qsort_r) |
400 | weak_alias (__qsort_r, qsort_r) | |
401 | ||
402 | void | |
403 | qsort (void *b, size_t n, size_t s, __compar_fn_t cmp) | |
404 | { | |
405 | return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL); | |
406 | } | |
407 | libc_hidden_def (qsort) |