]>
Commit | Line | Data |
---|---|---|
dff8da6b | 1 | /* Copyright (C) 1991-2024 Free Software Foundation, Inc. |
6d52618b | 2 | This file is part of the GNU C Library. |
28f540f4 | 3 | |
6d52618b | 4 | The GNU C Library is free software; you can redistribute it and/or |
41bdb6e2 AJ |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
28f540f4 | 8 | |
6d52618b UD |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 12 | Lesser General Public License for more details. |
28f540f4 | 13 | |
41bdb6e2 | 14 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 15 | License along with the GNU C Library; if not, see |
5a82c748 | 16 | <https://www.gnu.org/licenses/>. */ |
28f540f4 | 17 | |
061d137b UD |
18 | /* If you consider tuning this algorithm, you should consult first: |
19 | Engineering a sort function; Jon Bentley and M. Douglas McIlroy; | |
20 | Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ | |
21 | ||
709fbd3e | 22 | #include <errno.h> |
061d137b | 23 | #include <limits.h> |
21d30c77 | 24 | #include <memswap.h> |
28f540f4 RM |
25 | #include <stdlib.h> |
26 | #include <string.h> | |
21d30c77 | 27 | #include <stdbool.h> |
28f540f4 | 28 | |
21d30c77 AZ |
29 | /* Swap SIZE bytes between addresses A and B. These helpers are provided |
30 | along the generic one as an optimization. */ | |
31 | ||
32 | enum swap_type_t | |
33 | { | |
34 | SWAP_WORDS_64, | |
35 | SWAP_WORDS_32, | |
709fbd3e | 36 | SWAP_VOID_ARG, |
21d30c77 AZ |
37 | SWAP_BYTES |
38 | }; | |
39 | ||
709fbd3e AZ |
40 | typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t; |
41 | typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t; | |
42 | ||
21d30c77 AZ |
43 | /* If this function returns true, elements can be safely copied using word |
44 | loads and stores. Otherwise, it might not be safe. BASE (as an integer) | |
45 | must be a multiple of the word alignment. SIZE must be a multiple of | |
46 | WORDSIZE. Since WORDSIZE must be a multiple of the word alignment, and | |
47 | WORDSIZE is a power of two on all supported platforms, this function for | |
48 | speed merely checks that BASE and SIZE are both multiples of the word | |
49 | size. */ | |
50 | static inline bool | |
51 | is_aligned (const void *base, size_t size, size_t wordsize) | |
52 | { | |
53 | return (((uintptr_t) base | size) & (wordsize - 1)) == 0; | |
54 | } | |
55 | ||
56 | static inline void | |
57 | swap_words_64 (void * restrict a, void * restrict b, size_t n) | |
58 | { | |
21d30c77 AZ |
59 | do |
60 | { | |
61 | n -= 8; | |
62 | u64_alias_t t = *(u64_alias_t *)(a + n); | |
63 | *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n); | |
64 | *(u64_alias_t *)(b + n) = t; | |
65 | } while (n); | |
66 | } | |
67 | ||
68 | static inline void | |
69 | swap_words_32 (void * restrict a, void * restrict b, size_t n) | |
70 | { | |
21d30c77 AZ |
71 | do |
72 | { | |
73 | n -= 4; | |
74 | u32_alias_t t = *(u32_alias_t *)(a + n); | |
75 | *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n); | |
76 | *(u32_alias_t *)(b + n) = t; | |
77 | } while (n); | |
78 | } | |
79 | ||
80 | /* Replace the indirect call with a serie of if statements. It should help | |
81 | the branch predictor. */ | |
82 | static void | |
83 | do_swap (void * restrict a, void * restrict b, size_t size, | |
84 | enum swap_type_t swap_type) | |
85 | { | |
86 | if (swap_type == SWAP_WORDS_64) | |
87 | swap_words_64 (a, b, size); | |
88 | else if (swap_type == SWAP_WORDS_32) | |
89 | swap_words_32 (a, b, size); | |
90 | else | |
91 | __memswap (a, b, size); | |
92 | } | |
28f540f4 | 93 | |
55364e1f FW |
94 | /* Establish the heap condition at index K, that is, the key at K will |
95 | not be less than either of its children, at 2 * K + 1 and 2 * K + 2 | |
96 | (if they exist). N is the last valid index. */ | |
274a46c9 AZ |
97 | static inline void |
98 | siftdown (void *base, size_t size, size_t k, size_t n, | |
99 | enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg) | |
100 | { | |
55364e1f FW |
101 | /* There can only be a heap condition violation if there are |
102 | children. */ | |
103 | while (2 * k + 1 <= n) | |
274a46c9 | 104 | { |
55364e1f FW |
105 | /* Left child. */ |
106 | size_t j = 2 * k + 1; | |
107 | /* If the right child is larger, use it. */ | |
274a46c9 AZ |
108 | if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0) |
109 | j++; | |
110 | ||
55364e1f | 111 | /* If k is already >= to its children, we are done. */ |
f8cfb683 | 112 | if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0) |
274a46c9 AZ |
113 | break; |
114 | ||
55364e1f | 115 | /* Heal the violation. */ |
274a46c9 | 116 | do_swap (base + (size * j), base + (k * size), size, swap_type); |
55364e1f FW |
117 | |
118 | /* Swapping with j may have introduced a violation at j. Fix | |
119 | it in the next loop iteration. */ | |
274a46c9 AZ |
120 | k = j; |
121 | } | |
122 | } | |
123 | ||
55364e1f | 124 | /* Establish the heap condition for the indices 0 to N (inclusive). */ |
274a46c9 AZ |
125 | static inline void |
126 | heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type, | |
127 | __compar_d_fn_t cmp, void *arg) | |
128 | { | |
55364e1f FW |
129 | /* If n is odd, k = n / 2 has a left child at n, so this is the |
130 | largest index that can have a heap condition violation regarding | |
131 | its children. */ | |
274a46c9 AZ |
132 | size_t k = n / 2; |
133 | while (1) | |
134 | { | |
135 | siftdown (base, size, k, n, swap_type, cmp, arg); | |
136 | if (k-- == 0) | |
137 | break; | |
138 | } | |
139 | } | |
140 | ||
709fbd3e AZ |
141 | static enum swap_type_t |
142 | get_swap_type (void *const pbase, size_t size) | |
143 | { | |
144 | if ((size & (sizeof (uint32_t) - 1)) == 0 | |
145 | && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0) | |
146 | { | |
147 | if (size == sizeof (uint32_t)) | |
148 | return SWAP_WORDS_32; | |
149 | else if (size == sizeof (uint64_t) | |
150 | && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0) | |
151 | return SWAP_WORDS_64; | |
152 | } | |
153 | return SWAP_BYTES; | |
154 | } | |
155 | ||
156 | ||
157 | /* A non-recursive heapsort with worst-case performance of O(nlog n) and | |
158 | worst-case space complexity of O(1). It sorts the array starting at | |
159 | BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback | |
160 | function used to swap elements, and CMP is the function used to compare | |
161 | elements. */ | |
274a46c9 | 162 | static void |
709fbd3e | 163 | heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg) |
274a46c9 | 164 | { |
55364e1f | 165 | if (n <= 1) |
274a46c9 AZ |
166 | return; |
167 | ||
709fbd3e AZ |
168 | enum swap_type_t swap_type = get_swap_type (base, size); |
169 | ||
274a46c9 AZ |
170 | /* Build the binary heap, largest value at the base[0]. */ |
171 | heapify (base, size, n, swap_type, cmp, arg); | |
172 | ||
55364e1f | 173 | while (true) |
274a46c9 | 174 | { |
55364e1f FW |
175 | /* Indices 0 .. n contain the binary heap. Extract the largest |
176 | element put it into the final position in the array. */ | |
274a46c9 | 177 | do_swap (base, base + (n * size), size, swap_type); |
55364e1f FW |
178 | |
179 | /* The heap is now one element shorter. */ | |
274a46c9 | 180 | n--; |
55364e1f FW |
181 | if (n == 0) |
182 | break; | |
183 | ||
184 | /* By swapping in elements 0 and the previous value of n (now at | |
185 | n + 1), we likely introduced a heap condition violation. Fix | |
186 | it for the reduced heap. */ | |
274a46c9 AZ |
187 | siftdown (base, size, 0, n, swap_type, cmp, arg); |
188 | } | |
189 | } | |
28f540f4 | 190 | |
709fbd3e AZ |
191 | /* The maximum size in bytes required by mergesort that will be provided |
192 | through a buffer allocated in the stack. */ | |
193 | #define QSORT_STACK_SIZE 1024 | |
194 | ||
195 | /* Elements larger than this value will be sorted through indirect sorting | |
196 | to minimize the need to memory swap calls. */ | |
197 | #define INDIRECT_SORT_SIZE_THRES 32 | |
198 | ||
199 | struct msort_param | |
a035a985 | 200 | { |
709fbd3e AZ |
201 | size_t s; |
202 | enum swap_type_t var; | |
203 | __compar_d_fn_t cmp; | |
204 | void *arg; | |
205 | char *t; | |
206 | }; | |
a035a985 | 207 | |
709fbd3e AZ |
208 | static void |
209 | msort_with_tmp (const struct msort_param *p, void *b, size_t n) | |
210 | { | |
211 | char *b1, *b2; | |
212 | size_t n1, n2; | |
a035a985 | 213 | |
709fbd3e AZ |
214 | if (n <= 1) |
215 | return; | |
a035a985 | 216 | |
709fbd3e AZ |
217 | n1 = n / 2; |
218 | n2 = n - n1; | |
219 | b1 = b; | |
220 | b2 = (char *) b + (n1 * p->s); | |
a035a985 | 221 | |
709fbd3e AZ |
222 | msort_with_tmp (p, b1, n1); |
223 | msort_with_tmp (p, b2, n2); | |
a035a985 | 224 | |
709fbd3e AZ |
225 | char *tmp = p->t; |
226 | const size_t s = p->s; | |
227 | __compar_d_fn_t cmp = p->cmp; | |
228 | void *arg = p->arg; | |
229 | switch (p->var) | |
a035a985 | 230 | { |
709fbd3e AZ |
231 | case SWAP_WORDS_32: |
232 | while (n1 > 0 && n2 > 0) | |
233 | { | |
234 | if (cmp (b1, b2, arg) <= 0) | |
235 | { | |
236 | *(u32_alias_t *) tmp = *(u32_alias_t *) b1; | |
237 | b1 += sizeof (u32_alias_t); | |
238 | --n1; | |
239 | } | |
240 | else | |
241 | { | |
242 | *(u32_alias_t *) tmp = *(u32_alias_t *) b2; | |
243 | b2 += sizeof (u32_alias_t); | |
244 | --n2; | |
245 | } | |
246 | tmp += sizeof (u32_alias_t); | |
247 | } | |
248 | break; | |
249 | case SWAP_WORDS_64: | |
250 | while (n1 > 0 && n2 > 0) | |
251 | { | |
252 | if (cmp (b1, b2, arg) <= 0) | |
253 | { | |
254 | *(u64_alias_t *) tmp = *(u64_alias_t *) b1; | |
255 | b1 += sizeof (u64_alias_t); | |
256 | --n1; | |
257 | } | |
258 | else | |
259 | { | |
260 | *(u64_alias_t *) tmp = *(u64_alias_t *) b2; | |
261 | b2 += sizeof (u64_alias_t); | |
262 | --n2; | |
263 | } | |
264 | tmp += sizeof (u64_alias_t); | |
265 | } | |
266 | break; | |
267 | case SWAP_VOID_ARG: | |
268 | while (n1 > 0 && n2 > 0) | |
269 | { | |
270 | if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0) | |
271 | { | |
272 | *(void **) tmp = *(void **) b1; | |
273 | b1 += sizeof (void *); | |
274 | --n1; | |
275 | } | |
276 | else | |
277 | { | |
278 | *(void **) tmp = *(void **) b2; | |
279 | b2 += sizeof (void *); | |
280 | --n2; | |
281 | } | |
282 | tmp += sizeof (void *); | |
283 | } | |
284 | break; | |
285 | default: | |
286 | while (n1 > 0 && n2 > 0) | |
287 | { | |
288 | if (cmp (b1, b2, arg) <= 0) | |
289 | { | |
290 | tmp = (char *) __mempcpy (tmp, b1, s); | |
291 | b1 += s; | |
292 | --n1; | |
293 | } | |
294 | else | |
295 | { | |
296 | tmp = (char *) __mempcpy (tmp, b2, s); | |
297 | b2 += s; | |
298 | --n2; | |
299 | } | |
300 | } | |
301 | break; | |
a035a985 | 302 | } |
28f540f4 | 303 | |
709fbd3e AZ |
304 | if (n1 > 0) |
305 | memcpy (tmp, b1, n1 * s); | |
306 | memcpy (b, p->t, (n - n2) * s); | |
307 | } | |
28f540f4 | 308 | |
709fbd3e AZ |
309 | static void |
310 | __attribute_used__ | |
311 | indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n, | |
312 | size_t s) | |
313 | { | |
314 | /* Indirect sorting. */ | |
315 | char *ip = (char *) b; | |
316 | void **tp = (void **) (p->t + n * sizeof (void *)); | |
317 | void **t = tp; | |
318 | void *tmp_storage = (void *) (tp + n); | |
28f540f4 | 319 | |
709fbd3e AZ |
320 | while ((void *) t < tmp_storage) |
321 | { | |
322 | *t++ = ip; | |
323 | ip += s; | |
324 | } | |
325 | msort_with_tmp (p, p->t + n * sizeof (void *), n); | |
326 | ||
327 | /* tp[0] .. tp[n - 1] is now sorted, copy around entries of | |
328 | the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ | |
329 | char *kp; | |
330 | size_t i; | |
331 | for (i = 0, ip = (char *) b; i < n; i++, ip += s) | |
332 | if ((kp = tp[i]) != ip) | |
333 | { | |
334 | size_t j = i; | |
335 | char *jp = ip; | |
336 | memcpy (tmp_storage, ip, s); | |
337 | ||
338 | do | |
339 | { | |
340 | size_t k = (kp - (char *) b) / s; | |
341 | tp[j] = jp; | |
342 | memcpy (jp, kp, s); | |
343 | j = k; | |
344 | jp = kp; | |
345 | kp = tp[k]; | |
346 | } | |
347 | while (kp != ip); | |
348 | ||
349 | tp[j] = jp; | |
350 | memcpy (jp, tmp_storage, s); | |
351 | } | |
352 | } | |
28f540f4 RM |
353 | |
354 | void | |
03bf8357 AZ |
355 | __qsort_r (void *const pbase, size_t total_elems, size_t size, |
356 | __compar_d_fn_t cmp, void *arg) | |
28f540f4 | 357 | { |
274a46c9 | 358 | if (total_elems <= 1) |
28f540f4 RM |
359 | return; |
360 | ||
709fbd3e AZ |
361 | /* Align to the maximum size used by the swap optimization. */ |
362 | _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE]; | |
363 | size_t total_size = total_elems * size; | |
364 | char *buf; | |
21d30c77 | 365 | |
709fbd3e AZ |
366 | if (size > INDIRECT_SORT_SIZE_THRES) |
367 | total_size = 2 * total_elems * sizeof (void *) + size; | |
274a46c9 | 368 | |
709fbd3e AZ |
369 | if (total_size < sizeof buf) |
370 | buf = tmp; | |
371 | else | |
28f540f4 | 372 | { |
709fbd3e AZ |
373 | int save = errno; |
374 | buf = malloc (total_size); | |
375 | __set_errno (save); | |
376 | if (buf == NULL) | |
377 | { | |
378 | /* Fallback to heapsort in case of memory failure. */ | |
379 | heapsort_r (pbase, total_elems - 1, size, cmp, arg); | |
380 | return; | |
381 | } | |
382 | } | |
383 | ||
384 | if (size > INDIRECT_SORT_SIZE_THRES) | |
385 | { | |
386 | const struct msort_param msort_param = | |
387 | { | |
388 | .s = sizeof (void *), | |
389 | .cmp = cmp, | |
390 | .arg = arg, | |
391 | .var = SWAP_VOID_ARG, | |
392 | .t = buf, | |
393 | }; | |
394 | indirect_msort_with_tmp (&msort_param, pbase, total_elems, size); | |
395 | } | |
396 | else | |
397 | { | |
398 | const struct msort_param msort_param = | |
399 | { | |
400 | .s = size, | |
401 | .cmp = cmp, | |
402 | .arg = arg, | |
403 | .var = get_swap_type (pbase, size), | |
404 | .t = buf, | |
405 | }; | |
406 | msort_with_tmp (&msort_param, pbase, total_elems); | |
28f540f4 RM |
407 | } |
408 | ||
709fbd3e AZ |
409 | if (buf != tmp) |
410 | free (buf); | |
28f540f4 | 411 | } |
03bf8357 AZ |
412 | libc_hidden_def (__qsort_r) |
413 | weak_alias (__qsort_r, qsort_r) | |
414 | ||
415 | void | |
416 | qsort (void *b, size_t n, size_t s, __compar_fn_t cmp) | |
417 | { | |
418 | return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL); | |
419 | } | |
420 | libc_hidden_def (qsort) |