]> git.ipfire.org Git - thirdparty/glibc.git/blame - stdlib/qsort.c
stdlib: Fix heapsort for cases with exactly two elements
[thirdparty/glibc.git] / stdlib / qsort.c
CommitLineData
dff8da6b 1/* Copyright (C) 1991-2024 Free Software Foundation, Inc.
6d52618b 2 This file is part of the GNU C Library.
28f540f4 3
6d52618b 4 The GNU C Library is free software; you can redistribute it and/or
41bdb6e2
AJ
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
28f540f4 8
6d52618b
UD
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 12 Lesser General Public License for more details.
28f540f4 13
41bdb6e2 14 You should have received a copy of the GNU Lesser General Public
59ba27a6 15 License along with the GNU C Library; if not, see
5a82c748 16 <https://www.gnu.org/licenses/>. */
28f540f4 17
061d137b
UD
18/* If you consider tuning this algorithm, you should consult first:
19 Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
20 Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
21
709fbd3e 22#include <errno.h>
061d137b 23#include <limits.h>
21d30c77 24#include <memswap.h>
28f540f4
RM
25#include <stdlib.h>
26#include <string.h>
21d30c77 27#include <stdbool.h>
28f540f4 28
21d30c77
AZ
29/* Swap SIZE bytes between addresses A and B. These helpers are provided
30 along the generic one as an optimization. */
31
32enum swap_type_t
33 {
34 SWAP_WORDS_64,
35 SWAP_WORDS_32,
709fbd3e 36 SWAP_VOID_ARG,
21d30c77
AZ
37 SWAP_BYTES
38 };
39
709fbd3e
AZ
40typedef uint32_t __attribute__ ((__may_alias__)) u32_alias_t;
41typedef uint64_t __attribute__ ((__may_alias__)) u64_alias_t;
42
21d30c77
AZ
43/* If this function returns true, elements can be safely copied using word
44 loads and stores. Otherwise, it might not be safe. BASE (as an integer)
45 must be a multiple of the word alignment. SIZE must be a multiple of
46 WORDSIZE. Since WORDSIZE must be a multiple of the word alignment, and
47 WORDSIZE is a power of two on all supported platforms, this function for
48 speed merely checks that BASE and SIZE are both multiples of the word
49 size. */
50static inline bool
51is_aligned (const void *base, size_t size, size_t wordsize)
52{
53 return (((uintptr_t) base | size) & (wordsize - 1)) == 0;
54}
55
56static inline void
57swap_words_64 (void * restrict a, void * restrict b, size_t n)
58{
21d30c77
AZ
59 do
60 {
61 n -= 8;
62 u64_alias_t t = *(u64_alias_t *)(a + n);
63 *(u64_alias_t *)(a + n) = *(u64_alias_t *)(b + n);
64 *(u64_alias_t *)(b + n) = t;
65 } while (n);
66}
67
68static inline void
69swap_words_32 (void * restrict a, void * restrict b, size_t n)
70{
21d30c77
AZ
71 do
72 {
73 n -= 4;
74 u32_alias_t t = *(u32_alias_t *)(a + n);
75 *(u32_alias_t *)(a + n) = *(u32_alias_t *)(b + n);
76 *(u32_alias_t *)(b + n) = t;
77 } while (n);
78}
79
80/* Replace the indirect call with a serie of if statements. It should help
81 the branch predictor. */
82static void
83do_swap (void * restrict a, void * restrict b, size_t size,
84 enum swap_type_t swap_type)
85{
86 if (swap_type == SWAP_WORDS_64)
87 swap_words_64 (a, b, size);
88 else if (swap_type == SWAP_WORDS_32)
89 swap_words_32 (a, b, size);
90 else
91 __memswap (a, b, size);
92}
28f540f4 93
55364e1f
FW
94/* Establish the heap condition at index K, that is, the key at K will
95 not be less than either of its children, at 2 * K + 1 and 2 * K + 2
96 (if they exist). N is the last valid index. */
274a46c9
AZ
97static inline void
98siftdown (void *base, size_t size, size_t k, size_t n,
99 enum swap_type_t swap_type, __compar_d_fn_t cmp, void *arg)
100{
55364e1f
FW
101 /* There can only be a heap condition violation if there are
102 children. */
103 while (2 * k + 1 <= n)
274a46c9 104 {
55364e1f
FW
105 /* Left child. */
106 size_t j = 2 * k + 1;
107 /* If the right child is larger, use it. */
274a46c9
AZ
108 if (j < n && cmp (base + (j * size), base + ((j + 1) * size), arg) < 0)
109 j++;
110
55364e1f 111 /* If k is already >= to its children, we are done. */
f8cfb683 112 if (j == k || cmp (base + (k * size), base + (j * size), arg) >= 0)
274a46c9
AZ
113 break;
114
55364e1f 115 /* Heal the violation. */
274a46c9 116 do_swap (base + (size * j), base + (k * size), size, swap_type);
55364e1f
FW
117
118 /* Swapping with j may have introduced a violation at j. Fix
119 it in the next loop iteration. */
274a46c9
AZ
120 k = j;
121 }
122}
123
55364e1f 124/* Establish the heap condition for the indices 0 to N (inclusive). */
274a46c9
AZ
125static inline void
126heapify (void *base, size_t size, size_t n, enum swap_type_t swap_type,
127 __compar_d_fn_t cmp, void *arg)
128{
55364e1f
FW
129 /* If n is odd, k = n / 2 has a left child at n, so this is the
130 largest index that can have a heap condition violation regarding
131 its children. */
274a46c9
AZ
132 size_t k = n / 2;
133 while (1)
134 {
135 siftdown (base, size, k, n, swap_type, cmp, arg);
136 if (k-- == 0)
137 break;
138 }
139}
140
709fbd3e
AZ
141static enum swap_type_t
142get_swap_type (void *const pbase, size_t size)
143{
144 if ((size & (sizeof (uint32_t) - 1)) == 0
145 && ((uintptr_t) pbase) % __alignof__ (uint32_t) == 0)
146 {
147 if (size == sizeof (uint32_t))
148 return SWAP_WORDS_32;
149 else if (size == sizeof (uint64_t)
150 && ((uintptr_t) pbase) % __alignof__ (uint64_t) == 0)
151 return SWAP_WORDS_64;
152 }
153 return SWAP_BYTES;
154}
155
156
157/* A non-recursive heapsort with worst-case performance of O(nlog n) and
158 worst-case space complexity of O(1). It sorts the array starting at
159 BASE with n + 1 elements of SIZE bytes. The SWAP_TYPE is the callback
160 function used to swap elements, and CMP is the function used to compare
161 elements. */
274a46c9 162static void
709fbd3e 163heapsort_r (void *base, size_t n, size_t size, __compar_d_fn_t cmp, void *arg)
274a46c9 164{
74d2731a 165 if (n == 0)
274a46c9
AZ
166 return;
167
709fbd3e
AZ
168 enum swap_type_t swap_type = get_swap_type (base, size);
169
274a46c9
AZ
170 /* Build the binary heap, largest value at the base[0]. */
171 heapify (base, size, n, swap_type, cmp, arg);
172
55364e1f 173 while (true)
274a46c9 174 {
55364e1f
FW
175 /* Indices 0 .. n contain the binary heap. Extract the largest
176 element put it into the final position in the array. */
274a46c9 177 do_swap (base, base + (n * size), size, swap_type);
55364e1f
FW
178
179 /* The heap is now one element shorter. */
274a46c9 180 n--;
55364e1f
FW
181 if (n == 0)
182 break;
183
184 /* By swapping in elements 0 and the previous value of n (now at
185 n + 1), we likely introduced a heap condition violation. Fix
186 it for the reduced heap. */
274a46c9
AZ
187 siftdown (base, size, 0, n, swap_type, cmp, arg);
188 }
189}
28f540f4 190
709fbd3e
AZ
191/* The maximum size in bytes required by mergesort that will be provided
192 through a buffer allocated in the stack. */
193#define QSORT_STACK_SIZE 1024
194
195/* Elements larger than this value will be sorted through indirect sorting
196 to minimize the need to memory swap calls. */
197#define INDIRECT_SORT_SIZE_THRES 32
198
199struct msort_param
a035a985 200{
709fbd3e
AZ
201 size_t s;
202 enum swap_type_t var;
203 __compar_d_fn_t cmp;
204 void *arg;
205 char *t;
206};
a035a985 207
709fbd3e
AZ
208static void
209msort_with_tmp (const struct msort_param *p, void *b, size_t n)
210{
211 char *b1, *b2;
212 size_t n1, n2;
a035a985 213
709fbd3e
AZ
214 if (n <= 1)
215 return;
a035a985 216
709fbd3e
AZ
217 n1 = n / 2;
218 n2 = n - n1;
219 b1 = b;
220 b2 = (char *) b + (n1 * p->s);
a035a985 221
709fbd3e
AZ
222 msort_with_tmp (p, b1, n1);
223 msort_with_tmp (p, b2, n2);
a035a985 224
709fbd3e
AZ
225 char *tmp = p->t;
226 const size_t s = p->s;
227 __compar_d_fn_t cmp = p->cmp;
228 void *arg = p->arg;
229 switch (p->var)
a035a985 230 {
709fbd3e
AZ
231 case SWAP_WORDS_32:
232 while (n1 > 0 && n2 > 0)
233 {
234 if (cmp (b1, b2, arg) <= 0)
235 {
236 *(u32_alias_t *) tmp = *(u32_alias_t *) b1;
237 b1 += sizeof (u32_alias_t);
238 --n1;
239 }
240 else
241 {
242 *(u32_alias_t *) tmp = *(u32_alias_t *) b2;
243 b2 += sizeof (u32_alias_t);
244 --n2;
245 }
246 tmp += sizeof (u32_alias_t);
247 }
248 break;
249 case SWAP_WORDS_64:
250 while (n1 > 0 && n2 > 0)
251 {
252 if (cmp (b1, b2, arg) <= 0)
253 {
254 *(u64_alias_t *) tmp = *(u64_alias_t *) b1;
255 b1 += sizeof (u64_alias_t);
256 --n1;
257 }
258 else
259 {
260 *(u64_alias_t *) tmp = *(u64_alias_t *) b2;
261 b2 += sizeof (u64_alias_t);
262 --n2;
263 }
264 tmp += sizeof (u64_alias_t);
265 }
266 break;
267 case SWAP_VOID_ARG:
268 while (n1 > 0 && n2 > 0)
269 {
270 if ((*cmp) (*(const void **) b1, *(const void **) b2, arg) <= 0)
271 {
272 *(void **) tmp = *(void **) b1;
273 b1 += sizeof (void *);
274 --n1;
275 }
276 else
277 {
278 *(void **) tmp = *(void **) b2;
279 b2 += sizeof (void *);
280 --n2;
281 }
282 tmp += sizeof (void *);
283 }
284 break;
285 default:
286 while (n1 > 0 && n2 > 0)
287 {
288 if (cmp (b1, b2, arg) <= 0)
289 {
290 tmp = (char *) __mempcpy (tmp, b1, s);
291 b1 += s;
292 --n1;
293 }
294 else
295 {
296 tmp = (char *) __mempcpy (tmp, b2, s);
297 b2 += s;
298 --n2;
299 }
300 }
301 break;
a035a985 302 }
28f540f4 303
709fbd3e
AZ
304 if (n1 > 0)
305 memcpy (tmp, b1, n1 * s);
306 memcpy (b, p->t, (n - n2) * s);
307}
28f540f4 308
709fbd3e
AZ
309static void
310__attribute_used__
311indirect_msort_with_tmp (const struct msort_param *p, void *b, size_t n,
312 size_t s)
313{
314 /* Indirect sorting. */
315 char *ip = (char *) b;
316 void **tp = (void **) (p->t + n * sizeof (void *));
317 void **t = tp;
318 void *tmp_storage = (void *) (tp + n);
28f540f4 319
709fbd3e
AZ
320 while ((void *) t < tmp_storage)
321 {
322 *t++ = ip;
323 ip += s;
324 }
325 msort_with_tmp (p, p->t + n * sizeof (void *), n);
326
327 /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
328 the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
329 char *kp;
330 size_t i;
331 for (i = 0, ip = (char *) b; i < n; i++, ip += s)
332 if ((kp = tp[i]) != ip)
333 {
334 size_t j = i;
335 char *jp = ip;
336 memcpy (tmp_storage, ip, s);
337
338 do
339 {
340 size_t k = (kp - (char *) b) / s;
341 tp[j] = jp;
342 memcpy (jp, kp, s);
343 j = k;
344 jp = kp;
345 kp = tp[k];
346 }
347 while (kp != ip);
348
349 tp[j] = jp;
350 memcpy (jp, tmp_storage, s);
351 }
352}
28f540f4
RM
353
354void
03bf8357
AZ
355__qsort_r (void *const pbase, size_t total_elems, size_t size,
356 __compar_d_fn_t cmp, void *arg)
28f540f4 357{
274a46c9 358 if (total_elems <= 1)
28f540f4
RM
359 return;
360
709fbd3e
AZ
361 /* Align to the maximum size used by the swap optimization. */
362 _Alignas (uint64_t) char tmp[QSORT_STACK_SIZE];
363 size_t total_size = total_elems * size;
364 char *buf;
21d30c77 365
709fbd3e
AZ
366 if (size > INDIRECT_SORT_SIZE_THRES)
367 total_size = 2 * total_elems * sizeof (void *) + size;
274a46c9 368
709fbd3e
AZ
369 if (total_size < sizeof buf)
370 buf = tmp;
371 else
28f540f4 372 {
709fbd3e
AZ
373 int save = errno;
374 buf = malloc (total_size);
375 __set_errno (save);
376 if (buf == NULL)
377 {
378 /* Fallback to heapsort in case of memory failure. */
379 heapsort_r (pbase, total_elems - 1, size, cmp, arg);
380 return;
381 }
382 }
383
384 if (size > INDIRECT_SORT_SIZE_THRES)
385 {
386 const struct msort_param msort_param =
387 {
388 .s = sizeof (void *),
389 .cmp = cmp,
390 .arg = arg,
391 .var = SWAP_VOID_ARG,
392 .t = buf,
393 };
394 indirect_msort_with_tmp (&msort_param, pbase, total_elems, size);
395 }
396 else
397 {
398 const struct msort_param msort_param =
399 {
400 .s = size,
401 .cmp = cmp,
402 .arg = arg,
403 .var = get_swap_type (pbase, size),
404 .t = buf,
405 };
406 msort_with_tmp (&msort_param, pbase, total_elems);
28f540f4
RM
407 }
408
709fbd3e
AZ
409 if (buf != tmp)
410 free (buf);
28f540f4 411}
03bf8357
AZ
412libc_hidden_def (__qsort_r)
413weak_alias (__qsort_r, qsort_r)
414
415void
416qsort (void *b, size_t n, size_t s, __compar_fn_t cmp)
417{
418 return __qsort_r (b, n, s, (__compar_d_fn_t) cmp, NULL);
419}
420libc_hidden_def (qsort)