1 /* Test and benchmark of a couple of parallel sorting algorithms.
2 Copyright (C) 2008-2022 Free Software Foundation, Inc.
4 GCC is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3, or (at your option) any later
9 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
18 /* { dg-additional-options "-Wno-deprecated-declarations" } */
32 verify (const char *name
, double stime
, int *array
, int count
)
35 double etime
= omp_get_wtime ();
37 printf ("%s: %g\n", name
, etime
- stime
);
38 for (i
= 1; i
< count
; i
++)
39 if (array
[i
] < array
[i
- 1])
41 printf ("%s: incorrectly sorted\n", name
);
47 insertsort (int *array
, int s
, int e
)
50 for (i
= s
+ 1; i
<= e
; i
++)
54 while (j
-- > s
&& val
< array
[j
])
55 array
[j
+ 1] = array
[j
];
69 #define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
70 struct int_pair arr
[STACK_SIZE
];
74 init_int_pair_stack (struct int_pair_stack
*stack
)
76 stack
->top
= &stack
->arr
[0];
80 push_int_pair_stack (struct int_pair_stack
*stack
, int lo
, int hi
)
88 pop_int_pair_stack (struct int_pair_stack
*stack
, int *lo
, int *hi
)
96 size_int_pair_stack (struct int_pair_stack
*stack
)
98 return stack
->top
- &stack
->arr
[0];
104 #if defined __i386__ || defined __x86_64__
105 __builtin_ia32_pause ();
106 #elif defined __ia64__
107 __asm
volatile ("hint @pause" : : : "memory");
108 #elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
109 __asm
volatile ("membar #LoadLoad" : : : "memory");
111 __asm
volatile ("" : : : "memory");
116 swap (int *array
, int a
, int b
)
124 choose_pivot (int *array
, int lo
, int hi
)
126 int mid
= (lo
+ hi
) / 2;
128 if (array
[mid
] < array
[lo
])
129 swap (array
, lo
, mid
);
130 if (array
[hi
] < array
[mid
])
132 swap (array
, mid
, hi
);
133 if (array
[mid
] < array
[lo
])
134 swap (array
, lo
, mid
);
140 partition (int *array
, int lo
, int hi
)
142 int pivot
= choose_pivot (array
, lo
, hi
);
148 while (array
[++left
] < pivot
);
149 while (array
[--right
] > pivot
);
152 swap (array
, left
, right
);
158 sort1 (int *array
, int count
)
161 struct int_pair_stack global_stack
;
165 omp_init_lock (&lock
);
166 init_int_pair_stack (&global_stack
);
167 #pragma omp parallel firstprivate (array, count)
169 int lo
= 0, hi
= 0, mid
, next_lo
, next_hi
;
171 struct int_pair_stack local_stack
;
173 init_int_pair_stack (&local_stack
);
174 if (omp_get_thread_num () == 0)
176 num_threads
= omp_get_num_threads ();
183 if (hi
- lo
< THRESHOLD
)
185 insertsort (array
, lo
, hi
);
190 if (size_int_pair_stack (&local_stack
) == 0)
193 omp_set_lock (&lock
);
194 if (size_int_pair_stack (&global_stack
) == 0)
200 omp_unset_lock (&lock
);
203 omp_unset_lock (&lock
);
205 while (size_int_pair_stack (&global_stack
) == 0
212 pop_int_pair_stack (&global_stack
, &lo
, &hi
);
213 omp_unset_lock (&lock
);
217 pop_int_pair_stack (&local_stack
, &lo
, &hi
);
220 mid
= partition (array
, lo
, hi
);
221 if (mid
- lo
< hi
- mid
)
234 if (next_hi
- next_lo
< THRESHOLD
)
235 insertsort (array
, next_lo
, next_hi
);
238 if (size_int_pair_stack (&global_stack
) < num_threads
- 1)
242 omp_set_lock (&lock
);
243 size
= size_int_pair_stack (&global_stack
);
244 if (size
< num_threads
- 1 && size
< STACK_SIZE
)
245 push_int_pair_stack (&global_stack
, next_lo
, next_hi
);
247 push_int_pair_stack (&local_stack
, next_lo
, next_hi
);
248 omp_unset_lock (&lock
);
251 push_int_pair_stack (&local_stack
, next_lo
, next_hi
);
255 omp_destroy_lock (&lock
);
259 sort2_1 (int *array
, int lo
, int hi
, int num_threads
, int *busy
)
263 if (hi
- lo
< THRESHOLD
)
265 insertsort (array
, lo
, hi
);
269 mid
= partition (array
, lo
, hi
);
271 if (*busy
>= num_threads
)
273 sort2_1 (array
, lo
, mid
- 1, num_threads
, busy
);
274 sort2_1 (array
, mid
, hi
, num_threads
, busy
);
281 #pragma omp parallel num_threads (2) \
282 firstprivate (array, lo, hi, mid, num_threads, busy)
284 if (omp_get_thread_num () == 0)
285 sort2_1 (array
, lo
, mid
- 1, num_threads
, busy
);
288 sort2_1 (array
, mid
, hi
, num_threads
, busy
);
296 sort2 (int *array
, int count
)
302 #pragma omp single nowait
303 num_threads
= omp_get_num_threads ();
305 sort2_1 (array
, 0, count
- 1, num_threads
, &busy
);
308 #if _OPENMP >= 200805
310 sort3_1 (int *array
, int lo
, int hi
)
314 if (hi
- lo
< THRESHOLD
)
316 insertsort (array
, lo
, hi
);
320 mid
= partition (array
, lo
, hi
);
322 sort3_1 (array
, lo
, mid
- 1);
323 sort3_1 (array
, mid
, hi
);
327 sort3 (int *array
, int count
)
331 sort3_1 (array
, 0, count
- 1);
336 main (int argc
, char **argv
)
338 int i
, count
= 1000000;
340 int *unsorted
, *sorted
, num_threads
;
342 count
= strtoul (argv
[1], NULL
, 0);
344 unsorted
= malloc (count
* sizeof (int));
345 sorted
= malloc (count
* sizeof (int));
346 if (unsorted
== NULL
|| sorted
== NULL
)
348 puts ("allocation failure");
353 for (i
= 0; i
< count
; i
++)
354 unsorted
[i
] = rand ();
359 #pragma omp single nowait
360 num_threads
= omp_get_num_threads ();
361 printf ("Threads: %d\n", num_threads
);
363 memcpy (sorted
, unsorted
, count
* sizeof (int));
364 stime
= omp_get_wtime ();
365 sort1 (sorted
, count
);
366 verify ("sort1", stime
, sorted
, count
);
368 memcpy (sorted
, unsorted
, count
* sizeof (int));
369 stime
= omp_get_wtime ();
370 sort2 (sorted
, count
);
371 verify ("sort2", stime
, sorted
, count
);
373 #if _OPENMP >= 200805
374 memcpy (sorted
, unsorted
, count
* sizeof (int));
375 stime
= omp_get_wtime ();
376 sort3 (sorted
, count
);
377 verify ("sort3", stime
, sorted
, count
);