]>
Commit | Line | Data |
---|---|---|
acf0174b | 1 | /* Affinity tests. |
83ffe9cd | 2 | Copyright (C) 2013-2023 Free Software Foundation, Inc. |
acf0174b JJ |
3 | |
4 | GCC is free software; you can redistribute it and/or modify it under | |
5 | the terms of the GNU General Public License as published by the Free | |
6 | Software Foundation; either version 3, or (at your option) any later | |
7 | version. | |
8 | ||
9 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 | for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with GCC; see the file COPYING3. If not see | |
16 | <http://www.gnu.org/licenses/>. */ | |
17 | ||
18 | /* { dg-do run } */ | |
19 | /* { dg-set-target-env-var OMP_PROC_BIND "false" } */ | |
10508db8 KCY |
20 | /* { dg-additional-options "-Wno-deprecated-declarations" } */ |
21 | /* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl -Wno-deprecated-declarations" { target *-*-linux* } } */ | |
acf0174b JJ |
22 | |
23 | #ifndef _GNU_SOURCE | |
24 | #define _GNU_SOURCE | |
25 | #endif | |
26 | #include "config.h" | |
acf0174b JJ |
27 | #include <omp.h> |
28 | #include <stdio.h> | |
29 | #include <stdlib.h> | |
30 | #include <string.h> | |
31 | #include <unistd.h> | |
32 | ||
33 | #ifdef DO_FORK | |
34 | #include <signal.h> | |
c1d62412 | 35 | #include <sys/wait.h> |
acf0174b JJ |
36 | #endif |
37 | #ifdef HAVE_PTHREAD_AFFINITY_NP | |
38 | #include <sched.h> | |
39 | #include <pthread.h> | |
40 | #ifdef INTERPOSE_GETAFFINITY | |
41 | #include <dlfcn.h> | |
42 | #endif | |
43 | #endif | |
44 | ||
45 | struct place | |
46 | { | |
47 | int start, len; | |
48 | }; | |
49 | struct places | |
50 | { | |
a10794ea | 51 | const char *name; |
acf0174b JJ |
52 | int count; |
53 | struct place places[8]; | |
54 | } places_array[] = { | |
55 | { "", 1, { { -1, -1 } } }, | |
56 | { "{0}:8", 8, | |
57 | { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }, | |
58 | { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, | |
59 | { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } }, | |
60 | { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } }, | |
61 | { "{1}:7:1", 7, | |
62 | { { 1, 1 }, { 2, 1 }, { 3, 1 }, | |
63 | { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, | |
64 | { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5, | |
4a0fed0c | 65 | { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, |
a10794ea JJ |
66 | { "1,2,{2,3,!2},3,3,!3,!{5:3:-1,!4,!5},{4},5,!4,!5," |
67 | "1:2,!{1},!2,7:3:-2,!{5},!7,!3", 3, | |
4a0fed0c | 68 | { { 1, 1 }, { 2, 1 }, { 3, 1 } } } |
acf0174b JJ |
69 | }; |
70 | ||
71 | unsigned long contig_cpucount; | |
72 | unsigned long min_cpusetsize; | |
73 | ||
74 | #if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \ | |
75 | && defined (CPU_ALLOC_SIZE) | |
76 | ||
77 | #if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY) | |
78 | int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *); | |
79 | ||
80 | int | |
81 | pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset) | |
82 | { | |
83 | int ret; | |
84 | unsigned long i, max; | |
85 | if (orig_getaffinity_np == NULL) | |
86 | { | |
87 | orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *)) | |
88 | dlsym (RTLD_NEXT, "pthread_getaffinity_np"); | |
89 | if (orig_getaffinity_np == NULL) | |
90 | exit (0); | |
91 | } | |
92 | ret = orig_getaffinity_np (thread, cpusetsize, cpuset); | |
93 | if (ret != 0) | |
94 | return ret; | |
95 | if (contig_cpucount == 0) | |
96 | { | |
97 | max = 8 * cpusetsize; | |
98 | for (i = 0; i < max; i++) | |
99 | if (!CPU_ISSET_S (i, cpusetsize, cpuset)) | |
100 | break; | |
101 | contig_cpucount = i; | |
102 | min_cpusetsize = cpusetsize; | |
103 | } | |
104 | return ret; | |
105 | } | |
106 | #endif | |
107 | ||
108 | void | |
109 | print_affinity (struct place p) | |
110 | { | |
111 | static unsigned long size; | |
112 | if (size == 0) | |
113 | { | |
114 | if (min_cpusetsize) | |
115 | size = min_cpusetsize; | |
116 | else | |
117 | { | |
118 | size = sysconf (_SC_NPROCESSORS_CONF); | |
119 | size = CPU_ALLOC_SIZE (size); | |
120 | if (size < sizeof (cpu_set_t)) | |
121 | size = sizeof (cpu_set_t); | |
122 | } | |
123 | } | |
ae246f7f | 124 | cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size); |
acf0174b JJ |
125 | if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0) |
126 | { | |
127 | unsigned long i, len, max = 8 * size; | |
128 | int notfirst = 0, unexpected = 1; | |
129 | ||
130 | printf (" bound to {"); | |
131 | for (i = 0, len = 0; i < max; i++) | |
132 | if (CPU_ISSET_S (i, size, cpusetp)) | |
133 | { | |
134 | if (len == 0) | |
135 | { | |
136 | if (notfirst) | |
137 | { | |
138 | unexpected = 1; | |
139 | printf (","); | |
140 | } | |
141 | else if (i == (unsigned long) p.start) | |
142 | unexpected = 0; | |
143 | notfirst = 1; | |
144 | printf ("%lu", i); | |
145 | } | |
146 | ++len; | |
147 | } | |
148 | else | |
149 | { | |
150 | if (len && len != (unsigned long) p.len) | |
151 | unexpected = 1; | |
152 | if (len > 1) | |
153 | printf (":%lu", len); | |
154 | len = 0; | |
155 | } | |
156 | if (len && len != (unsigned long) p.len) | |
157 | unexpected = 1; | |
158 | if (len > 1) | |
159 | printf (":%lu", len); | |
160 | printf ("}"); | |
161 | if (p.start != -1 && unexpected) | |
162 | { | |
163 | printf (", expected {%d", p.start); | |
164 | if (p.len != 1) | |
165 | printf (":%d", p.len); | |
166 | printf ("} instead"); | |
167 | } | |
168 | else if (p.start != -1) | |
169 | printf (", verified"); | |
170 | } | |
171 | } | |
172 | #else | |
173 | void | |
174 | print_affinity (struct place p) | |
175 | { | |
176 | (void) p.start; | |
177 | (void) p.len; | |
178 | } | |
179 | #endif | |
180 | ||
181 | ||
182 | int | |
183 | main () | |
184 | { | |
185 | char *env_proc_bind = getenv ("OMP_PROC_BIND"); | |
186 | int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0; | |
187 | int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0; | |
188 | int test_spread_master_close | |
432de084 TB |
189 | = (env_proc_bind |
190 | && (strcmp (env_proc_bind, "spread,master,close") == 0 | |
191 | || strcmp (env_proc_bind, "spread,primary,close") == 0)); | |
acf0174b JJ |
192 | char *env_places = getenv ("OMP_PLACES"); |
193 | int test_places = 0; | |
194 | ||
432de084 TB |
195 | if (omp_proc_bind_master != omp_proc_bind_primary) |
196 | abort (); | |
197 | ||
acf0174b JJ |
198 | #ifdef DO_FORK |
199 | if (env_places == NULL && contig_cpucount >= 8 && test_false | |
200 | && getenv ("GOMP_AFFINITY") == NULL) | |
201 | { | |
202 | int i, j, status; | |
203 | pid_t pid; | |
432de084 | 204 | for (j = 0; j < 3; j++) |
acf0174b | 205 | { |
432de084 TB |
206 | if (setenv ("OMP_PROC_BIND", |
207 | j > 1 ? "spread,primary,close" | |
208 | : (j ? "spread,master,close" : "true"), 1) < 0) | |
acf0174b JJ |
209 | break; |
210 | for (i = sizeof (places_array) / sizeof (places_array[0]) - 1; | |
211 | i; --i) | |
212 | { | |
213 | if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0) | |
214 | break; | |
215 | pid = fork (); | |
216 | if (pid == -1) | |
217 | break; | |
218 | if (pid == 0) | |
219 | { | |
220 | execl ("/proc/self/exe", "affinity-1.exe", NULL); | |
221 | _exit (1); | |
222 | } | |
223 | if (waitpid (pid, &status, 0) < 0) | |
224 | break; | |
225 | if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT) | |
226 | abort (); | |
227 | else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0) | |
228 | break; | |
229 | } | |
230 | if (i) | |
231 | break; | |
232 | } | |
233 | } | |
234 | #endif | |
235 | ||
236 | int first = 1; | |
237 | if (env_proc_bind) | |
238 | { | |
239 | printf ("OMP_PROC_BIND='%s'", env_proc_bind); | |
240 | first = 0; | |
241 | } | |
242 | if (env_places) | |
243 | printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places); | |
244 | printf ("\n"); | |
245 | ||
246 | if (env_places && contig_cpucount >= 8 | |
247 | && (test_true || test_spread_master_close)) | |
248 | { | |
249 | for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1; | |
250 | test_places; --test_places) | |
251 | if (strcmp (env_places, places_array[test_places].name) == 0) | |
252 | break; | |
253 | } | |
254 | ||
255 | #define verify(if_true, if_s_m_c) \ | |
256 | if (test_false && omp_get_proc_bind () != omp_proc_bind_false) \ | |
257 | abort (); \ | |
258 | if (test_true && omp_get_proc_bind () != if_true) \ | |
259 | abort (); \ | |
260 | if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c) \ | |
261 | abort (); | |
262 | ||
263 | verify (omp_proc_bind_true, omp_proc_bind_spread); | |
264 | ||
265 | printf ("Initial thread"); | |
266 | print_affinity (places_array[test_places].places[0]); | |
267 | printf ("\n"); | |
268 | omp_set_nested (1); | |
269 | omp_set_dynamic (0); | |
270 | ||
271 | #pragma omp parallel if (0) | |
272 | { | |
273 | verify (omp_proc_bind_true, omp_proc_bind_master); | |
274 | #pragma omp parallel if (0) | |
275 | { | |
276 | verify (omp_proc_bind_true, omp_proc_bind_close); | |
277 | #pragma omp parallel if (0) | |
278 | { | |
279 | verify (omp_proc_bind_true, omp_proc_bind_close); | |
280 | } | |
281 | #pragma omp parallel if (0) proc_bind (spread) | |
282 | { | |
283 | verify (omp_proc_bind_spread, omp_proc_bind_spread); | |
284 | } | |
285 | } | |
286 | #pragma omp parallel if (0) proc_bind (master) | |
287 | { | |
288 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
289 | #pragma omp parallel if (0) | |
290 | { | |
291 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
292 | } | |
293 | #pragma omp parallel if (0) proc_bind (spread) | |
294 | { | |
295 | verify (omp_proc_bind_spread, omp_proc_bind_spread); | |
296 | } | |
297 | } | |
298 | } | |
299 | ||
300 | /* True/spread */ | |
301 | #pragma omp parallel num_threads (4) | |
302 | { | |
303 | verify (omp_proc_bind_true, omp_proc_bind_master); | |
304 | #pragma omp critical | |
305 | { | |
306 | struct place p = places_array[0].places[0]; | |
307 | int thr = omp_get_thread_num (); | |
308 | printf ("#1 thread %d", thr); | |
309 | if (omp_get_num_threads () == 4 && test_spread_master_close) | |
310 | switch (places_array[test_places].count) | |
311 | { | |
312 | case 8: | |
313 | /* T = 4, P = 8, each subpartition has 2 places. */ | |
314 | case 7: | |
315 | /* T = 4, P = 7, each subpartition has 2 places, but | |
316 | last partition, which has just one place. */ | |
317 | p = places_array[test_places].places[2 * thr]; | |
318 | break; | |
319 | case 5: | |
320 | /* T = 4, P = 5, first subpartition has 2 places, the | |
321 | rest just one. */ | |
322 | p = places_array[test_places].places[thr ? 1 + thr : 0]; | |
323 | break; | |
324 | case 3: | |
325 | /* T = 4, P = 3, unit sized subpartitions, first gets | |
326 | thr0 and thr3, second thr1, third thr2. */ | |
327 | p = places_array[test_places].places[thr == 3 ? 0 : thr]; | |
328 | break; | |
329 | case 2: | |
330 | /* T = 4, P = 2, unit sized subpartitions, each with | |
331 | 2 threads. */ | |
332 | p = places_array[test_places].places[thr / 2]; | |
333 | break; | |
334 | } | |
335 | print_affinity (p); | |
336 | printf ("\n"); | |
337 | } | |
338 | #pragma omp barrier | |
339 | if (omp_get_thread_num () == 3) | |
340 | { | |
341 | /* True/spread, true/master. */ | |
342 | #pragma omp parallel num_threads (3) | |
343 | { | |
344 | verify (omp_proc_bind_true, omp_proc_bind_close); | |
345 | #pragma omp critical | |
346 | { | |
347 | struct place p = places_array[0].places[0]; | |
348 | int thr = omp_get_thread_num (); | |
349 | printf ("#1,#1 thread 3,%d", thr); | |
350 | if (omp_get_num_threads () == 3 && test_spread_master_close) | |
351 | /* Outer is spread, inner master, so just bind to the | |
352 | place or the master thread, which is thr 3 above. */ | |
353 | switch (places_array[test_places].count) | |
354 | { | |
355 | case 8: | |
356 | case 7: | |
357 | p = places_array[test_places].places[6]; | |
358 | break; | |
359 | case 5: | |
360 | p = places_array[test_places].places[4]; | |
361 | break; | |
362 | case 3: | |
363 | p = places_array[test_places].places[0]; | |
364 | break; | |
365 | case 2: | |
366 | p = places_array[test_places].places[1]; | |
367 | break; | |
368 | } | |
369 | print_affinity (p); | |
370 | printf ("\n"); | |
371 | } | |
372 | } | |
373 | /* True/spread, spread. */ | |
374 | #pragma omp parallel num_threads (5) proc_bind (spread) | |
375 | { | |
376 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
377 | #pragma omp critical | |
378 | { | |
379 | struct place p = places_array[0].places[0]; | |
380 | int thr = omp_get_thread_num (); | |
381 | printf ("#1,#2 thread 3,%d", thr); | |
382 | if (omp_get_num_threads () == 5 && test_spread_master_close) | |
383 | /* Outer is spread, inner spread. */ | |
384 | switch (places_array[test_places].count) | |
385 | { | |
386 | case 8: | |
387 | /* T = 5, P = 2, unit sized subpartitions. */ | |
388 | p = places_array[test_places].places[thr == 4 ? 6 | |
389 | : 6 + thr / 2]; | |
390 | break; | |
391 | /* The rest are T = 5, P = 1. */ | |
392 | case 7: | |
393 | p = places_array[test_places].places[6]; | |
394 | break; | |
395 | case 5: | |
396 | p = places_array[test_places].places[4]; | |
397 | break; | |
398 | case 3: | |
399 | p = places_array[test_places].places[0]; | |
400 | break; | |
401 | case 2: | |
402 | p = places_array[test_places].places[1]; | |
403 | break; | |
404 | } | |
405 | print_affinity (p); | |
406 | printf ("\n"); | |
407 | } | |
408 | #pragma omp barrier | |
409 | if (omp_get_thread_num () == 3) | |
410 | { | |
411 | /* True/spread, spread, close. */ | |
412 | #pragma omp parallel num_threads (5) proc_bind (close) | |
413 | { | |
414 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
415 | #pragma omp critical | |
416 | { | |
417 | struct place p = places_array[0].places[0]; | |
418 | int thr = omp_get_thread_num (); | |
419 | printf ("#1,#2,#1 thread 3,3,%d", thr); | |
420 | if (omp_get_num_threads () == 5 && test_spread_master_close) | |
421 | /* Outer is spread, inner spread, innermost close. */ | |
422 | switch (places_array[test_places].count) | |
423 | { | |
424 | /* All are T = 5, P = 1. */ | |
425 | case 8: | |
426 | p = places_array[test_places].places[7]; | |
427 | break; | |
428 | case 7: | |
429 | p = places_array[test_places].places[6]; | |
430 | break; | |
431 | case 5: | |
432 | p = places_array[test_places].places[4]; | |
433 | break; | |
434 | case 3: | |
435 | p = places_array[test_places].places[0]; | |
436 | break; | |
437 | case 2: | |
438 | p = places_array[test_places].places[1]; | |
439 | break; | |
440 | } | |
441 | print_affinity (p); | |
442 | printf ("\n"); | |
443 | } | |
444 | } | |
445 | } | |
446 | } | |
447 | /* True/spread, master. */ | |
448 | #pragma omp parallel num_threads (4) proc_bind(master) | |
449 | { | |
450 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
451 | #pragma omp critical | |
452 | { | |
453 | struct place p = places_array[0].places[0]; | |
454 | int thr = omp_get_thread_num (); | |
455 | printf ("#1,#3 thread 3,%d", thr); | |
456 | if (omp_get_num_threads () == 4 && test_spread_master_close) | |
457 | /* Outer is spread, inner master, so just bind to the | |
458 | place or the master thread, which is thr 3 above. */ | |
459 | switch (places_array[test_places].count) | |
460 | { | |
461 | case 8: | |
462 | case 7: | |
463 | p = places_array[test_places].places[6]; | |
464 | break; | |
465 | case 5: | |
466 | p = places_array[test_places].places[4]; | |
467 | break; | |
468 | case 3: | |
469 | p = places_array[test_places].places[0]; | |
470 | break; | |
471 | case 2: | |
472 | p = places_array[test_places].places[1]; | |
473 | break; | |
474 | } | |
475 | print_affinity (p); | |
476 | printf ("\n"); | |
477 | } | |
478 | } | |
479 | /* True/spread, close. */ | |
480 | #pragma omp parallel num_threads (6) proc_bind (close) | |
481 | { | |
482 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
483 | #pragma omp critical | |
484 | { | |
485 | struct place p = places_array[0].places[0]; | |
486 | int thr = omp_get_thread_num (); | |
487 | printf ("#1,#4 thread 3,%d", thr); | |
488 | if (omp_get_num_threads () == 6 && test_spread_master_close) | |
489 | /* Outer is spread, inner close. */ | |
490 | switch (places_array[test_places].count) | |
491 | { | |
492 | case 8: | |
493 | /* T = 6, P = 2, unit sized subpartitions. */ | |
494 | p = places_array[test_places].places[6 + thr / 3]; | |
495 | break; | |
496 | /* The rest are T = 6, P = 1. */ | |
497 | case 7: | |
498 | p = places_array[test_places].places[6]; | |
499 | break; | |
500 | case 5: | |
501 | p = places_array[test_places].places[4]; | |
502 | break; | |
503 | case 3: | |
504 | p = places_array[test_places].places[0]; | |
505 | break; | |
506 | case 2: | |
507 | p = places_array[test_places].places[1]; | |
508 | break; | |
509 | } | |
510 | print_affinity (p); | |
511 | printf ("\n"); | |
512 | } | |
513 | } | |
514 | } | |
515 | } | |
516 | ||
517 | /* Spread. */ | |
518 | #pragma omp parallel num_threads (5) proc_bind(spread) | |
519 | { | |
520 | verify (omp_proc_bind_spread, omp_proc_bind_master); | |
521 | #pragma omp critical | |
522 | { | |
523 | struct place p = places_array[0].places[0]; | |
524 | int thr = omp_get_thread_num (); | |
525 | printf ("#2 thread %d", thr); | |
526 | if (omp_get_num_threads () == 5 | |
527 | && (test_spread_master_close || test_true)) | |
528 | switch (places_array[test_places].count) | |
529 | { | |
530 | case 8: | |
531 | /* T = 5, P = 8, first 3 subpartitions have 2 places, last | |
532 | 2 one place. */ | |
533 | p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr]; | |
534 | break; | |
535 | case 7: | |
536 | /* T = 5, P = 7, first 2 subpartitions have 2 places, last | |
537 | 3 one place. */ | |
538 | p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr]; | |
539 | break; | |
540 | case 5: | |
541 | /* T = 5, P = 5, unit sized subpartitions, each one with one | |
542 | thread. */ | |
543 | p = places_array[test_places].places[thr]; | |
544 | break; | |
545 | case 3: | |
546 | /* T = 5, P = 3, unit sized subpartitions, first gets | |
547 | thr0 and thr3, second thr1 and thr4, third thr2. */ | |
548 | p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; | |
549 | break; | |
550 | case 2: | |
551 | /* T = 5, P = 2, unit sized subpartitions, first with | |
552 | thr{0,1,4} and second with thr{2,3}. */ | |
553 | p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; | |
554 | break; | |
555 | } | |
556 | print_affinity (p); | |
557 | printf ("\n"); | |
558 | } | |
559 | #pragma omp barrier | |
560 | if (omp_get_thread_num () == 3) | |
561 | { | |
562 | int pp = 0; | |
563 | switch (places_array[test_places].count) | |
564 | { | |
565 | case 8: pp = 6; break; | |
566 | case 7: pp = 5; break; | |
567 | case 5: pp = 3; break; | |
568 | case 2: pp = 1; break; | |
569 | } | |
570 | /* Spread, spread/master. */ | |
571 | #pragma omp parallel num_threads (3) firstprivate (pp) | |
572 | { | |
573 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
574 | #pragma omp critical | |
575 | { | |
576 | struct place p = places_array[0].places[0]; | |
577 | int thr = omp_get_thread_num (); | |
578 | printf ("#2,#1 thread 3,%d", thr); | |
579 | if (test_spread_master_close || test_true) | |
580 | /* Outer is spread, inner spread resp. master, bit we have | |
581 | just unit sized partitions. */ | |
582 | p = places_array[test_places].places[pp]; | |
583 | print_affinity (p); | |
584 | printf ("\n"); | |
585 | } | |
586 | } | |
587 | /* Spread, spread. */ | |
588 | #pragma omp parallel num_threads (5) proc_bind (spread) \ | |
589 | firstprivate (pp) | |
590 | { | |
591 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
592 | #pragma omp critical | |
593 | { | |
594 | struct place p = places_array[0].places[0]; | |
595 | int thr = omp_get_thread_num (); | |
596 | printf ("#2,#2 thread 3,%d", thr); | |
597 | if (test_spread_master_close || test_true) | |
598 | /* Outer is spread, inner spread, bit we have | |
599 | just unit sized partitions. */ | |
600 | p = places_array[test_places].places[pp]; | |
601 | print_affinity (p); | |
602 | printf ("\n"); | |
603 | } | |
604 | } | |
605 | /* Spread, master. */ | |
606 | #pragma omp parallel num_threads (4) proc_bind(master) \ | |
607 | firstprivate(pp) | |
608 | { | |
609 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
610 | #pragma omp critical | |
611 | { | |
612 | struct place p = places_array[0].places[0]; | |
613 | int thr = omp_get_thread_num (); | |
614 | printf ("#2,#3 thread 3,%d", thr); | |
615 | if (test_spread_master_close || test_true) | |
616 | /* Outer is spread, inner master, bit we have | |
617 | just unit sized partitions. */ | |
618 | p = places_array[test_places].places[pp]; | |
619 | print_affinity (p); | |
620 | printf ("\n"); | |
621 | } | |
622 | } | |
623 | /* Spread, close. */ | |
624 | #pragma omp parallel num_threads (6) proc_bind (close) \ | |
625 | firstprivate (pp) | |
626 | { | |
627 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
628 | #pragma omp critical | |
629 | { | |
630 | struct place p = places_array[0].places[0]; | |
631 | int thr = omp_get_thread_num (); | |
632 | printf ("#2,#4 thread 3,%d", thr); | |
633 | if (test_spread_master_close || test_true) | |
634 | /* Outer is spread, inner close, bit we have | |
635 | just unit sized partitions. */ | |
636 | p = places_array[test_places].places[pp]; | |
637 | print_affinity (p); | |
638 | printf ("\n"); | |
639 | } | |
640 | } | |
641 | } | |
642 | } | |
643 | ||
644 | /* Master. */ | |
645 | #pragma omp parallel num_threads (3) proc_bind(master) | |
646 | { | |
647 | verify (omp_proc_bind_master, omp_proc_bind_master); | |
648 | #pragma omp critical | |
649 | { | |
650 | struct place p = places_array[0].places[0]; | |
651 | int thr = omp_get_thread_num (); | |
652 | printf ("#3 thread %d", thr); | |
653 | if (test_spread_master_close || test_true) | |
654 | p = places_array[test_places].places[0]; | |
655 | print_affinity (p); | |
656 | printf ("\n"); | |
657 | } | |
658 | #pragma omp barrier | |
659 | if (omp_get_thread_num () == 2) | |
660 | { | |
661 | /* Master, master. */ | |
662 | #pragma omp parallel num_threads (4) | |
663 | { | |
664 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
665 | #pragma omp critical | |
666 | { | |
667 | struct place p = places_array[0].places[0]; | |
668 | int thr = omp_get_thread_num (); | |
669 | printf ("#3,#1 thread 2,%d", thr); | |
670 | if (test_spread_master_close || test_true) | |
671 | /* Outer is master, inner is master. */ | |
672 | p = places_array[test_places].places[0]; | |
673 | print_affinity (p); | |
674 | printf ("\n"); | |
675 | } | |
676 | } | |
677 | /* Master, spread. */ | |
678 | #pragma omp parallel num_threads (4) proc_bind (spread) | |
679 | { | |
680 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
681 | #pragma omp critical | |
682 | { | |
683 | struct place p = places_array[0].places[0]; | |
684 | int thr = omp_get_thread_num (); | |
685 | printf ("#3,#2 thread 2,%d", thr); | |
686 | if (omp_get_num_threads () == 4 | |
687 | && (test_spread_master_close || test_true)) | |
688 | /* Outer is master, inner is spread. */ | |
689 | switch (places_array[test_places].count) | |
690 | { | |
691 | case 8: | |
692 | /* T = 4, P = 8, each subpartition has 2 places. */ | |
693 | case 7: | |
694 | /* T = 4, P = 7, each subpartition has 2 places, but | |
695 | last partition, which has just one place. */ | |
696 | p = places_array[test_places].places[2 * thr]; | |
697 | break; | |
698 | case 5: | |
699 | /* T = 4, P = 5, first subpartition has 2 places, the | |
700 | rest just one. */ | |
701 | p = places_array[test_places].places[thr ? 1 + thr : 0]; | |
702 | break; | |
703 | case 3: | |
704 | /* T = 4, P = 3, unit sized subpartitions, first gets | |
705 | thr0 and thr3, second thr1, third thr2. */ | |
706 | p = places_array[test_places].places[thr == 3 ? 0 : thr]; | |
707 | break; | |
708 | case 2: | |
709 | /* T = 4, P = 2, unit sized subpartitions, each with | |
710 | 2 threads. */ | |
711 | p = places_array[test_places].places[thr / 2]; | |
712 | break; | |
713 | } | |
714 | print_affinity (p); | |
715 | printf ("\n"); | |
716 | } | |
717 | #pragma omp barrier | |
718 | if (omp_get_thread_num () == 0) | |
719 | { | |
720 | /* Master, spread, close. */ | |
721 | #pragma omp parallel num_threads (5) proc_bind (close) | |
722 | { | |
723 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
724 | #pragma omp critical | |
725 | { | |
726 | struct place p = places_array[0].places[0]; | |
727 | int thr = omp_get_thread_num (); | |
728 | printf ("#3,#2,#1 thread 2,0,%d", thr); | |
729 | if (omp_get_num_threads () == 5 | |
730 | && (test_spread_master_close || test_true)) | |
731 | /* Outer is master, inner spread, innermost close. */ | |
732 | switch (places_array[test_places].count) | |
733 | { | |
734 | /* First 3 are T = 5, P = 2. */ | |
735 | case 8: | |
736 | case 7: | |
737 | case 5: | |
738 | p = places_array[test_places].places[(thr & 2) / 2]; | |
739 | break; | |
740 | /* All the rest are T = 5, P = 1. */ | |
741 | case 3: | |
742 | case 2: | |
743 | p = places_array[test_places].places[0]; | |
744 | break; | |
745 | } | |
746 | print_affinity (p); | |
747 | printf ("\n"); | |
748 | } | |
749 | } | |
750 | } | |
751 | #pragma omp barrier | |
752 | if (omp_get_thread_num () == 3) | |
753 | { | |
754 | /* Master, spread, close. */ | |
755 | #pragma omp parallel num_threads (5) proc_bind (close) | |
756 | { | |
757 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
758 | #pragma omp critical | |
759 | { | |
760 | struct place p = places_array[0].places[0]; | |
761 | int thr = omp_get_thread_num (); | |
762 | printf ("#3,#2,#2 thread 2,3,%d", thr); | |
763 | if (omp_get_num_threads () == 5 | |
764 | && (test_spread_master_close || test_true)) | |
765 | /* Outer is master, inner spread, innermost close. */ | |
766 | switch (places_array[test_places].count) | |
767 | { | |
768 | case 8: | |
769 | /* T = 5, P = 2. */ | |
770 | p = places_array[test_places].places[6 | |
771 | + (thr & 2) / 2]; | |
772 | break; | |
773 | /* All the rest are T = 5, P = 1. */ | |
774 | case 7: | |
775 | p = places_array[test_places].places[6]; | |
776 | break; | |
777 | case 5: | |
778 | p = places_array[test_places].places[4]; | |
779 | break; | |
780 | case 3: | |
781 | p = places_array[test_places].places[0]; | |
782 | break; | |
783 | case 2: | |
784 | p = places_array[test_places].places[1]; | |
785 | break; | |
786 | } | |
787 | print_affinity (p); | |
788 | printf ("\n"); | |
789 | } | |
790 | } | |
791 | } | |
792 | } | |
793 | /* Master, master. */ | |
794 | #pragma omp parallel num_threads (4) proc_bind(master) | |
795 | { | |
796 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
797 | #pragma omp critical | |
798 | { | |
799 | struct place p = places_array[0].places[0]; | |
800 | int thr = omp_get_thread_num (); | |
801 | printf ("#3,#3 thread 2,%d", thr); | |
802 | if (test_spread_master_close || test_true) | |
803 | /* Outer is master, inner master. */ | |
804 | p = places_array[test_places].places[0]; | |
805 | print_affinity (p); | |
806 | printf ("\n"); | |
807 | } | |
808 | } | |
809 | /* Master, close. */ | |
810 | #pragma omp parallel num_threads (6) proc_bind (close) | |
811 | { | |
812 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
813 | #pragma omp critical | |
814 | { | |
815 | struct place p = places_array[0].places[0]; | |
816 | int thr = omp_get_thread_num (); | |
817 | printf ("#3,#4 thread 2,%d", thr); | |
818 | if (omp_get_num_threads () == 6 | |
819 | && (test_spread_master_close || test_true)) | |
820 | switch (places_array[test_places].count) | |
821 | { | |
822 | case 8: | |
823 | /* T = 6, P = 8. */ | |
824 | case 7: | |
825 | /* T = 6, P = 7. */ | |
826 | p = places_array[test_places].places[thr]; | |
827 | break; | |
828 | case 5: | |
829 | /* T = 6, P = 5. thr{0,5} go into the first place. */ | |
830 | p = places_array[test_places].places[thr == 5 ? 0 : thr]; | |
831 | break; | |
832 | case 3: | |
833 | /* T = 6, P = 3, two threads into each place. */ | |
834 | p = places_array[test_places].places[thr / 2]; | |
835 | break; | |
836 | case 2: | |
837 | /* T = 6, P = 2, 3 threads into each place. */ | |
838 | p = places_array[test_places].places[thr / 3]; | |
839 | break; | |
840 | } | |
841 | print_affinity (p); | |
842 | printf ("\n"); | |
843 | } | |
844 | } | |
845 | } | |
846 | } | |
847 | ||
848 | #pragma omp parallel num_threads (5) proc_bind(close) | |
849 | { | |
850 | verify (omp_proc_bind_close, omp_proc_bind_master); | |
851 | #pragma omp critical | |
852 | { | |
853 | struct place p = places_array[0].places[0]; | |
854 | int thr = omp_get_thread_num (); | |
855 | printf ("#4 thread %d", thr); | |
856 | if (omp_get_num_threads () == 5 | |
857 | && (test_spread_master_close || test_true)) | |
858 | switch (places_array[test_places].count) | |
859 | { | |
860 | case 8: | |
861 | /* T = 5, P = 8. */ | |
862 | case 7: | |
863 | /* T = 5, P = 7. */ | |
864 | case 5: | |
865 | /* T = 5, P = 5. */ | |
866 | p = places_array[test_places].places[thr]; | |
867 | break; | |
868 | case 3: | |
869 | /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second, | |
870 | thr2 in third. */ | |
871 | p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; | |
872 | break; | |
873 | case 2: | |
874 | /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second. */ | |
875 | p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; | |
876 | break; | |
877 | } | |
878 | print_affinity (p); | |
879 | printf ("\n"); | |
880 | } | |
881 | #pragma omp barrier | |
882 | if (omp_get_thread_num () == 2) | |
883 | { | |
884 | int pp = 0; | |
885 | switch (places_array[test_places].count) | |
886 | { | |
887 | case 8: | |
888 | case 7: | |
889 | case 5: | |
890 | case 3: | |
891 | pp = 2; | |
892 | break; | |
893 | case 2: | |
894 | pp = 1; | |
895 | break; | |
896 | } | |
897 | /* Close, close/master. */ | |
898 | #pragma omp parallel num_threads (4) firstprivate (pp) | |
899 | { | |
900 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
901 | #pragma omp critical | |
902 | { | |
903 | struct place p = places_array[0].places[0]; | |
904 | int thr = omp_get_thread_num (); | |
905 | printf ("#4,#1 thread 2,%d", thr); | |
906 | if (test_spread_master_close) | |
907 | /* Outer is close, inner is master. */ | |
908 | p = places_array[test_places].places[pp]; | |
909 | else if (omp_get_num_threads () == 4 && test_true) | |
910 | /* Outer is close, inner is close. */ | |
911 | switch (places_array[test_places].count) | |
912 | { | |
913 | case 8: | |
914 | /* T = 4, P = 8. */ | |
915 | case 7: | |
916 | /* T = 4, P = 7. */ | |
917 | p = places_array[test_places].places[2 + thr]; | |
918 | break; | |
919 | case 5: | |
920 | /* T = 4, P = 5. There is wrap-around for thr3. */ | |
921 | p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr]; | |
922 | break; | |
923 | case 3: | |
924 | /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2 | |
925 | into p1. */ | |
926 | p = places_array[test_places].places[(2 + thr) % 3]; | |
927 | break; | |
928 | case 2: | |
929 | /* T = 4, P = 2, 2 threads into each place. */ | |
930 | p = places_array[test_places].places[1 - thr / 2]; | |
931 | break; | |
932 | } | |
933 | ||
934 | print_affinity (p); | |
935 | printf ("\n"); | |
936 | } | |
937 | } | |
938 | /* Close, spread. */ | |
939 | #pragma omp parallel num_threads (4) proc_bind (spread) | |
940 | { | |
941 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
942 | #pragma omp critical | |
943 | { | |
944 | struct place p = places_array[0].places[0]; | |
945 | int thr = omp_get_thread_num (); | |
946 | printf ("#4,#2 thread 2,%d", thr); | |
947 | if (omp_get_num_threads () == 4 | |
948 | && (test_spread_master_close || test_true)) | |
949 | /* Outer is close, inner is spread. */ | |
950 | switch (places_array[test_places].count) | |
951 | { | |
952 | case 8: | |
953 | /* T = 4, P = 8, each subpartition has 2 places. */ | |
954 | case 7: | |
955 | /* T = 4, P = 7, each subpartition has 2 places, but | |
956 | last partition, which has just one place. */ | |
957 | p = places_array[test_places].places[thr == 3 ? 0 | |
958 | : 2 + 2 * thr]; | |
959 | break; | |
960 | case 5: | |
961 | /* T = 4, P = 5, first subpartition has 2 places, the | |
962 | rest just one. */ | |
963 | p = places_array[test_places].places[thr == 3 ? 0 | |
964 | : 2 + thr]; | |
965 | break; | |
966 | case 3: | |
967 | /* T = 4, P = 3, unit sized subpartitions, third gets | |
968 | thr0 and thr3, first thr1, second thr2. */ | |
969 | p = places_array[test_places].places[thr == 0 ? 2 : thr - 1]; | |
970 | break; | |
971 | case 2: | |
972 | /* T = 4, P = 2, unit sized subpartitions, each with | |
973 | 2 threads. */ | |
974 | p = places_array[test_places].places[1 - thr / 2]; | |
975 | break; | |
976 | } | |
977 | print_affinity (p); | |
978 | printf ("\n"); | |
979 | } | |
980 | #pragma omp barrier | |
981 | if (omp_get_thread_num () == 0) | |
982 | { | |
983 | /* Close, spread, close. */ | |
984 | #pragma omp parallel num_threads (5) proc_bind (close) | |
985 | { | |
986 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
987 | #pragma omp critical | |
988 | { | |
989 | struct place p = places_array[0].places[0]; | |
990 | int thr = omp_get_thread_num (); | |
991 | printf ("#4,#2,#1 thread 2,0,%d", thr); | |
992 | if (omp_get_num_threads () == 5 | |
993 | && (test_spread_master_close || test_true)) | |
994 | /* Outer is close, inner spread, innermost close. */ | |
995 | switch (places_array[test_places].count) | |
996 | { | |
997 | case 8: | |
998 | case 7: | |
999 | /* T = 5, P = 2. */ | |
1000 | p = places_array[test_places].places[2 | |
1001 | + (thr & 2) / 2]; | |
1002 | break; | |
1003 | /* All the rest are T = 5, P = 1. */ | |
1004 | case 5: | |
1005 | case 3: | |
1006 | p = places_array[test_places].places[2]; | |
1007 | break; | |
1008 | case 2: | |
1009 | p = places_array[test_places].places[1]; | |
1010 | break; | |
1011 | } | |
1012 | print_affinity (p); | |
1013 | printf ("\n"); | |
1014 | } | |
1015 | } | |
1016 | } | |
1017 | #pragma omp barrier | |
1018 | if (omp_get_thread_num () == 2) | |
1019 | { | |
1020 | /* Close, spread, close. */ | |
1021 | #pragma omp parallel num_threads (5) proc_bind (close) | |
1022 | { | |
1023 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
1024 | #pragma omp critical | |
1025 | { | |
1026 | struct place p = places_array[0].places[0]; | |
1027 | int thr = omp_get_thread_num (); | |
1028 | printf ("#4,#2,#2 thread 2,2,%d", thr); | |
1029 | if (omp_get_num_threads () == 5 | |
1030 | && (test_spread_master_close || test_true)) | |
1031 | /* Outer is close, inner spread, innermost close. */ | |
1032 | switch (places_array[test_places].count) | |
1033 | { | |
1034 | case 8: | |
1035 | /* T = 5, P = 2. */ | |
1036 | p = places_array[test_places].places[6 | |
1037 | + (thr & 2) / 2]; | |
1038 | break; | |
1039 | /* All the rest are T = 5, P = 1. */ | |
1040 | case 7: | |
1041 | p = places_array[test_places].places[6]; | |
1042 | break; | |
1043 | case 5: | |
1044 | p = places_array[test_places].places[4]; | |
1045 | break; | |
1046 | case 3: | |
1047 | p = places_array[test_places].places[1]; | |
1048 | break; | |
1049 | case 2: | |
1050 | p = places_array[test_places].places[0]; | |
1051 | break; | |
1052 | } | |
1053 | print_affinity (p); | |
1054 | printf ("\n"); | |
1055 | } | |
1056 | } | |
1057 | } | |
1058 | #pragma omp barrier | |
1059 | if (omp_get_thread_num () == 3) | |
1060 | { | |
1061 | /* Close, spread, close. */ | |
1062 | #pragma omp parallel num_threads (5) proc_bind (close) | |
1063 | { | |
1064 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
1065 | #pragma omp critical | |
1066 | { | |
1067 | struct place p = places_array[0].places[0]; | |
1068 | int thr = omp_get_thread_num (); | |
1069 | printf ("#4,#2,#3 thread 2,3,%d", thr); | |
1070 | if (omp_get_num_threads () == 5 | |
1071 | && (test_spread_master_close || test_true)) | |
1072 | /* Outer is close, inner spread, innermost close. */ | |
1073 | switch (places_array[test_places].count) | |
1074 | { | |
1075 | case 8: | |
1076 | case 7: | |
1077 | case 5: | |
1078 | /* T = 5, P = 2. */ | |
1079 | p = places_array[test_places].places[(thr & 2) / 2]; | |
1080 | break; | |
1081 | /* All the rest are T = 5, P = 1. */ | |
1082 | case 3: | |
1083 | p = places_array[test_places].places[2]; | |
1084 | break; | |
1085 | case 2: | |
1086 | p = places_array[test_places].places[0]; | |
1087 | break; | |
1088 | } | |
1089 | print_affinity (p); | |
1090 | printf ("\n"); | |
1091 | } | |
1092 | } | |
1093 | } | |
1094 | } | |
1095 | /* Close, master. */ | |
1096 | #pragma omp parallel num_threads (4) proc_bind(master) \ | |
1097 | firstprivate (pp) | |
1098 | { | |
1099 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
1100 | #pragma omp critical | |
1101 | { | |
1102 | struct place p = places_array[0].places[0]; | |
1103 | int thr = omp_get_thread_num (); | |
1104 | printf ("#4,#3 thread 2,%d", thr); | |
1105 | if (test_spread_master_close || test_true) | |
1106 | /* Outer is close, inner master. */ | |
1107 | p = places_array[test_places].places[pp]; | |
1108 | print_affinity (p); | |
1109 | printf ("\n"); | |
1110 | } | |
1111 | } | |
1112 | /* Close, close. */ | |
1113 | #pragma omp parallel num_threads (6) proc_bind (close) | |
1114 | { | |
1115 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
1116 | #pragma omp critical | |
1117 | { | |
1118 | struct place p = places_array[0].places[0]; | |
1119 | int thr = omp_get_thread_num (); | |
1120 | printf ("#4,#4 thread 2,%d", thr); | |
1121 | if (omp_get_num_threads () == 6 | |
1122 | && (test_spread_master_close || test_true)) | |
1123 | switch (places_array[test_places].count) | |
1124 | { | |
1125 | case 8: | |
1126 | /* T = 6, P = 8. */ | |
1127 | p = places_array[test_places].places[2 + thr]; | |
1128 | break; | |
1129 | case 7: | |
1130 | /* T = 6, P = 7. */ | |
1131 | p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr]; | |
1132 | break; | |
1133 | case 5: | |
1134 | /* T = 6, P = 5. thr{0,5} go into the third place. */ | |
1135 | p = places_array[test_places].places[thr >= 3 ? thr - 3 | |
1136 | : 2 + thr]; | |
1137 | break; | |
1138 | case 3: | |
1139 | /* T = 6, P = 3, two threads into each place. */ | |
1140 | p = places_array[test_places].places[thr < 2 ? 2 | |
1141 | : thr / 2 - 1]; | |
1142 | break; | |
1143 | case 2: | |
1144 | /* T = 6, P = 2, 3 threads into each place. */ | |
1145 | p = places_array[test_places].places[1 - thr / 3]; | |
1146 | break; | |
1147 | } | |
1148 | print_affinity (p); | |
1149 | printf ("\n"); | |
1150 | } | |
1151 | } | |
1152 | } | |
1153 | } | |
1154 | ||
1155 | return 0; | |
1156 | } |