]>
Commit | Line | Data |
---|---|---|
bc7bff74 | 1 | /* Affinity tests. |
f1717362 | 2 | Copyright (C) 2013-2016 Free Software Foundation, Inc. |
bc7bff74 | 3 | |
4 | GCC is free software; you can redistribute it and/or modify it under | |
5 | the terms of the GNU General Public License as published by the Free | |
6 | Software Foundation; either version 3, or (at your option) any later | |
7 | version. | |
8 | ||
9 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
10 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 | for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with GCC; see the file COPYING3. If not see | |
16 | <http://www.gnu.org/licenses/>. */ | |
17 | ||
18 | /* { dg-do run } */ | |
19 | /* { dg-set-target-env-var OMP_PROC_BIND "false" } */ | |
20 | /* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */ | |
21 | ||
22 | #ifndef _GNU_SOURCE | |
23 | #define _GNU_SOURCE | |
24 | #endif | |
25 | #include "config.h" | |
bc7bff74 | 26 | #include <omp.h> |
27 | #include <stdio.h> | |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
30 | #include <unistd.h> | |
31 | ||
32 | #ifdef DO_FORK | |
33 | #include <signal.h> | |
542ae458 | 34 | #include <sys/wait.h> |
bc7bff74 | 35 | #endif |
36 | #ifdef HAVE_PTHREAD_AFFINITY_NP | |
37 | #include <sched.h> | |
38 | #include <pthread.h> | |
39 | #ifdef INTERPOSE_GETAFFINITY | |
40 | #include <dlfcn.h> | |
41 | #endif | |
42 | #endif | |
43 | ||
44 | struct place | |
45 | { | |
46 | int start, len; | |
47 | }; | |
48 | struct places | |
49 | { | |
50 | char name[40]; | |
51 | int count; | |
52 | struct place places[8]; | |
53 | } places_array[] = { | |
54 | { "", 1, { { -1, -1 } } }, | |
55 | { "{0}:8", 8, | |
56 | { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }, | |
57 | { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, | |
58 | { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } }, | |
59 | { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } }, | |
60 | { "{1}:7:1", 7, | |
61 | { { 1, 1 }, { 2, 1 }, { 3, 1 }, | |
62 | { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, | |
63 | { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5, | |
64 | { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } } | |
65 | }; | |
66 | ||
67 | unsigned long contig_cpucount; | |
68 | unsigned long min_cpusetsize; | |
69 | ||
70 | #if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \ | |
71 | && defined (CPU_ALLOC_SIZE) | |
72 | ||
73 | #if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY) | |
74 | int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *); | |
75 | ||
76 | int | |
77 | pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset) | |
78 | { | |
79 | int ret; | |
80 | unsigned long i, max; | |
81 | if (orig_getaffinity_np == NULL) | |
82 | { | |
83 | orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *)) | |
84 | dlsym (RTLD_NEXT, "pthread_getaffinity_np"); | |
85 | if (orig_getaffinity_np == NULL) | |
86 | exit (0); | |
87 | } | |
88 | ret = orig_getaffinity_np (thread, cpusetsize, cpuset); | |
89 | if (ret != 0) | |
90 | return ret; | |
91 | if (contig_cpucount == 0) | |
92 | { | |
93 | max = 8 * cpusetsize; | |
94 | for (i = 0; i < max; i++) | |
95 | if (!CPU_ISSET_S (i, cpusetsize, cpuset)) | |
96 | break; | |
97 | contig_cpucount = i; | |
98 | min_cpusetsize = cpusetsize; | |
99 | } | |
100 | return ret; | |
101 | } | |
102 | #endif | |
103 | ||
104 | void | |
105 | print_affinity (struct place p) | |
106 | { | |
107 | static unsigned long size; | |
108 | if (size == 0) | |
109 | { | |
110 | if (min_cpusetsize) | |
111 | size = min_cpusetsize; | |
112 | else | |
113 | { | |
114 | size = sysconf (_SC_NPROCESSORS_CONF); | |
115 | size = CPU_ALLOC_SIZE (size); | |
116 | if (size < sizeof (cpu_set_t)) | |
117 | size = sizeof (cpu_set_t); | |
118 | } | |
119 | } | |
17587b2a | 120 | cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size); |
bc7bff74 | 121 | if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0) |
122 | { | |
123 | unsigned long i, len, max = 8 * size; | |
124 | int notfirst = 0, unexpected = 1; | |
125 | ||
126 | printf (" bound to {"); | |
127 | for (i = 0, len = 0; i < max; i++) | |
128 | if (CPU_ISSET_S (i, size, cpusetp)) | |
129 | { | |
130 | if (len == 0) | |
131 | { | |
132 | if (notfirst) | |
133 | { | |
134 | unexpected = 1; | |
135 | printf (","); | |
136 | } | |
137 | else if (i == (unsigned long) p.start) | |
138 | unexpected = 0; | |
139 | notfirst = 1; | |
140 | printf ("%lu", i); | |
141 | } | |
142 | ++len; | |
143 | } | |
144 | else | |
145 | { | |
146 | if (len && len != (unsigned long) p.len) | |
147 | unexpected = 1; | |
148 | if (len > 1) | |
149 | printf (":%lu", len); | |
150 | len = 0; | |
151 | } | |
152 | if (len && len != (unsigned long) p.len) | |
153 | unexpected = 1; | |
154 | if (len > 1) | |
155 | printf (":%lu", len); | |
156 | printf ("}"); | |
157 | if (p.start != -1 && unexpected) | |
158 | { | |
159 | printf (", expected {%d", p.start); | |
160 | if (p.len != 1) | |
161 | printf (":%d", p.len); | |
162 | printf ("} instead"); | |
163 | } | |
164 | else if (p.start != -1) | |
165 | printf (", verified"); | |
166 | } | |
167 | } | |
168 | #else | |
169 | void | |
170 | print_affinity (struct place p) | |
171 | { | |
172 | (void) p.start; | |
173 | (void) p.len; | |
174 | } | |
175 | #endif | |
176 | ||
177 | ||
178 | int | |
179 | main () | |
180 | { | |
181 | char *env_proc_bind = getenv ("OMP_PROC_BIND"); | |
182 | int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0; | |
183 | int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0; | |
184 | int test_spread_master_close | |
185 | = env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0; | |
186 | char *env_places = getenv ("OMP_PLACES"); | |
187 | int test_places = 0; | |
188 | ||
189 | #ifdef DO_FORK | |
190 | if (env_places == NULL && contig_cpucount >= 8 && test_false | |
191 | && getenv ("GOMP_AFFINITY") == NULL) | |
192 | { | |
193 | int i, j, status; | |
194 | pid_t pid; | |
195 | for (j = 0; j < 2; j++) | |
196 | { | |
197 | if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1) | |
198 | < 0) | |
199 | break; | |
200 | for (i = sizeof (places_array) / sizeof (places_array[0]) - 1; | |
201 | i; --i) | |
202 | { | |
203 | if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0) | |
204 | break; | |
205 | pid = fork (); | |
206 | if (pid == -1) | |
207 | break; | |
208 | if (pid == 0) | |
209 | { | |
210 | execl ("/proc/self/exe", "affinity-1.exe", NULL); | |
211 | _exit (1); | |
212 | } | |
213 | if (waitpid (pid, &status, 0) < 0) | |
214 | break; | |
215 | if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT) | |
216 | abort (); | |
217 | else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0) | |
218 | break; | |
219 | } | |
220 | if (i) | |
221 | break; | |
222 | } | |
223 | } | |
224 | #endif | |
225 | ||
226 | int first = 1; | |
227 | if (env_proc_bind) | |
228 | { | |
229 | printf ("OMP_PROC_BIND='%s'", env_proc_bind); | |
230 | first = 0; | |
231 | } | |
232 | if (env_places) | |
233 | printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places); | |
234 | printf ("\n"); | |
235 | ||
236 | if (env_places && contig_cpucount >= 8 | |
237 | && (test_true || test_spread_master_close)) | |
238 | { | |
239 | for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1; | |
240 | test_places; --test_places) | |
241 | if (strcmp (env_places, places_array[test_places].name) == 0) | |
242 | break; | |
243 | } | |
244 | ||
245 | #define verify(if_true, if_s_m_c) \ | |
246 | if (test_false && omp_get_proc_bind () != omp_proc_bind_false) \ | |
247 | abort (); \ | |
248 | if (test_true && omp_get_proc_bind () != if_true) \ | |
249 | abort (); \ | |
250 | if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c) \ | |
251 | abort (); | |
252 | ||
253 | verify (omp_proc_bind_true, omp_proc_bind_spread); | |
254 | ||
255 | printf ("Initial thread"); | |
256 | print_affinity (places_array[test_places].places[0]); | |
257 | printf ("\n"); | |
258 | omp_set_nested (1); | |
259 | omp_set_dynamic (0); | |
260 | ||
261 | #pragma omp parallel if (0) | |
262 | { | |
263 | verify (omp_proc_bind_true, omp_proc_bind_master); | |
264 | #pragma omp parallel if (0) | |
265 | { | |
266 | verify (omp_proc_bind_true, omp_proc_bind_close); | |
267 | #pragma omp parallel if (0) | |
268 | { | |
269 | verify (omp_proc_bind_true, omp_proc_bind_close); | |
270 | } | |
271 | #pragma omp parallel if (0) proc_bind (spread) | |
272 | { | |
273 | verify (omp_proc_bind_spread, omp_proc_bind_spread); | |
274 | } | |
275 | } | |
276 | #pragma omp parallel if (0) proc_bind (master) | |
277 | { | |
278 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
279 | #pragma omp parallel if (0) | |
280 | { | |
281 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
282 | } | |
283 | #pragma omp parallel if (0) proc_bind (spread) | |
284 | { | |
285 | verify (omp_proc_bind_spread, omp_proc_bind_spread); | |
286 | } | |
287 | } | |
288 | } | |
289 | ||
290 | /* True/spread */ | |
291 | #pragma omp parallel num_threads (4) | |
292 | { | |
293 | verify (omp_proc_bind_true, omp_proc_bind_master); | |
294 | #pragma omp critical | |
295 | { | |
296 | struct place p = places_array[0].places[0]; | |
297 | int thr = omp_get_thread_num (); | |
298 | printf ("#1 thread %d", thr); | |
299 | if (omp_get_num_threads () == 4 && test_spread_master_close) | |
300 | switch (places_array[test_places].count) | |
301 | { | |
302 | case 8: | |
303 | /* T = 4, P = 8, each subpartition has 2 places. */ | |
304 | case 7: | |
305 | /* T = 4, P = 7, each subpartition has 2 places, but | |
306 | last partition, which has just one place. */ | |
307 | p = places_array[test_places].places[2 * thr]; | |
308 | break; | |
309 | case 5: | |
310 | /* T = 4, P = 5, first subpartition has 2 places, the | |
311 | rest just one. */ | |
312 | p = places_array[test_places].places[thr ? 1 + thr : 0]; | |
313 | break; | |
314 | case 3: | |
315 | /* T = 4, P = 3, unit sized subpartitions, first gets | |
316 | thr0 and thr3, second thr1, third thr2. */ | |
317 | p = places_array[test_places].places[thr == 3 ? 0 : thr]; | |
318 | break; | |
319 | case 2: | |
320 | /* T = 4, P = 2, unit sized subpartitions, each with | |
321 | 2 threads. */ | |
322 | p = places_array[test_places].places[thr / 2]; | |
323 | break; | |
324 | } | |
325 | print_affinity (p); | |
326 | printf ("\n"); | |
327 | } | |
328 | #pragma omp barrier | |
329 | if (omp_get_thread_num () == 3) | |
330 | { | |
331 | /* True/spread, true/master. */ | |
332 | #pragma omp parallel num_threads (3) | |
333 | { | |
334 | verify (omp_proc_bind_true, omp_proc_bind_close); | |
335 | #pragma omp critical | |
336 | { | |
337 | struct place p = places_array[0].places[0]; | |
338 | int thr = omp_get_thread_num (); | |
339 | printf ("#1,#1 thread 3,%d", thr); | |
340 | if (omp_get_num_threads () == 3 && test_spread_master_close) | |
341 | /* Outer is spread, inner master, so just bind to the | |
342 | place or the master thread, which is thr 3 above. */ | |
343 | switch (places_array[test_places].count) | |
344 | { | |
345 | case 8: | |
346 | case 7: | |
347 | p = places_array[test_places].places[6]; | |
348 | break; | |
349 | case 5: | |
350 | p = places_array[test_places].places[4]; | |
351 | break; | |
352 | case 3: | |
353 | p = places_array[test_places].places[0]; | |
354 | break; | |
355 | case 2: | |
356 | p = places_array[test_places].places[1]; | |
357 | break; | |
358 | } | |
359 | print_affinity (p); | |
360 | printf ("\n"); | |
361 | } | |
362 | } | |
363 | /* True/spread, spread. */ | |
364 | #pragma omp parallel num_threads (5) proc_bind (spread) | |
365 | { | |
366 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
367 | #pragma omp critical | |
368 | { | |
369 | struct place p = places_array[0].places[0]; | |
370 | int thr = omp_get_thread_num (); | |
371 | printf ("#1,#2 thread 3,%d", thr); | |
372 | if (omp_get_num_threads () == 5 && test_spread_master_close) | |
373 | /* Outer is spread, inner spread. */ | |
374 | switch (places_array[test_places].count) | |
375 | { | |
376 | case 8: | |
377 | /* T = 5, P = 2, unit sized subpartitions. */ | |
378 | p = places_array[test_places].places[thr == 4 ? 6 | |
379 | : 6 + thr / 2]; | |
380 | break; | |
381 | /* The rest are T = 5, P = 1. */ | |
382 | case 7: | |
383 | p = places_array[test_places].places[6]; | |
384 | break; | |
385 | case 5: | |
386 | p = places_array[test_places].places[4]; | |
387 | break; | |
388 | case 3: | |
389 | p = places_array[test_places].places[0]; | |
390 | break; | |
391 | case 2: | |
392 | p = places_array[test_places].places[1]; | |
393 | break; | |
394 | } | |
395 | print_affinity (p); | |
396 | printf ("\n"); | |
397 | } | |
398 | #pragma omp barrier | |
399 | if (omp_get_thread_num () == 3) | |
400 | { | |
401 | /* True/spread, spread, close. */ | |
402 | #pragma omp parallel num_threads (5) proc_bind (close) | |
403 | { | |
404 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
405 | #pragma omp critical | |
406 | { | |
407 | struct place p = places_array[0].places[0]; | |
408 | int thr = omp_get_thread_num (); | |
409 | printf ("#1,#2,#1 thread 3,3,%d", thr); | |
410 | if (omp_get_num_threads () == 5 && test_spread_master_close) | |
411 | /* Outer is spread, inner spread, innermost close. */ | |
412 | switch (places_array[test_places].count) | |
413 | { | |
414 | /* All are T = 5, P = 1. */ | |
415 | case 8: | |
416 | p = places_array[test_places].places[7]; | |
417 | break; | |
418 | case 7: | |
419 | p = places_array[test_places].places[6]; | |
420 | break; | |
421 | case 5: | |
422 | p = places_array[test_places].places[4]; | |
423 | break; | |
424 | case 3: | |
425 | p = places_array[test_places].places[0]; | |
426 | break; | |
427 | case 2: | |
428 | p = places_array[test_places].places[1]; | |
429 | break; | |
430 | } | |
431 | print_affinity (p); | |
432 | printf ("\n"); | |
433 | } | |
434 | } | |
435 | } | |
436 | } | |
437 | /* True/spread, master. */ | |
438 | #pragma omp parallel num_threads (4) proc_bind(master) | |
439 | { | |
440 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
441 | #pragma omp critical | |
442 | { | |
443 | struct place p = places_array[0].places[0]; | |
444 | int thr = omp_get_thread_num (); | |
445 | printf ("#1,#3 thread 3,%d", thr); | |
446 | if (omp_get_num_threads () == 4 && test_spread_master_close) | |
447 | /* Outer is spread, inner master, so just bind to the | |
448 | place or the master thread, which is thr 3 above. */ | |
449 | switch (places_array[test_places].count) | |
450 | { | |
451 | case 8: | |
452 | case 7: | |
453 | p = places_array[test_places].places[6]; | |
454 | break; | |
455 | case 5: | |
456 | p = places_array[test_places].places[4]; | |
457 | break; | |
458 | case 3: | |
459 | p = places_array[test_places].places[0]; | |
460 | break; | |
461 | case 2: | |
462 | p = places_array[test_places].places[1]; | |
463 | break; | |
464 | } | |
465 | print_affinity (p); | |
466 | printf ("\n"); | |
467 | } | |
468 | } | |
469 | /* True/spread, close. */ | |
470 | #pragma omp parallel num_threads (6) proc_bind (close) | |
471 | { | |
472 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
473 | #pragma omp critical | |
474 | { | |
475 | struct place p = places_array[0].places[0]; | |
476 | int thr = omp_get_thread_num (); | |
477 | printf ("#1,#4 thread 3,%d", thr); | |
478 | if (omp_get_num_threads () == 6 && test_spread_master_close) | |
479 | /* Outer is spread, inner close. */ | |
480 | switch (places_array[test_places].count) | |
481 | { | |
482 | case 8: | |
483 | /* T = 6, P = 2, unit sized subpartitions. */ | |
484 | p = places_array[test_places].places[6 + thr / 3]; | |
485 | break; | |
486 | /* The rest are T = 6, P = 1. */ | |
487 | case 7: | |
488 | p = places_array[test_places].places[6]; | |
489 | break; | |
490 | case 5: | |
491 | p = places_array[test_places].places[4]; | |
492 | break; | |
493 | case 3: | |
494 | p = places_array[test_places].places[0]; | |
495 | break; | |
496 | case 2: | |
497 | p = places_array[test_places].places[1]; | |
498 | break; | |
499 | } | |
500 | print_affinity (p); | |
501 | printf ("\n"); | |
502 | } | |
503 | } | |
504 | } | |
505 | } | |
506 | ||
507 | /* Spread. */ | |
508 | #pragma omp parallel num_threads (5) proc_bind(spread) | |
509 | { | |
510 | verify (omp_proc_bind_spread, omp_proc_bind_master); | |
511 | #pragma omp critical | |
512 | { | |
513 | struct place p = places_array[0].places[0]; | |
514 | int thr = omp_get_thread_num (); | |
515 | printf ("#2 thread %d", thr); | |
516 | if (omp_get_num_threads () == 5 | |
517 | && (test_spread_master_close || test_true)) | |
518 | switch (places_array[test_places].count) | |
519 | { | |
520 | case 8: | |
521 | /* T = 5, P = 8, first 3 subpartitions have 2 places, last | |
522 | 2 one place. */ | |
523 | p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr]; | |
524 | break; | |
525 | case 7: | |
526 | /* T = 5, P = 7, first 2 subpartitions have 2 places, last | |
527 | 3 one place. */ | |
528 | p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr]; | |
529 | break; | |
530 | case 5: | |
531 | /* T = 5, P = 5, unit sized subpartitions, each one with one | |
532 | thread. */ | |
533 | p = places_array[test_places].places[thr]; | |
534 | break; | |
535 | case 3: | |
536 | /* T = 5, P = 3, unit sized subpartitions, first gets | |
537 | thr0 and thr3, second thr1 and thr4, third thr2. */ | |
538 | p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; | |
539 | break; | |
540 | case 2: | |
541 | /* T = 5, P = 2, unit sized subpartitions, first with | |
542 | thr{0,1,4} and second with thr{2,3}. */ | |
543 | p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; | |
544 | break; | |
545 | } | |
546 | print_affinity (p); | |
547 | printf ("\n"); | |
548 | } | |
549 | #pragma omp barrier | |
550 | if (omp_get_thread_num () == 3) | |
551 | { | |
552 | int pp = 0; | |
553 | switch (places_array[test_places].count) | |
554 | { | |
555 | case 8: pp = 6; break; | |
556 | case 7: pp = 5; break; | |
557 | case 5: pp = 3; break; | |
558 | case 2: pp = 1; break; | |
559 | } | |
560 | /* Spread, spread/master. */ | |
561 | #pragma omp parallel num_threads (3) firstprivate (pp) | |
562 | { | |
563 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
564 | #pragma omp critical | |
565 | { | |
566 | struct place p = places_array[0].places[0]; | |
567 | int thr = omp_get_thread_num (); | |
568 | printf ("#2,#1 thread 3,%d", thr); | |
569 | if (test_spread_master_close || test_true) | |
570 | /* Outer is spread, inner spread resp. master, bit we have | |
571 | just unit sized partitions. */ | |
572 | p = places_array[test_places].places[pp]; | |
573 | print_affinity (p); | |
574 | printf ("\n"); | |
575 | } | |
576 | } | |
577 | /* Spread, spread. */ | |
578 | #pragma omp parallel num_threads (5) proc_bind (spread) \ | |
579 | firstprivate (pp) | |
580 | { | |
581 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
582 | #pragma omp critical | |
583 | { | |
584 | struct place p = places_array[0].places[0]; | |
585 | int thr = omp_get_thread_num (); | |
586 | printf ("#2,#2 thread 3,%d", thr); | |
587 | if (test_spread_master_close || test_true) | |
588 | /* Outer is spread, inner spread, bit we have | |
589 | just unit sized partitions. */ | |
590 | p = places_array[test_places].places[pp]; | |
591 | print_affinity (p); | |
592 | printf ("\n"); | |
593 | } | |
594 | } | |
595 | /* Spread, master. */ | |
596 | #pragma omp parallel num_threads (4) proc_bind(master) \ | |
597 | firstprivate(pp) | |
598 | { | |
599 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
600 | #pragma omp critical | |
601 | { | |
602 | struct place p = places_array[0].places[0]; | |
603 | int thr = omp_get_thread_num (); | |
604 | printf ("#2,#3 thread 3,%d", thr); | |
605 | if (test_spread_master_close || test_true) | |
606 | /* Outer is spread, inner master, bit we have | |
607 | just unit sized partitions. */ | |
608 | p = places_array[test_places].places[pp]; | |
609 | print_affinity (p); | |
610 | printf ("\n"); | |
611 | } | |
612 | } | |
613 | /* Spread, close. */ | |
614 | #pragma omp parallel num_threads (6) proc_bind (close) \ | |
615 | firstprivate (pp) | |
616 | { | |
617 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
618 | #pragma omp critical | |
619 | { | |
620 | struct place p = places_array[0].places[0]; | |
621 | int thr = omp_get_thread_num (); | |
622 | printf ("#2,#4 thread 3,%d", thr); | |
623 | if (test_spread_master_close || test_true) | |
624 | /* Outer is spread, inner close, bit we have | |
625 | just unit sized partitions. */ | |
626 | p = places_array[test_places].places[pp]; | |
627 | print_affinity (p); | |
628 | printf ("\n"); | |
629 | } | |
630 | } | |
631 | } | |
632 | } | |
633 | ||
634 | /* Master. */ | |
635 | #pragma omp parallel num_threads (3) proc_bind(master) | |
636 | { | |
637 | verify (omp_proc_bind_master, omp_proc_bind_master); | |
638 | #pragma omp critical | |
639 | { | |
640 | struct place p = places_array[0].places[0]; | |
641 | int thr = omp_get_thread_num (); | |
642 | printf ("#3 thread %d", thr); | |
643 | if (test_spread_master_close || test_true) | |
644 | p = places_array[test_places].places[0]; | |
645 | print_affinity (p); | |
646 | printf ("\n"); | |
647 | } | |
648 | #pragma omp barrier | |
649 | if (omp_get_thread_num () == 2) | |
650 | { | |
651 | /* Master, master. */ | |
652 | #pragma omp parallel num_threads (4) | |
653 | { | |
654 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
655 | #pragma omp critical | |
656 | { | |
657 | struct place p = places_array[0].places[0]; | |
658 | int thr = omp_get_thread_num (); | |
659 | printf ("#3,#1 thread 2,%d", thr); | |
660 | if (test_spread_master_close || test_true) | |
661 | /* Outer is master, inner is master. */ | |
662 | p = places_array[test_places].places[0]; | |
663 | print_affinity (p); | |
664 | printf ("\n"); | |
665 | } | |
666 | } | |
667 | /* Master, spread. */ | |
668 | #pragma omp parallel num_threads (4) proc_bind (spread) | |
669 | { | |
670 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
671 | #pragma omp critical | |
672 | { | |
673 | struct place p = places_array[0].places[0]; | |
674 | int thr = omp_get_thread_num (); | |
675 | printf ("#3,#2 thread 2,%d", thr); | |
676 | if (omp_get_num_threads () == 4 | |
677 | && (test_spread_master_close || test_true)) | |
678 | /* Outer is master, inner is spread. */ | |
679 | switch (places_array[test_places].count) | |
680 | { | |
681 | case 8: | |
682 | /* T = 4, P = 8, each subpartition has 2 places. */ | |
683 | case 7: | |
684 | /* T = 4, P = 7, each subpartition has 2 places, but | |
685 | last partition, which has just one place. */ | |
686 | p = places_array[test_places].places[2 * thr]; | |
687 | break; | |
688 | case 5: | |
689 | /* T = 4, P = 5, first subpartition has 2 places, the | |
690 | rest just one. */ | |
691 | p = places_array[test_places].places[thr ? 1 + thr : 0]; | |
692 | break; | |
693 | case 3: | |
694 | /* T = 4, P = 3, unit sized subpartitions, first gets | |
695 | thr0 and thr3, second thr1, third thr2. */ | |
696 | p = places_array[test_places].places[thr == 3 ? 0 : thr]; | |
697 | break; | |
698 | case 2: | |
699 | /* T = 4, P = 2, unit sized subpartitions, each with | |
700 | 2 threads. */ | |
701 | p = places_array[test_places].places[thr / 2]; | |
702 | break; | |
703 | } | |
704 | print_affinity (p); | |
705 | printf ("\n"); | |
706 | } | |
707 | #pragma omp barrier | |
708 | if (omp_get_thread_num () == 0) | |
709 | { | |
710 | /* Master, spread, close. */ | |
711 | #pragma omp parallel num_threads (5) proc_bind (close) | |
712 | { | |
713 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
714 | #pragma omp critical | |
715 | { | |
716 | struct place p = places_array[0].places[0]; | |
717 | int thr = omp_get_thread_num (); | |
718 | printf ("#3,#2,#1 thread 2,0,%d", thr); | |
719 | if (omp_get_num_threads () == 5 | |
720 | && (test_spread_master_close || test_true)) | |
721 | /* Outer is master, inner spread, innermost close. */ | |
722 | switch (places_array[test_places].count) | |
723 | { | |
724 | /* First 3 are T = 5, P = 2. */ | |
725 | case 8: | |
726 | case 7: | |
727 | case 5: | |
728 | p = places_array[test_places].places[(thr & 2) / 2]; | |
729 | break; | |
730 | /* All the rest are T = 5, P = 1. */ | |
731 | case 3: | |
732 | case 2: | |
733 | p = places_array[test_places].places[0]; | |
734 | break; | |
735 | } | |
736 | print_affinity (p); | |
737 | printf ("\n"); | |
738 | } | |
739 | } | |
740 | } | |
741 | #pragma omp barrier | |
742 | if (omp_get_thread_num () == 3) | |
743 | { | |
744 | /* Master, spread, close. */ | |
745 | #pragma omp parallel num_threads (5) proc_bind (close) | |
746 | { | |
747 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
748 | #pragma omp critical | |
749 | { | |
750 | struct place p = places_array[0].places[0]; | |
751 | int thr = omp_get_thread_num (); | |
752 | printf ("#3,#2,#2 thread 2,3,%d", thr); | |
753 | if (omp_get_num_threads () == 5 | |
754 | && (test_spread_master_close || test_true)) | |
755 | /* Outer is master, inner spread, innermost close. */ | |
756 | switch (places_array[test_places].count) | |
757 | { | |
758 | case 8: | |
759 | /* T = 5, P = 2. */ | |
760 | p = places_array[test_places].places[6 | |
761 | + (thr & 2) / 2]; | |
762 | break; | |
763 | /* All the rest are T = 5, P = 1. */ | |
764 | case 7: | |
765 | p = places_array[test_places].places[6]; | |
766 | break; | |
767 | case 5: | |
768 | p = places_array[test_places].places[4]; | |
769 | break; | |
770 | case 3: | |
771 | p = places_array[test_places].places[0]; | |
772 | break; | |
773 | case 2: | |
774 | p = places_array[test_places].places[1]; | |
775 | break; | |
776 | } | |
777 | print_affinity (p); | |
778 | printf ("\n"); | |
779 | } | |
780 | } | |
781 | } | |
782 | } | |
783 | /* Master, master. */ | |
784 | #pragma omp parallel num_threads (4) proc_bind(master) | |
785 | { | |
786 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
787 | #pragma omp critical | |
788 | { | |
789 | struct place p = places_array[0].places[0]; | |
790 | int thr = omp_get_thread_num (); | |
791 | printf ("#3,#3 thread 2,%d", thr); | |
792 | if (test_spread_master_close || test_true) | |
793 | /* Outer is master, inner master. */ | |
794 | p = places_array[test_places].places[0]; | |
795 | print_affinity (p); | |
796 | printf ("\n"); | |
797 | } | |
798 | } | |
799 | /* Master, close. */ | |
800 | #pragma omp parallel num_threads (6) proc_bind (close) | |
801 | { | |
802 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
803 | #pragma omp critical | |
804 | { | |
805 | struct place p = places_array[0].places[0]; | |
806 | int thr = omp_get_thread_num (); | |
807 | printf ("#3,#4 thread 2,%d", thr); | |
808 | if (omp_get_num_threads () == 6 | |
809 | && (test_spread_master_close || test_true)) | |
810 | switch (places_array[test_places].count) | |
811 | { | |
812 | case 8: | |
813 | /* T = 6, P = 8. */ | |
814 | case 7: | |
815 | /* T = 6, P = 7. */ | |
816 | p = places_array[test_places].places[thr]; | |
817 | break; | |
818 | case 5: | |
819 | /* T = 6, P = 5. thr{0,5} go into the first place. */ | |
820 | p = places_array[test_places].places[thr == 5 ? 0 : thr]; | |
821 | break; | |
822 | case 3: | |
823 | /* T = 6, P = 3, two threads into each place. */ | |
824 | p = places_array[test_places].places[thr / 2]; | |
825 | break; | |
826 | case 2: | |
827 | /* T = 6, P = 2, 3 threads into each place. */ | |
828 | p = places_array[test_places].places[thr / 3]; | |
829 | break; | |
830 | } | |
831 | print_affinity (p); | |
832 | printf ("\n"); | |
833 | } | |
834 | } | |
835 | } | |
836 | } | |
837 | ||
838 | #pragma omp parallel num_threads (5) proc_bind(close) | |
839 | { | |
840 | verify (omp_proc_bind_close, omp_proc_bind_master); | |
841 | #pragma omp critical | |
842 | { | |
843 | struct place p = places_array[0].places[0]; | |
844 | int thr = omp_get_thread_num (); | |
845 | printf ("#4 thread %d", thr); | |
846 | if (omp_get_num_threads () == 5 | |
847 | && (test_spread_master_close || test_true)) | |
848 | switch (places_array[test_places].count) | |
849 | { | |
850 | case 8: | |
851 | /* T = 5, P = 8. */ | |
852 | case 7: | |
853 | /* T = 5, P = 7. */ | |
854 | case 5: | |
855 | /* T = 5, P = 5. */ | |
856 | p = places_array[test_places].places[thr]; | |
857 | break; | |
858 | case 3: | |
859 | /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second, | |
860 | thr2 in third. */ | |
861 | p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; | |
862 | break; | |
863 | case 2: | |
864 | /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second. */ | |
865 | p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; | |
866 | break; | |
867 | } | |
868 | print_affinity (p); | |
869 | printf ("\n"); | |
870 | } | |
871 | #pragma omp barrier | |
872 | if (omp_get_thread_num () == 2) | |
873 | { | |
874 | int pp = 0; | |
875 | switch (places_array[test_places].count) | |
876 | { | |
877 | case 8: | |
878 | case 7: | |
879 | case 5: | |
880 | case 3: | |
881 | pp = 2; | |
882 | break; | |
883 | case 2: | |
884 | pp = 1; | |
885 | break; | |
886 | } | |
887 | /* Close, close/master. */ | |
888 | #pragma omp parallel num_threads (4) firstprivate (pp) | |
889 | { | |
890 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
891 | #pragma omp critical | |
892 | { | |
893 | struct place p = places_array[0].places[0]; | |
894 | int thr = omp_get_thread_num (); | |
895 | printf ("#4,#1 thread 2,%d", thr); | |
896 | if (test_spread_master_close) | |
897 | /* Outer is close, inner is master. */ | |
898 | p = places_array[test_places].places[pp]; | |
899 | else if (omp_get_num_threads () == 4 && test_true) | |
900 | /* Outer is close, inner is close. */ | |
901 | switch (places_array[test_places].count) | |
902 | { | |
903 | case 8: | |
904 | /* T = 4, P = 8. */ | |
905 | case 7: | |
906 | /* T = 4, P = 7. */ | |
907 | p = places_array[test_places].places[2 + thr]; | |
908 | break; | |
909 | case 5: | |
910 | /* T = 4, P = 5. There is wrap-around for thr3. */ | |
911 | p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr]; | |
912 | break; | |
913 | case 3: | |
914 | /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2 | |
915 | into p1. */ | |
916 | p = places_array[test_places].places[(2 + thr) % 3]; | |
917 | break; | |
918 | case 2: | |
919 | /* T = 4, P = 2, 2 threads into each place. */ | |
920 | p = places_array[test_places].places[1 - thr / 2]; | |
921 | break; | |
922 | } | |
923 | ||
924 | print_affinity (p); | |
925 | printf ("\n"); | |
926 | } | |
927 | } | |
928 | /* Close, spread. */ | |
929 | #pragma omp parallel num_threads (4) proc_bind (spread) | |
930 | { | |
931 | verify (omp_proc_bind_spread, omp_proc_bind_close); | |
932 | #pragma omp critical | |
933 | { | |
934 | struct place p = places_array[0].places[0]; | |
935 | int thr = omp_get_thread_num (); | |
936 | printf ("#4,#2 thread 2,%d", thr); | |
937 | if (omp_get_num_threads () == 4 | |
938 | && (test_spread_master_close || test_true)) | |
939 | /* Outer is close, inner is spread. */ | |
940 | switch (places_array[test_places].count) | |
941 | { | |
942 | case 8: | |
943 | /* T = 4, P = 8, each subpartition has 2 places. */ | |
944 | case 7: | |
945 | /* T = 4, P = 7, each subpartition has 2 places, but | |
946 | last partition, which has just one place. */ | |
947 | p = places_array[test_places].places[thr == 3 ? 0 | |
948 | : 2 + 2 * thr]; | |
949 | break; | |
950 | case 5: | |
951 | /* T = 4, P = 5, first subpartition has 2 places, the | |
952 | rest just one. */ | |
953 | p = places_array[test_places].places[thr == 3 ? 0 | |
954 | : 2 + thr]; | |
955 | break; | |
956 | case 3: | |
957 | /* T = 4, P = 3, unit sized subpartitions, third gets | |
958 | thr0 and thr3, first thr1, second thr2. */ | |
959 | p = places_array[test_places].places[thr == 0 ? 2 : thr - 1]; | |
960 | break; | |
961 | case 2: | |
962 | /* T = 4, P = 2, unit sized subpartitions, each with | |
963 | 2 threads. */ | |
964 | p = places_array[test_places].places[1 - thr / 2]; | |
965 | break; | |
966 | } | |
967 | print_affinity (p); | |
968 | printf ("\n"); | |
969 | } | |
970 | #pragma omp barrier | |
971 | if (omp_get_thread_num () == 0) | |
972 | { | |
973 | /* Close, spread, close. */ | |
974 | #pragma omp parallel num_threads (5) proc_bind (close) | |
975 | { | |
976 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
977 | #pragma omp critical | |
978 | { | |
979 | struct place p = places_array[0].places[0]; | |
980 | int thr = omp_get_thread_num (); | |
981 | printf ("#4,#2,#1 thread 2,0,%d", thr); | |
982 | if (omp_get_num_threads () == 5 | |
983 | && (test_spread_master_close || test_true)) | |
984 | /* Outer is close, inner spread, innermost close. */ | |
985 | switch (places_array[test_places].count) | |
986 | { | |
987 | case 8: | |
988 | case 7: | |
989 | /* T = 5, P = 2. */ | |
990 | p = places_array[test_places].places[2 | |
991 | + (thr & 2) / 2]; | |
992 | break; | |
993 | /* All the rest are T = 5, P = 1. */ | |
994 | case 5: | |
995 | case 3: | |
996 | p = places_array[test_places].places[2]; | |
997 | break; | |
998 | case 2: | |
999 | p = places_array[test_places].places[1]; | |
1000 | break; | |
1001 | } | |
1002 | print_affinity (p); | |
1003 | printf ("\n"); | |
1004 | } | |
1005 | } | |
1006 | } | |
1007 | #pragma omp barrier | |
1008 | if (omp_get_thread_num () == 2) | |
1009 | { | |
1010 | /* Close, spread, close. */ | |
1011 | #pragma omp parallel num_threads (5) proc_bind (close) | |
1012 | { | |
1013 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
1014 | #pragma omp critical | |
1015 | { | |
1016 | struct place p = places_array[0].places[0]; | |
1017 | int thr = omp_get_thread_num (); | |
1018 | printf ("#4,#2,#2 thread 2,2,%d", thr); | |
1019 | if (omp_get_num_threads () == 5 | |
1020 | && (test_spread_master_close || test_true)) | |
1021 | /* Outer is close, inner spread, innermost close. */ | |
1022 | switch (places_array[test_places].count) | |
1023 | { | |
1024 | case 8: | |
1025 | /* T = 5, P = 2. */ | |
1026 | p = places_array[test_places].places[6 | |
1027 | + (thr & 2) / 2]; | |
1028 | break; | |
1029 | /* All the rest are T = 5, P = 1. */ | |
1030 | case 7: | |
1031 | p = places_array[test_places].places[6]; | |
1032 | break; | |
1033 | case 5: | |
1034 | p = places_array[test_places].places[4]; | |
1035 | break; | |
1036 | case 3: | |
1037 | p = places_array[test_places].places[1]; | |
1038 | break; | |
1039 | case 2: | |
1040 | p = places_array[test_places].places[0]; | |
1041 | break; | |
1042 | } | |
1043 | print_affinity (p); | |
1044 | printf ("\n"); | |
1045 | } | |
1046 | } | |
1047 | } | |
1048 | #pragma omp barrier | |
1049 | if (omp_get_thread_num () == 3) | |
1050 | { | |
1051 | /* Close, spread, close. */ | |
1052 | #pragma omp parallel num_threads (5) proc_bind (close) | |
1053 | { | |
1054 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
1055 | #pragma omp critical | |
1056 | { | |
1057 | struct place p = places_array[0].places[0]; | |
1058 | int thr = omp_get_thread_num (); | |
1059 | printf ("#4,#2,#3 thread 2,3,%d", thr); | |
1060 | if (omp_get_num_threads () == 5 | |
1061 | && (test_spread_master_close || test_true)) | |
1062 | /* Outer is close, inner spread, innermost close. */ | |
1063 | switch (places_array[test_places].count) | |
1064 | { | |
1065 | case 8: | |
1066 | case 7: | |
1067 | case 5: | |
1068 | /* T = 5, P = 2. */ | |
1069 | p = places_array[test_places].places[(thr & 2) / 2]; | |
1070 | break; | |
1071 | /* All the rest are T = 5, P = 1. */ | |
1072 | case 3: | |
1073 | p = places_array[test_places].places[2]; | |
1074 | break; | |
1075 | case 2: | |
1076 | p = places_array[test_places].places[0]; | |
1077 | break; | |
1078 | } | |
1079 | print_affinity (p); | |
1080 | printf ("\n"); | |
1081 | } | |
1082 | } | |
1083 | } | |
1084 | } | |
1085 | /* Close, master. */ | |
1086 | #pragma omp parallel num_threads (4) proc_bind(master) \ | |
1087 | firstprivate (pp) | |
1088 | { | |
1089 | verify (omp_proc_bind_master, omp_proc_bind_close); | |
1090 | #pragma omp critical | |
1091 | { | |
1092 | struct place p = places_array[0].places[0]; | |
1093 | int thr = omp_get_thread_num (); | |
1094 | printf ("#4,#3 thread 2,%d", thr); | |
1095 | if (test_spread_master_close || test_true) | |
1096 | /* Outer is close, inner master. */ | |
1097 | p = places_array[test_places].places[pp]; | |
1098 | print_affinity (p); | |
1099 | printf ("\n"); | |
1100 | } | |
1101 | } | |
1102 | /* Close, close. */ | |
1103 | #pragma omp parallel num_threads (6) proc_bind (close) | |
1104 | { | |
1105 | verify (omp_proc_bind_close, omp_proc_bind_close); | |
1106 | #pragma omp critical | |
1107 | { | |
1108 | struct place p = places_array[0].places[0]; | |
1109 | int thr = omp_get_thread_num (); | |
1110 | printf ("#4,#4 thread 2,%d", thr); | |
1111 | if (omp_get_num_threads () == 6 | |
1112 | && (test_spread_master_close || test_true)) | |
1113 | switch (places_array[test_places].count) | |
1114 | { | |
1115 | case 8: | |
1116 | /* T = 6, P = 8. */ | |
1117 | p = places_array[test_places].places[2 + thr]; | |
1118 | break; | |
1119 | case 7: | |
1120 | /* T = 6, P = 7. */ | |
1121 | p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr]; | |
1122 | break; | |
1123 | case 5: | |
1124 | /* T = 6, P = 5. thr{0,5} go into the third place. */ | |
1125 | p = places_array[test_places].places[thr >= 3 ? thr - 3 | |
1126 | : 2 + thr]; | |
1127 | break; | |
1128 | case 3: | |
1129 | /* T = 6, P = 3, two threads into each place. */ | |
1130 | p = places_array[test_places].places[thr < 2 ? 2 | |
1131 | : thr / 2 - 1]; | |
1132 | break; | |
1133 | case 2: | |
1134 | /* T = 6, P = 2, 3 threads into each place. */ | |
1135 | p = places_array[test_places].places[1 - thr / 3]; | |
1136 | break; | |
1137 | } | |
1138 | print_affinity (p); | |
1139 | printf ("\n"); | |
1140 | } | |
1141 | } | |
1142 | } | |
1143 | } | |
1144 | ||
1145 | return 0; | |
1146 | } |