[thirdparty/gcc.git] / libgomp / config / nvptx / bar.c

/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
   Contributed by Alexander Monakov <amonakov@ispras.ru>

   This file is part of the GNU Offloading and Multi Processing Library
   (libgomp).

   Libgomp is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

/* This is an NVPTX specific implementation of a barrier synchronization
   mechanism for libgomp.  This type is private to the library.  This
   implementation uses atomic instructions and bar.sync instruction.  */

#include <limits.h>
#include "libgomp.h"


void
gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
  if (__builtin_expect (state & BAR_WAS_LAST, 0))
    {
      /* Next time we'll be awaiting TOTAL threads again.  */
      bar->awaited = bar->total;
      __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
			MEMMODEL_RELEASE);
    }
  asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
}

void
gomp_barrier_wait (gomp_barrier_t *bar)
{
  gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}

/* Like gomp_barrier_wait, except that if the encountering thread
   is not the last one to hit the barrier, it returns immediately.
   The intended usage is that a thread which intends to gomp_barrier_destroy
   this barrier calls gomp_barrier_wait, while all other threads
   call gomp_barrier_wait_last.  When gomp_barrier_wait returns,
   the barrier can be safely destroyed.  */

void
gomp_barrier_wait_last (gomp_barrier_t *bar)
{
  /* Deferring to gomp_barrier_wait does not use the optimization opportunity
     allowed by the interface contract for all-but-last participants.  The
     original implementation in config/linux/bar.c handles this better.  */
  gomp_barrier_wait (bar);
}

void
gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
{
  asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
}

void
gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
{
  unsigned int generation, gen;

  if (__builtin_expect (state & BAR_WAS_LAST, 0))
    {
      /* Next time we'll be awaiting TOTAL threads again.  */
      struct gomp_thread *thr = gomp_thread ();
      struct gomp_team *team = thr->ts.team;

      bar->awaited = bar->total;
      team->work_share_cancelled = 0;
      if (__builtin_expect (team->task_count, 0))
	{
	  gomp_barrier_handle_tasks (state);
	  state &= ~BAR_WAS_LAST;
	}
      else
	{
	  state &= ~BAR_CANCELLED;
	  state += BAR_INCR - BAR_WAS_LAST;
	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
	  asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
	  return;
	}
    }

  generation = state;
  state &= ~BAR_CANCELLED;
  do
    {
      asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
      gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
      if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
	{
	  gomp_barrier_handle_tasks (state);
	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
	}
      generation |= gen & BAR_WAITING_FOR_TASK;
    }
  while (gen != state + BAR_INCR);
}

void
gomp_team_barrier_wait (gomp_barrier_t *bar)
{
  gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
}

void
gomp_team_barrier_wait_final (gomp_barrier_t *bar)
{
  gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
  if (__builtin_expect (state & BAR_WAS_LAST, 0))
    bar->awaited_final = bar->total;
  gomp_team_barrier_wait_end (bar, state);
}

bool
gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
				   gomp_barrier_state_t state)
{
  unsigned int generation, gen;

  if (__builtin_expect (state & BAR_WAS_LAST, 0))
    {
      /* Next time we'll be awaiting TOTAL threads again.  */
      /* BAR_CANCELLED should never be set in state here, because
	 cancellation means that at least one of the threads has been
	 cancelled, thus on a cancellable barrier we should never see
	 all threads to arrive.  */
      struct gomp_thread *thr = gomp_thread ();
      struct gomp_team *team = thr->ts.team;

      bar->awaited = bar->total;
      team->work_share_cancelled = 0;
      if (__builtin_expect (team->task_count, 0))
	{
	  gomp_barrier_handle_tasks (state);
	  state &= ~BAR_WAS_LAST;
	}
      else
	{
	  state += BAR_INCR - BAR_WAS_LAST;
	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
	  asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
	  return false;
	}
    }

  if (__builtin_expect (state & BAR_CANCELLED, 0))
    return true;

  generation = state;
  do
    {
      asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
      gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
      if (__builtin_expect (gen & BAR_CANCELLED, 0))
	return true;
      if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
	{
	  gomp_barrier_handle_tasks (state);
	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
	}
      generation |= gen & BAR_WAITING_FOR_TASK;
    }
  while (gen != state + BAR_INCR);

  return false;
}

bool
gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
{
  return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
}

void
gomp_team_barrier_cancel (struct gomp_team *team)
{
  gomp_mutex_lock (&team->task_lock);
  if (team->barrier.generation & BAR_CANCELLED)
    {
      gomp_mutex_unlock (&team->task_lock);
      return;
    }
  team->barrier.generation |= BAR_CANCELLED;
  gomp_mutex_unlock (&team->task_lock);
  gomp_team_barrier_wake (&team->barrier, INT_MAX);
}
Commit	Line	Data
fbd26352	1	/* Copyright (C) 2015-2019 Free Software Foundation, Inc.
44a69dfd	2	Contributed by Alexander Monakov <amonakov@ispras.ru>
	3
	4	This file is part of the GNU Offloading and Multi Processing Library
	5	(libgomp).
	6
	7	Libgomp is free software; you can redistribute it and/or modify it
	8	under the terms of the GNU General Public License as published by
	9	the Free Software Foundation; either version 3, or (at your option)
	10	any later version.
	11
	12	Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
	13	WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	14	FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	15	more details.
	16
	17	Under Section 7 of GPL version 3, you are granted additional
	18	permissions described in the GCC Runtime Library Exception, version
	19	3.1, as published by the Free Software Foundation.
	20
	21	You should have received a copy of the GNU General Public License and
	22	a copy of the GCC Runtime Library Exception along with this program;
	23	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	24	<http://www.gnu.org/licenses/>. */
	25
	26	/* This is an NVPTX specific implementation of a barrier synchronization
	27	mechanism for libgomp. This type is private to the library. This
	28	implementation uses atomic instructions and bar.sync instruction. */
	29
	30	#include <limits.h>
	31	#include "libgomp.h"
	32
	33
	34	void
	35	gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
	36	{
	37	if (__builtin_expect (state & BAR_WAS_LAST, 0))
	38	{
	39	/* Next time we'll be awaiting TOTAL threads again. */
	40	bar->awaited = bar->total;
	41	__atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
	42	MEMMODEL_RELEASE);
	43	}
	44	asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
	45	}
	46
	47	void
	48	gomp_barrier_wait (gomp_barrier_t *bar)
	49	{
	50	gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
	51	}
	52
	53	/* Like gomp_barrier_wait, except that if the encountering thread
	54	is not the last one to hit the barrier, it returns immediately.
	55	The intended usage is that a thread which intends to gomp_barrier_destroy
	56	this barrier calls gomp_barrier_wait, while all other threads
	57	call gomp_barrier_wait_last. When gomp_barrier_wait returns,
	58	the barrier can be safely destroyed. */
	59
	60	void
	61	gomp_barrier_wait_last (gomp_barrier_t *bar)
	62	{
	63	/* Deferring to gomp_barrier_wait does not use the optimization opportunity
	64	allowed by the interface contract for all-but-last participants. The
	65	original implementation in config/linux/bar.c handles this better. */
66	gomp_barrier_wait (bar);
67	}
68
69	void
70	gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
71	{
72	asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
73	}
74
75	void
76	gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
77	{
78	unsigned int generation, gen;
79
80	if (__builtin_expect (state & BAR_WAS_LAST, 0))
81	{
82	/* Next time we'll be awaiting TOTAL threads again. */
83	struct gomp_thread *thr = gomp_thread ();
84	struct gomp_team *team = thr->ts.team;
85
86	bar->awaited = bar->total;
87	team->work_share_cancelled = 0;
88	if (__builtin_expect (team->task_count, 0))
89	{
90	gomp_barrier_handle_tasks (state);
91	state &= ~BAR_WAS_LAST;
92	}
93	else
94	{
95	state &= ~BAR_CANCELLED;
96	state += BAR_INCR - BAR_WAS_LAST;
97	__atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
98	asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
99	return;
100	}
101	}
102
103	generation = state;
104	state &= ~BAR_CANCELLED;
105	do
106	{
107	asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
108	gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
109	if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
110	{
111	gomp_barrier_handle_tasks (state);
112	gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
113	}
114	generation \|= gen & BAR_WAITING_FOR_TASK;
115	}
116	while (gen != state + BAR_INCR);
117	}
118
119	void
120	gomp_team_barrier_wait (gomp_barrier_t *bar)
121	{
122	gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
123	}
124
125	void
126	gomp_team_barrier_wait_final (gomp_barrier_t *bar)
127	{
128	gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
129	if (__builtin_expect (state & BAR_WAS_LAST, 0))
130	bar->awaited_final = bar->total;
131	gomp_team_barrier_wait_end (bar, state);
132	}
133
134	bool
135	gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
136	gomp_barrier_state_t state)
137	{
138	unsigned int generation, gen;
139
140	if (__builtin_expect (state & BAR_WAS_LAST, 0))
141	{
142	/* Next time we'll be awaiting TOTAL threads again. */
143	/* BAR_CANCELLED should never be set in state here, because
144	cancellation means that at least one of the threads has been
145	cancelled, thus on a cancellable barrier we should never see
146	all threads to arrive. */
147	struct gomp_thread *thr = gomp_thread ();
148	struct gomp_team *team = thr->ts.team;
149
150	bar->awaited = bar->total;
151	team->work_share_cancelled = 0;
152	if (__builtin_expect (team->task_count, 0))
153	{
154	gomp_barrier_handle_tasks (state);
155	state &= ~BAR_WAS_LAST;
156	}
157	else
158	{
159	state += BAR_INCR - BAR_WAS_LAST;
160	__atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
161	asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
162	return false;
163	}
164	}
165
166	if (__builtin_expect (state & BAR_CANCELLED, 0))
167	return true;
168
169	generation = state;
170	do
171	{
172	asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
173	gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
174	if (__builtin_expect (gen & BAR_CANCELLED, 0))
175	return true;
176	if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
177	{
178	gomp_barrier_handle_tasks (state);
179	gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
180	}
181	generation \|= gen & BAR_WAITING_FOR_TASK;
182	}
183	while (gen != state + BAR_INCR);
184
185	return false;
186	}
187
188	bool
189	gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
190	{
191	return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
192	}
193
194	void
195	gomp_team_barrier_cancel (struct gomp_team *team)
196	{
197	gomp_mutex_lock (&team->task_lock);
198	if (team->barrier.generation & BAR_CANCELLED)
199	{
200	gomp_mutex_unlock (&team->task_lock);
201	return;
202	}
203	team->barrier.generation \|= BAR_CANCELLED;
204	gomp_mutex_unlock (&team->task_lock);
205	gomp_team_barrier_wake (&team->barrier, INT_MAX);
206	}