libgomp: Ensure memory sync after performing tasks

author Matthew Malcolmson <mmalcolmson@nvidia.com>

Tue, 20 Jan 2026 03:54:51 +0000 (03:54 +0000)

committer Prathamesh Kulkarni <prathameshk@nvidia.com>

Tue, 20 Jan 2026 03:54:51 +0000 (03:54 +0000)
author Matthew Malcolmson <mmalcolmson@nvidia.com>
Tue, 20 Jan 2026 03:54:51 +0000 (03:54 +0000)
committer Prathamesh Kulkarni <prathameshk@nvidia.com>
Tue, 20 Jan 2026 03:54:51 +0000 (03:54 +0000)
diff --git a/libgomp/config/gcn/bar.c b/libgomp/config/gcn/bar.c

index 10c3f5d13623a2cbbef216e02248ada692b02982..3045587f0f3c61ed2d29c1810eda1c96523762b6 100644 (file)
--- a/libgomp/config/gcn/bar.c
+++ b/libgomp/config/gcn/bar.c
@@ -89,7 +89,9 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
  
        bar->awaited = bar->total;
        team->work_share_cancelled = 0;
-      if (__builtin_expect (team->task_count, 0))
+      unsigned task_count
+       = __atomic_load_n (&team->task_count, MEMMODEL_ACQUIRE);
+      if (__builtin_expect (task_count, 0))
         {
           gomp_barrier_handle_tasks (state);
           state &= ~BAR_WAS_LAST;
@@ -164,7 +166,9 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
  
        bar->awaited = bar->total;
        team->work_share_cancelled = 0;
-      if (__builtin_expect (team->task_count, 0))
+      unsigned task_count
+       = __atomic_load_n (&team->task_count, MEMMODEL_ACQUIRE);
+      if (__builtin_expect (task_count, 0))
         {
           gomp_barrier_handle_tasks (state);
           state &= ~BAR_WAS_LAST;
diff --git a/libgomp/config/gcn/bar.h b/libgomp/config/gcn/bar.h

index 0507efb7d2d1320154aac22731a4784a23ea69ae..6e838ff54a89fc98d9b272f25f295d132d8b365c 100644 (file)
--- a/libgomp/config/gcn/bar.h
+++ b/libgomp/config/gcn/bar.h
@@ -162,7 +162,10 @@ gomp_team_barrier_cancelled (gomp_barrier_t *bar)
  static inline void
  gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
  {
-  bar->generation = (state & -BAR_INCR) + BAR_INCR;
+  /* Need the atomic store for acquire-release synchronisation with the
+     load in `gomp_team_barrier_wait_{cancel_,}end`.  See PR112356  */
+  __atomic_store_n (&bar->generation, (state & -BAR_INCR) + BAR_INCR,
+                   MEMMODEL_RELEASE);
  }
  
  static inline bool
diff --git a/libgomp/config/linux/bar.c b/libgomp/config/linux/bar.c

index 2a1b052b11e6ccd569ac2a936b636002366ebf1b..bbdfc8963918a24eb5932989a4c012a8a7aa24cb 100644 (file)
--- a/libgomp/config/linux/bar.c
+++ b/libgomp/config/linux/bar.c
@@ -90,7 +90,9 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
  
        bar->awaited = bar->total;
        team->work_share_cancelled = 0;
-      if (__builtin_expect (team->task_count, 0))
+      unsigned task_count
+       = __atomic_load_n (&team->task_count, MEMMODEL_ACQUIRE);
+      if (__builtin_expect (task_count, 0))
         {
           gomp_barrier_handle_tasks (state);
           state &= ~BAR_WAS_LAST;
@@ -154,7 +156,9 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
  
        bar->awaited = bar->total;
        team->work_share_cancelled = 0;
-      if (__builtin_expect (team->task_count, 0))
+      unsigned task_count
+       = __atomic_load_n (&team->task_count, MEMMODEL_ACQUIRE);
+      if (__builtin_expect (task_count, 0))
         {
           gomp_barrier_handle_tasks (state);
           state &= ~BAR_WAS_LAST;
diff --git a/libgomp/config/linux/bar.h b/libgomp/config/linux/bar.h

index b1fff01105a71b79e4329a1fd74a597be26be122..4dc0d3cca994a5ec5ef403f35be4365a1b18f08f 100644 (file)
--- a/libgomp/config/linux/bar.h
+++ b/libgomp/config/linux/bar.h
@@ -162,7 +162,10 @@ gomp_team_barrier_cancelled (gomp_barrier_t *bar)
  static inline void
  gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
  {
-  bar->generation = (state & -BAR_INCR) + BAR_INCR;
+  /* Need the atomic store for acquire-release synchronisation with the
+     load in `gomp_team_barrier_wait_{cancel_,}end`.  See PR112356  */
+  __atomic_store_n (&bar->generation, (state & -BAR_INCR) + BAR_INCR,
+                   MEMMODEL_RELEASE);
  }
  
  static inline bool
diff --git a/libgomp/config/posix/bar.c b/libgomp/config/posix/bar.c

index ce69905ba6744c1e3b90fa36587fcdd6715608cf..c46659bd26456e25de0a2f51c0aa095fc3472aed 100644 (file)
--- a/libgomp/config/posix/bar.c
+++ b/libgomp/config/posix/bar.c
@@ -123,7 +123,9 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
        struct gomp_team *team = thr->ts.team;
  
        team->work_share_cancelled = 0;
-      if (team->task_count)
+      unsigned task_count
+       = __atomic_load_n (&team->task_count, MEMMODEL_ACQUIRE);
+      if (task_count)
         {
           gomp_barrier_handle_tasks (state);
           if (n > 0)
@@ -185,7 +187,9 @@ gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
        struct gomp_team *team = thr->ts.team;
  
        team->work_share_cancelled = 0;
-      if (team->task_count)
+      unsigned task_count
+       = __atomic_load_n (&team->task_count, MEMMODEL_ACQUIRE);
+      if (task_count)
         {
           gomp_barrier_handle_tasks (state);
           if (n > 0)
diff --git a/libgomp/config/posix/bar.h b/libgomp/config/posix/bar.h

index 5a175c228c260d37b0952b75fd5e0367cfc44041..026daca793d5524afbc45854106e96776ab8384f 100644 (file)
--- a/libgomp/config/posix/bar.h
+++ b/libgomp/config/posix/bar.h
@@ -152,7 +152,10 @@ gomp_team_barrier_cancelled (gomp_barrier_t *bar)
  static inline void
  gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
  {
-  bar->generation = (state & -BAR_INCR) + BAR_INCR;
+  /* Need the atomic store for acquire-release synchronisation with the
+     load in `gomp_team_barrier_wait_{cancel_,}end`.  See PR112356  */
+  __atomic_store_n (&bar->generation, (state & -BAR_INCR) + BAR_INCR,
+                   MEMMODEL_RELEASE);
  }
  
  static inline bool
diff --git a/libgomp/config/rtems/bar.h b/libgomp/config/rtems/bar.h

index 61fa91f300f2e94999bc02b218c18a37997df15b..80fb1cd3be87ab4ec06bc6273a4c13fd088fab63 100644 (file)
--- a/libgomp/config/rtems/bar.h
+++ b/libgomp/config/rtems/bar.h
@@ -164,7 +164,10 @@ gomp_team_barrier_cancelled (gomp_barrier_t *bar)
  static inline void
  gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state)
  {
-  bar->generation = (state & -BAR_INCR) + BAR_INCR;
+  /* Need the atomic store for acquire-release synchronisation with the
+     load in `gomp_team_barrier_wait_{cancel_,}end`.  See PR112356  */
+  __atomic_store_n (&bar->generation, (state & -BAR_INCR) + BAR_INCR,
+                   MEMMODEL_RELEASE);
  }
  
  static inline bool
diff --git a/libgomp/task.c b/libgomp/task.c

index 554636aadd5c8b6e08cc7179fe4c17aad9c56722..cbba28516e3fa074c83bd84e94eababb34285e33 100644 (file)
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -1702,7 +1702,13 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state)
               if (do_wake > new_tasks)
                 do_wake = new_tasks;
             }
-         --team->task_count;
+         /* Need to use RELEASE to sync with barrier read outside of the
+            tasking code (See PR122356).  Only care when decrementing to zero
+            because that's what the barrier cares about.  */
+         if (team->task_count == 1)
+           __atomic_store_n (&team->task_count, 0, MEMMODEL_RELEASE);
+         else
+           team->task_count--;
         }
      }
  }
diff --git a/libgomp/testsuite/libgomp.c/pr122356.c b/libgomp/testsuite/libgomp.c/pr122356.c

new file mode 100644 (file)

index 0000000..7687951
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr122356.c
@@ -0,0 +1,40 @@
+#include <omp.h>
+
+void abort ();
+
+#define NUM_THREADS 8
+unsigned full_data[NUM_THREADS] = {0};
+#pragma omp declare target enter(full_data)
+
+void
+test ()
+{
+#pragma omp parallel num_threads(8)
+  {
+#pragma omp for
+    for (int i = 0; i < 10; i++)
+#pragma omp task
+      {
+       full_data[omp_get_thread_num ()] += 1;
+      }
+#pragma omp barrier
+
+    unsigned total = 0;
+    for (int i = 0; i < NUM_THREADS; i++)
+      total += full_data[i];
+
+    if (total != 10)
+      abort ();
+  }
+}
+#pragma omp declare target enter(test)
+
+
+int
+main ()
+{
+  test ();
+
+  #pragma omp target
+    test ();
+}
author	Matthew Malcolmson <mmalcolmson@nvidia.com>
	Tue, 20 Jan 2026 03:54:51 +0000 (03:54 +0000)
committer	Prathamesh Kulkarni <prathameshk@nvidia.com>
	Tue, 20 Jan 2026 03:54:51 +0000 (03:54 +0000)
libgomp/config/gcn/bar.c		patch \| blob \| blame \| history
libgomp/config/gcn/bar.h		patch \| blob \| blame \| history
libgomp/config/linux/bar.c		patch \| blob \| blame \| history
libgomp/config/linux/bar.h		patch \| blob \| blame \| history
libgomp/config/posix/bar.c		patch \| blob \| blame \| history
libgomp/config/posix/bar.h		patch \| blob \| blame \| history
libgomp/config/rtems/bar.h		patch \| blob \| blame \| history
libgomp/task.c		patch \| blob \| blame \| history
libgomp/testsuite/libgomp.c/pr122356.c	[new file with mode: 0644]	patch \| blob