]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
workqueue: Fix spruious data race in __flush_work()
authorTejun Heo <tj@kernel.org>
Mon, 5 Aug 2024 19:37:25 +0000 (09:37 -1000)
committerTejun Heo <tj@kernel.org>
Tue, 6 Aug 2024 04:33:56 +0000 (18:33 -1000)
When flushing a work item for cancellation, __flush_work() knows that it
exclusively owns the work item through its PENDING bit. 134874e2eee9
("workqueue: Allow cancel_work_sync() and disable_work() from atomic
contexts on BH work items") added a read of @work->data to determine whether
to use busy wait for BH work items that are being canceled. While the read
is safe when @from_cancel, @work->data was read before testing @from_cancel
to simplify code structure:

data = *work_data_bits(work);
if (from_cancel &&
    !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) {

While the read data was never used if !@from_cancel, this could trigger
KCSAN data race detection spuriously:

  ==================================================================
  BUG: KCSAN: data-race in __flush_work / __flush_work

  write to 0xffff8881223aa3e8 of 8 bytes by task 3998 on cpu 0:
   instrument_write include/linux/instrumented.h:41 [inline]
   ___set_bit include/asm-generic/bitops/instrumented-non-atomic.h:28 [inline]
   insert_wq_barrier kernel/workqueue.c:3790 [inline]
   start_flush_work kernel/workqueue.c:4142 [inline]
   __flush_work+0x30b/0x570 kernel/workqueue.c:4178
   flush_work kernel/workqueue.c:4229 [inline]
   ...

  read to 0xffff8881223aa3e8 of 8 bytes by task 50 on cpu 1:
   __flush_work+0x42a/0x570 kernel/workqueue.c:4188
   flush_work kernel/workqueue.c:4229 [inline]
   flush_delayed_work+0x66/0x70 kernel/workqueue.c:4251
   ...

  value changed: 0x0000000000400000 -> 0xffff88810006c00d

Reorganize the code so that @from_cancel is tested before @work->data is
accessed. The only problem is triggering KCSAN detection spuriously. This
shouldn't need READ_ONCE() or other access qualifiers.

No functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: syzbot+b3e4f2f51ed645fd5df2@syzkaller.appspotmail.com
Fixes: 134874e2eee9 ("workqueue: Allow cancel_work_sync() and disable_work() from atomic contexts on BH work items")
Link: http://lkml.kernel.org/r/000000000000ae429e061eea2157@google.com
Cc: Jens Axboe <axboe@kernel.dk>
kernel/workqueue.c

index d56bd2277e58e842a2df0fa2ecd0138fab6d330f..ef174d8c1f63913ab8fabfa647571effc28cf5d6 100644 (file)
@@ -4166,7 +4166,6 @@ already_gone:
 static bool __flush_work(struct work_struct *work, bool from_cancel)
 {
        struct wq_barrier barr;
-       unsigned long data;
 
        if (WARN_ON(!wq_online))
                return false;
@@ -4184,29 +4183,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
         * was queued on a BH workqueue, we also know that it was running in the
         * BH context and thus can be busy-waited.
         */
-       data = *work_data_bits(work);
-       if (from_cancel &&
-           !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) {
-               /*
-                * On RT, prevent a live lock when %current preempted soft
-                * interrupt processing or prevents ksoftirqd from running by
-                * keeping flipping BH. If the BH work item runs on a different
-                * CPU then this has no effect other than doing the BH
-                * disable/enable dance for nothing. This is copied from
-                * kernel/softirq.c::tasklet_unlock_spin_wait().
-                */
-               while (!try_wait_for_completion(&barr.done)) {
-                       if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
-                               local_bh_disable();
-                               local_bh_enable();
-                       } else {
-                               cpu_relax();
+       if (from_cancel) {
+               unsigned long data = *work_data_bits(work);
+
+               if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) &&
+                   (data & WORK_OFFQ_BH)) {
+                       /*
+                        * On RT, prevent a live lock when %current preempted
+                        * soft interrupt processing or prevents ksoftirqd from
+                        * running by keeping flipping BH. If the BH work item
+                        * runs on a different CPU then this has no effect other
+                        * than doing the BH disable/enable dance for nothing.
+                        * This is copied from
+                        * kernel/softirq.c::tasklet_unlock_spin_wait().
+                        */
+                       while (!try_wait_for_completion(&barr.done)) {
+                               if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+                                       local_bh_disable();
+                                       local_bh_enable();
+                               } else {
+                                       cpu_relax();
+                               }
                        }
+                       goto out_destroy;
                }
-       } else {
-               wait_for_completion(&barr.done);
        }
 
+       wait_for_completion(&barr.done);
+
+out_destroy:
        destroy_work_on_stack(&barr.work);
        return true;
 }