]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
66a8b7680cb5d45f2a545bd5c284b39d386244a2
[thirdparty/kernel/stable-queue.git] /
1 From de35994ecd2dd6148ab5a6c5050a1670a04dec77 Mon Sep 17 00:00:00 2001
2 From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
3 Date: Thu, 19 Dec 2024 09:30:30 +0000
4 Subject: workqueue: Do not warn when cancelling WQ_MEM_RECLAIM work from !WQ_MEM_RECLAIM worker
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
10
11 commit de35994ecd2dd6148ab5a6c5050a1670a04dec77 upstream.
12
13 After commit
14 746ae46c1113 ("drm/sched: Mark scheduler work queues with WQ_MEM_RECLAIM")
15 amdgpu started seeing the following warning:
16
17 [ ] workqueue: WQ_MEM_RECLAIM sdma0:drm_sched_run_job_work [gpu_sched] is flushing !WQ_MEM_RECLAIM events:amdgpu_device_delay_enable_gfx_off [amdgpu]
18 ...
19 [ ] Workqueue: sdma0 drm_sched_run_job_work [gpu_sched]
20 ...
21 [ ] Call Trace:
22 [ ] <TASK>
23 ...
24 [ ] ? check_flush_dependency+0xf5/0x110
25 ...
26 [ ] cancel_delayed_work_sync+0x6e/0x80
27 [ ] amdgpu_gfx_off_ctrl+0xab/0x140 [amdgpu]
28 [ ] amdgpu_ring_alloc+0x40/0x50 [amdgpu]
29 [ ] amdgpu_ib_schedule+0xf4/0x810 [amdgpu]
30 [ ] ? drm_sched_run_job_work+0x22c/0x430 [gpu_sched]
31 [ ] amdgpu_job_run+0xaa/0x1f0 [amdgpu]
32 [ ] drm_sched_run_job_work+0x257/0x430 [gpu_sched]
33 [ ] process_one_work+0x217/0x720
34 ...
35 [ ] </TASK>
36
37 The intent of the verifcation done in check_flush_depedency is to ensure
38 forward progress during memory reclaim, by flagging cases when either a
39 memory reclaim process, or a memory reclaim work item is flushed from a
40 context not marked as memory reclaim safe.
41
42 This is correct when flushing, but when called from the
43 cancel(_delayed)_work_sync() paths it is a false positive because work is
44 either already running, or will not be running at all. Therefore
45 cancelling it is safe and we can relax the warning criteria by letting the
46 helper know of the calling context.
47
48 Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
49 Fixes: fca839c00a12 ("workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue")
50 References: 746ae46c1113 ("drm/sched: Mark scheduler work queues with WQ_MEM_RECLAIM")
51 Cc: Tejun Heo <tj@kernel.org>
52 Cc: Peter Zijlstra <peterz@infradead.org>
53 Cc: Lai Jiangshan <jiangshanlai@gmail.com>
54 Cc: Alex Deucher <alexander.deucher@amd.com>
55 Cc: Christian König <christian.koenig@amd.com
56 Cc: Matthew Brost <matthew.brost@intel.com>
57 Cc: <stable@vger.kernel.org> # v4.5+
58 Signed-off-by: Tejun Heo <tj@kernel.org>
59 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
60 ---
61 kernel/workqueue.c | 22 +++++++++++++---------
62 1 file changed, 13 insertions(+), 9 deletions(-)
63
64 --- a/kernel/workqueue.c
65 +++ b/kernel/workqueue.c
66 @@ -3680,23 +3680,27 @@ void workqueue_softirq_dead(unsigned int
67 * check_flush_dependency - check for flush dependency sanity
68 * @target_wq: workqueue being flushed
69 * @target_work: work item being flushed (NULL for workqueue flushes)
70 + * @from_cancel: are we called from the work cancel path
71 *
72 * %current is trying to flush the whole @target_wq or @target_work on it.
73 - * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
74 - * reclaiming memory or running on a workqueue which doesn't have
75 - * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
76 - * a deadlock.
77 + * If this is not the cancel path (which implies work being flushed is either
78 + * already running, or will not be at all), check if @target_wq doesn't have
79 + * %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running
80 + * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward-
81 + * progress guarantee leading to a deadlock.
82 */
83 static void check_flush_dependency(struct workqueue_struct *target_wq,
84 - struct work_struct *target_work)
85 + struct work_struct *target_work,
86 + bool from_cancel)
87 {
88 - work_func_t target_func = target_work ? target_work->func : NULL;
89 + work_func_t target_func;
90 struct worker *worker;
91
92 - if (target_wq->flags & WQ_MEM_RECLAIM)
93 + if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM)
94 return;
95
96 worker = current_wq_worker();
97 + target_func = target_work ? target_work->func : NULL;
98
99 WARN_ONCE(current->flags & PF_MEMALLOC,
100 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
101 @@ -3966,7 +3970,7 @@ void __flush_workqueue(struct workqueue_
102 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
103 }
104
105 - check_flush_dependency(wq, NULL);
106 + check_flush_dependency(wq, NULL, false);
107
108 mutex_unlock(&wq->mutex);
109
110 @@ -4141,7 +4145,7 @@ static bool start_flush_work(struct work
111 }
112
113 wq = pwq->wq;
114 - check_flush_dependency(wq, work);
115 + check_flush_dependency(wq, work, from_cancel);
116
117 insert_wq_barrier(pwq, barr, work, worker);
118 raw_spin_unlock_irq(&pool->lock);