]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.19.51/percpu-remove-spurious-lock-dependency-between-percp.patch
Linux 5.1.10
[thirdparty/kernel/stable-queue.git] / releases / 4.19.51 / percpu-remove-spurious-lock-dependency-between-percp.patch
1 From 9038678dd40f550a0b3e0b7dfe992630739ae8cc Mon Sep 17 00:00:00 2001
2 From: John Sperbeck <jsperbeck@google.com>
3 Date: Tue, 7 May 2019 18:43:20 -0700
4 Subject: percpu: remove spurious lock dependency between percpu and sched
5
6 [ Upstream commit 198790d9a3aeaef5792d33a560020861126edc22 ]
7
8 In free_percpu() we sometimes call pcpu_schedule_balance_work() to
9 queue a work item (which does a wakeup) while holding pcpu_lock.
10 This creates an unnecessary lock dependency between pcpu_lock and
11 the scheduler's pi_lock. There are other places where we call
12 pcpu_schedule_balance_work() without hold pcpu_lock, and this case
13 doesn't need to be different.
14
15 Moving the call outside the lock prevents the following lockdep splat
16 when running tools/testing/selftests/bpf/{test_maps,test_progs} in
17 sequence with lockdep enabled:
18
19 ======================================================
20 WARNING: possible circular locking dependency detected
21 5.1.0-dbg-DEV #1 Not tainted
22 ------------------------------------------------------
23 kworker/23:255/18872 is trying to acquire lock:
24 000000000bc79290 (&(&pool->lock)->rlock){-.-.}, at: __queue_work+0xb2/0x520
25
26 but task is already holding lock:
27 00000000e3e7a6aa (pcpu_lock){..-.}, at: free_percpu+0x36/0x260
28
29 which lock already depends on the new lock.
30
31 the existing dependency chain (in reverse order) is:
32
33 -> #4 (pcpu_lock){..-.}:
34 lock_acquire+0x9e/0x180
35 _raw_spin_lock_irqsave+0x3a/0x50
36 pcpu_alloc+0xfa/0x780
37 __alloc_percpu_gfp+0x12/0x20
38 alloc_htab_elem+0x184/0x2b0
39 __htab_percpu_map_update_elem+0x252/0x290
40 bpf_percpu_hash_update+0x7c/0x130
41 __do_sys_bpf+0x1912/0x1be0
42 __x64_sys_bpf+0x1a/0x20
43 do_syscall_64+0x59/0x400
44 entry_SYSCALL_64_after_hwframe+0x49/0xbe
45
46 -> #3 (&htab->buckets[i].lock){....}:
47 lock_acquire+0x9e/0x180
48 _raw_spin_lock_irqsave+0x3a/0x50
49 htab_map_update_elem+0x1af/0x3a0
50
51 -> #2 (&rq->lock){-.-.}:
52 lock_acquire+0x9e/0x180
53 _raw_spin_lock+0x2f/0x40
54 task_fork_fair+0x37/0x160
55 sched_fork+0x211/0x310
56 copy_process.part.43+0x7b1/0x2160
57 _do_fork+0xda/0x6b0
58 kernel_thread+0x29/0x30
59 rest_init+0x22/0x260
60 arch_call_rest_init+0xe/0x10
61 start_kernel+0x4fd/0x520
62 x86_64_start_reservations+0x24/0x26
63 x86_64_start_kernel+0x6f/0x72
64 secondary_startup_64+0xa4/0xb0
65
66 -> #1 (&p->pi_lock){-.-.}:
67 lock_acquire+0x9e/0x180
68 _raw_spin_lock_irqsave+0x3a/0x50
69 try_to_wake_up+0x41/0x600
70 wake_up_process+0x15/0x20
71 create_worker+0x16b/0x1e0
72 workqueue_init+0x279/0x2ee
73 kernel_init_freeable+0xf7/0x288
74 kernel_init+0xf/0x180
75 ret_from_fork+0x24/0x30
76
77 -> #0 (&(&pool->lock)->rlock){-.-.}:
78 __lock_acquire+0x101f/0x12a0
79 lock_acquire+0x9e/0x180
80 _raw_spin_lock+0x2f/0x40
81 __queue_work+0xb2/0x520
82 queue_work_on+0x38/0x80
83 free_percpu+0x221/0x260
84 pcpu_freelist_destroy+0x11/0x20
85 stack_map_free+0x2a/0x40
86 bpf_map_free_deferred+0x3c/0x50
87 process_one_work+0x1f7/0x580
88 worker_thread+0x54/0x410
89 kthread+0x10f/0x150
90 ret_from_fork+0x24/0x30
91
92 other info that might help us debug this:
93
94 Chain exists of:
95 &(&pool->lock)->rlock --> &htab->buckets[i].lock --> pcpu_lock
96
97 Possible unsafe locking scenario:
98
99 CPU0 CPU1
100 ---- ----
101 lock(pcpu_lock);
102 lock(&htab->buckets[i].lock);
103 lock(pcpu_lock);
104 lock(&(&pool->lock)->rlock);
105
106 *** DEADLOCK ***
107
108 3 locks held by kworker/23:255/18872:
109 #0: 00000000b36a6e16 ((wq_completion)events){+.+.},
110 at: process_one_work+0x17a/0x580
111 #1: 00000000dfd966f0 ((work_completion)(&map->work)){+.+.},
112 at: process_one_work+0x17a/0x580
113 #2: 00000000e3e7a6aa (pcpu_lock){..-.},
114 at: free_percpu+0x36/0x260
115
116 stack backtrace:
117 CPU: 23 PID: 18872 Comm: kworker/23:255 Not tainted 5.1.0-dbg-DEV #1
118 Hardware name: ...
119 Workqueue: events bpf_map_free_deferred
120 Call Trace:
121 dump_stack+0x67/0x95
122 print_circular_bug.isra.38+0x1c6/0x220
123 check_prev_add.constprop.50+0x9f6/0xd20
124 __lock_acquire+0x101f/0x12a0
125 lock_acquire+0x9e/0x180
126 _raw_spin_lock+0x2f/0x40
127 __queue_work+0xb2/0x520
128 queue_work_on+0x38/0x80
129 free_percpu+0x221/0x260
130 pcpu_freelist_destroy+0x11/0x20
131 stack_map_free+0x2a/0x40
132 bpf_map_free_deferred+0x3c/0x50
133 process_one_work+0x1f7/0x580
134 worker_thread+0x54/0x410
135 kthread+0x10f/0x150
136 ret_from_fork+0x24/0x30
137
138 Signed-off-by: John Sperbeck <jsperbeck@google.com>
139 Signed-off-by: Dennis Zhou <dennis@kernel.org>
140 Signed-off-by: Sasha Levin <sashal@kernel.org>
141 ---
142 mm/percpu.c | 6 +++++-
143 1 file changed, 5 insertions(+), 1 deletion(-)
144
145 diff --git a/mm/percpu.c b/mm/percpu.c
146 index 41e58f3d8fbf..c66149ce1fe6 100644
147 --- a/mm/percpu.c
148 +++ b/mm/percpu.c
149 @@ -1721,6 +1721,7 @@ void free_percpu(void __percpu *ptr)
150 struct pcpu_chunk *chunk;
151 unsigned long flags;
152 int off;
153 + bool need_balance = false;
154
155 if (!ptr)
156 return;
157 @@ -1742,7 +1743,7 @@ void free_percpu(void __percpu *ptr)
158
159 list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
160 if (pos != chunk) {
161 - pcpu_schedule_balance_work();
162 + need_balance = true;
163 break;
164 }
165 }
166 @@ -1750,6 +1751,9 @@ void free_percpu(void __percpu *ptr)
167 trace_percpu_free_percpu(chunk->base_addr, off, ptr);
168
169 spin_unlock_irqrestore(&pcpu_lock, flags);
170 +
171 + if (need_balance)
172 + pcpu_schedule_balance_work();
173 }
174 EXPORT_SYMBOL_GPL(free_percpu);
175
176 --
177 2.20.1
178