]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.2.8/net_sched-fix-qdisc_tree_decrease_qlen-races.patch
4.14-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.2.8 / net_sched-fix-qdisc_tree_decrease_qlen-races.patch
1 From foo@baz Fri Dec 11 11:38:35 EST 2015
2 From: Eric Dumazet <edumazet@google.com>
3 Date: Tue, 1 Dec 2015 20:08:51 -0800
4 Subject: net_sched: fix qdisc_tree_decrease_qlen() races
5
6 From: Eric Dumazet <edumazet@google.com>
7
8 [ Upstream commit 4eaf3b84f2881c9c028f1d5e76c52ab575fe3a66 ]
9
10 qdisc_tree_decrease_qlen() suffers from two problems on multiqueue
11 devices.
12
13 One problem is that it updates sch->q.qlen and sch->qstats.drops
14 on the mq/mqprio root qdisc, while it should not : Daniele
15 reported underflows errors :
16 [ 681.774821] PAX: sch->q.qlen: 0 n: 1
17 [ 681.774825] PAX: size overflow detected in function qdisc_tree_decrease_qlen net/sched/sch_api.c:769 cicus.693_49 min, count: 72, decl: qlen; num: 0; context: sk_buff_head;
18 [ 681.774954] CPU: 2 PID: 19 Comm: ksoftirqd/2 Tainted: G O 4.2.6.201511282239-1-grsec #1
19 [ 681.774955] Hardware name: ASUSTeK COMPUTER INC. X302LJ/X302LJ, BIOS X302LJ.202 03/05/2015
20 [ 681.774956] ffffffffa9a04863 0000000000000000 0000000000000000 ffffffffa990ff7c
21 [ 681.774959] ffffc90000d3bc38 ffffffffa95d2810 0000000000000007 ffffffffa991002b
22 [ 681.774960] ffffc90000d3bc68 ffffffffa91a44f4 0000000000000001 0000000000000001
23 [ 681.774962] Call Trace:
24 [ 681.774967] [<ffffffffa95d2810>] dump_stack+0x4c/0x7f
25 [ 681.774970] [<ffffffffa91a44f4>] report_size_overflow+0x34/0x50
26 [ 681.774972] [<ffffffffa94d17e2>] qdisc_tree_decrease_qlen+0x152/0x160
27 [ 681.774976] [<ffffffffc02694b1>] fq_codel_dequeue+0x7b1/0x820 [sch_fq_codel]
28 [ 681.774978] [<ffffffffc02680a0>] ? qdisc_peek_dequeued+0xa0/0xa0 [sch_fq_codel]
29 [ 681.774980] [<ffffffffa94cd92d>] __qdisc_run+0x4d/0x1d0
30 [ 681.774983] [<ffffffffa949b2b2>] net_tx_action+0xc2/0x160
31 [ 681.774985] [<ffffffffa90664c1>] __do_softirq+0xf1/0x200
32 [ 681.774987] [<ffffffffa90665ee>] run_ksoftirqd+0x1e/0x30
33 [ 681.774989] [<ffffffffa90896b0>] smpboot_thread_fn+0x150/0x260
34 [ 681.774991] [<ffffffffa9089560>] ? sort_range+0x40/0x40
35 [ 681.774992] [<ffffffffa9085fe4>] kthread+0xe4/0x100
36 [ 681.774994] [<ffffffffa9085f00>] ? kthread_worker_fn+0x170/0x170
37 [ 681.774995] [<ffffffffa95d8d1e>] ret_from_fork+0x3e/0x70
38
39 mq/mqprio have their own ways to report qlen/drops by folding stats on
40 all their queues, with appropriate locking.
41
42 A second problem is that qdisc_tree_decrease_qlen() calls qdisc_lookup()
43 without proper locking : concurrent qdisc updates could corrupt the list
44 that qdisc_match_from_root() parses to find a qdisc given its handle.
45
46 Fix first problem adding a TCQ_F_NOPARENT qdisc flag that
47 qdisc_tree_decrease_qlen() can use to abort its tree traversal,
48 as soon as it meets a mq/mqprio qdisc children.
49
50 Second problem can be fixed by RCU protection.
51 Qdisc are already freed after RCU grace period, so qdisc_list_add() and
52 qdisc_list_del() simply have to use appropriate rcu list variants.
53
54 A future patch will add a per struct netdev_queue list anchor, so that
55 qdisc_tree_decrease_qlen() can have more efficient lookups.
56
57 Reported-by: Daniele Fucini <dfucini@gmail.com>
58 Signed-off-by: Eric Dumazet <edumazet@google.com>
59 Cc: Cong Wang <cwang@twopensource.com>
60 Cc: Jamal Hadi Salim <jhs@mojatatu.com>
61 Signed-off-by: David S. Miller <davem@davemloft.net>
62 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
63 ---
64 include/net/sch_generic.h | 3 +++
65 net/sched/sch_api.c | 27 ++++++++++++++++++---------
66 net/sched/sch_generic.c | 2 +-
67 net/sched/sch_mq.c | 4 ++--
68 net/sched/sch_mqprio.c | 4 ++--
69 5 files changed, 26 insertions(+), 14 deletions(-)
70
71 --- a/include/net/sch_generic.h
72 +++ b/include/net/sch_generic.h
73 @@ -61,6 +61,9 @@ struct Qdisc {
74 */
75 #define TCQ_F_WARN_NONWC (1 << 16)
76 #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */
77 +#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy :
78 + * qdisc_tree_decrease_qlen() should stop.
79 + */
80 u32 limit;
81 const struct Qdisc_ops *ops;
82 struct qdisc_size_table __rcu *stab;
83 --- a/net/sched/sch_api.c
84 +++ b/net/sched/sch_api.c
85 @@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
86 }
87
88 /* We know handle. Find qdisc among all qdisc's attached to device
89 - (root qdisc, all its children, children of children etc.)
90 + * (root qdisc, all its children, children of children etc.)
91 + * Note: caller either uses rtnl or rcu_read_lock()
92 */
93
94 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
95 @@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_ro
96 root->handle == handle)
97 return root;
98
99 - list_for_each_entry(q, &root->list, list) {
100 + list_for_each_entry_rcu(q, &root->list, list) {
101 if (q->handle == handle)
102 return q;
103 }
104 @@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
105 struct Qdisc *root = qdisc_dev(q)->qdisc;
106
107 WARN_ON_ONCE(root == &noop_qdisc);
108 - list_add_tail(&q->list, &root->list);
109 + ASSERT_RTNL();
110 + list_add_tail_rcu(&q->list, &root->list);
111 }
112 }
113 EXPORT_SYMBOL(qdisc_list_add);
114
115 void qdisc_list_del(struct Qdisc *q)
116 {
117 - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
118 - list_del(&q->list);
119 + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
120 + ASSERT_RTNL();
121 + list_del_rcu(&q->list);
122 + }
123 }
124 EXPORT_SYMBOL(qdisc_list_del);
125
126 @@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdi
127 if (n == 0)
128 return;
129 drops = max_t(int, n, 0);
130 + rcu_read_lock();
131 while ((parentid = sch->parent)) {
132 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
133 - return;
134 + break;
135
136 + if (sch->flags & TCQ_F_NOPARENT)
137 + break;
138 + /* TODO: perform the search on a per txq basis */
139 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
140 if (sch == NULL) {
141 - WARN_ON(parentid != TC_H_ROOT);
142 - return;
143 + WARN_ON_ONCE(parentid != TC_H_ROOT);
144 + break;
145 }
146 cops = sch->ops->cl_ops;
147 if (cops->qlen_notify) {
148 @@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdi
149 sch->q.qlen -= n;
150 __qdisc_qstats_drop(sch, drops);
151 }
152 + rcu_read_unlock();
153 }
154 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
155
156 @@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, str
157 }
158 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
159 if (!netif_is_multiqueue(dev))
160 - sch->flags |= TCQ_F_ONETXQUEUE;
161 + sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
162 }
163
164 sch->handle = handle;
165 --- a/net/sched/sch_generic.c
166 +++ b/net/sched/sch_generic.c
167 @@ -743,7 +743,7 @@ static void attach_one_default_qdisc(str
168 return;
169 }
170 if (!netif_is_multiqueue(dev))
171 - qdisc->flags |= TCQ_F_ONETXQUEUE;
172 + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
173 }
174 dev_queue->qdisc_sleeping = qdisc;
175 }
176 --- a/net/sched/sch_mq.c
177 +++ b/net/sched/sch_mq.c
178 @@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, st
179 if (qdisc == NULL)
180 goto err;
181 priv->qdiscs[ntx] = qdisc;
182 - qdisc->flags |= TCQ_F_ONETXQUEUE;
183 + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
184 }
185
186 sch->flags |= TCQ_F_MQROOT;
187 @@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, u
188
189 *old = dev_graft_qdisc(dev_queue, new);
190 if (new)
191 - new->flags |= TCQ_F_ONETXQUEUE;
192 + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
193 if (dev->flags & IFF_UP)
194 dev_activate(dev);
195 return 0;
196 --- a/net/sched/sch_mqprio.c
197 +++ b/net/sched/sch_mqprio.c
198 @@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch
199 goto err;
200 }
201 priv->qdiscs[i] = qdisc;
202 - qdisc->flags |= TCQ_F_ONETXQUEUE;
203 + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
204 }
205
206 /* If the mqprio options indicate that hardware should own
207 @@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sc
208 *old = dev_graft_qdisc(dev_queue, new);
209
210 if (new)
211 - new->flags |= TCQ_F_ONETXQUEUE;
212 + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
213
214 if (dev->flags & IFF_UP)
215 dev_activate(dev);