]>
Commit | Line | Data |
---|---|---|
97bb1c57 AF |
1 | From: Eric Dumazet <edumazet@google.com> |
2 | Date: Fri, 11 May 2012 09:30:50 +0000 | |
3 | Subject: [PATCH 3/7] fq_codel: Fair Queue Codel AQM | |
4 | ||
5 | commit 4b549a2ef4bef9965d97cbd992ba67930cd3e0fe upstream. | |
6 | ||
7 | Fair Queue Codel packet scheduler | |
8 | ||
9 | Principles : | |
10 | ||
11 | - Packets are classified (internal classifier or external) on flows. | |
12 | - This is a Stochastic model (as we use a hash, several flows might | |
13 | be hashed on same slot) | |
14 | - Each flow has a CoDel managed queue. | |
15 | - Flows are linked onto two (Round Robin) lists, | |
16 | so that new flows have priority on old ones. | |
17 | ||
18 | - For a given flow, packets are not reordered (CoDel uses a FIFO) | |
19 | - head drops only. | |
20 | - ECN capability is on by default. | |
21 | - Very low memory footprint (64 bytes per flow) | |
22 | ||
23 | tc qdisc ... fq_codel [ limit PACKETS ] [ flows number ] | |
24 | [ target TIME ] [ interval TIME ] [ noecn ] | |
25 | [ quantum BYTES ] | |
26 | ||
27 | defaults : 1024 flows, 10240 packets limit, quantum : device MTU | |
28 | target : 5ms (CoDel default) | |
29 | interval : 100ms (CoDel default) | |
30 | ||
31 | Impressive results on load : | |
32 | ||
33 | class htb 1:1 root leaf 10: prio 0 quantum 1514 rate 200000Kbit ceil 200000Kbit burst 1475b/8 mpu 0b overhead 0b cburst 1475b/8 mpu 0b overhead 0b level 0 | |
34 | Sent 43304920109 bytes 33063109 pkt (dropped 0, overlimits 0 requeues 0) | |
35 | rate 201691Kbit 28595pps backlog 0b 312p requeues 0 | |
36 | lended: 33063109 borrowed: 0 giants: 0 | |
37 | tokens: -912 ctokens: -912 | |
38 | ||
39 | class fq_codel 10:1735 parent 10: | |
40 | (dropped 1292, overlimits 0 requeues 0) | |
41 | backlog 15140b 10p requeues 0 | |
42 | deficit 1514 count 1 lastcount 1 ldelay 7.1ms | |
43 | class fq_codel 10:4524 parent 10: | |
44 | (dropped 1291, overlimits 0 requeues 0) | |
45 | backlog 16654b 11p requeues 0 | |
46 | deficit 1514 count 1 lastcount 1 ldelay 7.1ms | |
47 | class fq_codel 10:4e74 parent 10: | |
48 | (dropped 1290, overlimits 0 requeues 0) | |
49 | backlog 6056b 4p requeues 0 | |
50 | deficit 1514 count 1 lastcount 1 ldelay 6.4ms dropping drop_next 92.0ms | |
51 | class fq_codel 10:628a parent 10: | |
52 | (dropped 1289, overlimits 0 requeues 0) | |
53 | backlog 7570b 5p requeues 0 | |
54 | deficit 1514 count 1 lastcount 1 ldelay 5.4ms dropping drop_next 90.9ms | |
55 | class fq_codel 10:a4b3 parent 10: | |
56 | (dropped 302, overlimits 0 requeues 0) | |
57 | backlog 16654b 11p requeues 0 | |
58 | deficit 1514 count 1 lastcount 1 ldelay 7.1ms | |
59 | class fq_codel 10:c3c2 parent 10: | |
60 | (dropped 1284, overlimits 0 requeues 0) | |
61 | backlog 13626b 9p requeues 0 | |
62 | deficit 1514 count 1 lastcount 1 ldelay 5.9ms | |
63 | class fq_codel 10:d331 parent 10: | |
64 | (dropped 299, overlimits 0 requeues 0) | |
65 | backlog 15140b 10p requeues 0 | |
66 | deficit 1514 count 1 lastcount 1 ldelay 7.0ms | |
67 | class fq_codel 10:d526 parent 10: | |
68 | (dropped 12160, overlimits 0 requeues 0) | |
69 | backlog 35870b 211p requeues 0 | |
70 | deficit 1508 count 12160 lastcount 1 ldelay 15.3ms dropping drop_next 247us | |
71 | class fq_codel 10:e2c6 parent 10: | |
72 | (dropped 1288, overlimits 0 requeues 0) | |
73 | backlog 15140b 10p requeues 0 | |
74 | deficit 1514 count 1 lastcount 1 ldelay 7.1ms | |
75 | class fq_codel 10:eab5 parent 10: | |
76 | (dropped 1285, overlimits 0 requeues 0) | |
77 | backlog 16654b 11p requeues 0 | |
78 | deficit 1514 count 1 lastcount 1 ldelay 5.9ms | |
79 | class fq_codel 10:f220 parent 10: | |
80 | (dropped 1289, overlimits 0 requeues 0) | |
81 | backlog 15140b 10p requeues 0 | |
82 | deficit 1514 count 1 lastcount 1 ldelay 7.1ms | |
83 | ||
84 | qdisc htb 1: root refcnt 6 r2q 10 default 1 direct_packets_stat 0 ver 3.17 | |
85 | Sent 43331086547 bytes 33092812 pkt (dropped 0, overlimits 66063544 requeues 71) | |
86 | rate 201697Kbit 28602pps backlog 0b 260p requeues 71 | |
87 | qdisc fq_codel 10: parent 1:1 limit 10240p flows 65536 target 5.0ms interval 100.0ms ecn | |
88 | Sent 43331086547 bytes 33092812 pkt (dropped 949359, overlimits 0 requeues 0) | |
89 | rate 201697Kbit 28602pps backlog 189352b 260p requeues 0 | |
90 | maxpacket 1514 drop_overlimit 0 new_flow_count 5582 ecn_mark 125593 | |
91 | new_flows_len 0 old_flows_len 11 | |
92 | ||
93 | PING 172.30.42.18 (172.30.42.18) 56(84) bytes of data. | |
94 | 64 bytes from 172.30.42.18: icmp_req=1 ttl=64 time=0.227 ms | |
95 | 64 bytes from 172.30.42.18: icmp_req=2 ttl=64 time=0.165 ms | |
96 | 64 bytes from 172.30.42.18: icmp_req=3 ttl=64 time=0.166 ms | |
97 | 64 bytes from 172.30.42.18: icmp_req=4 ttl=64 time=0.151 ms | |
98 | 64 bytes from 172.30.42.18: icmp_req=5 ttl=64 time=0.164 ms | |
99 | 64 bytes from 172.30.42.18: icmp_req=6 ttl=64 time=0.172 ms | |
100 | 64 bytes from 172.30.42.18: icmp_req=7 ttl=64 time=0.175 ms | |
101 | 64 bytes from 172.30.42.18: icmp_req=8 ttl=64 time=0.183 ms | |
102 | 64 bytes from 172.30.42.18: icmp_req=9 ttl=64 time=0.158 ms | |
103 | 64 bytes from 172.30.42.18: icmp_req=10 ttl=64 time=0.200 ms | |
104 | ||
105 | 10 packets transmitted, 10 received, 0% packet loss, time 8999ms | |
106 | rtt min/avg/max/mdev = 0.151/0.176/0.227/0.022 ms | |
107 | ||
108 | Much better than SFQ because of priority given to new flows, and fast | |
109 | path dirtying less cache lines. | |
110 | ||
111 | Signed-off-by: Eric Dumazet <edumazet@google.com> | |
112 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
113 | --- | |
114 | include/linux/pkt_sched.h | 54 ++++ | |
115 | net/sched/Kconfig | 11 + | |
116 | net/sched/Makefile | 1 + | |
117 | net/sched/sch_fq_codel.c | 624 +++++++++++++++++++++++++++++++++++++++++++++ | |
118 | 4 files changed, 690 insertions(+) | |
119 | create mode 100644 net/sched/sch_fq_codel.c | |
120 | ||
121 | diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h | |
122 | index cde56c2..32aef0a 100644 | |
123 | --- a/include/linux/pkt_sched.h | |
124 | +++ b/include/linux/pkt_sched.h | |
125 | @@ -681,4 +681,58 @@ struct tc_codel_xstats { | |
126 | __u32 dropping; /* are we in dropping state ? */ | |
127 | }; | |
128 | ||
129 | +/* FQ_CODEL */ | |
130 | + | |
131 | +enum { | |
132 | + TCA_FQ_CODEL_UNSPEC, | |
133 | + TCA_FQ_CODEL_TARGET, | |
134 | + TCA_FQ_CODEL_LIMIT, | |
135 | + TCA_FQ_CODEL_INTERVAL, | |
136 | + TCA_FQ_CODEL_ECN, | |
137 | + TCA_FQ_CODEL_FLOWS, | |
138 | + TCA_FQ_CODEL_QUANTUM, | |
139 | + __TCA_FQ_CODEL_MAX | |
140 | +}; | |
141 | + | |
142 | +#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) | |
143 | + | |
144 | +enum { | |
145 | + TCA_FQ_CODEL_XSTATS_QDISC, | |
146 | + TCA_FQ_CODEL_XSTATS_CLASS, | |
147 | +}; | |
148 | + | |
149 | +struct tc_fq_codel_qd_stats { | |
150 | + __u32 maxpacket; /* largest packet we've seen so far */ | |
151 | + __u32 drop_overlimit; /* number of time max qdisc | |
152 | + * packet limit was hit | |
153 | + */ | |
154 | + __u32 ecn_mark; /* number of packets we ECN marked | |
155 | + * instead of being dropped | |
156 | + */ | |
157 | + __u32 new_flow_count; /* number of time packets | |
158 | + * created a 'new flow' | |
159 | + */ | |
160 | + __u32 new_flows_len; /* count of flows in new list */ | |
161 | + __u32 old_flows_len; /* count of flows in old list */ | |
162 | +}; | |
163 | + | |
164 | +struct tc_fq_codel_cl_stats { | |
165 | + __s32 deficit; | |
166 | + __u32 ldelay; /* in-queue delay seen by most recently | |
167 | + * dequeued packet | |
168 | + */ | |
169 | + __u32 count; | |
170 | + __u32 lastcount; | |
171 | + __u32 dropping; | |
172 | + __s32 drop_next; | |
173 | +}; | |
174 | + | |
175 | +struct tc_fq_codel_xstats { | |
176 | + __u32 type; | |
177 | + union { | |
178 | + struct tc_fq_codel_qd_stats qdisc_stats; | |
179 | + struct tc_fq_codel_cl_stats class_stats; | |
180 | + }; | |
181 | +}; | |
182 | + | |
183 | #endif | |
184 | diff --git a/net/sched/Kconfig b/net/sched/Kconfig | |
185 | index fadd252..e7a8976 100644 | |
186 | --- a/net/sched/Kconfig | |
187 | +++ b/net/sched/Kconfig | |
188 | @@ -261,6 +261,17 @@ config NET_SCH_CODEL | |
189 | ||
190 | If unsure, say N. | |
191 | ||
192 | +config NET_SCH_FQ_CODEL | |
193 | + tristate "Fair Queue Controlled Delay AQM (FQ_CODEL)" | |
194 | + help | |
195 | + Say Y here if you want to use the FQ Controlled Delay (FQ_CODEL) | |
196 | + packet scheduling algorithm. | |
197 | + | |
198 | + To compile this driver as a module, choose M here: the module | |
199 | + will be called sch_fq_codel. | |
200 | + | |
201 | + If unsure, say N. | |
202 | + | |
203 | config NET_SCH_INGRESS | |
204 | tristate "Ingress Qdisc" | |
205 | depends on NET_CLS_ACT | |
206 | diff --git a/net/sched/Makefile b/net/sched/Makefile | |
207 | index 30fab03..5940a19 100644 | |
208 | --- a/net/sched/Makefile | |
209 | +++ b/net/sched/Makefile | |
210 | @@ -38,6 +38,7 @@ obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o | |
211 | obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o | |
212 | obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o | |
213 | obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o | |
214 | +obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o | |
215 | ||
216 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o | |
217 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o | |
218 | diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c | |
219 | new file mode 100644 | |
220 | index 0000000..a7b3754 | |
221 | --- /dev/null | |
222 | +++ b/net/sched/sch_fq_codel.c | |
223 | @@ -0,0 +1,624 @@ | |
224 | +/* | |
225 | + * Fair Queue CoDel discipline | |
226 | + * | |
227 | + * This program is free software; you can redistribute it and/or | |
228 | + * modify it under the terms of the GNU General Public License | |
229 | + * as published by the Free Software Foundation; either version | |
230 | + * 2 of the License, or (at your option) any later version. | |
231 | + * | |
232 | + * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> | |
233 | + */ | |
234 | + | |
235 | +#include <linux/module.h> | |
236 | +#include <linux/types.h> | |
237 | +#include <linux/kernel.h> | |
238 | +#include <linux/jiffies.h> | |
239 | +#include <linux/string.h> | |
240 | +#include <linux/in.h> | |
241 | +#include <linux/errno.h> | |
242 | +#include <linux/init.h> | |
243 | +#include <linux/skbuff.h> | |
244 | +#include <linux/jhash.h> | |
245 | +#include <linux/slab.h> | |
246 | +#include <linux/vmalloc.h> | |
247 | +#include <net/netlink.h> | |
248 | +#include <net/pkt_sched.h> | |
249 | +#include <net/flow_keys.h> | |
250 | +#include <net/codel.h> | |
251 | + | |
252 | +/* Fair Queue CoDel. | |
253 | + * | |
254 | + * Principles : | |
255 | + * Packets are classified (internal classifier or external) on flows. | |
256 | + * This is a Stochastic model (as we use a hash, several flows | |
257 | + * might be hashed on same slot) | |
258 | + * Each flow has a CoDel managed queue. | |
259 | + * Flows are linked onto two (Round Robin) lists, | |
260 | + * so that new flows have priority on old ones. | |
261 | + * | |
262 | + * For a given flow, packets are not reordered (CoDel uses a FIFO) | |
263 | + * head drops only. | |
264 | + * ECN capability is on by default. | |
265 | + * Low memory footprint (64 bytes per flow) | |
266 | + */ | |
267 | + | |
268 | +struct fq_codel_flow { | |
269 | + struct sk_buff *head; | |
270 | + struct sk_buff *tail; | |
271 | + struct list_head flowchain; | |
272 | + int deficit; | |
273 | + u32 dropped; /* number of drops (or ECN marks) on this flow */ | |
274 | + struct codel_vars cvars; | |
275 | +}; /* please try to keep this structure <= 64 bytes */ | |
276 | + | |
277 | +struct fq_codel_sched_data { | |
278 | + struct tcf_proto *filter_list; /* optional external classifier */ | |
279 | + struct fq_codel_flow *flows; /* Flows table [flows_cnt] */ | |
280 | + u32 *backlogs; /* backlog table [flows_cnt] */ | |
281 | + u32 flows_cnt; /* number of flows */ | |
282 | + u32 perturbation; /* hash perturbation */ | |
283 | + u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ | |
284 | + struct codel_params cparams; | |
285 | + struct codel_stats cstats; | |
286 | + u32 drop_overlimit; | |
287 | + u32 new_flow_count; | |
288 | + | |
289 | + struct list_head new_flows; /* list of new flows */ | |
290 | + struct list_head old_flows; /* list of old flows */ | |
291 | +}; | |
292 | + | |
293 | +static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, | |
294 | + const struct sk_buff *skb) | |
295 | +{ | |
296 | + struct flow_keys keys; | |
297 | + unsigned int hash; | |
298 | + | |
299 | + skb_flow_dissect(skb, &keys); | |
300 | + hash = jhash_3words((__force u32)keys.dst, | |
301 | + (__force u32)keys.src ^ keys.ip_proto, | |
302 | + (__force u32)keys.ports, q->perturbation); | |
303 | + return ((u64)hash * q->flows_cnt) >> 32; | |
304 | +} | |
305 | + | |
306 | +static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, | |
307 | + int *qerr) | |
308 | +{ | |
309 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
310 | + struct tcf_result res; | |
311 | + int result; | |
312 | + | |
313 | + if (TC_H_MAJ(skb->priority) == sch->handle && | |
314 | + TC_H_MIN(skb->priority) > 0 && | |
315 | + TC_H_MIN(skb->priority) <= q->flows_cnt) | |
316 | + return TC_H_MIN(skb->priority); | |
317 | + | |
318 | + if (!q->filter_list) | |
319 | + return fq_codel_hash(q, skb) + 1; | |
320 | + | |
321 | + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | |
322 | + result = tc_classify(skb, q->filter_list, &res); | |
323 | + if (result >= 0) { | |
324 | +#ifdef CONFIG_NET_CLS_ACT | |
325 | + switch (result) { | |
326 | + case TC_ACT_STOLEN: | |
327 | + case TC_ACT_QUEUED: | |
328 | + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | |
329 | + case TC_ACT_SHOT: | |
330 | + return 0; | |
331 | + } | |
332 | +#endif | |
333 | + if (TC_H_MIN(res.classid) <= q->flows_cnt) | |
334 | + return TC_H_MIN(res.classid); | |
335 | + } | |
336 | + return 0; | |
337 | +} | |
338 | + | |
339 | +/* helper functions : might be changed when/if skb use a standard list_head */ | |
340 | + | |
341 | +/* remove one skb from head of slot queue */ | |
342 | +static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow) | |
343 | +{ | |
344 | + struct sk_buff *skb = flow->head; | |
345 | + | |
346 | + flow->head = skb->next; | |
347 | + skb->next = NULL; | |
348 | + return skb; | |
349 | +} | |
350 | + | |
351 | +/* add skb to flow queue (tail add) */ | |
352 | +static inline void flow_queue_add(struct fq_codel_flow *flow, | |
353 | + struct sk_buff *skb) | |
354 | +{ | |
355 | + if (flow->head == NULL) | |
356 | + flow->head = skb; | |
357 | + else | |
358 | + flow->tail->next = skb; | |
359 | + flow->tail = skb; | |
360 | + skb->next = NULL; | |
361 | +} | |
362 | + | |
363 | +static unsigned int fq_codel_drop(struct Qdisc *sch) | |
364 | +{ | |
365 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
366 | + struct sk_buff *skb; | |
367 | + unsigned int maxbacklog = 0, idx = 0, i, len; | |
368 | + struct fq_codel_flow *flow; | |
369 | + | |
370 | + /* Queue is full! Find the fat flow and drop packet from it. | |
371 | + * This might sound expensive, but with 1024 flows, we scan | |
372 | + * 4KB of memory, and we dont need to handle a complex tree | |
373 | + * in fast path (packet queue/enqueue) with many cache misses. | |
374 | + */ | |
375 | + for (i = 0; i < q->flows_cnt; i++) { | |
376 | + if (q->backlogs[i] > maxbacklog) { | |
377 | + maxbacklog = q->backlogs[i]; | |
378 | + idx = i; | |
379 | + } | |
380 | + } | |
381 | + flow = &q->flows[idx]; | |
382 | + skb = dequeue_head(flow); | |
383 | + len = qdisc_pkt_len(skb); | |
384 | + q->backlogs[idx] -= len; | |
385 | + kfree_skb(skb); | |
386 | + sch->q.qlen--; | |
387 | + sch->qstats.drops++; | |
388 | + sch->qstats.backlog -= len; | |
389 | + flow->dropped++; | |
390 | + return idx; | |
391 | +} | |
392 | + | |
393 | +static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |
394 | +{ | |
395 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
396 | + unsigned int idx; | |
397 | + struct fq_codel_flow *flow; | |
398 | + int uninitialized_var(ret); | |
399 | + | |
400 | + idx = fq_codel_classify(skb, sch, &ret); | |
401 | + if (idx == 0) { | |
402 | + if (ret & __NET_XMIT_BYPASS) | |
403 | + sch->qstats.drops++; | |
404 | + kfree_skb(skb); | |
405 | + return ret; | |
406 | + } | |
407 | + idx--; | |
408 | + | |
409 | + codel_set_enqueue_time(skb); | |
410 | + flow = &q->flows[idx]; | |
411 | + flow_queue_add(flow, skb); | |
412 | + q->backlogs[idx] += qdisc_pkt_len(skb); | |
413 | + sch->qstats.backlog += qdisc_pkt_len(skb); | |
414 | + | |
415 | + if (list_empty(&flow->flowchain)) { | |
416 | + list_add_tail(&flow->flowchain, &q->new_flows); | |
417 | + codel_vars_init(&flow->cvars); | |
418 | + q->new_flow_count++; | |
419 | + flow->deficit = q->quantum; | |
420 | + flow->dropped = 0; | |
421 | + } | |
422 | + if (++sch->q.qlen < sch->limit) | |
423 | + return NET_XMIT_SUCCESS; | |
424 | + | |
425 | + q->drop_overlimit++; | |
426 | + /* Return Congestion Notification only if we dropped a packet | |
427 | + * from this flow. | |
428 | + */ | |
429 | + if (fq_codel_drop(sch) == idx) | |
430 | + return NET_XMIT_CN; | |
431 | + | |
432 | + /* As we dropped a packet, better let upper stack know this */ | |
433 | + qdisc_tree_decrease_qlen(sch, 1); | |
434 | + return NET_XMIT_SUCCESS; | |
435 | +} | |
436 | + | |
437 | +/* This is the specific function called from codel_dequeue() | |
438 | + * to dequeue a packet from queue. Note: backlog is handled in | |
439 | + * codel, we dont need to reduce it here. | |
440 | + */ | |
441 | +static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch) | |
442 | +{ | |
443 | + struct fq_codel_flow *flow; | |
444 | + struct sk_buff *skb = NULL; | |
445 | + | |
446 | + flow = container_of(vars, struct fq_codel_flow, cvars); | |
447 | + if (flow->head) { | |
448 | + skb = dequeue_head(flow); | |
449 | + sch->qstats.backlog -= qdisc_pkt_len(skb); | |
450 | + sch->q.qlen--; | |
451 | + } | |
452 | + return skb; | |
453 | +} | |
454 | + | |
455 | +static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) | |
456 | +{ | |
457 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
458 | + struct sk_buff *skb; | |
459 | + struct fq_codel_flow *flow; | |
460 | + struct list_head *head; | |
461 | + u32 prev_drop_count, prev_ecn_mark; | |
462 | + | |
463 | +begin: | |
464 | + head = &q->new_flows; | |
465 | + if (list_empty(head)) { | |
466 | + head = &q->old_flows; | |
467 | + if (list_empty(head)) | |
468 | + return NULL; | |
469 | + } | |
470 | + flow = list_first_entry(head, struct fq_codel_flow, flowchain); | |
471 | + | |
472 | + if (flow->deficit <= 0) { | |
473 | + flow->deficit += q->quantum; | |
474 | + list_move_tail(&flow->flowchain, &q->old_flows); | |
475 | + goto begin; | |
476 | + } | |
477 | + | |
478 | + prev_drop_count = q->cstats.drop_count; | |
479 | + prev_ecn_mark = q->cstats.ecn_mark; | |
480 | + | |
481 | + skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, | |
482 | + dequeue, &q->backlogs[flow - q->flows]); | |
483 | + | |
484 | + flow->dropped += q->cstats.drop_count - prev_drop_count; | |
485 | + flow->dropped += q->cstats.ecn_mark - prev_ecn_mark; | |
486 | + | |
487 | + if (!skb) { | |
488 | + /* force a pass through old_flows to prevent starvation */ | |
489 | + if ((head == &q->new_flows) && !list_empty(&q->old_flows)) | |
490 | + list_move_tail(&flow->flowchain, &q->old_flows); | |
491 | + else | |
492 | + list_del_init(&flow->flowchain); | |
493 | + goto begin; | |
494 | + } | |
495 | + qdisc_bstats_update(sch, skb); | |
496 | + flow->deficit -= qdisc_pkt_len(skb); | |
497 | + /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, | |
498 | + * or HTB crashes. Defer it for next round. | |
499 | + */ | |
500 | + if (q->cstats.drop_count && sch->q.qlen) { | |
501 | + qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); | |
502 | + q->cstats.drop_count = 0; | |
503 | + } | |
504 | + return skb; | |
505 | +} | |
506 | + | |
507 | +static void fq_codel_reset(struct Qdisc *sch) | |
508 | +{ | |
509 | + struct sk_buff *skb; | |
510 | + | |
511 | + while ((skb = fq_codel_dequeue(sch)) != NULL) | |
512 | + kfree_skb(skb); | |
513 | +} | |
514 | + | |
515 | +static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { | |
516 | + [TCA_FQ_CODEL_TARGET] = { .type = NLA_U32 }, | |
517 | + [TCA_FQ_CODEL_LIMIT] = { .type = NLA_U32 }, | |
518 | + [TCA_FQ_CODEL_INTERVAL] = { .type = NLA_U32 }, | |
519 | + [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 }, | |
520 | + [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, | |
521 | + [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, | |
522 | +}; | |
523 | + | |
524 | +static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) | |
525 | +{ | |
526 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
527 | + struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; | |
528 | + int err; | |
529 | + | |
530 | + if (!opt) | |
531 | + return -EINVAL; | |
532 | + | |
533 | + err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy); | |
534 | + if (err < 0) | |
535 | + return err; | |
536 | + if (tb[TCA_FQ_CODEL_FLOWS]) { | |
537 | + if (q->flows) | |
538 | + return -EINVAL; | |
539 | + q->flows_cnt = nla_get_u32(tb[TCA_FQ_CODEL_FLOWS]); | |
540 | + if (!q->flows_cnt || | |
541 | + q->flows_cnt > 65536) | |
542 | + return -EINVAL; | |
543 | + } | |
544 | + sch_tree_lock(sch); | |
545 | + | |
546 | + if (tb[TCA_FQ_CODEL_TARGET]) { | |
547 | + u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]); | |
548 | + | |
549 | + q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT; | |
550 | + } | |
551 | + | |
552 | + if (tb[TCA_FQ_CODEL_INTERVAL]) { | |
553 | + u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); | |
554 | + | |
555 | + q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT; | |
556 | + } | |
557 | + | |
558 | + if (tb[TCA_FQ_CODEL_LIMIT]) | |
559 | + sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]); | |
560 | + | |
561 | + if (tb[TCA_FQ_CODEL_ECN]) | |
562 | + q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]); | |
563 | + | |
564 | + if (tb[TCA_FQ_CODEL_QUANTUM]) | |
565 | + q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); | |
566 | + | |
567 | + while (sch->q.qlen > sch->limit) { | |
568 | + struct sk_buff *skb = fq_codel_dequeue(sch); | |
569 | + | |
570 | + kfree_skb(skb); | |
571 | + q->cstats.drop_count++; | |
572 | + } | |
573 | + qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); | |
574 | + q->cstats.drop_count = 0; | |
575 | + | |
576 | + sch_tree_unlock(sch); | |
577 | + return 0; | |
578 | +} | |
579 | + | |
580 | +static void *fq_codel_zalloc(size_t sz) | |
581 | +{ | |
582 | + void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN); | |
583 | + | |
584 | + if (!ptr) | |
585 | + ptr = vzalloc(sz); | |
586 | + return ptr; | |
587 | +} | |
588 | + | |
589 | +static void fq_codel_free(void *addr) | |
590 | +{ | |
591 | + if (addr) { | |
592 | + if (is_vmalloc_addr(addr)) | |
593 | + vfree(addr); | |
594 | + else | |
595 | + kfree(addr); | |
596 | + } | |
597 | +} | |
598 | + | |
599 | +static void fq_codel_destroy(struct Qdisc *sch) | |
600 | +{ | |
601 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
602 | + | |
603 | + tcf_destroy_chain(&q->filter_list); | |
604 | + fq_codel_free(q->backlogs); | |
605 | + fq_codel_free(q->flows); | |
606 | +} | |
607 | + | |
608 | +static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) | |
609 | +{ | |
610 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
611 | + int i; | |
612 | + | |
613 | + sch->limit = 10*1024; | |
614 | + q->flows_cnt = 1024; | |
615 | + q->quantum = psched_mtu(qdisc_dev(sch)); | |
616 | + q->perturbation = net_random(); | |
617 | + INIT_LIST_HEAD(&q->new_flows); | |
618 | + INIT_LIST_HEAD(&q->old_flows); | |
619 | + codel_params_init(&q->cparams); | |
620 | + codel_stats_init(&q->cstats); | |
621 | + q->cparams.ecn = true; | |
622 | + | |
623 | + if (opt) { | |
624 | + int err = fq_codel_change(sch, opt); | |
625 | + if (err) | |
626 | + return err; | |
627 | + } | |
628 | + | |
629 | + if (!q->flows) { | |
630 | + q->flows = fq_codel_zalloc(q->flows_cnt * | |
631 | + sizeof(struct fq_codel_flow)); | |
632 | + if (!q->flows) | |
633 | + return -ENOMEM; | |
634 | + q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32)); | |
635 | + if (!q->backlogs) { | |
636 | + fq_codel_free(q->flows); | |
637 | + return -ENOMEM; | |
638 | + } | |
639 | + for (i = 0; i < q->flows_cnt; i++) { | |
640 | + struct fq_codel_flow *flow = q->flows + i; | |
641 | + | |
642 | + INIT_LIST_HEAD(&flow->flowchain); | |
643 | + } | |
644 | + } | |
645 | + if (sch->limit >= 1) | |
646 | + sch->flags |= TCQ_F_CAN_BYPASS; | |
647 | + else | |
648 | + sch->flags &= ~TCQ_F_CAN_BYPASS; | |
649 | + return 0; | |
650 | +} | |
651 | + | |
652 | +static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) | |
653 | +{ | |
654 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
655 | + struct nlattr *opts; | |
656 | + | |
657 | + opts = nla_nest_start(skb, TCA_OPTIONS); | |
658 | + if (opts == NULL) | |
659 | + goto nla_put_failure; | |
660 | + | |
661 | + if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET, | |
662 | + codel_time_to_us(q->cparams.target)) || | |
663 | + nla_put_u32(skb, TCA_FQ_CODEL_LIMIT, | |
664 | + sch->limit) || | |
665 | + nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL, | |
666 | + codel_time_to_us(q->cparams.interval)) || | |
667 | + nla_put_u32(skb, TCA_FQ_CODEL_ECN, | |
668 | + q->cparams.ecn) || | |
669 | + nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM, | |
670 | + q->quantum) || | |
671 | + nla_put_u32(skb, TCA_FQ_CODEL_FLOWS, | |
672 | + q->flows_cnt)) | |
673 | + goto nla_put_failure; | |
674 | + | |
675 | + nla_nest_end(skb, opts); | |
676 | + return skb->len; | |
677 | + | |
678 | +nla_put_failure: | |
679 | + return -1; | |
680 | +} | |
681 | + | |
682 | +static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | |
683 | +{ | |
684 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
685 | + struct tc_fq_codel_xstats st = { | |
686 | + .type = TCA_FQ_CODEL_XSTATS_QDISC, | |
687 | + .qdisc_stats.maxpacket = q->cstats.maxpacket, | |
688 | + .qdisc_stats.drop_overlimit = q->drop_overlimit, | |
689 | + .qdisc_stats.ecn_mark = q->cstats.ecn_mark, | |
690 | + .qdisc_stats.new_flow_count = q->new_flow_count, | |
691 | + }; | |
692 | + struct list_head *pos; | |
693 | + | |
694 | + list_for_each(pos, &q->new_flows) | |
695 | + st.qdisc_stats.new_flows_len++; | |
696 | + | |
697 | + list_for_each(pos, &q->old_flows) | |
698 | + st.qdisc_stats.old_flows_len++; | |
699 | + | |
700 | + return gnet_stats_copy_app(d, &st, sizeof(st)); | |
701 | +} | |
702 | + | |
703 | +static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg) | |
704 | +{ | |
705 | + return NULL; | |
706 | +} | |
707 | + | |
708 | +static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid) | |
709 | +{ | |
710 | + return 0; | |
711 | +} | |
712 | + | |
713 | +static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent, | |
714 | + u32 classid) | |
715 | +{ | |
716 | + /* we cannot bypass queue discipline anymore */ | |
717 | + sch->flags &= ~TCQ_F_CAN_BYPASS; | |
718 | + return 0; | |
719 | +} | |
720 | + | |
721 | +static void fq_codel_put(struct Qdisc *q, unsigned long cl) | |
722 | +{ | |
723 | +} | |
724 | + | |
725 | +static struct tcf_proto **fq_codel_find_tcf(struct Qdisc *sch, unsigned long cl) | |
726 | +{ | |
727 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
728 | + | |
729 | + if (cl) | |
730 | + return NULL; | |
731 | + return &q->filter_list; | |
732 | +} | |
733 | + | |
734 | +static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl, | |
735 | + struct sk_buff *skb, struct tcmsg *tcm) | |
736 | +{ | |
737 | + tcm->tcm_handle |= TC_H_MIN(cl); | |
738 | + return 0; | |
739 | +} | |
740 | + | |
741 | +static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, | |
742 | + struct gnet_dump *d) | |
743 | +{ | |
744 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
745 | + u32 idx = cl - 1; | |
746 | + struct gnet_stats_queue qs = { 0 }; | |
747 | + struct tc_fq_codel_xstats xstats; | |
748 | + | |
749 | + if (idx < q->flows_cnt) { | |
750 | + const struct fq_codel_flow *flow = &q->flows[idx]; | |
751 | + const struct sk_buff *skb = flow->head; | |
752 | + | |
753 | + memset(&xstats, 0, sizeof(xstats)); | |
754 | + xstats.type = TCA_FQ_CODEL_XSTATS_CLASS; | |
755 | + xstats.class_stats.deficit = flow->deficit; | |
756 | + xstats.class_stats.ldelay = | |
757 | + codel_time_to_us(flow->cvars.ldelay); | |
758 | + xstats.class_stats.count = flow->cvars.count; | |
759 | + xstats.class_stats.lastcount = flow->cvars.lastcount; | |
760 | + xstats.class_stats.dropping = flow->cvars.dropping; | |
761 | + if (flow->cvars.dropping) { | |
762 | + codel_tdiff_t delta = flow->cvars.drop_next - | |
763 | + codel_get_time(); | |
764 | + | |
765 | + xstats.class_stats.drop_next = (delta >= 0) ? | |
766 | + codel_time_to_us(delta) : | |
767 | + -codel_time_to_us(-delta); | |
768 | + } | |
769 | + while (skb) { | |
770 | + qs.qlen++; | |
771 | + skb = skb->next; | |
772 | + } | |
773 | + qs.backlog = q->backlogs[idx]; | |
774 | + qs.drops = flow->dropped; | |
775 | + } | |
776 | + if (gnet_stats_copy_queue(d, &qs) < 0) | |
777 | + return -1; | |
778 | + if (idx < q->flows_cnt) | |
779 | + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); | |
780 | + return 0; | |
781 | +} | |
782 | + | |
783 | +static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |
784 | +{ | |
785 | + struct fq_codel_sched_data *q = qdisc_priv(sch); | |
786 | + unsigned int i; | |
787 | + | |
788 | + if (arg->stop) | |
789 | + return; | |
790 | + | |
791 | + for (i = 0; i < q->flows_cnt; i++) { | |
792 | + if (list_empty(&q->flows[i].flowchain) || | |
793 | + arg->count < arg->skip) { | |
794 | + arg->count++; | |
795 | + continue; | |
796 | + } | |
797 | + if (arg->fn(sch, i + 1, arg) < 0) { | |
798 | + arg->stop = 1; | |
799 | + break; | |
800 | + } | |
801 | + arg->count++; | |
802 | + } | |
803 | +} | |
804 | + | |
805 | +static const struct Qdisc_class_ops fq_codel_class_ops = { | |
806 | + .leaf = fq_codel_leaf, | |
807 | + .get = fq_codel_get, | |
808 | + .put = fq_codel_put, | |
809 | + .tcf_chain = fq_codel_find_tcf, | |
810 | + .bind_tcf = fq_codel_bind, | |
811 | + .unbind_tcf = fq_codel_put, | |
812 | + .dump = fq_codel_dump_class, | |
813 | + .dump_stats = fq_codel_dump_class_stats, | |
814 | + .walk = fq_codel_walk, | |
815 | +}; | |
816 | + | |
817 | +static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { | |
818 | + .cl_ops = &fq_codel_class_ops, | |
819 | + .id = "fq_codel", | |
820 | + .priv_size = sizeof(struct fq_codel_sched_data), | |
821 | + .enqueue = fq_codel_enqueue, | |
822 | + .dequeue = fq_codel_dequeue, | |
823 | + .peek = qdisc_peek_dequeued, | |
824 | + .drop = fq_codel_drop, | |
825 | + .init = fq_codel_init, | |
826 | + .reset = fq_codel_reset, | |
827 | + .destroy = fq_codel_destroy, | |
828 | + .change = fq_codel_change, | |
829 | + .dump = fq_codel_dump, | |
830 | + .dump_stats = fq_codel_dump_stats, | |
831 | + .owner = THIS_MODULE, | |
832 | +}; | |
833 | + | |
834 | +static int __init fq_codel_module_init(void) | |
835 | +{ | |
836 | + return register_qdisc(&fq_codel_qdisc_ops); | |
837 | +} | |
838 | + | |
839 | +static void __exit fq_codel_module_exit(void) | |
840 | +{ | |
841 | + unregister_qdisc(&fq_codel_qdisc_ops); | |
842 | +} | |
843 | + | |
844 | +module_init(fq_codel_module_init) | |
845 | +module_exit(fq_codel_module_exit) | |
846 | +MODULE_AUTHOR("Eric Dumazet"); | |
847 | +MODULE_LICENSE("GPL"); | |
848 | -- | |
849 | 1.7.10 | |
850 |