From: Patrick McHardy Date: Wed, 28 Mar 2007 19:31:36 +0000 (+0200) Subject: [NET_SCHED]: Fix endless loops caused by inaccurate qlen counters X-Git-Tag: v2.6.16.46-rc1~19 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=39740872332b2dd28b479d9d8333c42c32465953;p=thirdparty%2Fkernel%2Fstable.git [NET_SCHED]: Fix endless loops caused by inaccurate qlen counters There are multiple problems related to qlen adjustment that can lead to an upper qdisc getting out of sync with the real number of packets queued, leading to endless dequeueing attempts by the upper layer code. All qdiscs must maintain an accurate q.qlen counter. There are basically two groups of operations affecting the qlen: operations that propagate down the tree (enqueue, dequeue, requeue, drop, reset) beginning at the root qdisc and operations only affecting a subtree or single qdisc (change, graft, delete class). Since qlen changes during operations from the second group don't propagate to ancestor qdiscs, their qlen values become desynchronized. This patch adds a function to propagate qlen changes up the qdisc tree, optionally calling a callback function to perform qdisc-internal maintenance when the child qdisc is deactivated, and converts all qdiscs to use this where necessary. Signed-off-by: Patrick McHardy Signed-off-by: Adrian Bunk --- diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7b6ec99867157..66101a9ccd2b5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -61,6 +61,7 @@ struct Qdisc_class_ops int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **); struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); + void (*qlen_notify)(struct Qdisc *, unsigned long); /* Class manipulation routines */ unsigned long (*get)(struct Qdisc *, u32 classid); @@ -173,9 +174,10 @@ extern void dev_activate(struct net_device *dev); extern void dev_deactivate(struct net_device *dev); extern void qdisc_reset(struct Qdisc *qdisc); extern void qdisc_destroy(struct Qdisc *qdisc); +extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops); extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, - struct Qdisc_ops *ops); + struct Qdisc_ops *ops, u32 parentid); static inline void tcf_destroy(struct tcf_proto *tp) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 9e315bcbc0726..c7f17dd08c9e9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -192,21 +192,28 @@ int unregister_qdisc(struct Qdisc_ops *qops) (root qdisc, all its children, children of children etc.) */ -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; - read_lock(&qdisc_tree_lock); list_for_each_entry(q, &dev->qdisc_list, list) { - if (q->handle == handle) { - read_unlock(&qdisc_tree_lock); + if (q->handle == handle) return q; - } } - read_unlock(&qdisc_tree_lock); return NULL; } + +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) +{ + struct Qdisc *q; + + read_lock(&qdisc_tree_lock); + q = __qdisc_lookup(dev, handle); + read_unlock(&qdisc_tree_lock); + return q; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; @@ -349,6 +356,26 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc) return oqdisc; } +void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) +{ + struct Qdisc_class_ops *cops; + unsigned long cl; + u32 parentid; + + if (n == 0) + return; + while ((parentid = sch->parent)) { + sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + cops = sch->ops->cl_ops; + if (cops->qlen_notify) { + cl = cops->get(sch, parentid); + cops->qlen_notify(sch, cl); + cops->put(sch, cl); + } + sch->q.qlen -= n; + } +} +EXPORT_SYMBOL(qdisc_tree_decrease_qlen); /* Graft qdisc "new" to class "classid" of qdisc "parent" or to device "dev". diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 93ebce40acac6..6c14971a0ddb7 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -317,7 +317,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, } memset(flow,0,sizeof(*flow)); flow->filter_list = NULL; - if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops))) + if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,classid))) flow->q = &noop_qdisc; DPRINTK("atm_tc_change: qdisc %p\n",flow->q); flow->sock = sock; @@ -577,7 +577,8 @@ static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt) DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); p->flows = &p->link; - if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops))) + if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops, + sch->handle))) p->link.q = &noop_qdisc; DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q); p->link.filter_list = NULL; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6cd81708bf710..9cda4064741ce 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1430,7 +1430,8 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) q->link.sibling = &q->link; q->link.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle))) q->link.q = &noop_qdisc; q->link.priority = TC_CBQ_MAXPRIO-1; @@ -1675,7 +1676,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl) { if (new == NULL) { - if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL) + if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid)) == NULL) return -ENOBUFS; } else { #ifdef CONFIG_NET_CLS_POLICE @@ -1686,7 +1688,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = cl->q; cl->q = new; - sch->q.qlen -= (*old)->q.qlen; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); @@ -1934,7 +1936,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) + if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) cl->q = &noop_qdisc; cl->classid = classid; cl->tparent = parent; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 13e0e7b3856bf..3e0e5b91cb4b5 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -89,15 +89,16 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle); if (new == NULL) new = &noop_qdisc; } sch_tree_lock(sch); *old = xchg(&p->q, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; @@ -388,7 +389,7 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt) p->default_index = default_index; p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]); - p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0326752bd0ec1..7814562aa4c43 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -440,13 +440,15 @@ errout: return ERR_PTR(-err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) +struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, + unsigned int parentid) { struct Qdisc *sch; sch = qdisc_alloc(dev, ops); if (IS_ERR(sch)) goto errout; + sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) return sch; @@ -510,7 +512,8 @@ void dev_activate(struct net_device *dev) if (dev->qdisc_sleeping == &noop_qdisc) { struct Qdisc *qdisc; if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops); + qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, + TC_H_ROOT); if (qdisc == NULL) { printk(KERN_INFO "%s: activation failed\n", dev->name); return; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 91132f6871d76..9aef64e560ae9 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -958,11 +958,7 @@ hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) unsigned int len = cl->qdisc->q.qlen; qdisc_reset(cl->qdisc); - if (len > 0) { - update_vf(cl, 0, 0); - set_passive(cl); - sch->q.qlen -= len; - } + qdisc_tree_decrease_qlen(cl->qdisc, len); } static void @@ -1140,7 +1136,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->classid = classid; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; cl->stats_lock = &sch->dev->queue_lock; @@ -1202,10 +1198,12 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - list_del(&cl->hlist); list_del(&cl->siblings); hfsc_adjust_levels(cl->cl_parent); + hfsc_purge_queue(sch, cl); + list_del(&cl->hlist); + if (--cl->refcnt == 0) hfsc_destroy_class(sch, cl); @@ -1273,7 +1271,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid); if (new == NULL) new = &noop_qdisc; } @@ -1296,6 +1295,17 @@ hfsc_class_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } +static void +hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct hfsc_class *cl = (struct hfsc_class *)arg; + + if (cl->qdisc->q.qlen == 0) { + update_vf(cl, 0, 0); + set_passive(cl); + } +} + static unsigned long hfsc_get_class(struct Qdisc *sch, u32 classid) { @@ -1516,7 +1526,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) q->root.refcnt = 1; q->root.classid = sch->handle; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; q->root.stats_lock = &sch->dev->queue_lock; @@ -1779,6 +1790,7 @@ static struct Qdisc_class_ops hfsc_class_ops = { .delete = hfsc_delete_class, .graft = hfsc_graft_class, .leaf = hfsc_class_leaf, + .qlen_notify = hfsc_qlen_notify, .get = hfsc_get_class, .put = hfsc_put_class, .bind_tcf = hfsc_bind_tcf, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3ec95df4a85ed..f04e0a875c987 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1385,16 +1385,13 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct htb_class *cl = (struct htb_class*)arg; if (cl && !cl->level) { - if (new == NULL && (new = qdisc_create_dflt(sch->dev, - &pfifo_qdisc_ops)) == NULL) + if (new == NULL && + (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + cl->classid)) == NULL) return -ENOBUFS; sch_tree_lock(sch); if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { - if (cl->prio_activity) - htb_deactivate (qdisc_priv(sch),cl); - - /* TODO: is it correct ? Why CBQ doesn't do it ? */ - sch->q.qlen -= (*old)->q.qlen; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); } sch_tree_unlock(sch); @@ -1409,6 +1406,14 @@ static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg) return (cl && !cl->level) ? cl->un.leaf.q : NULL; } +static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) +{ + struct htb_class *cl = (struct htb_class *)arg; + + if (cl->un.leaf.q->q.qlen == 0) + htb_deactivate(qdisc_priv(sch), cl); +} + static unsigned long htb_get(struct Qdisc *sch, u32 classid) { #ifdef HTB_DEBUG @@ -1434,10 +1439,10 @@ static void htb_destroy_filters(struct tcf_proto **fl) static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) { struct htb_sched *q = qdisc_priv(sch); + HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0); if (!cl->level) { BUG_TRAP(cl->un.leaf.q); - sch->q.qlen -= cl->un.leaf.q->q.qlen; qdisc_destroy(cl->un.leaf.q); } qdisc_put_rtab(cl->rate); @@ -1489,6 +1494,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class*)arg; + unsigned int qlen; + HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); // TODO: why don't allow to delete subtree ? references ? does @@ -1498,7 +1505,13 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) return -EBUSY; sch_tree_lock(sch); - + + if (!cl->level) { + qlen = cl->un.leaf.q->q.qlen; + qdisc_reset(cl->un.leaf.q); + qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); + } + /* delete from hash and active; remainder in destroy_class */ list_del_init(&cl->hlist); if (cl->prio_activity) @@ -1576,11 +1589,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { + unsigned int qlen = parent->un.leaf.q->q.qlen; + /* turn parent into inner node */ - sch->q.qlen -= parent->un.leaf.q->q.qlen; + qdisc_reset(parent->un.leaf.q); + qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); qdisc_destroy (parent->un.leaf.q); if (parent->prio_activity) htb_deactivate (q,parent); @@ -1721,6 +1737,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) static struct Qdisc_class_ops htb_class_ops = { .graft = htb_graft, .leaf = htb_leaf, + .qlen_notify = htb_qlen_notify, .get = htb_get, .put = htb_put, .change = htb_change_class, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ba52832048374..7c71a78775d8a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -571,7 +571,8 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) q->timer.function = netem_watchdog; q->timer.data = (unsigned long) sch; - q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); + q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); return -ENOMEM; @@ -658,8 +659,8 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = xchg(&q->qdisc, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 1641db33a9940..337a050895ace 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -223,21 +223,27 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt) for (i=q->bands; iqueues[i], &noop_qdisc); - if (child != &noop_qdisc) + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, child->q.qlen); qdisc_destroy(child); + } } sch_tree_unlock(sch); for (i=0; ibands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child; - child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); if (child) { sch_tree_lock(sch); child = xchg(&q->queues[i], child); - if (child != &noop_qdisc) + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, + child->q.qlen); qdisc_destroy(child); + } sch_tree_unlock(sch); } } @@ -295,7 +301,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = q->queues[band]; q->queues[band] = new; - sch->q.qlen -= (*old)->q.qlen; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); sch_tree_unlock(sch); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 86d8da0cbd027..a7dfcdb94459e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -389,6 +389,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); struct tc_sfq_qopt *ctl = RTA_DATA(opt); + unsigned int qlen; if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) return -EINVAL; @@ -399,8 +400,10 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt) if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH); + qlen = sch->q.qlen; while (sch->q.qlen >= q->limit-1) sfq_drop(sch); + qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); del_timer(&q->perturb_timer); if (q->perturb_period) { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index cb9711ea8c6ca..539b08f93c3f6 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -274,12 +274,14 @@ static void tbf_reset(struct Qdisc* sch) del_timer(&q->wd_timer); } -static struct Qdisc *tbf_create_dflt_qdisc(struct net_device *dev, u32 limit) +static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) { - struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops); + struct Qdisc *q; struct rtattr *rta; int ret; + q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 1)); if (q) { rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (rta) { @@ -342,12 +344,15 @@ static int tbf_change(struct Qdisc* sch, struct rtattr *opt) goto done; if (q->qdisc == &noop_qdisc) { - if ((child = tbf_create_dflt_qdisc(sch->dev, qopt->limit)) == NULL) + if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL) goto done; } sch_tree_lock(sch); - if (child) q->qdisc = child; + if (child) { + qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_destroy(xchg(&q->qdisc, child)); + } q->limit = qopt->limit; q->mtu = qopt->mtu; q->max_size = max_size; @@ -449,8 +454,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, sch_tree_lock(sch); *old = xchg(&q->qdisc, new); + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); qdisc_reset(*old); - sch->q.qlen = 0; sch_tree_unlock(sch); return 0;