From: Greg Kroah-Hartman Date: Fri, 23 Feb 2024 09:24:33 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v4.19.308~121 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6fbae7a80942d09eaaceb6ec3a4acccf7f55aba2;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: net-sched-retire-atm-qdisc.patch net-sched-retire-cbq-qdisc.patch net-sched-retire-dsmark-qdisc.patch series --- diff --git a/queue-6.1/net-sched-retire-atm-qdisc.patch b/queue-6.1/net-sched-retire-atm-qdisc.patch new file mode 100644 index 00000000000..e2fd3062515 --- /dev/null +++ b/queue-6.1/net-sched-retire-atm-qdisc.patch @@ -0,0 +1,865 @@ +From fb38306ceb9e770adfb5ffa6e3c64047b55f7a07 Mon Sep 17 00:00:00 2001 +From: Jamal Hadi Salim +Date: Tue, 14 Feb 2023 08:49:12 -0500 +Subject: net/sched: Retire ATM qdisc + +From: Jamal Hadi Salim + +commit fb38306ceb9e770adfb5ffa6e3c64047b55f7a07 upstream. + +The ATM qdisc has served us well over the years but has not been getting much +TLC due to lack of known users. Most recently it has become a shooting target +for syzkaller. For this reason, we are retiring it. + +Signed-off-by: Jamal Hadi Salim +Acked-by: Jiri Pirko +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/Kconfig | 14 + net/sched/Makefile | 1 + net/sched/sch_atm.c | 706 ------------ + tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json | 94 - + 4 files changed, 815 deletions(-) + delete mode 100644 net/sched/sch_atm.c + delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json + +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -68,20 +68,6 @@ config NET_SCH_HFSC + To compile this code as a module, choose M here: the + module will be called sch_hfsc. + +-config NET_SCH_ATM +- tristate "ATM Virtual Circuits (ATM)" +- depends on ATM +- help +- Say Y here if you want to use the ATM pseudo-scheduler. This +- provides a framework for invoking classifiers, which in turn +- select classes of this queuing discipline. Each class maps +- the flow(s) it is handling to a given virtual circuit. +- +- See the top of for more details. +- +- To compile this code as a module, choose M here: the +- module will be called sch_atm. +- + config NET_SCH_PRIO + tristate "Multi Band Priority Queueing (PRIO)" + help +--- a/net/sched/Makefile ++++ b/net/sched/Makefile +@@ -45,7 +45,6 @@ obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o + obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o + obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o + obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o +-obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o + obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o + obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o + obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o +--- a/net/sched/sch_atm.c ++++ /dev/null +@@ -1,706 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-only +-/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */ +- +-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include /* for fput */ +-#include +-#include +-#include +- +-/* +- * The ATM queuing discipline provides a framework for invoking classifiers +- * (aka "filters"), which in turn select classes of this queuing discipline. +- * Each class maps the flow(s) it is handling to a given VC. Multiple classes +- * may share the same VC. +- * +- * When creating a class, VCs are specified by passing the number of the open +- * socket descriptor by which the calling process references the VC. The kernel +- * keeps the VC open at least until all classes using it are removed. +- * +- * In this file, most functions are named atm_tc_* to avoid confusion with all +- * the atm_* in net/atm. This naming convention differs from what's used in the +- * rest of net/sched. +- * +- * Known bugs: +- * - sometimes messes up the IP stack +- * - any manipulations besides the few operations described in the README, are +- * untested and likely to crash the system +- * - should lock the flow while there is data in the queue (?) +- */ +- +-#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) +- +-struct atm_flow_data { +- struct Qdisc_class_common common; +- struct Qdisc *q; /* FIFO, TBF, etc. */ +- struct tcf_proto __rcu *filter_list; +- struct tcf_block *block; +- struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ +- void (*old_pop)(struct atm_vcc *vcc, +- struct sk_buff *skb); /* chaining */ +- struct atm_qdisc_data *parent; /* parent qdisc */ +- struct socket *sock; /* for closing */ +- int ref; /* reference count */ +- struct gnet_stats_basic_sync bstats; +- struct gnet_stats_queue qstats; +- struct list_head list; +- struct atm_flow_data *excess; /* flow for excess traffic; +- NULL to set CLP instead */ +- int hdr_len; +- unsigned char hdr[]; /* header data; MUST BE LAST */ +-}; +- +-struct atm_qdisc_data { +- struct atm_flow_data link; /* unclassified skbs go here */ +- struct list_head flows; /* NB: "link" is also on this +- list */ +- struct tasklet_struct task; /* dequeue tasklet */ +-}; +- +-/* ------------------------- Class/flow operations ------------------------- */ +- +-static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow; +- +- list_for_each_entry(flow, &p->flows, list) { +- if (flow->common.classid == classid) +- return flow; +- } +- return NULL; +-} +- +-static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, +- struct Qdisc *new, struct Qdisc **old, +- struct netlink_ext_ack *extack) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow = (struct atm_flow_data *)arg; +- +- pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", +- sch, p, flow, new, old); +- if (list_empty(&flow->list)) +- return -EINVAL; +- if (!new) +- new = &noop_qdisc; +- *old = flow->q; +- flow->q = new; +- if (*old) +- qdisc_reset(*old); +- return 0; +-} +- +-static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) +-{ +- struct atm_flow_data *flow = (struct atm_flow_data *)cl; +- +- pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); +- return flow ? flow->q : NULL; +-} +- +-static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid) +-{ +- struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); +- struct atm_flow_data *flow; +- +- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); +- flow = lookup_flow(sch, classid); +- pr_debug("%s: flow %p\n", __func__, flow); +- return (unsigned long)flow; +-} +- +-static unsigned long atm_tc_bind_filter(struct Qdisc *sch, +- unsigned long parent, u32 classid) +-{ +- struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); +- struct atm_flow_data *flow; +- +- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); +- flow = lookup_flow(sch, classid); +- if (flow) +- flow->ref++; +- pr_debug("%s: flow %p\n", __func__, flow); +- return (unsigned long)flow; +-} +- +-/* +- * atm_tc_put handles all destructions, including the ones that are explicitly +- * requested (atm_tc_destroy, etc.). The assumption here is that we never drop +- * anything that still seems to be in use. +- */ +-static void atm_tc_put(struct Qdisc *sch, unsigned long cl) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow = (struct atm_flow_data *)cl; +- +- pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); +- if (--flow->ref) +- return; +- pr_debug("atm_tc_put: destroying\n"); +- list_del_init(&flow->list); +- pr_debug("atm_tc_put: qdisc %p\n", flow->q); +- qdisc_put(flow->q); +- tcf_block_put(flow->block); +- if (flow->sock) { +- pr_debug("atm_tc_put: f_count %ld\n", +- file_count(flow->sock->file)); +- flow->vcc->pop = flow->old_pop; +- sockfd_put(flow->sock); +- } +- if (flow->excess) +- atm_tc_put(sch, (unsigned long)flow->excess); +- if (flow != &p->link) +- kfree(flow); +- /* +- * If flow == &p->link, the qdisc no longer works at this point and +- * needs to be removed. (By the caller of atm_tc_put.) +- */ +-} +- +-static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) +-{ +- struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; +- +- pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); +- VCC2FLOW(vcc)->old_pop(vcc, skb); +- tasklet_schedule(&p->task); +-} +- +-static const u8 llc_oui_ip[] = { +- 0xaa, /* DSAP: non-ISO */ +- 0xaa, /* SSAP: non-ISO */ +- 0x03, /* Ctrl: Unnumbered Information Command PDU */ +- 0x00, /* OUI: EtherType */ +- 0x00, 0x00, +- 0x08, 0x00 +-}; /* Ethertype IP (0800) */ +- +-static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = { +- [TCA_ATM_FD] = { .type = NLA_U32 }, +- [TCA_ATM_EXCESS] = { .type = NLA_U32 }, +-}; +- +-static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, +- struct nlattr **tca, unsigned long *arg, +- struct netlink_ext_ack *extack) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow = (struct atm_flow_data *)*arg; +- struct atm_flow_data *excess = NULL; +- struct nlattr *opt = tca[TCA_OPTIONS]; +- struct nlattr *tb[TCA_ATM_MAX + 1]; +- struct socket *sock; +- int fd, error, hdr_len; +- void *hdr; +- +- pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," +- "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); +- /* +- * The concept of parents doesn't apply for this qdisc. +- */ +- if (parent && parent != TC_H_ROOT && parent != sch->handle) +- return -EINVAL; +- /* +- * ATM classes cannot be changed. In order to change properties of the +- * ATM connection, that socket needs to be modified directly (via the +- * native ATM API. In order to send a flow to a different VC, the old +- * class needs to be removed and a new one added. (This may be changed +- * later.) +- */ +- if (flow) +- return -EBUSY; +- if (opt == NULL) +- return -EINVAL; +- +- error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, +- NULL); +- if (error < 0) +- return error; +- +- if (!tb[TCA_ATM_FD]) +- return -EINVAL; +- fd = nla_get_u32(tb[TCA_ATM_FD]); +- pr_debug("atm_tc_change: fd %d\n", fd); +- if (tb[TCA_ATM_HDR]) { +- hdr_len = nla_len(tb[TCA_ATM_HDR]); +- hdr = nla_data(tb[TCA_ATM_HDR]); +- } else { +- hdr_len = RFC1483LLC_LEN; +- hdr = NULL; /* default LLC/SNAP for IP */ +- } +- if (!tb[TCA_ATM_EXCESS]) +- excess = NULL; +- else { +- excess = (struct atm_flow_data *) +- atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); +- if (!excess) +- return -ENOENT; +- } +- pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n", +- opt->nla_type, nla_len(opt), hdr_len); +- sock = sockfd_lookup(fd, &error); +- if (!sock) +- return error; /* f_count++ */ +- pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); +- if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { +- error = -EPROTOTYPE; +- goto err_out; +- } +- /* @@@ should check if the socket is really operational or we'll crash +- on vcc->send */ +- if (classid) { +- if (TC_H_MAJ(classid ^ sch->handle)) { +- pr_debug("atm_tc_change: classid mismatch\n"); +- error = -EINVAL; +- goto err_out; +- } +- } else { +- int i; +- unsigned long cl; +- +- for (i = 1; i < 0x8000; i++) { +- classid = TC_H_MAKE(sch->handle, 0x8000 | i); +- cl = atm_tc_find(sch, classid); +- if (!cl) +- break; +- } +- } +- pr_debug("atm_tc_change: new id %x\n", classid); +- flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); +- pr_debug("atm_tc_change: flow %p\n", flow); +- if (!flow) { +- error = -ENOBUFS; +- goto err_out; +- } +- +- error = tcf_block_get(&flow->block, &flow->filter_list, sch, +- extack); +- if (error) { +- kfree(flow); +- goto err_out; +- } +- +- flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, +- extack); +- if (!flow->q) +- flow->q = &noop_qdisc; +- pr_debug("atm_tc_change: qdisc %p\n", flow->q); +- flow->sock = sock; +- flow->vcc = ATM_SD(sock); /* speedup */ +- flow->vcc->user_back = flow; +- pr_debug("atm_tc_change: vcc %p\n", flow->vcc); +- flow->old_pop = flow->vcc->pop; +- flow->parent = p; +- flow->vcc->pop = sch_atm_pop; +- flow->common.classid = classid; +- flow->ref = 1; +- flow->excess = excess; +- list_add(&flow->list, &p->link.list); +- flow->hdr_len = hdr_len; +- if (hdr) +- memcpy(flow->hdr, hdr, hdr_len); +- else +- memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); +- *arg = (unsigned long)flow; +- return 0; +-err_out: +- sockfd_put(sock); +- return error; +-} +- +-static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, +- struct netlink_ext_ack *extack) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow = (struct atm_flow_data *)arg; +- +- pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); +- if (list_empty(&flow->list)) +- return -EINVAL; +- if (rcu_access_pointer(flow->filter_list) || flow == &p->link) +- return -EBUSY; +- /* +- * Reference count must be 2: one for "keepalive" (set at class +- * creation), and one for the reference held when calling delete. +- */ +- if (flow->ref < 2) { +- pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); +- return -EINVAL; +- } +- if (flow->ref > 2) +- return -EBUSY; /* catch references via excess, etc. */ +- atm_tc_put(sch, arg); +- return 0; +-} +- +-static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow; +- +- pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); +- if (walker->stop) +- return; +- list_for_each_entry(flow, &p->flows, list) { +- if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker)) +- break; +- } +-} +- +-static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, +- struct netlink_ext_ack *extack) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow = (struct atm_flow_data *)cl; +- +- pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); +- return flow ? flow->block : p->link.block; +-} +- +-/* --------------------------- Qdisc operations ---------------------------- */ +- +-static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +- struct sk_buff **to_free) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow; +- struct tcf_result res; +- int result; +- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; +- +- pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); +- result = TC_ACT_OK; /* be nice to gcc */ +- flow = NULL; +- if (TC_H_MAJ(skb->priority) != sch->handle || +- !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) { +- struct tcf_proto *fl; +- +- list_for_each_entry(flow, &p->flows, list) { +- fl = rcu_dereference_bh(flow->filter_list); +- if (fl) { +- result = tcf_classify(skb, NULL, fl, &res, true); +- if (result < 0) +- continue; +- if (result == TC_ACT_SHOT) +- goto done; +- +- flow = (struct atm_flow_data *)res.class; +- if (!flow) +- flow = lookup_flow(sch, res.classid); +- goto drop; +- } +- } +- flow = NULL; +-done: +- ; +- } +- if (!flow) { +- flow = &p->link; +- } else { +- if (flow->vcc) +- ATM_SKB(skb)->atm_options = flow->vcc->atm_options; +- /*@@@ looks good ... but it's not supposed to work :-) */ +-#ifdef CONFIG_NET_CLS_ACT +- switch (result) { +- case TC_ACT_QUEUED: +- case TC_ACT_STOLEN: +- case TC_ACT_TRAP: +- __qdisc_drop(skb, to_free); +- return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; +- case TC_ACT_SHOT: +- __qdisc_drop(skb, to_free); +- goto drop; +- case TC_ACT_RECLASSIFY: +- if (flow->excess) +- flow = flow->excess; +- else +- ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; +- break; +- } +-#endif +- } +- +- ret = qdisc_enqueue(skb, flow->q, to_free); +- if (ret != NET_XMIT_SUCCESS) { +-drop: __maybe_unused +- if (net_xmit_drop_count(ret)) { +- qdisc_qstats_drop(sch); +- if (flow) +- flow->qstats.drops++; +- } +- return ret; +- } +- /* +- * Okay, this may seem weird. We pretend we've dropped the packet if +- * it goes via ATM. The reason for this is that the outer qdisc +- * expects to be able to q->dequeue the packet later on if we return +- * success at this place. Also, sch->q.qdisc needs to reflect whether +- * there is a packet egligible for dequeuing or not. Note that the +- * statistics of the outer qdisc are necessarily wrong because of all +- * this. There's currently no correct solution for this. +- */ +- if (flow == &p->link) { +- sch->q.qlen++; +- return NET_XMIT_SUCCESS; +- } +- tasklet_schedule(&p->task); +- return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; +-} +- +-/* +- * Dequeue packets and send them over ATM. Note that we quite deliberately +- * avoid checking net_device's flow control here, simply because sch_atm +- * uses its own channels, which have nothing to do with any CLIP/LANE/or +- * non-ATM interfaces. +- */ +- +-static void sch_atm_dequeue(struct tasklet_struct *t) +-{ +- struct atm_qdisc_data *p = from_tasklet(p, t, task); +- struct Qdisc *sch = qdisc_from_priv(p); +- struct atm_flow_data *flow; +- struct sk_buff *skb; +- +- pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); +- list_for_each_entry(flow, &p->flows, list) { +- if (flow == &p->link) +- continue; +- /* +- * If traffic is properly shaped, this won't generate nasty +- * little bursts. Otherwise, it may ... (but that's okay) +- */ +- while ((skb = flow->q->ops->peek(flow->q))) { +- if (!atm_may_send(flow->vcc, skb->truesize)) +- break; +- +- skb = qdisc_dequeue_peeked(flow->q); +- if (unlikely(!skb)) +- break; +- +- qdisc_bstats_update(sch, skb); +- bstats_update(&flow->bstats, skb); +- pr_debug("atm_tc_dequeue: sending on class %p\n", flow); +- /* remove any LL header somebody else has attached */ +- skb_pull(skb, skb_network_offset(skb)); +- if (skb_headroom(skb) < flow->hdr_len) { +- struct sk_buff *new; +- +- new = skb_realloc_headroom(skb, flow->hdr_len); +- dev_kfree_skb(skb); +- if (!new) +- continue; +- skb = new; +- } +- pr_debug("sch_atm_dequeue: ip %p, data %p\n", +- skb_network_header(skb), skb->data); +- ATM_SKB(skb)->vcc = flow->vcc; +- memcpy(skb_push(skb, flow->hdr_len), flow->hdr, +- flow->hdr_len); +- refcount_add(skb->truesize, +- &sk_atm(flow->vcc)->sk_wmem_alloc); +- /* atm.atm_options are already set by atm_tc_enqueue */ +- flow->vcc->send(flow->vcc, skb); +- } +- } +-} +- +-static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct sk_buff *skb; +- +- pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); +- tasklet_schedule(&p->task); +- skb = qdisc_dequeue_peeked(p->link.q); +- if (skb) +- sch->q.qlen--; +- return skb; +-} +- +-static struct sk_buff *atm_tc_peek(struct Qdisc *sch) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- +- pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); +- +- return p->link.q->ops->peek(p->link.q); +-} +- +-static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, +- struct netlink_ext_ack *extack) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- int err; +- +- pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); +- INIT_LIST_HEAD(&p->flows); +- INIT_LIST_HEAD(&p->link.list); +- gnet_stats_basic_sync_init(&p->link.bstats); +- list_add(&p->link.list, &p->flows); +- p->link.q = qdisc_create_dflt(sch->dev_queue, +- &pfifo_qdisc_ops, sch->handle, extack); +- if (!p->link.q) +- p->link.q = &noop_qdisc; +- pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); +- p->link.vcc = NULL; +- p->link.sock = NULL; +- p->link.common.classid = sch->handle; +- p->link.ref = 1; +- +- err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, +- extack); +- if (err) +- return err; +- +- tasklet_setup(&p->task, sch_atm_dequeue); +- return 0; +-} +- +-static void atm_tc_reset(struct Qdisc *sch) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow; +- +- pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); +- list_for_each_entry(flow, &p->flows, list) +- qdisc_reset(flow->q); +-} +- +-static void atm_tc_destroy(struct Qdisc *sch) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow, *tmp; +- +- pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); +- list_for_each_entry(flow, &p->flows, list) { +- tcf_block_put(flow->block); +- flow->block = NULL; +- } +- +- list_for_each_entry_safe(flow, tmp, &p->flows, list) { +- if (flow->ref > 1) +- pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); +- atm_tc_put(sch, (unsigned long)flow); +- } +- tasklet_kill(&p->task); +-} +- +-static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, +- struct sk_buff *skb, struct tcmsg *tcm) +-{ +- struct atm_qdisc_data *p = qdisc_priv(sch); +- struct atm_flow_data *flow = (struct atm_flow_data *)cl; +- struct nlattr *nest; +- +- pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", +- sch, p, flow, skb, tcm); +- if (list_empty(&flow->list)) +- return -EINVAL; +- tcm->tcm_handle = flow->common.classid; +- tcm->tcm_info = flow->q->handle; +- +- nest = nla_nest_start_noflag(skb, TCA_OPTIONS); +- if (nest == NULL) +- goto nla_put_failure; +- +- if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr)) +- goto nla_put_failure; +- if (flow->vcc) { +- struct sockaddr_atmpvc pvc; +- int state; +- +- memset(&pvc, 0, sizeof(pvc)); +- pvc.sap_family = AF_ATMPVC; +- pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; +- pvc.sap_addr.vpi = flow->vcc->vpi; +- pvc.sap_addr.vci = flow->vcc->vci; +- if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc)) +- goto nla_put_failure; +- state = ATM_VF2VS(flow->vcc->flags); +- if (nla_put_u32(skb, TCA_ATM_STATE, state)) +- goto nla_put_failure; +- } +- if (flow->excess) { +- if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) +- goto nla_put_failure; +- } else { +- if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) +- goto nla_put_failure; +- } +- return nla_nest_end(skb, nest); +- +-nla_put_failure: +- nla_nest_cancel(skb, nest); +- return -1; +-} +-static int +-atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, +- struct gnet_dump *d) +-{ +- struct atm_flow_data *flow = (struct atm_flow_data *)arg; +- +- if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || +- gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) +- return -1; +- +- return 0; +-} +- +-static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) +-{ +- return 0; +-} +- +-static const struct Qdisc_class_ops atm_class_ops = { +- .graft = atm_tc_graft, +- .leaf = atm_tc_leaf, +- .find = atm_tc_find, +- .change = atm_tc_change, +- .delete = atm_tc_delete, +- .walk = atm_tc_walk, +- .tcf_block = atm_tc_tcf_block, +- .bind_tcf = atm_tc_bind_filter, +- .unbind_tcf = atm_tc_put, +- .dump = atm_tc_dump_class, +- .dump_stats = atm_tc_dump_class_stats, +-}; +- +-static struct Qdisc_ops atm_qdisc_ops __read_mostly = { +- .cl_ops = &atm_class_ops, +- .id = "atm", +- .priv_size = sizeof(struct atm_qdisc_data), +- .enqueue = atm_tc_enqueue, +- .dequeue = atm_tc_dequeue, +- .peek = atm_tc_peek, +- .init = atm_tc_init, +- .reset = atm_tc_reset, +- .destroy = atm_tc_destroy, +- .dump = atm_tc_dump, +- .owner = THIS_MODULE, +-}; +- +-static int __init atm_init(void) +-{ +- return register_qdisc(&atm_qdisc_ops); +-} +- +-static void __exit atm_exit(void) +-{ +- unregister_qdisc(&atm_qdisc_ops); +-} +- +-module_init(atm_init) +-module_exit(atm_exit) +-MODULE_LICENSE("GPL"); +--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json ++++ /dev/null +@@ -1,94 +0,0 @@ +-[ +- { +- "id": "7628", +- "name": "Create ATM with default setting", +- "category": [ +- "qdisc", +- "atm" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc atm 1: root refcnt", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "390a", +- "name": "Delete ATM with valid handle", +- "category": [ +- "qdisc", +- "atm" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true", +- "$TC qdisc add dev $DUMMY handle 1: root atm" +- ], +- "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc atm 1: root refcnt", +- "matchCount": "0", +- "teardown": [ +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "32a0", +- "name": "Show ATM class", +- "category": [ +- "qdisc", +- "atm" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", +- "expExitCode": "0", +- "verifyCmd": "$TC class show dev $DUMMY", +- "matchPattern": "class atm 1: parent 1:", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "6310", +- "name": "Dump ATM stats", +- "category": [ +- "qdisc", +- "atm" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm", +- "expExitCode": "0", +- "verifyCmd": "$TC -s qdisc show dev $DUMMY", +- "matchPattern": "qdisc atm 1: root refcnt", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- } +-] diff --git a/queue-6.1/net-sched-retire-cbq-qdisc.patch b/queue-6.1/net-sched-retire-cbq-qdisc.patch new file mode 100644 index 00000000000..f5b06ca6032 --- /dev/null +++ b/queue-6.1/net-sched-retire-cbq-qdisc.patch @@ -0,0 +1,1980 @@ +From 051d442098421c28c7951625652f61b1e15c4bd5 Mon Sep 17 00:00:00 2001 +From: Jamal Hadi Salim +Date: Tue, 14 Feb 2023 08:49:11 -0500 +Subject: net/sched: Retire CBQ qdisc + +From: Jamal Hadi Salim + +commit 051d442098421c28c7951625652f61b1e15c4bd5 upstream. + +While this amazing qdisc has served us well over the years it has not been +getting any tender love and care and has bitrotted over time. +It has become mostly a shooting target for syzkaller lately. +For this reason, we are retiring it. Goodbye CBQ - we loved you. + +Signed-off-by: Jamal Hadi Salim +Acked-by: Jiri Pirko +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/Kconfig | 17 + net/sched/Makefile | 1 + net/sched/sch_cbq.c | 1727 ------------ + tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json | 184 - + 4 files changed, 1929 deletions(-) + delete mode 100644 net/sched/sch_cbq.c + delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json + +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -45,23 +45,6 @@ if NET_SCHED + + comment "Queueing/Scheduling" + +-config NET_SCH_CBQ +- tristate "Class Based Queueing (CBQ)" +- help +- Say Y here if you want to use the Class-Based Queueing (CBQ) packet +- scheduling algorithm. This algorithm classifies the waiting packets +- into a tree-like hierarchy of classes; the leaves of this tree are +- in turn scheduled by separate algorithms. +- +- See the top of for more details. +- +- CBQ is a commonly used scheduler, so if you're unsure, you should +- say Y here. Then say Y to all the queueing algorithms below that you +- want to use as leaf disciplines. +- +- To compile this code as a module, choose M here: the +- module will be called sch_cbq. +- + config NET_SCH_HTB + tristate "Hierarchical Token Bucket (HTB)" + help +--- a/net/sched/Makefile ++++ b/net/sched/Makefile +@@ -33,7 +33,6 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_t + obj-$(CONFIG_NET_ACT_CT) += act_ct.o + obj-$(CONFIG_NET_ACT_GATE) += act_gate.o + obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o +-obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o + obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o + obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o + obj-$(CONFIG_NET_SCH_RED) += sch_red.o +--- a/net/sched/sch_cbq.c ++++ /dev/null +@@ -1,1727 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-or-later +-/* +- * net/sched/sch_cbq.c Class-Based Queueing discipline. +- * +- * Authors: Alexey Kuznetsov, +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +- +-/* Class-Based Queueing (CBQ) algorithm. +- ======================================= +- +- Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource +- Management Models for Packet Networks", +- IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 +- +- [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 +- +- [3] Sally Floyd, "Notes on Class-Based Queueing: Setting +- Parameters", 1996 +- +- [4] Sally Floyd and Michael Speer, "Experimental Results +- for Class-Based Queueing", 1998, not published. +- +- ----------------------------------------------------------------------- +- +- Algorithm skeleton was taken from NS simulator cbq.cc. +- If someone wants to check this code against the LBL version, +- he should take into account that ONLY the skeleton was borrowed, +- the implementation is different. Particularly: +- +- --- The WRR algorithm is different. Our version looks more +- reasonable (I hope) and works when quanta are allowed to be +- less than MTU, which is always the case when real time classes +- have small rates. Note, that the statement of [3] is +- incomplete, delay may actually be estimated even if class +- per-round allotment is less than MTU. Namely, if per-round +- allotment is W*r_i, and r_1+...+r_k = r < 1 +- +- delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B +- +- In the worst case we have IntServ estimate with D = W*r+k*MTU +- and C = MTU*r. The proof (if correct at all) is trivial. +- +- +- --- It seems that cbq-2.0 is not very accurate. At least, I cannot +- interpret some places, which look like wrong translations +- from NS. Anyone is advised to find these differences +- and explain to me, why I am wrong 8). +- +- --- Linux has no EOI event, so that we cannot estimate true class +- idle time. Workaround is to consider the next dequeue event +- as sign that previous packet is finished. This is wrong because of +- internal device queueing, but on a permanently loaded link it is true. +- Moreover, combined with clock integrator, this scheme looks +- very close to an ideal solution. */ +- +-struct cbq_sched_data; +- +- +-struct cbq_class { +- struct Qdisc_class_common common; +- struct cbq_class *next_alive; /* next class with backlog in this priority band */ +- +-/* Parameters */ +- unsigned char priority; /* class priority */ +- unsigned char priority2; /* priority to be used after overlimit */ +- unsigned char ewma_log; /* time constant for idle time calculation */ +- +- u32 defmap; +- +- /* Link-sharing scheduler parameters */ +- long maxidle; /* Class parameters: see below. */ +- long offtime; +- long minidle; +- u32 avpkt; +- struct qdisc_rate_table *R_tab; +- +- /* General scheduler (WRR) parameters */ +- long allot; +- long quantum; /* Allotment per WRR round */ +- long weight; /* Relative allotment: see below */ +- +- struct Qdisc *qdisc; /* Ptr to CBQ discipline */ +- struct cbq_class *split; /* Ptr to split node */ +- struct cbq_class *share; /* Ptr to LS parent in the class tree */ +- struct cbq_class *tparent; /* Ptr to tree parent in the class tree */ +- struct cbq_class *borrow; /* NULL if class is bandwidth limited; +- parent otherwise */ +- struct cbq_class *sibling; /* Sibling chain */ +- struct cbq_class *children; /* Pointer to children chain */ +- +- struct Qdisc *q; /* Elementary queueing discipline */ +- +- +-/* Variables */ +- unsigned char cpriority; /* Effective priority */ +- unsigned char delayed; +- unsigned char level; /* level of the class in hierarchy: +- 0 for leaf classes, and maximal +- level of children + 1 for nodes. +- */ +- +- psched_time_t last; /* Last end of service */ +- psched_time_t undertime; +- long avgidle; +- long deficit; /* Saved deficit for WRR */ +- psched_time_t penalized; +- struct gnet_stats_basic_sync bstats; +- struct gnet_stats_queue qstats; +- struct net_rate_estimator __rcu *rate_est; +- struct tc_cbq_xstats xstats; +- +- struct tcf_proto __rcu *filter_list; +- struct tcf_block *block; +- +- int filters; +- +- struct cbq_class *defaults[TC_PRIO_MAX + 1]; +-}; +- +-struct cbq_sched_data { +- struct Qdisc_class_hash clhash; /* Hash table of all classes */ +- int nclasses[TC_CBQ_MAXPRIO + 1]; +- unsigned int quanta[TC_CBQ_MAXPRIO + 1]; +- +- struct cbq_class link; +- +- unsigned int activemask; +- struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes +- with backlog */ +- +-#ifdef CONFIG_NET_CLS_ACT +- struct cbq_class *rx_class; +-#endif +- struct cbq_class *tx_class; +- struct cbq_class *tx_borrowed; +- int tx_len; +- psched_time_t now; /* Cached timestamp */ +- unsigned int pmask; +- +- struct qdisc_watchdog watchdog; /* Watchdog timer, +- started when CBQ has +- backlog, but cannot +- transmit just now */ +- psched_tdiff_t wd_expires; +- int toplevel; +- u32 hgenerator; +-}; +- +- +-#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) +- +-static inline struct cbq_class * +-cbq_class_lookup(struct cbq_sched_data *q, u32 classid) +-{ +- struct Qdisc_class_common *clc; +- +- clc = qdisc_class_find(&q->clhash, classid); +- if (clc == NULL) +- return NULL; +- return container_of(clc, struct cbq_class, common); +-} +- +-#ifdef CONFIG_NET_CLS_ACT +- +-static struct cbq_class * +-cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) +-{ +- struct cbq_class *cl; +- +- for (cl = this->tparent; cl; cl = cl->tparent) { +- struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; +- +- if (new != NULL && new != this) +- return new; +- } +- return NULL; +-} +- +-#endif +- +-/* Classify packet. The procedure is pretty complicated, but +- * it allows us to combine link sharing and priority scheduling +- * transparently. +- * +- * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, +- * so that it resolves to split nodes. Then packets are classified +- * by logical priority, or a more specific classifier may be attached +- * to the split node. +- */ +- +-static struct cbq_class * +-cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *head = &q->link; +- struct cbq_class **defmap; +- struct cbq_class *cl = NULL; +- u32 prio = skb->priority; +- struct tcf_proto *fl; +- struct tcf_result res; +- +- /* +- * Step 1. If skb->priority points to one of our classes, use it. +- */ +- if (TC_H_MAJ(prio ^ sch->handle) == 0 && +- (cl = cbq_class_lookup(q, prio)) != NULL) +- return cl; +- +- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; +- for (;;) { +- int result = 0; +- defmap = head->defaults; +- +- fl = rcu_dereference_bh(head->filter_list); +- /* +- * Step 2+n. Apply classifier. +- */ +- result = tcf_classify(skb, NULL, fl, &res, true); +- if (!fl || result < 0) +- goto fallback; +- if (result == TC_ACT_SHOT) +- return NULL; +- +- cl = (void *)res.class; +- if (!cl) { +- if (TC_H_MAJ(res.classid)) +- cl = cbq_class_lookup(q, res.classid); +- else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) +- cl = defmap[TC_PRIO_BESTEFFORT]; +- +- if (cl == NULL) +- goto fallback; +- } +- if (cl->level >= head->level) +- goto fallback; +-#ifdef CONFIG_NET_CLS_ACT +- switch (result) { +- case TC_ACT_QUEUED: +- case TC_ACT_STOLEN: +- case TC_ACT_TRAP: +- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; +- fallthrough; +- case TC_ACT_RECLASSIFY: +- return cbq_reclassify(skb, cl); +- } +-#endif +- if (cl->level == 0) +- return cl; +- +- /* +- * Step 3+n. If classifier selected a link sharing class, +- * apply agency specific classifier. +- * Repeat this procedure until we hit a leaf node. +- */ +- head = cl; +- } +- +-fallback: +- cl = head; +- +- /* +- * Step 4. No success... +- */ +- if (TC_H_MAJ(prio) == 0 && +- !(cl = head->defaults[prio & TC_PRIO_MAX]) && +- !(cl = head->defaults[TC_PRIO_BESTEFFORT])) +- return head; +- +- return cl; +-} +- +-/* +- * A packet has just been enqueued on the empty class. +- * cbq_activate_class adds it to the tail of active class list +- * of its priority band. +- */ +- +-static inline void cbq_activate_class(struct cbq_class *cl) +-{ +- struct cbq_sched_data *q = qdisc_priv(cl->qdisc); +- int prio = cl->cpriority; +- struct cbq_class *cl_tail; +- +- cl_tail = q->active[prio]; +- q->active[prio] = cl; +- +- if (cl_tail != NULL) { +- cl->next_alive = cl_tail->next_alive; +- cl_tail->next_alive = cl; +- } else { +- cl->next_alive = cl; +- q->activemask |= (1<qdisc); +- int prio = this->cpriority; +- struct cbq_class *cl; +- struct cbq_class *cl_prev = q->active[prio]; +- +- do { +- cl = cl_prev->next_alive; +- if (cl == this) { +- cl_prev->next_alive = cl->next_alive; +- cl->next_alive = NULL; +- +- if (cl == q->active[prio]) { +- q->active[prio] = cl_prev; +- if (cl == q->active[prio]) { +- q->active[prio] = NULL; +- q->activemask &= ~(1<active[prio]); +-} +- +-static void +-cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) +-{ +- int toplevel = q->toplevel; +- +- if (toplevel > cl->level) { +- psched_time_t now = psched_get_time(); +- +- do { +- if (cl->undertime < now) { +- q->toplevel = cl->level; +- return; +- } +- } while ((cl = cl->borrow) != NULL && toplevel > cl->level); +- } +-} +- +-static int +-cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +- struct sk_buff **to_free) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- int ret; +- struct cbq_class *cl = cbq_classify(skb, sch, &ret); +- +-#ifdef CONFIG_NET_CLS_ACT +- q->rx_class = cl; +-#endif +- if (cl == NULL) { +- if (ret & __NET_XMIT_BYPASS) +- qdisc_qstats_drop(sch); +- __qdisc_drop(skb, to_free); +- return ret; +- } +- +- ret = qdisc_enqueue(skb, cl->q, to_free); +- if (ret == NET_XMIT_SUCCESS) { +- sch->q.qlen++; +- cbq_mark_toplevel(q, cl); +- if (!cl->next_alive) +- cbq_activate_class(cl); +- return ret; +- } +- +- if (net_xmit_drop_count(ret)) { +- qdisc_qstats_drop(sch); +- cbq_mark_toplevel(q, cl); +- cl->qstats.drops++; +- } +- return ret; +-} +- +-/* Overlimit action: penalize leaf class by adding offtime */ +-static void cbq_overlimit(struct cbq_class *cl) +-{ +- struct cbq_sched_data *q = qdisc_priv(cl->qdisc); +- psched_tdiff_t delay = cl->undertime - q->now; +- +- if (!cl->delayed) { +- delay += cl->offtime; +- +- /* +- * Class goes to sleep, so that it will have no +- * chance to work avgidle. Let's forgive it 8) +- * +- * BTW cbq-2.0 has a crap in this +- * place, apparently they forgot to shift it by cl->ewma_log. +- */ +- if (cl->avgidle < 0) +- delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); +- if (cl->avgidle < cl->minidle) +- cl->avgidle = cl->minidle; +- if (delay <= 0) +- delay = 1; +- cl->undertime = q->now + delay; +- +- cl->xstats.overactions++; +- cl->delayed = 1; +- } +- if (q->wd_expires == 0 || q->wd_expires > delay) +- q->wd_expires = delay; +- +- /* Dirty work! We must schedule wakeups based on +- * real available rate, rather than leaf rate, +- * which may be tiny (even zero). +- */ +- if (q->toplevel == TC_CBQ_MAXLEVEL) { +- struct cbq_class *b; +- psched_tdiff_t base_delay = q->wd_expires; +- +- for (b = cl->borrow; b; b = b->borrow) { +- delay = b->undertime - q->now; +- if (delay < base_delay) { +- if (delay <= 0) +- delay = 1; +- base_delay = delay; +- } +- } +- +- q->wd_expires = base_delay; +- } +-} +- +-/* +- * It is mission critical procedure. +- * +- * We "regenerate" toplevel cutoff, if transmitting class +- * has backlog and it is not regulated. It is not part of +- * original CBQ description, but looks more reasonable. +- * Probably, it is wrong. This question needs further investigation. +- */ +- +-static inline void +-cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, +- struct cbq_class *borrowed) +-{ +- if (cl && q->toplevel >= borrowed->level) { +- if (cl->q->q.qlen > 1) { +- do { +- if (borrowed->undertime == PSCHED_PASTPERFECT) { +- q->toplevel = borrowed->level; +- return; +- } +- } while ((borrowed = borrowed->borrow) != NULL); +- } +-#if 0 +- /* It is not necessary now. Uncommenting it +- will save CPU cycles, but decrease fairness. +- */ +- q->toplevel = TC_CBQ_MAXLEVEL; +-#endif +- } +-} +- +-static void +-cbq_update(struct cbq_sched_data *q) +-{ +- struct cbq_class *this = q->tx_class; +- struct cbq_class *cl = this; +- int len = q->tx_len; +- psched_time_t now; +- +- q->tx_class = NULL; +- /* Time integrator. We calculate EOS time +- * by adding expected packet transmission time. +- */ +- now = q->now + L2T(&q->link, len); +- +- for ( ; cl; cl = cl->share) { +- long avgidle = cl->avgidle; +- long idle; +- +- _bstats_update(&cl->bstats, len, 1); +- +- /* +- * (now - last) is total time between packet right edges. +- * (last_pktlen/rate) is "virtual" busy time, so that +- * +- * idle = (now - last) - last_pktlen/rate +- */ +- +- idle = now - cl->last; +- if ((unsigned long)idle > 128*1024*1024) { +- avgidle = cl->maxidle; +- } else { +- idle -= L2T(cl, len); +- +- /* true_avgidle := (1-W)*true_avgidle + W*idle, +- * where W=2^{-ewma_log}. But cl->avgidle is scaled: +- * cl->avgidle == true_avgidle/W, +- * hence: +- */ +- avgidle += idle - (avgidle>>cl->ewma_log); +- } +- +- if (avgidle <= 0) { +- /* Overlimit or at-limit */ +- +- if (avgidle < cl->minidle) +- avgidle = cl->minidle; +- +- cl->avgidle = avgidle; +- +- /* Calculate expected time, when this class +- * will be allowed to send. +- * It will occur, when: +- * (1-W)*true_avgidle + W*delay = 0, i.e. +- * idle = (1/W - 1)*(-true_avgidle) +- * or +- * idle = (1 - W)*(-cl->avgidle); +- */ +- idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); +- +- /* +- * That is not all. +- * To maintain the rate allocated to the class, +- * we add to undertime virtual clock, +- * necessary to complete transmitted packet. +- * (len/phys_bandwidth has been already passed +- * to the moment of cbq_update) +- */ +- +- idle -= L2T(&q->link, len); +- idle += L2T(cl, len); +- +- cl->undertime = now + idle; +- } else { +- /* Underlimit */ +- +- cl->undertime = PSCHED_PASTPERFECT; +- if (avgidle > cl->maxidle) +- cl->avgidle = cl->maxidle; +- else +- cl->avgidle = avgidle; +- } +- if ((s64)(now - cl->last) > 0) +- cl->last = now; +- } +- +- cbq_update_toplevel(q, this, q->tx_borrowed); +-} +- +-static inline struct cbq_class * +-cbq_under_limit(struct cbq_class *cl) +-{ +- struct cbq_sched_data *q = qdisc_priv(cl->qdisc); +- struct cbq_class *this_cl = cl; +- +- if (cl->tparent == NULL) +- return cl; +- +- if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) { +- cl->delayed = 0; +- return cl; +- } +- +- do { +- /* It is very suspicious place. Now overlimit +- * action is generated for not bounded classes +- * only if link is completely congested. +- * Though it is in agree with ancestor-only paradigm, +- * it looks very stupid. Particularly, +- * it means that this chunk of code will either +- * never be called or result in strong amplification +- * of burstiness. Dangerous, silly, and, however, +- * no another solution exists. +- */ +- cl = cl->borrow; +- if (!cl) { +- this_cl->qstats.overlimits++; +- cbq_overlimit(this_cl); +- return NULL; +- } +- if (cl->level > q->toplevel) +- return NULL; +- } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime); +- +- cl->delayed = 0; +- return cl; +-} +- +-static inline struct sk_buff * +-cbq_dequeue_prio(struct Qdisc *sch, int prio) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *cl_tail, *cl_prev, *cl; +- struct sk_buff *skb; +- int deficit; +- +- cl_tail = cl_prev = q->active[prio]; +- cl = cl_prev->next_alive; +- +- do { +- deficit = 0; +- +- /* Start round */ +- do { +- struct cbq_class *borrow = cl; +- +- if (cl->q->q.qlen && +- (borrow = cbq_under_limit(cl)) == NULL) +- goto skip_class; +- +- if (cl->deficit <= 0) { +- /* Class exhausted its allotment per +- * this round. Switch to the next one. +- */ +- deficit = 1; +- cl->deficit += cl->quantum; +- goto next_class; +- } +- +- skb = cl->q->dequeue(cl->q); +- +- /* Class did not give us any skb :-( +- * It could occur even if cl->q->q.qlen != 0 +- * f.e. if cl->q == "tbf" +- */ +- if (skb == NULL) +- goto skip_class; +- +- cl->deficit -= qdisc_pkt_len(skb); +- q->tx_class = cl; +- q->tx_borrowed = borrow; +- if (borrow != cl) { +-#ifndef CBQ_XSTATS_BORROWS_BYTES +- borrow->xstats.borrows++; +- cl->xstats.borrows++; +-#else +- borrow->xstats.borrows += qdisc_pkt_len(skb); +- cl->xstats.borrows += qdisc_pkt_len(skb); +-#endif +- } +- q->tx_len = qdisc_pkt_len(skb); +- +- if (cl->deficit <= 0) { +- q->active[prio] = cl; +- cl = cl->next_alive; +- cl->deficit += cl->quantum; +- } +- return skb; +- +-skip_class: +- if (cl->q->q.qlen == 0 || prio != cl->cpriority) { +- /* Class is empty or penalized. +- * Unlink it from active chain. +- */ +- cl_prev->next_alive = cl->next_alive; +- cl->next_alive = NULL; +- +- /* Did cl_tail point to it? */ +- if (cl == cl_tail) { +- /* Repair it! */ +- cl_tail = cl_prev; +- +- /* Was it the last class in this band? */ +- if (cl == cl_tail) { +- /* Kill the band! */ +- q->active[prio] = NULL; +- q->activemask &= ~(1<q->q.qlen) +- cbq_activate_class(cl); +- return NULL; +- } +- +- q->active[prio] = cl_tail; +- } +- if (cl->q->q.qlen) +- cbq_activate_class(cl); +- +- cl = cl_prev; +- } +- +-next_class: +- cl_prev = cl; +- cl = cl->next_alive; +- } while (cl_prev != cl_tail); +- } while (deficit); +- +- q->active[prio] = cl_prev; +- +- return NULL; +-} +- +-static inline struct sk_buff * +-cbq_dequeue_1(struct Qdisc *sch) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct sk_buff *skb; +- unsigned int activemask; +- +- activemask = q->activemask & 0xFF; +- while (activemask) { +- int prio = ffz(~activemask); +- activemask &= ~(1<tx_class) +- cbq_update(q); +- +- q->now = now; +- +- for (;;) { +- q->wd_expires = 0; +- +- skb = cbq_dequeue_1(sch); +- if (skb) { +- qdisc_bstats_update(sch, skb); +- sch->q.qlen--; +- return skb; +- } +- +- /* All the classes are overlimit. +- * +- * It is possible, if: +- * +- * 1. Scheduler is empty. +- * 2. Toplevel cutoff inhibited borrowing. +- * 3. Root class is overlimit. +- * +- * Reset 2d and 3d conditions and retry. +- * +- * Note, that NS and cbq-2.0 are buggy, peeking +- * an arbitrary class is appropriate for ancestor-only +- * sharing, but not for toplevel algorithm. +- * +- * Our version is better, but slower, because it requires +- * two passes, but it is unavoidable with top-level sharing. +- */ +- +- if (q->toplevel == TC_CBQ_MAXLEVEL && +- q->link.undertime == PSCHED_PASTPERFECT) +- break; +- +- q->toplevel = TC_CBQ_MAXLEVEL; +- q->link.undertime = PSCHED_PASTPERFECT; +- } +- +- /* No packets in scheduler or nobody wants to give them to us :-( +- * Sigh... start watchdog timer in the last case. +- */ +- +- if (sch->q.qlen) { +- qdisc_qstats_overlimit(sch); +- if (q->wd_expires) +- qdisc_watchdog_schedule(&q->watchdog, +- now + q->wd_expires); +- } +- return NULL; +-} +- +-/* CBQ class maintenance routines */ +- +-static void cbq_adjust_levels(struct cbq_class *this) +-{ +- if (this == NULL) +- return; +- +- do { +- int level = 0; +- struct cbq_class *cl; +- +- cl = this->children; +- if (cl) { +- do { +- if (cl->level > level) +- level = cl->level; +- } while ((cl = cl->sibling) != this->children); +- } +- this->level = level + 1; +- } while ((this = this->tparent) != NULL); +-} +- +-static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) +-{ +- struct cbq_class *cl; +- unsigned int h; +- +- if (q->quanta[prio] == 0) +- return; +- +- for (h = 0; h < q->clhash.hashsize; h++) { +- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { +- /* BUGGGG... Beware! This expression suffer of +- * arithmetic overflows! +- */ +- if (cl->priority == prio) { +- cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ +- q->quanta[prio]; +- } +- if (cl->quantum <= 0 || +- cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { +- pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", +- cl->common.classid, cl->quantum); +- cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; +- } +- } +- } +-} +- +-static void cbq_sync_defmap(struct cbq_class *cl) +-{ +- struct cbq_sched_data *q = qdisc_priv(cl->qdisc); +- struct cbq_class *split = cl->split; +- unsigned int h; +- int i; +- +- if (split == NULL) +- return; +- +- for (i = 0; i <= TC_PRIO_MAX; i++) { +- if (split->defaults[i] == cl && !(cl->defmap & (1<defaults[i] = NULL; +- } +- +- for (i = 0; i <= TC_PRIO_MAX; i++) { +- int level = split->level; +- +- if (split->defaults[i]) +- continue; +- +- for (h = 0; h < q->clhash.hashsize; h++) { +- struct cbq_class *c; +- +- hlist_for_each_entry(c, &q->clhash.hash[h], +- common.hnode) { +- if (c->split == split && c->level < level && +- c->defmap & (1<defaults[i] = c; +- level = c->level; +- } +- } +- } +- } +-} +- +-static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask) +-{ +- struct cbq_class *split = NULL; +- +- if (splitid == 0) { +- split = cl->split; +- if (!split) +- return; +- splitid = split->common.classid; +- } +- +- if (split == NULL || split->common.classid != splitid) { +- for (split = cl->tparent; split; split = split->tparent) +- if (split->common.classid == splitid) +- break; +- } +- +- if (split == NULL) +- return; +- +- if (cl->split != split) { +- cl->defmap = 0; +- cbq_sync_defmap(cl); +- cl->split = split; +- cl->defmap = def & mask; +- } else +- cl->defmap = (cl->defmap & ~mask) | (def & mask); +- +- cbq_sync_defmap(cl); +-} +- +-static void cbq_unlink_class(struct cbq_class *this) +-{ +- struct cbq_class *cl, **clp; +- struct cbq_sched_data *q = qdisc_priv(this->qdisc); +- +- qdisc_class_hash_remove(&q->clhash, &this->common); +- +- if (this->tparent) { +- clp = &this->sibling; +- cl = *clp; +- do { +- if (cl == this) { +- *clp = cl->sibling; +- break; +- } +- clp = &cl->sibling; +- } while ((cl = *clp) != this->sibling); +- +- if (this->tparent->children == this) { +- this->tparent->children = this->sibling; +- if (this->sibling == this) +- this->tparent->children = NULL; +- } +- } else { +- WARN_ON(this->sibling != this); +- } +-} +- +-static void cbq_link_class(struct cbq_class *this) +-{ +- struct cbq_sched_data *q = qdisc_priv(this->qdisc); +- struct cbq_class *parent = this->tparent; +- +- this->sibling = this; +- qdisc_class_hash_insert(&q->clhash, &this->common); +- +- if (parent == NULL) +- return; +- +- if (parent->children == NULL) { +- parent->children = this; +- } else { +- this->sibling = parent->children->sibling; +- parent->children->sibling = this; +- } +-} +- +-static void +-cbq_reset(struct Qdisc *sch) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *cl; +- int prio; +- unsigned int h; +- +- q->activemask = 0; +- q->pmask = 0; +- q->tx_class = NULL; +- q->tx_borrowed = NULL; +- qdisc_watchdog_cancel(&q->watchdog); +- q->toplevel = TC_CBQ_MAXLEVEL; +- q->now = psched_get_time(); +- +- for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) +- q->active[prio] = NULL; +- +- for (h = 0; h < q->clhash.hashsize; h++) { +- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { +- qdisc_reset(cl->q); +- +- cl->next_alive = NULL; +- cl->undertime = PSCHED_PASTPERFECT; +- cl->avgidle = cl->maxidle; +- cl->deficit = cl->quantum; +- cl->cpriority = cl->priority; +- } +- } +-} +- +- +-static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) +-{ +- if (lss->change & TCF_CBQ_LSS_FLAGS) { +- cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; +- cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; +- } +- if (lss->change & TCF_CBQ_LSS_EWMA) +- cl->ewma_log = lss->ewma_log; +- if (lss->change & TCF_CBQ_LSS_AVPKT) +- cl->avpkt = lss->avpkt; +- if (lss->change & TCF_CBQ_LSS_MINIDLE) +- cl->minidle = -(long)lss->minidle; +- if (lss->change & TCF_CBQ_LSS_MAXIDLE) { +- cl->maxidle = lss->maxidle; +- cl->avgidle = lss->maxidle; +- } +- if (lss->change & TCF_CBQ_LSS_OFFTIME) +- cl->offtime = lss->offtime; +-} +- +-static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) +-{ +- q->nclasses[cl->priority]--; +- q->quanta[cl->priority] -= cl->weight; +- cbq_normalize_quanta(q, cl->priority); +-} +- +-static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl) +-{ +- q->nclasses[cl->priority]++; +- q->quanta[cl->priority] += cl->weight; +- cbq_normalize_quanta(q, cl->priority); +-} +- +-static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) +-{ +- struct cbq_sched_data *q = qdisc_priv(cl->qdisc); +- +- if (wrr->allot) +- cl->allot = wrr->allot; +- if (wrr->weight) +- cl->weight = wrr->weight; +- if (wrr->priority) { +- cl->priority = wrr->priority - 1; +- cl->cpriority = cl->priority; +- if (cl->priority >= cl->priority2) +- cl->priority2 = TC_CBQ_MAXPRIO - 1; +- } +- +- cbq_addprio(q, cl); +- return 0; +-} +- +-static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt) +-{ +- cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange); +- return 0; +-} +- +-static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { +- [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) }, +- [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) }, +- [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) }, +- [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) }, +- [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) }, +- [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, +- [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, +-}; +- +-static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], +- struct nlattr *opt, +- struct netlink_ext_ack *extack) +-{ +- int err; +- +- if (!opt) { +- NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); +- return -EINVAL; +- } +- +- err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, +- cbq_policy, extack); +- if (err < 0) +- return err; +- +- if (tb[TCA_CBQ_WRROPT]) { +- const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); +- +- if (wrr->priority > TC_CBQ_MAXPRIO) { +- NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO"); +- err = -EINVAL; +- } +- } +- return err; +-} +- +-static int cbq_init(struct Qdisc *sch, struct nlattr *opt, +- struct netlink_ext_ack *extack) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct nlattr *tb[TCA_CBQ_MAX + 1]; +- struct tc_ratespec *r; +- int err; +- +- qdisc_watchdog_init(&q->watchdog, sch); +- +- err = cbq_opt_parse(tb, opt, extack); +- if (err < 0) +- return err; +- +- if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) { +- NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete"); +- return -EINVAL; +- } +- +- r = nla_data(tb[TCA_CBQ_RATE]); +- +- q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack); +- if (!q->link.R_tab) +- return -EINVAL; +- +- err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack); +- if (err) +- goto put_rtab; +- +- err = qdisc_class_hash_init(&q->clhash); +- if (err < 0) +- goto put_block; +- +- q->link.sibling = &q->link; +- q->link.common.classid = sch->handle; +- q->link.qdisc = sch; +- q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, +- sch->handle, NULL); +- if (!q->link.q) +- q->link.q = &noop_qdisc; +- else +- qdisc_hash_add(q->link.q, true); +- +- q->link.priority = TC_CBQ_MAXPRIO - 1; +- q->link.priority2 = TC_CBQ_MAXPRIO - 1; +- q->link.cpriority = TC_CBQ_MAXPRIO - 1; +- q->link.allot = psched_mtu(qdisc_dev(sch)); +- q->link.quantum = q->link.allot; +- q->link.weight = q->link.R_tab->rate.rate; +- +- q->link.ewma_log = TC_CBQ_DEF_EWMA; +- q->link.avpkt = q->link.allot/2; +- q->link.minidle = -0x7FFFFFFF; +- +- q->toplevel = TC_CBQ_MAXLEVEL; +- q->now = psched_get_time(); +- +- cbq_link_class(&q->link); +- +- if (tb[TCA_CBQ_LSSOPT]) +- cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT])); +- +- cbq_addprio(q, &q->link); +- return 0; +- +-put_block: +- tcf_block_put(q->link.block); +- +-put_rtab: +- qdisc_put_rtab(q->link.R_tab); +- return err; +-} +- +-static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) +-{ +- unsigned char *b = skb_tail_pointer(skb); +- +- if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate)) +- goto nla_put_failure; +- return skb->len; +- +-nla_put_failure: +- nlmsg_trim(skb, b); +- return -1; +-} +- +-static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) +-{ +- unsigned char *b = skb_tail_pointer(skb); +- struct tc_cbq_lssopt opt; +- +- opt.flags = 0; +- if (cl->borrow == NULL) +- opt.flags |= TCF_CBQ_LSS_BOUNDED; +- if (cl->share == NULL) +- opt.flags |= TCF_CBQ_LSS_ISOLATED; +- opt.ewma_log = cl->ewma_log; +- opt.level = cl->level; +- opt.avpkt = cl->avpkt; +- opt.maxidle = cl->maxidle; +- opt.minidle = (u32)(-cl->minidle); +- opt.offtime = cl->offtime; +- opt.change = ~0; +- if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt)) +- goto nla_put_failure; +- return skb->len; +- +-nla_put_failure: +- nlmsg_trim(skb, b); +- return -1; +-} +- +-static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) +-{ +- unsigned char *b = skb_tail_pointer(skb); +- struct tc_cbq_wrropt opt; +- +- memset(&opt, 0, sizeof(opt)); +- opt.flags = 0; +- opt.allot = cl->allot; +- opt.priority = cl->priority + 1; +- opt.cpriority = cl->cpriority + 1; +- opt.weight = cl->weight; +- if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt)) +- goto nla_put_failure; +- return skb->len; +- +-nla_put_failure: +- nlmsg_trim(skb, b); +- return -1; +-} +- +-static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) +-{ +- unsigned char *b = skb_tail_pointer(skb); +- struct tc_cbq_fopt opt; +- +- if (cl->split || cl->defmap) { +- opt.split = cl->split ? cl->split->common.classid : 0; +- opt.defmap = cl->defmap; +- opt.defchange = ~0; +- if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt)) +- goto nla_put_failure; +- } +- return skb->len; +- +-nla_put_failure: +- nlmsg_trim(skb, b); +- return -1; +-} +- +-static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) +-{ +- if (cbq_dump_lss(skb, cl) < 0 || +- cbq_dump_rate(skb, cl) < 0 || +- cbq_dump_wrr(skb, cl) < 0 || +- cbq_dump_fopt(skb, cl) < 0) +- return -1; +- return 0; +-} +- +-static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct nlattr *nest; +- +- nest = nla_nest_start_noflag(skb, TCA_OPTIONS); +- if (nest == NULL) +- goto nla_put_failure; +- if (cbq_dump_attr(skb, &q->link) < 0) +- goto nla_put_failure; +- return nla_nest_end(skb, nest); +- +-nla_put_failure: +- nla_nest_cancel(skb, nest); +- return -1; +-} +- +-static int +-cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- +- q->link.xstats.avgidle = q->link.avgidle; +- return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats)); +-} +- +-static int +-cbq_dump_class(struct Qdisc *sch, unsigned long arg, +- struct sk_buff *skb, struct tcmsg *tcm) +-{ +- struct cbq_class *cl = (struct cbq_class *)arg; +- struct nlattr *nest; +- +- if (cl->tparent) +- tcm->tcm_parent = cl->tparent->common.classid; +- else +- tcm->tcm_parent = TC_H_ROOT; +- tcm->tcm_handle = cl->common.classid; +- tcm->tcm_info = cl->q->handle; +- +- nest = nla_nest_start_noflag(skb, TCA_OPTIONS); +- if (nest == NULL) +- goto nla_put_failure; +- if (cbq_dump_attr(skb, cl) < 0) +- goto nla_put_failure; +- return nla_nest_end(skb, nest); +- +-nla_put_failure: +- nla_nest_cancel(skb, nest); +- return -1; +-} +- +-static int +-cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, +- struct gnet_dump *d) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *cl = (struct cbq_class *)arg; +- __u32 qlen; +- +- cl->xstats.avgidle = cl->avgidle; +- cl->xstats.undertime = 0; +- qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); +- +- if (cl->undertime != PSCHED_PASTPERFECT) +- cl->xstats.undertime = cl->undertime - q->now; +- +- if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || +- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || +- gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) +- return -1; +- +- return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); +-} +- +-static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, +- struct Qdisc **old, struct netlink_ext_ack *extack) +-{ +- struct cbq_class *cl = (struct cbq_class *)arg; +- +- if (new == NULL) { +- new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, +- cl->common.classid, extack); +- if (new == NULL) +- return -ENOBUFS; +- } +- +- *old = qdisc_replace(sch, new, &cl->q); +- return 0; +-} +- +-static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) +-{ +- struct cbq_class *cl = (struct cbq_class *)arg; +- +- return cl->q; +-} +- +-static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) +-{ +- struct cbq_class *cl = (struct cbq_class *)arg; +- +- cbq_deactivate_class(cl); +-} +- +-static unsigned long cbq_find(struct Qdisc *sch, u32 classid) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- +- return (unsigned long)cbq_class_lookup(q, classid); +-} +- +-static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- +- WARN_ON(cl->filters); +- +- tcf_block_put(cl->block); +- qdisc_put(cl->q); +- qdisc_put_rtab(cl->R_tab); +- gen_kill_estimator(&cl->rate_est); +- if (cl != &q->link) +- kfree(cl); +-} +- +-static void cbq_destroy(struct Qdisc *sch) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct hlist_node *next; +- struct cbq_class *cl; +- unsigned int h; +- +-#ifdef CONFIG_NET_CLS_ACT +- q->rx_class = NULL; +-#endif +- /* +- * Filters must be destroyed first because we don't destroy the +- * classes from root to leafs which means that filters can still +- * be bound to classes which have been destroyed already. --TGR '04 +- */ +- for (h = 0; h < q->clhash.hashsize; h++) { +- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { +- tcf_block_put(cl->block); +- cl->block = NULL; +- } +- } +- for (h = 0; h < q->clhash.hashsize; h++) { +- hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], +- common.hnode) +- cbq_destroy_class(sch, cl); +- } +- qdisc_class_hash_destroy(&q->clhash); +-} +- +-static int +-cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, +- unsigned long *arg, struct netlink_ext_ack *extack) +-{ +- int err; +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *cl = (struct cbq_class *)*arg; +- struct nlattr *opt = tca[TCA_OPTIONS]; +- struct nlattr *tb[TCA_CBQ_MAX + 1]; +- struct cbq_class *parent; +- struct qdisc_rate_table *rtab = NULL; +- +- err = cbq_opt_parse(tb, opt, extack); +- if (err < 0) +- return err; +- +- if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) { +- NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params"); +- return -EOPNOTSUPP; +- } +- +- if (cl) { +- /* Check parent */ +- if (parentid) { +- if (cl->tparent && +- cl->tparent->common.classid != parentid) { +- NL_SET_ERR_MSG(extack, "Invalid parent id"); +- return -EINVAL; +- } +- if (!cl->tparent && parentid != TC_H_ROOT) { +- NL_SET_ERR_MSG(extack, "Parent must be root"); +- return -EINVAL; +- } +- } +- +- if (tb[TCA_CBQ_RATE]) { +- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), +- tb[TCA_CBQ_RTAB], extack); +- if (rtab == NULL) +- return -EINVAL; +- } +- +- if (tca[TCA_RATE]) { +- err = gen_replace_estimator(&cl->bstats, NULL, +- &cl->rate_est, +- NULL, +- true, +- tca[TCA_RATE]); +- if (err) { +- NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); +- qdisc_put_rtab(rtab); +- return err; +- } +- } +- +- /* Change class parameters */ +- sch_tree_lock(sch); +- +- if (cl->next_alive != NULL) +- cbq_deactivate_class(cl); +- +- if (rtab) { +- qdisc_put_rtab(cl->R_tab); +- cl->R_tab = rtab; +- } +- +- if (tb[TCA_CBQ_LSSOPT]) +- cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); +- +- if (tb[TCA_CBQ_WRROPT]) { +- cbq_rmprio(q, cl); +- cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); +- } +- +- if (tb[TCA_CBQ_FOPT]) +- cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); +- +- if (cl->q->q.qlen) +- cbq_activate_class(cl); +- +- sch_tree_unlock(sch); +- +- return 0; +- } +- +- if (parentid == TC_H_ROOT) +- return -EINVAL; +- +- if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) { +- NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing"); +- return -EINVAL; +- } +- +- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB], +- extack); +- if (rtab == NULL) +- return -EINVAL; +- +- if (classid) { +- err = -EINVAL; +- if (TC_H_MAJ(classid ^ sch->handle) || +- cbq_class_lookup(q, classid)) { +- NL_SET_ERR_MSG(extack, "Specified class not found"); +- goto failure; +- } +- } else { +- int i; +- classid = TC_H_MAKE(sch->handle, 0x8000); +- +- for (i = 0; i < 0x8000; i++) { +- if (++q->hgenerator >= 0x8000) +- q->hgenerator = 1; +- if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) +- break; +- } +- err = -ENOSR; +- if (i >= 0x8000) { +- NL_SET_ERR_MSG(extack, "Unable to generate classid"); +- goto failure; +- } +- classid = classid|q->hgenerator; +- } +- +- parent = &q->link; +- if (parentid) { +- parent = cbq_class_lookup(q, parentid); +- err = -EINVAL; +- if (!parent) { +- NL_SET_ERR_MSG(extack, "Failed to find parentid"); +- goto failure; +- } +- } +- +- err = -ENOBUFS; +- cl = kzalloc(sizeof(*cl), GFP_KERNEL); +- if (cl == NULL) +- goto failure; +- +- gnet_stats_basic_sync_init(&cl->bstats); +- err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); +- if (err) { +- kfree(cl); +- goto failure; +- } +- +- if (tca[TCA_RATE]) { +- err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, +- NULL, true, tca[TCA_RATE]); +- if (err) { +- NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); +- tcf_block_put(cl->block); +- kfree(cl); +- goto failure; +- } +- } +- +- cl->R_tab = rtab; +- rtab = NULL; +- cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, +- NULL); +- if (!cl->q) +- cl->q = &noop_qdisc; +- else +- qdisc_hash_add(cl->q, true); +- +- cl->common.classid = classid; +- cl->tparent = parent; +- cl->qdisc = sch; +- cl->allot = parent->allot; +- cl->quantum = cl->allot; +- cl->weight = cl->R_tab->rate.rate; +- +- sch_tree_lock(sch); +- cbq_link_class(cl); +- cl->borrow = cl->tparent; +- if (cl->tparent != &q->link) +- cl->share = cl->tparent; +- cbq_adjust_levels(parent); +- cl->minidle = -0x7FFFFFFF; +- cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); +- cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); +- if (cl->ewma_log == 0) +- cl->ewma_log = q->link.ewma_log; +- if (cl->maxidle == 0) +- cl->maxidle = q->link.maxidle; +- if (cl->avpkt == 0) +- cl->avpkt = q->link.avpkt; +- if (tb[TCA_CBQ_FOPT]) +- cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); +- sch_tree_unlock(sch); +- +- qdisc_class_hash_grow(sch, &q->clhash); +- +- *arg = (unsigned long)cl; +- return 0; +- +-failure: +- qdisc_put_rtab(rtab); +- return err; +-} +- +-static int cbq_delete(struct Qdisc *sch, unsigned long arg, +- struct netlink_ext_ack *extack) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *cl = (struct cbq_class *)arg; +- +- if (cl->filters || cl->children || cl == &q->link) +- return -EBUSY; +- +- sch_tree_lock(sch); +- +- qdisc_purge_queue(cl->q); +- +- if (cl->next_alive) +- cbq_deactivate_class(cl); +- +- if (q->tx_borrowed == cl) +- q->tx_borrowed = q->tx_class; +- if (q->tx_class == cl) { +- q->tx_class = NULL; +- q->tx_borrowed = NULL; +- } +-#ifdef CONFIG_NET_CLS_ACT +- if (q->rx_class == cl) +- q->rx_class = NULL; +-#endif +- +- cbq_unlink_class(cl); +- cbq_adjust_levels(cl->tparent); +- cl->defmap = 0; +- cbq_sync_defmap(cl); +- +- cbq_rmprio(q, cl); +- sch_tree_unlock(sch); +- +- cbq_destroy_class(sch, cl); +- return 0; +-} +- +-static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg, +- struct netlink_ext_ack *extack) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *cl = (struct cbq_class *)arg; +- +- if (cl == NULL) +- cl = &q->link; +- +- return cl->block; +-} +- +-static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, +- u32 classid) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *p = (struct cbq_class *)parent; +- struct cbq_class *cl = cbq_class_lookup(q, classid); +- +- if (cl) { +- if (p && p->level <= cl->level) +- return 0; +- cl->filters++; +- return (unsigned long)cl; +- } +- return 0; +-} +- +-static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) +-{ +- struct cbq_class *cl = (struct cbq_class *)arg; +- +- cl->filters--; +-} +- +-static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +-{ +- struct cbq_sched_data *q = qdisc_priv(sch); +- struct cbq_class *cl; +- unsigned int h; +- +- if (arg->stop) +- return; +- +- for (h = 0; h < q->clhash.hashsize; h++) { +- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { +- if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) +- return; +- } +- } +-} +- +-static const struct Qdisc_class_ops cbq_class_ops = { +- .graft = cbq_graft, +- .leaf = cbq_leaf, +- .qlen_notify = cbq_qlen_notify, +- .find = cbq_find, +- .change = cbq_change_class, +- .delete = cbq_delete, +- .walk = cbq_walk, +- .tcf_block = cbq_tcf_block, +- .bind_tcf = cbq_bind_filter, +- .unbind_tcf = cbq_unbind_filter, +- .dump = cbq_dump_class, +- .dump_stats = cbq_dump_class_stats, +-}; +- +-static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { +- .next = NULL, +- .cl_ops = &cbq_class_ops, +- .id = "cbq", +- .priv_size = sizeof(struct cbq_sched_data), +- .enqueue = cbq_enqueue, +- .dequeue = cbq_dequeue, +- .peek = qdisc_peek_dequeued, +- .init = cbq_init, +- .reset = cbq_reset, +- .destroy = cbq_destroy, +- .change = NULL, +- .dump = cbq_dump, +- .dump_stats = cbq_dump_stats, +- .owner = THIS_MODULE, +-}; +- +-static int __init cbq_module_init(void) +-{ +- return register_qdisc(&cbq_qdisc_ops); +-} +-static void __exit cbq_module_exit(void) +-{ +- unregister_qdisc(&cbq_qdisc_ops); +-} +-module_init(cbq_module_init) +-module_exit(cbq_module_exit) +-MODULE_LICENSE("GPL"); +--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json ++++ /dev/null +@@ -1,184 +0,0 @@ +-[ +- { +- "id": "3460", +- "name": "Create CBQ with default setting", +- "category": [ +- "qdisc", +- "cbq" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "0592", +- "name": "Create CBQ with mpu", +- "category": [ +- "qdisc", +- "cbq" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 mpu 1000", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "4684", +- "name": "Create CBQ with valid cell num", +- "category": [ +- "qdisc", +- "cbq" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 128", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "4345", +- "name": "Create CBQ with invalid cell num", +- "category": [ +- "qdisc", +- "cbq" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 100", +- "expExitCode": "1", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", +- "matchCount": "0", +- "teardown": [ +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "4525", +- "name": "Create CBQ with valid ewma", +- "category": [ +- "qdisc", +- "cbq" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 16", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "6784", +- "name": "Create CBQ with invalid ewma", +- "category": [ +- "qdisc", +- "cbq" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 128", +- "expExitCode": "1", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", +- "matchCount": "0", +- "teardown": [ +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "5468", +- "name": "Delete CBQ with handle", +- "category": [ +- "qdisc", +- "cbq" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true", +- "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000" +- ], +- "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit", +- "matchCount": "0", +- "teardown": [ +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "492a", +- "name": "Show CBQ class", +- "category": [ +- "qdisc", +- "cbq" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000", +- "expExitCode": "0", +- "verifyCmd": "$TC class show dev $DUMMY", +- "matchPattern": "class cbq 1: root rate 10Kbit \\(bounded,isolated\\) prio no-transmit", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- } +-] diff --git a/queue-6.1/net-sched-retire-dsmark-qdisc.patch b/queue-6.1/net-sched-retire-dsmark-qdisc.patch new file mode 100644 index 00000000000..29e8e911df2 --- /dev/null +++ b/queue-6.1/net-sched-retire-dsmark-qdisc.patch @@ -0,0 +1,721 @@ +From bbe77c14ee6185a61ba6d5e435c1cbb489d2a9ed Mon Sep 17 00:00:00 2001 +From: Jamal Hadi Salim +Date: Tue, 14 Feb 2023 08:49:13 -0500 +Subject: net/sched: Retire dsmark qdisc + +From: Jamal Hadi Salim + +commit bbe77c14ee6185a61ba6d5e435c1cbb489d2a9ed upstream. + +The dsmark qdisc has served us well over the years for diffserv but has not +been getting much attention due to other more popular approaches to do diffserv +services. Most recently it has become a shooting target for syzkaller. For this +reason, we are retiring it. + +Signed-off-by: Jamal Hadi Salim +Acked-by: Jiri Pirko +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/sch_dsmark.c | 518 ---------- + tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json | 140 -- + 4 files changed, 670 deletions(-) + delete mode 100644 net/sched/sch_dsmark.c + delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json + +--- a/net/sched/Kconfig ++++ b/net/sched/Kconfig +@@ -186,17 +186,6 @@ config NET_SCH_GRED + To compile this code as a module, choose M here: the + module will be called sch_gred. + +-config NET_SCH_DSMARK +- tristate "Differentiated Services marker (DSMARK)" +- help +- Say Y if you want to schedule packets according to the +- Differentiated Services architecture proposed in RFC 2475. +- Technical information on this method, with pointers to associated +- RFCs, is available at . +- +- To compile this code as a module, choose M here: the +- module will be called sch_dsmark. +- + config NET_SCH_NETEM + tristate "Network emulator (NETEM)" + help +--- a/net/sched/Makefile ++++ b/net/sched/Makefile +@@ -38,7 +38,6 @@ obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o + obj-$(CONFIG_NET_SCH_RED) += sch_red.o + obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o + obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o +-obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o + obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o + obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o + obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o +--- a/net/sched/sch_dsmark.c ++++ /dev/null +@@ -1,518 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-only +-/* net/sched/sch_dsmark.c - Differentiated Services field marker */ +- +-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ +- +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-/* +- * classid class marking +- * ------- ----- ------- +- * n/a 0 n/a +- * x:0 1 use entry [0] +- * ... ... ... +- * x:y y>0 y+1 use entry [y] +- * ... ... ... +- * x:indices-1 indices use entry [indices-1] +- * ... ... ... +- * x:y y+1 use entry [y & (indices-1)] +- * ... ... ... +- * 0xffff 0x10000 use entry [indices-1] +- */ +- +- +-#define NO_DEFAULT_INDEX (1 << 16) +- +-struct mask_value { +- u8 mask; +- u8 value; +-}; +- +-struct dsmark_qdisc_data { +- struct Qdisc *q; +- struct tcf_proto __rcu *filter_list; +- struct tcf_block *block; +- struct mask_value *mv; +- u16 indices; +- u8 set_tc_index; +- u32 default_index; /* index range is 0...0xffff */ +-#define DSMARK_EMBEDDED_SZ 16 +- struct mask_value embedded[DSMARK_EMBEDDED_SZ]; +-}; +- +-static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) +-{ +- return index <= p->indices && index > 0; +-} +- +-/* ------------------------- Class/flow operations ------------------------- */ +- +-static int dsmark_graft(struct Qdisc *sch, unsigned long arg, +- struct Qdisc *new, struct Qdisc **old, +- struct netlink_ext_ack *extack) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- +- pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", +- __func__, sch, p, new, old); +- +- if (new == NULL) { +- new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, +- sch->handle, NULL); +- if (new == NULL) +- new = &noop_qdisc; +- } +- +- *old = qdisc_replace(sch, new, &p->q); +- return 0; +-} +- +-static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- return p->q; +-} +- +-static unsigned long dsmark_find(struct Qdisc *sch, u32 classid) +-{ +- return TC_H_MIN(classid) + 1; +-} +- +-static unsigned long dsmark_bind_filter(struct Qdisc *sch, +- unsigned long parent, u32 classid) +-{ +- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", +- __func__, sch, qdisc_priv(sch), classid); +- +- return dsmark_find(sch, classid); +-} +- +-static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl) +-{ +-} +- +-static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = { +- [TCA_DSMARK_INDICES] = { .type = NLA_U16 }, +- [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 }, +- [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG }, +- [TCA_DSMARK_MASK] = { .type = NLA_U8 }, +- [TCA_DSMARK_VALUE] = { .type = NLA_U8 }, +-}; +- +-static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, +- struct nlattr **tca, unsigned long *arg, +- struct netlink_ext_ack *extack) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- struct nlattr *opt = tca[TCA_OPTIONS]; +- struct nlattr *tb[TCA_DSMARK_MAX + 1]; +- int err = -EINVAL; +- +- pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", +- __func__, sch, p, classid, parent, *arg); +- +- if (!dsmark_valid_index(p, *arg)) { +- err = -ENOENT; +- goto errout; +- } +- +- if (!opt) +- goto errout; +- +- err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, +- dsmark_policy, NULL); +- if (err < 0) +- goto errout; +- +- if (tb[TCA_DSMARK_VALUE]) +- p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]); +- +- if (tb[TCA_DSMARK_MASK]) +- p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]); +- +- err = 0; +- +-errout: +- return err; +-} +- +-static int dsmark_delete(struct Qdisc *sch, unsigned long arg, +- struct netlink_ext_ack *extack) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- +- if (!dsmark_valid_index(p, arg)) +- return -EINVAL; +- +- p->mv[arg - 1].mask = 0xff; +- p->mv[arg - 1].value = 0; +- +- return 0; +-} +- +-static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- int i; +- +- pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", +- __func__, sch, p, walker); +- +- if (walker->stop) +- return; +- +- for (i = 0; i < p->indices; i++) { +- if (p->mv[i].mask == 0xff && !p->mv[i].value) { +- walker->count++; +- continue; +- } +- if (!tc_qdisc_stats_dump(sch, i + 1, walker)) +- break; +- } +-} +- +-static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, +- struct netlink_ext_ack *extack) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- +- return p->block; +-} +- +-/* --------------------------- Qdisc operations ---------------------------- */ +- +-static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, +- struct sk_buff **to_free) +-{ +- unsigned int len = qdisc_pkt_len(skb); +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- int err; +- +- pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); +- +- if (p->set_tc_index) { +- int wlen = skb_network_offset(skb); +- +- switch (skb_protocol(skb, true)) { +- case htons(ETH_P_IP): +- wlen += sizeof(struct iphdr); +- if (!pskb_may_pull(skb, wlen) || +- skb_try_make_writable(skb, wlen)) +- goto drop; +- +- skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) +- & ~INET_ECN_MASK; +- break; +- +- case htons(ETH_P_IPV6): +- wlen += sizeof(struct ipv6hdr); +- if (!pskb_may_pull(skb, wlen) || +- skb_try_make_writable(skb, wlen)) +- goto drop; +- +- skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) +- & ~INET_ECN_MASK; +- break; +- default: +- skb->tc_index = 0; +- break; +- } +- } +- +- if (TC_H_MAJ(skb->priority) == sch->handle) +- skb->tc_index = TC_H_MIN(skb->priority); +- else { +- struct tcf_result res; +- struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); +- int result = tcf_classify(skb, NULL, fl, &res, false); +- +- pr_debug("result %d class 0x%04x\n", result, res.classid); +- +- switch (result) { +-#ifdef CONFIG_NET_CLS_ACT +- case TC_ACT_QUEUED: +- case TC_ACT_STOLEN: +- case TC_ACT_TRAP: +- __qdisc_drop(skb, to_free); +- return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; +- +- case TC_ACT_SHOT: +- goto drop; +-#endif +- case TC_ACT_OK: +- skb->tc_index = TC_H_MIN(res.classid); +- break; +- +- default: +- if (p->default_index != NO_DEFAULT_INDEX) +- skb->tc_index = p->default_index; +- break; +- } +- } +- +- err = qdisc_enqueue(skb, p->q, to_free); +- if (err != NET_XMIT_SUCCESS) { +- if (net_xmit_drop_count(err)) +- qdisc_qstats_drop(sch); +- return err; +- } +- +- sch->qstats.backlog += len; +- sch->q.qlen++; +- +- return NET_XMIT_SUCCESS; +- +-drop: +- qdisc_drop(skb, sch, to_free); +- return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; +-} +- +-static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- struct sk_buff *skb; +- u32 index; +- +- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); +- +- skb = qdisc_dequeue_peeked(p->q); +- if (skb == NULL) +- return NULL; +- +- qdisc_bstats_update(sch, skb); +- qdisc_qstats_backlog_dec(sch, skb); +- sch->q.qlen--; +- +- index = skb->tc_index & (p->indices - 1); +- pr_debug("index %d->%d\n", skb->tc_index, index); +- +- switch (skb_protocol(skb, true)) { +- case htons(ETH_P_IP): +- ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, +- p->mv[index].value); +- break; +- case htons(ETH_P_IPV6): +- ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask, +- p->mv[index].value); +- break; +- default: +- /* +- * Only complain if a change was actually attempted. +- * This way, we can send non-IP traffic through dsmark +- * and don't need yet another qdisc as a bypass. +- */ +- if (p->mv[index].mask != 0xff || p->mv[index].value) +- pr_warn("%s: unsupported protocol %d\n", +- __func__, ntohs(skb_protocol(skb, true))); +- break; +- } +- +- return skb; +-} +- +-static struct sk_buff *dsmark_peek(struct Qdisc *sch) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- +- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); +- +- return p->q->ops->peek(p->q); +-} +- +-static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, +- struct netlink_ext_ack *extack) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- struct nlattr *tb[TCA_DSMARK_MAX + 1]; +- int err = -EINVAL; +- u32 default_index = NO_DEFAULT_INDEX; +- u16 indices; +- int i; +- +- pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); +- +- if (!opt) +- goto errout; +- +- err = tcf_block_get(&p->block, &p->filter_list, sch, extack); +- if (err) +- return err; +- +- err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, +- dsmark_policy, NULL); +- if (err < 0) +- goto errout; +- +- err = -EINVAL; +- if (!tb[TCA_DSMARK_INDICES]) +- goto errout; +- indices = nla_get_u16(tb[TCA_DSMARK_INDICES]); +- +- if (hweight32(indices) != 1) +- goto errout; +- +- if (tb[TCA_DSMARK_DEFAULT_INDEX]) +- default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]); +- +- if (indices <= DSMARK_EMBEDDED_SZ) +- p->mv = p->embedded; +- else +- p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL); +- if (!p->mv) { +- err = -ENOMEM; +- goto errout; +- } +- for (i = 0; i < indices; i++) { +- p->mv[i].mask = 0xff; +- p->mv[i].value = 0; +- } +- p->indices = indices; +- p->default_index = default_index; +- p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); +- +- p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, +- NULL); +- if (p->q == NULL) +- p->q = &noop_qdisc; +- else +- qdisc_hash_add(p->q, true); +- +- pr_debug("%s: qdisc %p\n", __func__, p->q); +- +- err = 0; +-errout: +- return err; +-} +- +-static void dsmark_reset(struct Qdisc *sch) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- +- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); +- if (p->q) +- qdisc_reset(p->q); +-} +- +-static void dsmark_destroy(struct Qdisc *sch) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- +- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); +- +- tcf_block_put(p->block); +- qdisc_put(p->q); +- if (p->mv != p->embedded) +- kfree(p->mv); +-} +- +-static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, +- struct sk_buff *skb, struct tcmsg *tcm) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- struct nlattr *opts = NULL; +- +- pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); +- +- if (!dsmark_valid_index(p, cl)) +- return -EINVAL; +- +- tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); +- tcm->tcm_info = p->q->handle; +- +- opts = nla_nest_start_noflag(skb, TCA_OPTIONS); +- if (opts == NULL) +- goto nla_put_failure; +- if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || +- nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value)) +- goto nla_put_failure; +- +- return nla_nest_end(skb, opts); +- +-nla_put_failure: +- nla_nest_cancel(skb, opts); +- return -EMSGSIZE; +-} +- +-static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) +-{ +- struct dsmark_qdisc_data *p = qdisc_priv(sch); +- struct nlattr *opts = NULL; +- +- opts = nla_nest_start_noflag(skb, TCA_OPTIONS); +- if (opts == NULL) +- goto nla_put_failure; +- if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) +- goto nla_put_failure; +- +- if (p->default_index != NO_DEFAULT_INDEX && +- nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index)) +- goto nla_put_failure; +- +- if (p->set_tc_index && +- nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX)) +- goto nla_put_failure; +- +- return nla_nest_end(skb, opts); +- +-nla_put_failure: +- nla_nest_cancel(skb, opts); +- return -EMSGSIZE; +-} +- +-static const struct Qdisc_class_ops dsmark_class_ops = { +- .graft = dsmark_graft, +- .leaf = dsmark_leaf, +- .find = dsmark_find, +- .change = dsmark_change, +- .delete = dsmark_delete, +- .walk = dsmark_walk, +- .tcf_block = dsmark_tcf_block, +- .bind_tcf = dsmark_bind_filter, +- .unbind_tcf = dsmark_unbind_filter, +- .dump = dsmark_dump_class, +-}; +- +-static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { +- .next = NULL, +- .cl_ops = &dsmark_class_ops, +- .id = "dsmark", +- .priv_size = sizeof(struct dsmark_qdisc_data), +- .enqueue = dsmark_enqueue, +- .dequeue = dsmark_dequeue, +- .peek = dsmark_peek, +- .init = dsmark_init, +- .reset = dsmark_reset, +- .destroy = dsmark_destroy, +- .change = NULL, +- .dump = dsmark_dump, +- .owner = THIS_MODULE, +-}; +- +-static int __init dsmark_module_init(void) +-{ +- return register_qdisc(&dsmark_qdisc_ops); +-} +- +-static void __exit dsmark_module_exit(void) +-{ +- unregister_qdisc(&dsmark_qdisc_ops); +-} +- +-module_init(dsmark_module_init) +-module_exit(dsmark_module_exit) +- +-MODULE_LICENSE("GPL"); +--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json ++++ /dev/null +@@ -1,140 +0,0 @@ +-[ +- { +- "id": "6345", +- "name": "Create DSMARK with default setting", +- "category": [ +- "qdisc", +- "dsmark" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "3462", +- "name": "Create DSMARK with default_index setting", +- "category": [ +- "qdisc", +- "dsmark" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 512", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0200", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "ca95", +- "name": "Create DSMARK with set_tc_index flag", +- "category": [ +- "qdisc", +- "dsmark" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 set_tc_index", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 set_tc_index", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "a950", +- "name": "Create DSMARK with multiple setting", +- "category": [ +- "qdisc", +- "dsmark" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024 set_tc_index", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0400 set_tc_index", +- "matchCount": "1", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "4092", +- "name": "Delete DSMARK with handle", +- "category": [ +- "qdisc", +- "dsmark" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true", +- "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024" +- ], +- "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root", +- "expExitCode": "0", +- "verifyCmd": "$TC qdisc show dev $DUMMY", +- "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400", +- "matchCount": "0", +- "teardown": [ +- "$IP link del dev $DUMMY type dummy" +- ] +- }, +- { +- "id": "5930", +- "name": "Show DSMARK class", +- "category": [ +- "qdisc", +- "dsmark" +- ], +- "plugins": { +- "requires": "nsPlugin" +- }, +- "setup": [ +- "$IP link add dev $DUMMY type dummy || /bin/true" +- ], +- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024", +- "expExitCode": "0", +- "verifyCmd": "$TC class show dev $DUMMY", +- "matchPattern": "class dsmark 1:", +- "matchCount": "0", +- "teardown": [ +- "$TC qdisc del dev $DUMMY handle 1: root", +- "$IP link del dev $DUMMY type dummy" +- ] +- } +-] diff --git a/queue-6.1/series b/queue-6.1/series new file mode 100644 index 00000000000..d97558d11df --- /dev/null +++ b/queue-6.1/series @@ -0,0 +1,3 @@ +net-sched-retire-cbq-qdisc.patch +net-sched-retire-atm-qdisc.patch +net-sched-retire-dsmark-qdisc.patch