net_sched: add qdisc_dequeue_drop() helper

author Eric Dumazet <edumazet@google.com>

Fri, 21 Nov 2025 08:32:55 +0000 (08:32 +0000)

committer Paolo Abeni <pabeni@redhat.com>

Tue, 25 Nov 2025 15:10:32 +0000 (16:10 +0100)
author Eric Dumazet <edumazet@google.com>
Fri, 21 Nov 2025 08:32:55 +0000 (08:32 +0000)
committer Paolo Abeni <pabeni@redhat.com>
Tue, 25 Nov 2025 15:10:32 +0000 (16:10 +0100)
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h

index 4678db45832a1e3bf7b8a07756fb89ab868bd5d2..e703c507d0daa97ae7c3bf131e322b1eafcc5664 100644 (file)
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -114,12 +114,13 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
  
  void __qdisc_run(struct Qdisc *q);
  
-static inline void qdisc_run(struct Qdisc *q)
+static inline struct sk_buff *qdisc_run(struct Qdisc *q)
  {
         if (qdisc_run_begin(q)) {
                 __qdisc_run(q);
-               qdisc_run_end(q);
+               return qdisc_run_end(q);
         }
+       return NULL;
  }
  
  extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h

index b8092d0378a0cafa290123d17c1b0ba787cd0680..c3a7268b567e0abf3f38290cd4e3fa7cd0601e36 100644 (file)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -88,6 +88,8 @@ struct Qdisc {
  #define TCQ_F_INVISIBLE                0x80 /* invisible by default in dump */
  #define TCQ_F_NOLOCK           0x100 /* qdisc does not require locking */
  #define TCQ_F_OFFLOADED                0x200 /* qdisc is offloaded to HW */
+#define TCQ_F_DEQUEUE_DROPS    0x400 /* ->dequeue() can drop packets in q->to_free */
+
         u32                     limit;
         const struct Qdisc_ops  *ops;
         struct qdisc_size_table __rcu *stab;
@@ -119,6 +121,8 @@ struct Qdisc {
  
                 /* Note : we only change qstats.backlog in fast path. */
                 struct gnet_stats_queue qstats;
+
+               struct sk_buff          *to_free;
         __cacheline_group_end(Qdisc_write);
  
  
@@ -218,8 +222,10 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
         return true;
  }
  
-static inline void qdisc_run_end(struct Qdisc *qdisc)
+static inline struct sk_buff *qdisc_run_end(struct Qdisc *qdisc)
  {
+       struct sk_buff *to_free = NULL;
+
         if (qdisc->flags & TCQ_F_NOLOCK) {
                 spin_unlock(&qdisc->seqlock);
  
@@ -232,9 +238,16 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
                 if (unlikely(test_bit(__QDISC_STATE_MISSED,
                                       &qdisc->state)))
                         __netif_schedule(qdisc);
-       } else {
-               WRITE_ONCE(qdisc->running, false);
+               return NULL;
+       }
+
+       if (qdisc->flags & TCQ_F_DEQUEUE_DROPS) {
+               to_free = qdisc->to_free;
+               if (to_free)
+                       qdisc->to_free = NULL;
         }
+       WRITE_ONCE(qdisc->running, false);
+       return to_free;
  }
  
  static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
@@ -1116,6 +1129,17 @@ static inline void tcf_kfree_skb_list(struct sk_buff *skb)
         }
  }
  
+static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb,
+                                     enum skb_drop_reason reason)
+{
+       DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS));
+       DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK);
+
+       tcf_set_drop_reason(skb, reason);
+       skb->next = q->to_free;
+       q->to_free = skb;
+}
+
  /* Instead of calling kfree_skb() while root qdisc lock is held,
   * queue the skb for future freeing at end of __dev_xmit_skb()
   */
diff --git a/net/core/dev.c b/net/core/dev.c

index e865cdb9b6966225072dc44a86610b9c7828bd8c..9094c0fb8c689cd5252274d839638839bfb7642e 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4141,7 +4141,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                                  struct net_device *dev,
                                  struct netdev_queue *txq)
  {
-       struct sk_buff *next, *to_free = NULL;
+       struct sk_buff *next, *to_free = NULL, *to_free2 = NULL;
         spinlock_t *root_lock = qdisc_lock(q);
         struct llist_node *ll_list, *first_n;
         unsigned long defer_count = 0;
@@ -4160,7 +4160,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                         if (unlikely(!nolock_qdisc_is_empty(q))) {
                                 rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
                                 __qdisc_run(q);
-                               qdisc_run_end(q);
+                               to_free2 = qdisc_run_end(q);
  
                                 goto free_skbs;
                         }
@@ -4170,12 +4170,13 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                             !nolock_qdisc_is_empty(q))
                                 __qdisc_run(q);
  
-                       qdisc_run_end(q);
-                       return NET_XMIT_SUCCESS;
+                       to_free2 = qdisc_run_end(q);
+                       rc = NET_XMIT_SUCCESS;
+                       goto free_skbs;
                 }
  
                 rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
-               qdisc_run(q);
+               to_free2 = qdisc_run(q);
                 goto free_skbs;
         }
  
@@ -4234,7 +4235,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                 qdisc_bstats_update(q, skb);
                 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true))
                         __qdisc_run(q);
-               qdisc_run_end(q);
+               to_free2 = qdisc_run_end(q);
                 rc = NET_XMIT_SUCCESS;
         } else {
                 int count = 0;
@@ -4246,7 +4247,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                         rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
                         count++;
                 }
-               qdisc_run(q);
+               to_free2 = qdisc_run(q);
                 if (count != 1)
                         rc = NET_XMIT_SUCCESS;
         }
@@ -4255,6 +4256,7 @@ unlock:
  
  free_skbs:
         tcf_kfree_skb_list(to_free);
+       tcf_kfree_skb_list(to_free2);
         return rc;
  }
  
@@ -5747,8 +5749,9 @@ static __latent_entropy void net_tx_action(void)
                 rcu_read_lock();
  
                 while (head) {
-                       struct Qdisc *q = head;
                         spinlock_t *root_lock = NULL;
+                       struct sk_buff *to_free;
+                       struct Qdisc *q = head;
  
                         head = head->next_sched;
  
@@ -5775,9 +5778,10 @@ static __latent_entropy void net_tx_action(void)
                         }
  
                         clear_bit(__QDISC_STATE_SCHED, &q->state);
-                       qdisc_run(q);
+                       to_free = qdisc_run(q);
                         if (root_lock)
                                 spin_unlock(root_lock);
+                       tcf_kfree_skb_list(to_free);
                 }
  
                 rcu_read_unlock();
author	Eric Dumazet <edumazet@google.com>
	Fri, 21 Nov 2025 08:32:55 +0000 (08:32 +0000)
committer	Paolo Abeni <pabeni@redhat.com>
	Tue, 25 Nov 2025 15:10:32 +0000 (16:10 +0100)
include/net/pkt_sched.h		patch \| blob \| blame \| history
include/net/sch_generic.h		patch \| blob \| blame \| history
net/core/dev.c		patch \| blob \| blame \| history