]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
net: sch_fq: update flow delivery time on earlier EDT packet
authorWillem de Bruijn <willemb@google.com>
Tue, 26 May 2026 13:40:37 +0000 (09:40 -0400)
committerJakub Kicinski <kuba@kernel.org>
Thu, 28 May 2026 01:41:19 +0000 (18:41 -0700)
When inserting an EDT packet with time before flow->time_next_packet,
update the flow and possibly queue next delivery time.

Reinsert the flow into the q->delayed rb-tree to position correctly
and to have fq_check_throttled set wake-up at the right next time.

Factor RB tree insertion out fq_flow_set_throttled to avoid open
coding twice.

EDT packets do not take precedence over queue rate limit. Skip this
new step if a queue limit is set. EDT packets do take precedence over
per-socket rate limits, as can be seen from fq_dequeue reading
sk_pacing_rate if !skb->tstamp.

With this change the so_txtime selftest sends packets in the expected
order.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20260526134109.2624493-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/sched/sch_fq.c
tools/testing/selftests/drivers/net/so_txtime.py

index 796cb8046a902b94952a571b250813c5e557d600..33783c9f8e1665e096cf22adf61db3c199b87afb 100644 (file)
@@ -217,7 +217,7 @@ static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f)
        fq_flow_add_tail(q, f, OLD_FLOW);
 }
 
-static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
+static void fq_flow_rb_insert(struct fq_sched_data *q, struct fq_flow *f)
 {
        struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
 
@@ -233,14 +233,18 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
        }
        rb_link_node(&f->rate_node, parent, p);
        rb_insert_color(&f->rate_node, &q->delayed);
-       q->throttled_flows++;
-       q->stat_throttled++;
 
-       f->next = &throttled;
        if (q->time_next_delayed_flow > f->time_next_packet)
                q->time_next_delayed_flow = f->time_next_packet;
 }
 
+static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
+{
+       fq_flow_rb_insert(q, f);
+       q->throttled_flows++;
+       q->stat_throttled++;
+       f->next = &throttled;
+}
 
 static struct kmem_cache *fq_flow_cachep __read_mostly;
 
@@ -539,6 +543,24 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
        return unlikely((s64)skb->tstamp > (s64)(now + q->horizon));
 }
 
+static void fq_flow_adjust_timer(struct fq_sched_data *q, struct fq_flow *flow,
+                                u64 time_to_send, u64 now)
+{
+       if (time_to_send <= now) {
+               fq_flow_unset_throttled(q, flow);
+               if (q->time_next_delayed_flow == flow->time_next_packet) {
+                       struct rb_node *p = rb_first(&q->delayed);
+
+                       q->time_next_delayed_flow = p ? rb_entry(p, struct fq_flow, rate_node)->time_next_packet : ~0ULL;
+               }
+               flow->time_next_packet = time_to_send;
+       } else {
+               rb_erase(&flow->rate_node, &q->delayed);
+               flow->time_next_packet = time_to_send;
+               fq_flow_rb_insert(q, flow);
+       }
+}
+
 static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                      struct sk_buff **to_free)
 {
@@ -596,6 +618,10 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
        /* Note: this overwrites f->age */
        flow_queue_add(f, skb);
 
+       if (fq_skb_cb(skb)->time_to_send < f->time_next_packet && skb->tstamp &&
+           fq_flow_is_throttled(f) && q->flow_max_rate == ~0UL)
+               fq_flow_adjust_timer(q, f, fq_skb_cb(skb)->time_to_send, now);
+
        qdisc_qstats_backlog_inc(sch, skb);
        qdisc_qlen_inc(sch);
 
index e7de8fe22c1e59026592de2e6ba5925576c74829..5d4388bfc6dd914845a0db49e223e4b0af00cd32 100755 (executable)
@@ -53,7 +53,7 @@ def _test_variants_mono():
             ["zero_delay", "a,0", "a,0"],
             ["one_pkt", "a,10", "a,10"],
             ["in_order", "a,10,b,20", "a,10,b,20"],
-            ["reverse_order", "a,20,b,10", "b,20,a,20"],
+            ["reverse_order", "a,20,b,10", "b,10,a,20"],
         ]:
             name = f"v{ipver}_{testcase[0]}"
             yield KsftNamedVariant(name, ipver, testcase[1], testcase[2])