]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
rxrpc: Store the DATA serial in the txqueue and use this in RTT calc
authorDavid Howells <dhowells@redhat.com>
Wed, 4 Dec 2024 07:46:52 +0000 (07:46 +0000)
committerJakub Kicinski <kuba@kernel.org>
Mon, 9 Dec 2024 21:48:29 +0000 (13:48 -0800)
Store the serial number set on a DATA packet at the point of transmission
in the rxrpc_txqueue struct and when an ACK is received, match the
reference number in the ACK by trawling the txqueue rather than sharing an
RTT table with ACK RTT.  This can be done as part of Tx queue rotation.

This means we have a lot more RTT samples available and is faster to search
with all the serial numbers packed together into a few cachelines rather
than being hung off different txbufs.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: https://patch.msgid.link/20241204074710.990092-25-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/trace/events/rxrpc.h
net/rxrpc/ar-internal.h
net/rxrpc/call_event.c
net/rxrpc/input.c
net/rxrpc/output.c

index 609522a5bd0f7fac9782c8ef56f6e69a582f18d7..798bea0853c4cadbd34dc71718804af64ef9d5fe 100644 (file)
        E_(rxrpc_rtt_tx_ping,                   "PING")
 
 #define rxrpc_rtt_rx_traces \
-       EM(rxrpc_rtt_rx_other_ack,              "OACK") \
+       EM(rxrpc_rtt_rx_data_ack,               "DACK") \
        EM(rxrpc_rtt_rx_obsolete,               "OBSL") \
        EM(rxrpc_rtt_rx_lost,                   "LOST") \
-       EM(rxrpc_rtt_rx_ping_response,          "PONG") \
-       E_(rxrpc_rtt_rx_requested_ack,          "RACK")
+       E_(rxrpc_rtt_rx_ping_response,          "PONG")
 
 #define rxrpc_timer_traces \
        EM(rxrpc_timer_trace_delayed_ack,       "DelayAck ") \
@@ -1695,10 +1694,9 @@ TRACE_EVENT(rxrpc_retransmit,
            );
 
 TRACE_EVENT(rxrpc_congest,
-           TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
-                    rxrpc_serial_t ack_serial),
+           TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary),
 
-           TP_ARGS(call, summary, ack_serial),
+           TP_ARGS(call, summary),
 
            TP_STRUCT__entry(
                    __field(unsigned int,                       call)
@@ -1706,7 +1704,6 @@ TRACE_EVENT(rxrpc_congest,
                    __field(rxrpc_seq_t,                        hard_ack)
                    __field(rxrpc_seq_t,                        top)
                    __field(rxrpc_seq_t,                        lowest_nak)
-                   __field(rxrpc_serial_t,                     ack_serial)
                    __field(u16,                                nr_sacks)
                    __field(u16,                                nr_snacks)
                    __field(u16,                                cwnd)
@@ -1722,7 +1719,6 @@ TRACE_EVENT(rxrpc_congest,
                    __entry->hard_ack   = call->acks_hard_ack;
                    __entry->top        = call->tx_top;
                    __entry->lowest_nak = call->acks_lowest_nak;
-                   __entry->ack_serial = ack_serial;
                    __entry->nr_sacks   = call->acks_nr_sacks;
                    __entry->nr_snacks  = call->acks_nr_snacks;
                    __entry->cwnd       = call->cong_cwnd;
@@ -1734,7 +1730,7 @@ TRACE_EVENT(rxrpc_congest,
 
            TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
                      __entry->call,
-                     __entry->ack_serial,
+                     __entry->sum.acked_serial,
                      __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names),
                      __entry->hard_ack,
                      __print_symbolic(__entry->ca_state, rxrpc_ca_states),
index 9a70f0b86570ae96049eea6f6011a32602102063..297be421639cc3f6f3ae229ae8fd9bf7b74c18f2 100644 (file)
@@ -769,6 +769,7 @@ struct rxrpc_call {
  * Summary of a new ACK and the changes it made to the Tx buffer packet states.
  */
 struct rxrpc_ack_summary {
+       rxrpc_serial_t  acked_serial;           /* Serial number ACK'd */
        u16             in_flight;              /* Number of unreceived transmissions */
        u16             nr_new_hacks;           /* Number of rotated new ACKs */
        u16             nr_new_sacks;           /* Number of new soft ACKs in packet */
@@ -777,6 +778,7 @@ struct rxrpc_ack_summary {
        bool            new_low_snack:1;        /* T if new low soft NACK found */
        bool            retrans_timeo:1;        /* T if reTx due to timeout happened */
        bool            need_retransmit:1;      /* T if we need transmission */
+       bool            rtt_sample_avail:1;     /* T if RTT sample available */
        u8 /*enum rxrpc_congest_change*/ change;
 };
 
@@ -859,12 +861,14 @@ struct rxrpc_txqueue {
        unsigned long           segment_acked;  /* Bit-per-buf: Set if ACK'd */
        unsigned long           segment_lost;   /* Bit-per-buf: Set if declared lost */
        unsigned long           segment_retransmitted; /* Bit-per-buf: Set if retransmitted */
+       unsigned long           rtt_samples;    /* Bit-per-buf: Set if available for RTT */
 
        /* The arrays we want to pack into as few cache lines as possible. */
        struct {
 #define RXRPC_NR_TXQUEUE BITS_PER_LONG
 #define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1)
                struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE];
+               unsigned int    segment_serial[RXRPC_NR_TXQUEUE];
                unsigned int    segment_xmit_ts[RXRPC_NR_TXQUEUE];
        } ____cacheline_aligned;
 };
index e25921d39d4d7ff90d1651569f24cf7bb2ed97d5..f71773b18e2213117709df539d6ffa9fe9f5f5c0 100644 (file)
@@ -159,11 +159,11 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_
                        rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted);
 
                        _debug("unrep %x-%x", start, stop);
-                       for (rxrpc_seq_t seq = start; before(seq, stop); seq++) {
-                               struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK];
+                       for (rxrpc_seq_t seq = start; before_eq(seq, stop); seq++) {
+                               rxrpc_serial_t serial = tq->segment_serial[seq & RXRPC_TXQ_MASK];
 
                                if (ping_response &&
-                                   before(txb->serial, call->acks_highest_serial))
+                                   before(serial, call->acks_highest_serial))
                                        break; /* Wasn't accounted for by a more recent ping. */
                                req.tq  = tq;
                                req.seq = seq;
@@ -198,7 +198,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_
 
                _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now));
                call->resend_at = resend_at;
-               trace_rxrpc_timer_set(call, resend_at - req.now,
+               trace_rxrpc_timer_set(call, ktime_sub(resend_at, req.now),
                                      rxrpc_timer_trace_resend_reset);
        } else {
                call->resend_at = KTIME_MAX;
index 6e7ff133b5aac57a708b2874cf2b8ada2ff1c82f..41b4fb56f96cd945bb1ea86e4ca1111b79b59919 100644 (file)
@@ -30,9 +30,7 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
  * Do TCP-style congestion management [RFC 5681].
  */
 static void rxrpc_congestion_management(struct rxrpc_call *call,
-                                       struct sk_buff *skb,
-                                       struct rxrpc_ack_summary *summary,
-                                       rxrpc_serial_t acked_serial)
+                                       struct rxrpc_ack_summary *summary)
 {
        summary->change = rxrpc_cong_no_change;
        summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks;
@@ -44,7 +42,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
                if (call->cong_cwnd >= call->cong_ssthresh &&
                    call->cong_ca_state == RXRPC_CA_SLOW_START) {
                        call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE;
-                       call->cong_tstamp = skb->tstamp;
+                       call->cong_tstamp = call->acks_latest_ts;
                        call->cong_cumul_acks = 0;
                }
        }
@@ -62,7 +60,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
                        call->cong_cwnd += 1;
                if (call->cong_cwnd >= call->cong_ssthresh) {
                        call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE;
-                       call->cong_tstamp = skb->tstamp;
+                       call->cong_tstamp = call->acks_latest_ts;
                }
                goto out;
 
@@ -75,12 +73,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
                 */
                if (call->peer->rtt_count == 0)
                        goto out;
-               if (ktime_before(skb->tstamp,
+               if (ktime_before(call->acks_latest_ts,
                                 ktime_add_us(call->cong_tstamp,
                                              call->peer->srtt_us >> 3)))
                        goto out_no_clear_ca;
                summary->change = rxrpc_cong_rtt_window_end;
-               call->cong_tstamp = skb->tstamp;
+               call->cong_tstamp = call->acks_latest_ts;
                if (call->cong_cumul_acks >= call->cong_cwnd)
                        call->cong_cwnd++;
                goto out;
@@ -137,7 +135,7 @@ resume_normality:
        summary->change = rxrpc_cong_cleared_nacks;
        call->cong_dup_acks = 0;
        call->cong_extra = 0;
-       call->cong_tstamp = skb->tstamp;
+       call->cong_tstamp = call->acks_latest_ts;
        if (call->cong_cwnd < call->cong_ssthresh)
                call->cong_ca_state = RXRPC_CA_SLOW_START;
        else
@@ -147,7 +145,7 @@ out:
 out_no_clear_ca:
        if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW)
                call->cong_cwnd = RXRPC_TX_MAX_WINDOW;
-       trace_rxrpc_congest(call, summary, acked_serial);
+       trace_rxrpc_congest(call, summary);
        return;
 
 packet_loss_detected:
@@ -194,11 +192,29 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call)
        call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND);
 }
 
+/*
+ * Add an RTT sample derived from an ACK'd DATA packet.
+ */
+static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call,
+                                     struct rxrpc_ack_summary *summary,
+                                     struct rxrpc_txqueue *tq,
+                                     int ix,
+                                     rxrpc_serial_t ack_serial)
+{
+       rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1,
+                          summary->acked_serial, ack_serial,
+                          ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]),
+                          call->acks_latest_ts);
+       summary->rtt_sample_avail = false;
+       __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */
+}
+
 /*
  * Apply a hard ACK by advancing the Tx window.
  */
 static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
-                                  struct rxrpc_ack_summary *summary)
+                                  struct rxrpc_ack_summary *summary,
+                                  rxrpc_serial_t ack_serial)
 {
        struct rxrpc_txqueue *tq = call->tx_queue;
        rxrpc_seq_t seq = call->tx_bottom + 1;
@@ -236,6 +252,11 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
                        rot_last = true;
                }
 
+               if (summary->rtt_sample_avail &&
+                   summary->acked_serial == tq->segment_serial[ix] &&
+                   test_bit(ix, &tq->rtt_samples))
+                       rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial);
+
                if (ix == tq->nr_reported_acks) {
                        /* Packet directly hard ACK'd. */
                        tq->nr_reported_acks++;
@@ -348,7 +369,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
        }
 
        if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
-               if (!rxrpc_rotate_tx_window(call, top, &summary)) {
+               if (!rxrpc_rotate_tx_window(call, top, &summary, 0)) {
                        rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply);
                        return false;
                }
@@ -800,6 +821,19 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb
                })
 #endif
 
+/*
+ * Deal with RTT samples from soft ACKs.
+ */
+static void rxrpc_input_soft_rtt(struct rxrpc_call *call,
+                                struct rxrpc_ack_summary *summary,
+                                struct rxrpc_txqueue *tq,
+                                rxrpc_serial_t ack_serial)
+{
+       for (int ix = 0; ix < RXRPC_NR_TXQUEUE; ix++)
+               if (summary->acked_serial == tq->segment_serial[ix])
+                       return rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial);
+}
+
 /*
  * Process a batch of soft ACKs specific to a transmission queue segment.
  */
@@ -909,6 +943,8 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call,
 
                _debug("bound %16lx %u", extracted, nr);
 
+               if (summary->rtt_sample_avail)
+                       rxrpc_input_soft_rtt(call, summary, tq, sp->hdr.serial);
                rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE,
                                        seq - RXRPC_NR_TXQUEUE, &lowest_nak);
                extracted = ~0UL;
@@ -980,7 +1016,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
        struct rxrpc_ack_summary summary = { 0 };
        struct rxrpc_acktrailer trailer;
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       rxrpc_serial_t ack_serial, acked_serial;
+       rxrpc_serial_t ack_serial;
        rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
        int nr_acks, offset, ioffset;
 
@@ -989,11 +1025,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
        offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
 
        ack_serial      = sp->hdr.serial;
-       acked_serial    = sp->ack.acked_serial;
        first_soft_ack  = sp->ack.first_ack;
        prev_pkt        = sp->ack.prev_ack;
        nr_acks         = sp->ack.nr_acks;
        hard_ack        = first_soft_ack - 1;
+       summary.acked_serial = sp->ack.acked_serial;
        summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ?
                              sp->ack.reason : RXRPC_ACK__INVALID);
 
@@ -1001,21 +1037,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
        rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]);
        prefetch(call->tx_queue);
 
-       if (acked_serial != 0) {
-               switch (summary.ack_reason) {
-               case RXRPC_ACK_PING_RESPONSE:
-                       rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                                rxrpc_rtt_rx_ping_response);
-                       break;
-               case RXRPC_ACK_REQUESTED:
-                       rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                                rxrpc_rtt_rx_requested_ack);
-                       break;
-               default:
-                       rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                                rxrpc_rtt_rx_other_ack);
-                       break;
-               }
+       if (summary.acked_serial != 0) {
+               if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE)
+                       rxrpc_complete_rtt_probe(call, skb->tstamp, summary.acked_serial,
+                                                ack_serial, rxrpc_rtt_rx_ping_response);
+               else
+                       summary.rtt_sample_avail = true;
        }
 
        /* If we get an EXCEEDS_WINDOW ACK from the server, it probably
@@ -1068,8 +1095,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
        case RXRPC_ACK_PING:
                break;
        default:
-               if (acked_serial && after(acked_serial, call->acks_highest_serial))
-                       call->acks_highest_serial = acked_serial;
+               if (summary.acked_serial &&
+                   after(summary.acked_serial, call->acks_highest_serial))
+                       call->acks_highest_serial = summary.acked_serial;
                break;
        }
 
@@ -1098,7 +1126,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
                return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow);
 
        if (after(hard_ack, call->tx_bottom)) {
-               if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
+               if (rxrpc_rotate_tx_window(call, hard_ack, &summary, ack_serial)) {
                        rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
                        goto send_response;
                }
@@ -1116,7 +1144,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
                rxrpc_propose_ping(call, ack_serial,
                                   rxrpc_propose_ack_ping_for_lost_reply);
 
-       rxrpc_congestion_management(call, skb, &summary, acked_serial);
+       rxrpc_congestion_management(call, &summary);
        if (summary.need_retransmit)
                rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE);
 
@@ -1136,7 +1164,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
 {
        struct rxrpc_ack_summary summary = { 0 };
 
-       if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
+       if (rxrpc_rotate_tx_window(call, call->tx_top, &summary, 0))
                rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall);
 }
 
index 978c2dc6a7d459f19c1a61fe10d96fcee7db8eba..20bf45317264bc4c8c1fbc66c121bc223b0898f9 100644 (file)
@@ -436,7 +436,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call,
        trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
        if (why != rxrpc_reqack_no_srv_last) {
                flags |= RXRPC_REQUEST_ACK;
-               rxrpc_begin_rtt_probe(call, serial, req->now, rxrpc_rtt_tx_data);
+               trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial);
                call->peer->rtt_last_req = req->now;
        }
 dont_set_request_ack:
@@ -508,6 +508,10 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se
 
                _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq);
                tq->segment_xmit_ts[ix] = xmit_ts;
+               tq->segment_serial[ix] = serial;
+               if (i + 1 == req->n)
+                       /* Only sample the last subpacket in a jumbo. */
+                       __set_bit(ix, &tq->rtt_samples);
                len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i);
                serial++;
                seq++;