EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \
EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \
EM(rxrpc_propose_ack_ping_for_0_retrans, "0-Retrn") \
+ EM(rxrpc_propose_ack_ping_for_mtu_probe, "MTUProb") \
EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \
EM(rxrpc_propose_ack_ping_for_params, "Params ") \
EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \
EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \
E_(rxrpc_txbuf_see_unacked, "SEE UNACKED")
+#define rxrpc_pmtud_reduce_traces \
+ EM(rxrpc_pmtud_reduce_ack, "Ack ") \
+ EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \
+ E_(rxrpc_pmtud_reduce_route, "Route")
+
/*
* Generate enums for tracing information.
*/
enum rxrpc_conn_trace { rxrpc_conn_traces } __mode(byte);
enum rxrpc_local_trace { rxrpc_local_traces } __mode(byte);
enum rxrpc_peer_trace { rxrpc_peer_traces } __mode(byte);
+enum rxrpc_pmtud_reduce_trace { rxrpc_pmtud_reduce_traces } __mode(byte);
enum rxrpc_propose_ack_outcome { rxrpc_propose_ack_outcomes } __mode(byte);
enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte);
enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte);
rxrpc_congest_modes;
rxrpc_conn_traces;
rxrpc_local_traces;
+rxrpc_pmtud_reduce_traces;
rxrpc_propose_ack_traces;
rxrpc_receive_traces;
rxrpc_recvmsg_traces;
__entry->sack)
);
+TRACE_EVENT(rxrpc_pmtud_tx,
+ TP_PROTO(struct rxrpc_call *call),
+
+ TP_ARGS(call),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer_debug_id)
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_serial_t, ping_serial)
+ __field(unsigned short, pmtud_trial)
+ __field(unsigned short, pmtud_good)
+ __field(unsigned short, pmtud_bad)
+ ),
+
+ TP_fast_assign(
+ __entry->peer_debug_id = call->peer->debug_id;
+ __entry->call_debug_id = call->debug_id;
+ __entry->ping_serial = call->conn->pmtud_probe;
+ __entry->pmtud_trial = call->peer->pmtud_trial;
+ __entry->pmtud_good = call->peer->pmtud_good;
+ __entry->pmtud_bad = call->peer->pmtud_bad;
+ ),
+
+ TP_printk("P=%08x c=%08x pr=%08x %u-%u-%u",
+ __entry->peer_debug_id,
+ __entry->call_debug_id,
+ __entry->ping_serial,
+ __entry->pmtud_good,
+ __entry->pmtud_trial,
+ __entry->pmtud_bad)
+ );
+
+TRACE_EVENT(rxrpc_pmtud_rx,
+ TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t resp_serial),
+
+ TP_ARGS(conn, resp_serial),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer_debug_id)
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_serial_t, ping_serial)
+ __field(rxrpc_serial_t, resp_serial)
+ __field(unsigned short, max_data)
+ __field(u8, jumbo_max)
+ ),
+
+ TP_fast_assign(
+ __entry->peer_debug_id = conn->peer->debug_id;
+ __entry->call_debug_id = conn->pmtud_call;
+ __entry->ping_serial = conn->pmtud_probe;
+ __entry->resp_serial = resp_serial;
+ __entry->max_data = conn->peer->max_data;
+ __entry->jumbo_max = conn->peer->pmtud_jumbo;
+ ),
+
+ TP_printk("P=%08x c=%08x pr=%08x rr=%08x max=%u jm=%u",
+ __entry->peer_debug_id,
+ __entry->call_debug_id,
+ __entry->ping_serial,
+ __entry->resp_serial,
+ __entry->max_data,
+ __entry->jumbo_max)
+ );
+
+TRACE_EVENT(rxrpc_pmtud_lost,
+ TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t resp_serial),
+
+ TP_ARGS(conn, resp_serial),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer_debug_id)
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_serial_t, ping_serial)
+ __field(rxrpc_serial_t, resp_serial)
+ ),
+
+ TP_fast_assign(
+ __entry->peer_debug_id = conn->peer->debug_id;
+ __entry->call_debug_id = conn->pmtud_call;
+ __entry->ping_serial = conn->pmtud_probe;
+ __entry->resp_serial = resp_serial;
+ ),
+
+ TP_printk("P=%08x c=%08x pr=%08x rr=%08x",
+ __entry->peer_debug_id,
+ __entry->call_debug_id,
+ __entry->ping_serial,
+ __entry->resp_serial)
+ );
+
+TRACE_EVENT(rxrpc_pmtud_reduce,
+ TP_PROTO(struct rxrpc_peer *peer, rxrpc_serial_t serial,
+ unsigned int max_data, enum rxrpc_pmtud_reduce_trace reason),
+
+ TP_ARGS(peer, serial, max_data, reason),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, peer_debug_id)
+ __field(rxrpc_serial_t, serial)
+ __field(unsigned int, max_data)
+ __field(enum rxrpc_pmtud_reduce_trace, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->peer_debug_id = peer->debug_id;
+ __entry->serial = serial;
+ __entry->max_data = max_data;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("P=%08x %s r=%08x m=%u",
+ __entry->peer_debug_id,
+ __print_symbolic(__entry->reason, rxrpc_pmtud_reduce_traces),
+ __entry->serial, __entry->max_data)
+ );
+
#undef EM
#undef E_
time64_t last_tx_at; /* Last time packet sent here */
seqlock_t service_conn_lock;
spinlock_t lock; /* access lock */
- unsigned int if_mtu; /* interface MTU for this peer */
- unsigned int mtu; /* network MTU for this peer */
- unsigned int maxdata; /* data size (MTU - hdrsize) */
- unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
int debug_id; /* debug ID for printks */
struct sockaddr_rxrpc srx; /* remote address */
+ /* Path MTU discovery [RFC8899] */
+ unsigned int pmtud_trial; /* Current MTU probe size */
+ unsigned int pmtud_good; /* Largest working MTU probe we've tried */
+ unsigned int pmtud_bad; /* Smallest non-working MTU probe we've tried */
+ bool pmtud_lost; /* T if MTU probe was lost */
+ bool pmtud_probing; /* T if we have an active probe outstanding */
+ bool pmtud_pending; /* T if a call to this peer should send a probe */
+ u8 pmtud_jumbo; /* Max jumbo packets for the MTU */
+ bool ackr_adv_pmtud; /* T if the peer advertises path-MTU */
+ unsigned int ackr_max_data; /* Maximum data advertised by peer */
+ seqcount_t mtu_lock; /* Lockless MTU access management */
+ unsigned int if_mtu; /* Local interface MTU (- hdrsize) for this peer */
+ unsigned int max_data; /* Maximum packet data capacity for this peer */
+ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
+ unsigned short tx_seg_max; /* Maximum number of transmissable segments */
+
/* calculated RTT cache */
#define RXRPC_RTT_CACHE_SIZE 32
spinlock_t rtt_input_lock; /* RTT lock for input routine */
int debug_id; /* debug ID for printks */
rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
+ rxrpc_serial_t pmtud_probe; /* Serial of MTU probe (or 0) */
+ unsigned int pmtud_call; /* ID of call used for probe */
u32 service_id; /* Service ID, possibly upgraded */
u32 security_level; /* Security level selected */
u8 security_ix; /* security type */
*/
void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why);
+void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call);
int rxrpc_send_abort_packet(struct rxrpc_call *);
void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
*/
void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *);
void rxrpc_peer_keepalive_worker(struct work_struct *);
+void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial,
+ bool sendmsg_fail);
/*
* peer_object.c
rxrpc_disconnect_call(call);
if (call->security)
call->security->free_call_crypto(call);
+ } else {
+ if (skb &&
+ call->peer->ackr_adv_pmtud &&
+ call->peer->pmtud_pending)
+ rxrpc_send_probe_for_pmtud(call);
}
if (call->acks_hard_ack != call->tx_bottom)
rxrpc_shrink_call_tx_buffer(call);
struct rxrpc_acktrailer trailer;
size_t len;
int ret, ioc;
- u32 serial, mtu, call_id, padding;
+ u32 serial, max_mtu, if_mtu, call_id, padding;
_enter("%d", conn->debug_id);
break;
case RXRPC_PACKET_TYPE_ACK:
- mtu = conn->peer->if_mtu;
- mtu -= conn->peer->hdrsize;
+ if_mtu = conn->peer->if_mtu - conn->peer->hdrsize;
+ if (conn->peer->ackr_adv_pmtud) {
+ max_mtu = umax(conn->peer->max_data, rxrpc_rx_mtu);
+ } else {
+ if_mtu = umin(1444, if_mtu);
+ max_mtu = if_mtu;
+ }
pkt.ack.bufferSpace = 0;
pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
pkt.ack.firstPacket = htonl(chan->last_seq + 1);
pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
pkt.ack.nAcks = 0;
- trailer.maxMTU = htonl(rxrpc_rx_mtu);
- trailer.ifMTU = htonl(mtu);
+ trailer.maxMTU = htonl(max_mtu);
+ trailer.ifMTU = htonl(if_mtu);
trailer.rwind = htonl(rxrpc_rx_window_size);
- trailer.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ trailer.jumbo_max = 0;
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
padding = 0;
iov[0].iov_len += sizeof(pkt.ack);
list_del_init(&conn->proc_link);
write_unlock(&rxnet->conn_lock);
+ if (conn->pmtud_probe) {
+ trace_rxrpc_pmtud_lost(conn, 0);
+ conn->peer->pmtud_probing = false;
+ conn->peer->pmtud_pending = true;
+ }
+
rxrpc_purge_queue(&conn->rx_queue);
rxrpc_kill_client_conn(conn);
struct rxrpc_acktrailer *trailer)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_peer *peer;
- unsigned int mtu;
+ struct rxrpc_peer *peer = call->peer;
+ unsigned int max_data;
bool wake = false;
u32 rwind = ntohl(trailer->rwind);
call->tx_winsize = rwind;
}
- mtu = umin(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU));
+ if (trailer->jumbo_max == 0) {
+ /* The peer says it supports pmtu discovery */
+ peer->ackr_adv_pmtud = true;
+ } else {
+ peer->ackr_adv_pmtud = false;
+ }
+
+ max_data = ntohl(trailer->maxMTU);
+ peer->ackr_max_data = max_data;
- peer = call->peer;
- if (mtu < peer->maxdata) {
- spin_lock(&peer->lock);
- peer->maxdata = mtu;
- peer->mtu = mtu + peer->hdrsize;
- spin_unlock(&peer->lock);
+ if (max_data < peer->max_data) {
+ trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_data,
+ rxrpc_pmtud_reduce_ack);
+ write_seqcount_begin(&peer->mtu_lock);
+ peer->max_data = max_data;
+ write_seqcount_end(&peer->mtu_lock);
}
if (wake)
if (sp->hdr.callNumber == 0)
return rxrpc_input_conn_packet(conn, skb);
+ /* Deal with path MTU discovery probing. */
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK &&
+ conn->pmtud_probe &&
+ after_eq(sp->ack.acked_serial, conn->pmtud_probe))
+ rxrpc_input_probe_for_pmtud(conn, sp->ack.acked_serial, false);
+
/* Call-bound packets are routed by connection channel. */
channel = sp->hdr.cid & RXRPC_CHANNELMASK;
chan = &conn->channels[channel];
* Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
* made by gluing normal packets together that we're willing to handle.
*/
-unsigned int rxrpc_rx_mtu = 5692;
+unsigned int rxrpc_rx_mtu = RXRPC_JUMBO(46);
/*
* The maximum number of fragments in a received jumbo packet that we tell the
* sender that we're willing to handle.
*/
-unsigned int rxrpc_rx_jumbo_max = 4;
+unsigned int rxrpc_rx_jumbo_max = 46;
#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
/*
struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base;
struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3;
struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
- unsigned int qsize, sack, wrap, to;
+ unsigned int qsize, sack, wrap, to, max_mtu, if_mtu;
rxrpc_seq_t window, wtop;
int rsize;
- u32 mtu, jmax;
u8 *filler = txb->kvec[2].iov_base;
u8 *sackp = txb->kvec[1].iov_base;
ack->reason = RXRPC_ACK_IDLE;
}
- mtu = call->peer->if_mtu;
- mtu -= call->peer->hdrsize;
- jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
txb->ack_rwind = rsize;
- trailer->maxMTU = htonl(rxrpc_rx_mtu);
- trailer->ifMTU = htonl(mtu);
+
+ if_mtu = call->peer->if_mtu - call->peer->hdrsize;
+ if (call->peer->ackr_adv_pmtud) {
+ max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu);
+ } else {
+ if_mtu = umin(if_mtu, 1444);
+ max_mtu = if_mtu;
+ }
+
+ trailer->maxMTU = htonl(max_mtu);
+ trailer->ifMTU = htonl(if_mtu);
trailer->rwind = htonl(rsize);
- trailer->jumbo_max = htonl(jmax);
+ trailer->jumbo_max = 0; /* Advertise pmtu discovery */
}
/*
* Transmit an ACK packet.
*/
static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb,
- int nr_kv)
+ int nr_kv, enum rxrpc_propose_ack_trace why)
{
struct kvec *kv = call->local->kvec;
struct rxrpc_wire_header *whdr = kv[0].iov_base;
rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, txb->len);
- rxrpc_local_dont_fragment(conn->local, false);
+ rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe);
ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len);
call->peer->last_tx_at = ktime_get_seconds();
if (ret < 0) {
trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret,
rxrpc_tx_point_call_ack);
+ if (why == rxrpc_propose_ack_ping_for_mtu_probe &&
+ ret == -EMSGSIZE)
+ rxrpc_input_probe_for_pmtud(conn, txb->serial, true);
} else {
trace_rxrpc_tx_packet(call->debug_id, whdr,
rxrpc_tx_point_call_ack);
if (txb->flags & RXRPC_REQUEST_ACK)
call->peer->rtt_last_req = now;
rxrpc_set_keepalive(call, now);
+ if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
+ call->peer->pmtud_pending = false;
+ call->peer->pmtud_probing = true;
+ call->conn->pmtud_probe = txb->serial;
+ call->conn->pmtud_call = call->debug_id;
+ trace_rxrpc_pmtud_tx(call);
+ }
}
rxrpc_tx_backoff(call, ret);
}
rxrpc_fill_out_ack(call, txb, ack_reason, serial);
+ /* Extend a path MTU probe ACK. */
nr_kv = txb->nr_kvec;
kv[0] = txb->kvec[0];
kv[1] = txb->kvec[1];
kv[2] = txb->kvec[2];
- // TODO: Extend a path MTU probe ACK
+ if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
+ size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header);
+
+ if (txb->len > probe_mtu)
+ goto skip;
+ while (txb->len < probe_mtu) {
+ size_t part = umin(probe_mtu - txb->len, PAGE_SIZE);
+
+ kv[nr_kv].iov_base = page_address(ZERO_PAGE(0));
+ kv[nr_kv].iov_len = part;
+ txb->len += part;
+ nr_kv++;
+ }
+ }
call->ackr_nr_unacked = 0;
atomic_set(&call->ackr_nr_consumed, 0);
clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
trace_rxrpc_send_ack(call, why, ack_reason, serial);
- rxrpc_send_ack_packet(call, txb, nr_kv);
+ rxrpc_send_ack_packet(call, txb, nr_kv, why);
+skip:
rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
}
+/*
+ * Send an ACK probe for path MTU discovery.
+ */
+void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call)
+{
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_mtu_probe);
+}
+
/*
* Send an ABORT call packet.
*/
/* send the packet with the don't fragment bit set if we currently
* think it's small enough */
- if (len >= sizeof(struct rxrpc_wire_header) + call->peer->maxdata) {
+ if (len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
rxrpc_local_dont_fragment(conn->local, false);
frag = rxrpc_tx_point_call_data_frag;
} else {
RX_USER_ABORT, ret);
}
- _leave(" = %d [%u]", ret, call->peer->maxdata);
+ _leave(" = %d [%u]", ret, call->peer->max_data);
return ret;
}
*/
static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
{
+ unsigned int max_data;
+
/* wind down the local interface MTU */
if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu)
peer->if_mtu = mtu;
}
}
- if (mtu < peer->mtu) {
- spin_lock(&peer->lock);
- peer->mtu = mtu;
- peer->maxdata = peer->mtu - peer->hdrsize;
- spin_unlock(&peer->lock);
+ max_data = max_t(int, mtu - peer->hdrsize, 500);
+ if (max_data < peer->max_data) {
+ if (peer->pmtud_good > max_data)
+ peer->pmtud_good = max_data;
+ if (peer->pmtud_bad > max_data + 1)
+ peer->pmtud_bad = max_data + 1;
+
+ trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp);
+ write_seqcount_begin(&peer->mtu_lock);
+ peer->max_data = max_data;
+ write_seqcount_end(&peer->mtu_lock);
}
}
_leave("");
}
+
+/*
+ * Do path MTU probing.
+ */
+void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial,
+ bool sendmsg_fail)
+{
+ struct rxrpc_peer *peer = conn->peer;
+ unsigned int max_data = peer->max_data;
+ int good, trial, bad, jumbo;
+
+ good = peer->pmtud_good;
+ trial = peer->pmtud_trial;
+ bad = peer->pmtud_bad;
+ if (good >= bad - 1) {
+ conn->pmtud_probe = 0;
+ peer->pmtud_lost = false;
+ return;
+ }
+
+ if (!peer->pmtud_probing)
+ goto send_probe;
+
+ if (sendmsg_fail || after(acked_serial, conn->pmtud_probe)) {
+ /* Retry a lost probe. */
+ if (!peer->pmtud_lost) {
+ trace_rxrpc_pmtud_lost(conn, acked_serial);
+ conn->pmtud_probe = 0;
+ peer->pmtud_lost = true;
+ goto send_probe;
+ }
+
+ /* The probed size didn't seem to get through. */
+ bad = trial;
+ peer->pmtud_bad = bad;
+ if (bad <= max_data)
+ max_data = bad - 1;
+ } else {
+ /* It did get through. */
+ good = trial;
+ peer->pmtud_good = good;
+ if (good > max_data)
+ max_data = good;
+ }
+
+ max_data = umin(max_data, peer->ackr_max_data);
+ if (max_data != peer->max_data) {
+ preempt_disable();
+ write_seqcount_begin(&peer->mtu_lock);
+ peer->max_data = max_data;
+ write_seqcount_end(&peer->mtu_lock);
+ preempt_enable();
+ }
+
+ jumbo = max_data + sizeof(struct rxrpc_jumbo_header);
+ jumbo /= RXRPC_JUMBO_SUBPKTLEN;
+ peer->pmtud_jumbo = jumbo;
+
+ trace_rxrpc_pmtud_rx(conn, acked_serial);
+ conn->pmtud_probe = 0;
+ peer->pmtud_lost = false;
+
+ if (good < RXRPC_JUMBO(2) && bad > RXRPC_JUMBO(2))
+ trial = RXRPC_JUMBO(2);
+ else if (good < RXRPC_JUMBO(4) && bad > RXRPC_JUMBO(4))
+ trial = RXRPC_JUMBO(4);
+ else if (good < RXRPC_JUMBO(3) && bad > RXRPC_JUMBO(3))
+ trial = RXRPC_JUMBO(3);
+ else if (good < RXRPC_JUMBO(6) && bad > RXRPC_JUMBO(6))
+ trial = RXRPC_JUMBO(6);
+ else if (good < RXRPC_JUMBO(5) && bad > RXRPC_JUMBO(5))
+ trial = RXRPC_JUMBO(5);
+ else if (good < RXRPC_JUMBO(8) && bad > RXRPC_JUMBO(8))
+ trial = RXRPC_JUMBO(8);
+ else if (good < RXRPC_JUMBO(7) && bad > RXRPC_JUMBO(7))
+ trial = RXRPC_JUMBO(7);
+ else
+ trial = (good + bad) / 2;
+ peer->pmtud_trial = trial;
+
+ if (good >= bad)
+ return;
+
+send_probe:
+ peer->pmtud_pending = true;
+}
#endif
peer->if_mtu = 1500;
+ if (peer->max_data < peer->if_mtu - peer->hdrsize) {
+ trace_rxrpc_pmtud_reduce(peer, 0, peer->if_mtu - peer->hdrsize,
+ rxrpc_pmtud_reduce_route);
+ peer->max_data = peer->if_mtu - peer->hdrsize;
+ }
memset(&fl, 0, sizeof(fl));
switch (peer->srx.transport.family) {
}
peer->if_mtu = dst_mtu(dst);
+ peer->hdrsize += dst->header_len + dst->trailer_len;
+ peer->tx_seg_max = dst->dev->gso_max_segs;
dst_release(dst);
+ peer->max_data = umin(RXRPC_JUMBO(1), peer->if_mtu - peer->hdrsize);
+ peer->pmtud_good = 500;
+ peer->pmtud_bad = peer->if_mtu - peer->hdrsize + 1;
+ peer->pmtud_trial = umin(peer->max_data, peer->pmtud_bad - 1);
+ peer->pmtud_pending = true;
+
_leave(" [if_mtu %u]", peer->if_mtu);
}
seqlock_init(&peer->service_conn_lock);
spin_lock_init(&peer->lock);
spin_lock_init(&peer->rtt_input_lock);
+ seqcount_init(&peer->mtu_lock);
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
rxrpc_peer_init_rtt(peer);
unsigned long hash_key)
{
peer->hash_key = hash_key;
- rxrpc_assess_MTU_size(local, peer);
- peer->mtu = peer->if_mtu;
- peer->rtt_last_req = ktime_get_real();
+
switch (peer->srx.transport.family) {
case AF_INET:
}
peer->hdrsize += sizeof(struct rxrpc_wire_header);
- peer->maxdata = peer->mtu - peer->hdrsize;
+ peer->max_data = peer->if_mtu - peer->hdrsize;
+
+ rxrpc_assess_MTU_size(local, peer);
+
+ peer->rtt_last_req = ktime_get_real();
}
/*
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
- "Proto Local "
- " Remote "
- " Use SST MTU LastUse RTT RTO\n"
+ "Proto Local Remote Use SST Maxd LastUse RTT RTO\n"
);
return 0;
}
now = ktime_get_seconds();
seq_printf(seq,
- "UDP %-47.47s %-47.47s %3u"
- " %3u %5u %6llus %8u %8u\n",
+ "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8u %8u\n",
lbuff,
rbuff,
refcount_read(&peer->ref),
peer->cong_ssthresh,
- peer->mtu,
+ peer->max_data,
now - peer->last_tx_at,
peer->srtt_us >> 3,
peer->rto_us);
/*
* The maximum number of subpackets that can possibly fit in a UDP packet is:
*
- * ((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1
- * = ((65535 - 28 - 28) / 1416) + 1
- * = 46 non-terminal packets and 1 terminal packet.
+ * (max_UDP - wirehdr + jumbohdr) / (jumbohdr + 1412)
+ * = ((65535 - 28 + 4) / 1416)
+ * = 45 non-terminal packets and 1 terminal packet.
*/
-#define RXRPC_MAX_NR_JUMBO 47
+#define RXRPC_MAX_NR_JUMBO 46
+
+/* Size of a jumbo packet with N subpackets, excluding UDP+IP */
+#define RXRPC_JUMBO(N) ((int)sizeof(struct rxrpc_wire_header) + \
+ RXRPC_JUMBO_DATALEN + \
+ ((N) - 1) * RXRPC_JUMBO_SUBPKTLEN)
/*****************************************************************************/
/*
#include "ar-internal.h"
static struct ctl_table_header *rxrpc_sysctl_reg_table;
+static const unsigned int rxrpc_rx_mtu_min = 500;
+static const unsigned int rxrpc_jumbo_max = RXRPC_MAX_NR_JUMBO;
static const unsigned int four = 4;
static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = (void *)SYSCTL_ONE,
+ .extra1 = (void *)&rxrpc_rx_mtu_min,
.extra2 = (void *)&n_65535,
},
{
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)SYSCTL_ONE,
- .extra2 = (void *)&four,
+ .extra2 = (void *)&rxrpc_jumbo_max,
},
};
trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0,
rxrpc_txbuf_free);
for (i = 0; i < txb->nr_kvec; i++)
- if (txb->kvec[i].iov_base)
+ if (txb->kvec[i].iov_base &&
+ !is_zero_pfn(page_to_pfn(virt_to_page(txb->kvec[i].iov_base))))
page_frag_free(txb->kvec[i].iov_base);
kfree(txb);
atomic_dec(&rxrpc_nr_txbuf);