]> git.ipfire.org Git - thirdparty/linux.git/blame - net/rxrpc/output.c
Merge tag 'io_uring-6.16-20250630' of git://git.kernel.dk/linux
[thirdparty/linux.git] / net / rxrpc / output.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
17926a79
DH
2/* RxRPC packet transmission
3 *
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
17926a79
DH
6 */
7
9b6d5398
JP
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
17926a79 10#include <linux/net.h>
5a0e3ad6 11#include <linux/gfp.h>
17926a79 12#include <linux/skbuff.h>
bc3b2d7f 13#include <linux/export.h>
17926a79
DH
14#include <net/sock.h>
15#include <net/af_rxrpc.h>
ed472b0c 16#include <net/udp.h>
17926a79
DH
17#include "ar-internal.h"
18
ed472b0c
DH
19extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
20
9d1d2b59 21ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len)
ed472b0c 22{
ed472b0c 23 struct sockaddr *sa = msg->msg_name;
6423ac2e 24 struct sock *sk = socket->sk;
ed472b0c 25
6423ac2e
DH
26 if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) {
27 if (sa->sa_family == AF_INET6) {
28 if (sk->sk_family != AF_INET6) {
29 pr_warn("AF_INET6 address on AF_INET socket\n");
30 return -ENOPROTOOPT;
31 }
32 return udpv6_sendmsg(sk, msg, len);
33 }
34 }
35 return udp_sendmsg(sk, msg, len);
ed472b0c
DH
36}
37
26cb02aa
DH
38struct rxrpc_abort_buffer {
39 struct rxrpc_wire_header whdr;
40 __be32 abort_code;
41};
42
ace45bec
DH
43static const char rxrpc_keepalive_string[] = "";
44
c7e86acf
DH
45/*
46 * Increase Tx backoff on transmission failure and clear it on success.
47 */
48static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
49{
50 if (ret < 0) {
153f90a0
DH
51 if (call->tx_backoff < 1000)
52 call->tx_backoff += 100;
c7e86acf 53 } else {
153f90a0 54 call->tx_backoff = 0;
c7e86acf
DH
55 }
56}
57
415f44e4
DH
58/*
59 * Arrange for a keepalive ping a certain time after we last transmitted. This
60 * lets the far side know we're still interested in this call and helps keep
61 * the route through any intervening firewall open.
62 *
63 * Receiving a response to the ping will prevent the ->expect_rx_by timer from
64 * expiring.
65 */
4d267ad6 66static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now)
415f44e4 67{
153f90a0 68 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo) / 6);
415f44e4 69
153f90a0
DH
70 call->keepalive_at = ktime_add(ktime_get_real(), delay);
71 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive);
415f44e4
DH
72}
73
08d55d7c
DH
74/*
75 * Allocate transmission buffers for an ACK and attach them to local->kv[].
76 */
77static int rxrpc_alloc_ack(struct rxrpc_call *call, size_t sack_size)
78{
79 struct rxrpc_wire_header *whdr;
80 struct rxrpc_acktrailer *trailer;
81 struct rxrpc_ackpacket *ack;
82 struct kvec *kv = call->local->kvec;
83 gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS;
84 void *buf, *buf2 = NULL;
85 u8 *filler;
86
87 buf = page_frag_alloc(&call->local->tx_alloc,
88 sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp);
89 if (!buf)
90 return -ENOMEM;
91
92 if (sack_size) {
93 buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp);
94 if (!buf2) {
95 page_frag_free(buf);
96 return -ENOMEM;
97 }
98 }
99
100 whdr = buf;
101 ack = buf + sizeof(*whdr);
102 filler = buf + sizeof(*whdr) + sizeof(*ack) + 1;
103 trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3;
104
105 kv[0].iov_base = whdr;
106 kv[0].iov_len = sizeof(*whdr) + sizeof(*ack);
107 kv[1].iov_base = buf2;
108 kv[1].iov_len = sack_size;
109 kv[2].iov_base = filler;
110 kv[2].iov_len = 3 + sizeof(*trailer);
111 return 3; /* Number of kvec[] used. */
112}
113
114static void rxrpc_free_ack(struct rxrpc_call *call)
115{
116 page_frag_free(call->local->kvec[0].iov_base);
117 if (call->local->kvec[1].iov_base)
118 page_frag_free(call->local->kvec[1].iov_base);
119}
120
121/*
122 * Record the beginning of an RTT probe.
123 */
124static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
125 ktime_t now, enum rxrpc_rtt_tx_trace why)
126{
127 unsigned long avail = call->rtt_avail;
128 int rtt_slot = 9;
129
130 if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK))
131 goto no_slot;
132
133 rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK);
134 if (!test_and_clear_bit(rtt_slot, &call->rtt_avail))
135 goto no_slot;
136
137 call->rtt_serial[rtt_slot] = serial;
138 call->rtt_sent_at[rtt_slot] = now;
139 smp_wmb(); /* Write data before avail bit */
140 set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
141
142 trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
143 return;
144
145no_slot:
146 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
147}
148
8d94aa38
DH
149/*
150 * Fill out an ACK packet.
151 */
08d55d7c
DH
152static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason,
153 rxrpc_serial_t serial_to_ack, rxrpc_serial_t *_ack_serial)
8d94aa38 154{
08d55d7c
DH
155 struct kvec *kv = call->local->kvec;
156 struct rxrpc_wire_header *whdr = kv[0].iov_base;
157 struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3;
8985f2b0 158 struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
eeaedc54 159 unsigned int qsize, sack, wrap, to, max_mtu, if_mtu;
f21e9348 160 rxrpc_seq_t window, wtop;
08d55d7c 161 ktime_t now = ktime_get_real();
5d7edbc9 162 int rsize;
08d55d7c
DH
163 u8 *filler = kv[2].iov_base;
164 u8 *sackp = kv[1].iov_base;
8d94aa38 165
f2a676d1 166 rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
9a3dedcf 167
5bbf9533
DH
168 window = call->ackr_window;
169 wtop = call->ackr_wtop;
f21e9348 170 sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
8985f2b0 171
08d55d7c
DH
172 *_ack_serial = rxrpc_get_next_serial(call->conn);
173
174 whdr->epoch = htonl(call->conn->proto.epoch);
175 whdr->cid = htonl(call->cid);
176 whdr->callNumber = htonl(call->call_id);
177 whdr->serial = htonl(*_ack_serial);
8985f2b0
DH
178 whdr->seq = 0;
179 whdr->type = RXRPC_PACKET_TYPE_ACK;
08d55d7c
DH
180 whdr->flags = call->conn->out_clientflag | RXRPC_SLOW_START_OK;
181 whdr->userStatus = 0;
182 whdr->securityIndex = call->security_ix;
183 whdr->_rsvd = 0;
184 whdr->serviceId = htons(call->dest_srx.srx_service);
185
8985f2b0
DH
186 ack->bufferSpace = 0;
187 ack->maxSkew = 0;
188 ack->firstPacket = htonl(window);
189 ack->previousPacket = htonl(call->rx_highest_seq);
08d55d7c 190 ack->serial = htonl(serial_to_ack);
8985f2b0
DH
191 ack->reason = ack_reason;
192 ack->nAcks = wtop - window;
49489bb0
DH
193 filler[0] = 0;
194 filler[1] = 0;
195 filler[2] = 0;
196
197 if (ack_reason == RXRPC_ACK_PING)
08d55d7c 198 whdr->flags |= RXRPC_REQUEST_ACK;
5d7edbc9
DH
199
200 if (after(wtop, window)) {
08d55d7c 201 kv[1].iov_len = ack->nAcks;
49489bb0 202
f21e9348 203 wrap = RXRPC_SACK_SIZE - sack;
29e03ec7 204 to = umin(ack->nAcks, RXRPC_SACK_SIZE);
5d7edbc9 205
8985f2b0 206 if (sack + ack->nAcks <= RXRPC_SACK_SIZE) {
49489bb0 207 memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks);
5d7edbc9 208 } else {
49489bb0
DH
209 memcpy(sackp, call->ackr_sack_table + sack, wrap);
210 memcpy(sackp + wrap, call->ackr_sack_table, to - wrap);
5d7edbc9 211 }
5d7edbc9
DH
212 } else if (before(wtop, window)) {
213 pr_warn("ack window backward %x %x", window, wtop);
8985f2b0
DH
214 } else if (ack->reason == RXRPC_ACK_DELAY) {
215 ack->reason = RXRPC_ACK_IDLE;
248f219c 216 }
8d94aa38 217
5d7edbc9
DH
218 qsize = (window - 1) - call->rx_consumed;
219 rsize = max_t(int, call->rx_winsize - qsize, 0);
eeaedc54
DH
220
221 if_mtu = call->peer->if_mtu - call->peer->hdrsize;
222 if (call->peer->ackr_adv_pmtud) {
223 max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu);
224 } else {
225 if_mtu = umin(if_mtu, 1444);
226 max_mtu = if_mtu;
227 }
228
229 trailer->maxMTU = htonl(max_mtu);
230 trailer->ifMTU = htonl(if_mtu);
49489bb0 231 trailer->rwind = htonl(rsize);
eeaedc54 232 trailer->jumbo_max = 0; /* Advertise pmtu discovery */
8d94aa38 233
08d55d7c
DH
234 if (ack_reason == RXRPC_ACK_PING)
235 rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping);
236 if (whdr->flags & RXRPC_REQUEST_ACK)
b40ef2b8 237 call->rtt_last_req = now;
08d55d7c
DH
238 rxrpc_set_keepalive(call, now);
239 return nr_kv;
4700c4d8
DH
240}
241
8d94aa38 242/*
b0346843 243 * Transmit an ACK packet.
8d94aa38 244 */
08d55d7c
DH
245static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len,
246 rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
8d94aa38 247{
420f8af5
DH
248 struct kvec *kv = call->local->kvec;
249 struct rxrpc_wire_header *whdr = kv[0].iov_base;
08d55d7c 250 struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3;
5273a191 251 struct rxrpc_connection *conn;
8985f2b0 252 struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1);
8d94aa38 253 struct msghdr msg;
4d267ad6 254 int ret;
8d94aa38 255
5273a191 256 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
49489bb0 257 return;
8d94aa38 258
5273a191 259 conn = call->conn;
8d94aa38 260
8d94aa38
DH
261 msg.msg_name = &call->peer->srx.transport;
262 msg.msg_namelen = call->peer->srx.transport_len;
263 msg.msg_control = NULL;
264 msg.msg_controllen = 0;
49489bb0 265 msg.msg_flags = MSG_SPLICE_PAGES;
8d94aa38 266
08d55d7c 267 trace_rxrpc_tx_ack(call->debug_id, serial,
8985f2b0
DH
268 ntohl(ack->firstPacket),
269 ntohl(ack->serial), ack->reason, ack->nAcks,
b5099340 270 ntohl(trailer->rwind), why);
b86e218e 271
f2a676d1 272 rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
ed472b0c 273
08d55d7c 274 iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, len);
eeaedc54 275 rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe);
420f8af5 276
08d55d7c 277 ret = do_udp_sendmsg(conn->local->socket, &msg, len);
ed472b0c 278 call->peer->last_tx_at = ktime_get_seconds();
84e28aa5 279 if (ret < 0) {
08d55d7c 280 trace_rxrpc_tx_fail(call->debug_id, serial, ret,
4764c0da 281 rxrpc_tx_point_call_ack);
eeaedc54
DH
282 if (why == rxrpc_propose_ack_ping_for_mtu_probe &&
283 ret == -EMSGSIZE)
08d55d7c 284 rxrpc_input_probe_for_pmtud(conn, serial, true);
84e28aa5 285 } else {
8985f2b0 286 trace_rxrpc_tx_packet(call->debug_id, whdr,
4764c0da 287 rxrpc_tx_point_call_ack);
eeaedc54
DH
288 if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
289 call->peer->pmtud_pending = false;
290 call->peer->pmtud_probing = true;
08d55d7c 291 call->conn->pmtud_probe = serial;
eeaedc54
DH
292 call->conn->pmtud_call = call->debug_id;
293 trace_rxrpc_pmtud_tx(call);
294 }
84e28aa5 295 }
c7e86acf 296 rxrpc_tx_backoff(call, ret);
8d94aa38
DH
297}
298
99afb28c
DH
299/*
300 * Queue an ACK for immediate transmission.
301 */
302void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
08d55d7c 303 rxrpc_serial_t serial_to_ack, enum rxrpc_propose_ack_trace why)
99afb28c 304{
420f8af5 305 struct kvec *kv = call->local->kvec;
08d55d7c
DH
306 rxrpc_serial_t ack_serial;
307 size_t len;
420f8af5 308 int nr_kv;
99afb28c
DH
309
310 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
311 return;
312
313 rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
314
08d55d7c
DH
315 nr_kv = rxrpc_alloc_ack(call, call->ackr_wtop - call->ackr_window);
316 if (nr_kv < 0) {
99afb28c
DH
317 kleave(" = -ENOMEM");
318 return;
319 }
320
08d55d7c
DH
321 nr_kv = rxrpc_fill_out_ack(call, nr_kv, ack_reason, serial_to_ack, &ack_serial);
322 len = kv[0].iov_len;
323 len += kv[1].iov_len;
324 len += kv[2].iov_len;
420f8af5 325
eeaedc54 326 /* Extend a path MTU probe ACK. */
eeaedc54
DH
327 if (why == rxrpc_propose_ack_ping_for_mtu_probe) {
328 size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header);
329
08d55d7c 330 if (len > probe_mtu)
eeaedc54 331 goto skip;
08d55d7c
DH
332 while (len < probe_mtu) {
333 size_t part = umin(probe_mtu - len, PAGE_SIZE);
eeaedc54
DH
334
335 kv[nr_kv].iov_base = page_address(ZERO_PAGE(0));
336 kv[nr_kv].iov_len = part;
08d55d7c 337 len += part;
eeaedc54
DH
338 nr_kv++;
339 }
340 }
420f8af5 341
49489bb0
DH
342 call->ackr_nr_unacked = 0;
343 atomic_set(&call->ackr_nr_consumed, 0);
344 clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
99afb28c 345
08d55d7c
DH
346 trace_rxrpc_send_ack(call, why, ack_reason, ack_serial);
347 rxrpc_send_ack_packet(call, nr_kv, len, ack_serial, why);
eeaedc54 348skip:
08d55d7c 349 rxrpc_free_ack(call);
99afb28c
DH
350}
351
eeaedc54
DH
352/*
353 * Send an ACK probe for path MTU discovery.
354 */
355void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call)
356{
357 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
358 rxrpc_propose_ack_ping_for_mtu_probe);
359}
360
26cb02aa
DH
361/*
362 * Send an ABORT call packet.
363 */
364int rxrpc_send_abort_packet(struct rxrpc_call *call)
365{
5273a191 366 struct rxrpc_connection *conn;
26cb02aa
DH
367 struct rxrpc_abort_buffer pkt;
368 struct msghdr msg;
369 struct kvec iov[1];
370 rxrpc_serial_t serial;
371 int ret;
372
dcbefc30
DH
373 /* Don't bother sending aborts for a client call once the server has
374 * hard-ACK'd all of its request data. After that point, we're not
375 * going to stop the operation proceeding, and whilst we might limit
376 * the reply, it's not worth it if we can send a new call on the same
377 * channel instead, thereby closing off this call.
378 */
379 if (rxrpc_is_client_call(call) &&
a4ea4c47 380 test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
dcbefc30
DH
381 return 0;
382
5273a191 383 if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
26cb02aa
DH
384 return -ECONNRESET;
385
5273a191
DH
386 conn = call->conn;
387
26cb02aa
DH
388 msg.msg_name = &call->peer->srx.transport;
389 msg.msg_namelen = call->peer->srx.transport_len;
390 msg.msg_control = NULL;
391 msg.msg_controllen = 0;
392 msg.msg_flags = 0;
393
394 pkt.whdr.epoch = htonl(conn->proto.epoch);
395 pkt.whdr.cid = htonl(call->cid);
396 pkt.whdr.callNumber = htonl(call->call_id);
397 pkt.whdr.seq = 0;
398 pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT;
399 pkt.whdr.flags = conn->out_clientflag;
400 pkt.whdr.userStatus = 0;
401 pkt.whdr.securityIndex = call->security_ix;
402 pkt.whdr._rsvd = 0;
f3441d41 403 pkt.whdr.serviceId = htons(call->dest_srx.srx_service);
26cb02aa
DH
404 pkt.abort_code = htonl(call->abort_code);
405
406 iov[0].iov_base = &pkt;
407 iov[0].iov_len = sizeof(pkt);
408
f3104141 409 serial = rxrpc_get_next_serial(conn);
26cb02aa
DH
410 pkt.whdr.serial = htonl(serial);
411
ed472b0c 412 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
2cc80086
DH
413 ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt));
414 conn->peer->last_tx_at = ktime_get_seconds();
6b47fe1d
DH
415 if (ret < 0)
416 trace_rxrpc_tx_fail(call->debug_id, serial, ret,
4764c0da
DH
417 rxrpc_tx_point_call_abort);
418 else
419 trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
420 rxrpc_tx_point_call_abort);
c7e86acf 421 rxrpc_tx_backoff(call, ret);
26cb02aa
DH
422 return ret;
423}
424
17926a79 425/*
44125d5a 426 * Prepare a (sub)packet for transmission.
17926a79 427 */
b341a026
DH
428static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call,
429 struct rxrpc_send_data_req *req,
430 struct rxrpc_txbuf *txb,
06ea2c9c 431 struct rxrpc_wire_header *whdr,
b341a026 432 rxrpc_serial_t serial, int subpkt)
17926a79 433{
06ea2c9c 434 struct rxrpc_jumbo_header *jumbo = txb->data - sizeof(*jumbo);
1ac6a853 435 enum rxrpc_req_ack_trace why;
44125d5a 436 struct rxrpc_connection *conn = call->conn;
06ea2c9c 437 struct kvec *kv = &call->local->kvec[1 + subpkt];
3d2bdf73 438 size_t len = txb->pkt_len;
b341a026 439 bool last;
efa95c32 440 u8 flags;
17926a79 441
3d2bdf73 442 _enter("%x,%zd", txb->seq, len);
17926a79 443
44125d5a 444 txb->serial = serial;
5a924b89 445
4e255721 446 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
a4ea4c47 447 txb->seq == 1)
8985f2b0 448 whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
4e255721 449
efa95c32
DH
450 txb->flags &= ~RXRPC_REQUEST_ACK;
451 flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS;
452 last = txb->flags & RXRPC_LAST_PACKET;
453
b341a026 454 if (subpkt < req->n - 1) {
b7313009
DH
455 len = RXRPC_JUMBO_DATALEN;
456 goto dont_set_request_ack;
457 }
458
57494343
DH
459 /* If our RTT cache needs working on, request an ACK. Also request
460 * ACKs if a DATA packet appears to have been lost.
b604dd98
DH
461 *
462 * However, we mustn't request an ACK on the last reply packet of a
463 * service call, lest OpenAFS incorrectly send us an ACK with some
464 * soft-ACKs in it and then never follow up with a proper hard ACK.
57494343 465 */
efa95c32 466 if (last && rxrpc_sending_to_client(txb))
4d843be5
DH
467 why = rxrpc_reqack_no_srv_last;
468 else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
469 why = rxrpc_reqack_ack_lost;
12bdff73 470 else if (txb->flags & RXRPC_TXBUF_RESENT)
4d843be5 471 why = rxrpc_reqack_retrans;
4ee4c2f8 472 else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= RXRPC_MIN_CWND)
4d843be5
DH
473 why = rxrpc_reqack_slow_start;
474 else if (call->tx_winsize <= 2)
475 why = rxrpc_reqack_small_txwin;
b40ef2b8 476 else if (call->rtt_count < 3)
4d843be5 477 why = rxrpc_reqack_more_rtt;
b40ef2b8 478 else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real()))
4d843be5 479 why = rxrpc_reqack_old_rtt;
b341a026 480 else if (!last && !after(READ_ONCE(call->send_top), txb->seq))
8b5823ea 481 why = rxrpc_reqack_app_stall;
4d843be5
DH
482 else
483 goto dont_set_request_ack;
484
f7fa5242 485 rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
a4ea4c47 486 trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
efa95c32 487 if (why != rxrpc_reqack_no_srv_last) {
efa95c32 488 flags |= RXRPC_REQUEST_ACK;
dcdff0d8 489 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial);
b40ef2b8 490 call->rtt_last_req = req->now;
efa95c32 491 }
4d843be5 492dont_set_request_ack:
0d4b103c 493
06ea2c9c 494 /* There's a jumbo header prepended to the data if we need it. */
b341a026 495 if (subpkt < req->n - 1)
b7313009
DH
496 flags |= RXRPC_JUMBO_PACKET;
497 else
498 flags &= ~RXRPC_JUMBO_PACKET;
499 if (subpkt == 0) {
500 whdr->flags = flags;
b7313009 501 whdr->cksum = txb->cksum;
06ea2c9c 502 kv->iov_base = txb->data;
b7313009
DH
503 } else {
504 jumbo->flags = flags;
505 jumbo->pad = 0;
506 jumbo->cksum = txb->cksum;
507 kv->iov_base = jumbo;
508 len += sizeof(*jumbo);
509 }
44125d5a 510
372d12d1 511 trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, req->trace);
420f8af5
DH
512 kv->iov_len = len;
513 return len;
44125d5a
DH
514}
515
b341a026
DH
516/*
517 * Prepare a transmission queue object for initial transmission. Returns the
518 * number of microseconds since the transmission queue base timestamp.
519 */
520static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq,
521 struct rxrpc_send_data_req *req)
522{
523 if (!tq)
524 return 0;
525 if (tq->xmit_ts_base == KTIME_MIN) {
526 tq->xmit_ts_base = req->now;
527 return 0;
528 }
529 return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base));
530}
531
44125d5a 532/*
b7313009 533 * Prepare a (jumbo) packet for transmission.
44125d5a 534 */
06ea2c9c
DH
535static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call,
536 struct rxrpc_send_data_req *req,
537 struct rxrpc_wire_header *whdr)
44125d5a 538{
b341a026 539 struct rxrpc_txqueue *tq = req->tq;
44125d5a 540 rxrpc_serial_t serial;
b341a026
DH
541 unsigned int xmit_ts;
542 rxrpc_seq_t seq = req->seq;
420f8af5 543 size_t len = 0;
7c482665 544 bool start_tlp = false;
44125d5a 545
b341a026
DH
546 trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit);
547
44125d5a 548 /* Each transmission of a Tx packet needs a new serial number */
b341a026 549 serial = rxrpc_get_next_serials(call->conn, req->n);
44125d5a 550
06ea2c9c
DH
551 whdr->epoch = htonl(call->conn->proto.epoch);
552 whdr->cid = htonl(call->cid);
553 whdr->callNumber = htonl(call->call_id);
554 whdr->seq = htonl(seq);
555 whdr->serial = htonl(serial);
556 whdr->type = RXRPC_PACKET_TYPE_DATA;
557 whdr->flags = 0;
558 whdr->userStatus = 0;
559 whdr->securityIndex = call->security_ix;
560 whdr->_rsvd = 0;
561 whdr->serviceId = htons(call->conn->service_id);
562
7c482665 563 call->tx_last_serial = serial + req->n - 1;
b341a026
DH
564 call->tx_last_sent = req->now;
565 xmit_ts = rxrpc_prepare_txqueue(tq, req);
566 prefetch(tq->next);
44125d5a 567
b341a026
DH
568 for (int i = 0;;) {
569 int ix = seq & RXRPC_TXQ_MASK;
570 struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK];
4d267ad6 571
b341a026 572 _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq);
7c482665
DH
573
574 /* Record (re-)transmission for RACK [RFC8985 6.1]. */
575 if (__test_and_clear_bit(ix, &tq->segment_lost))
576 call->tx_nr_lost--;
577 if (req->retrans) {
578 __set_bit(ix, &tq->ever_retransmitted);
579 __set_bit(ix, &tq->segment_retransmitted);
580 call->tx_nr_resent++;
581 } else {
582 call->tx_nr_sent++;
583 start_tlp = true;
584 }
b341a026 585 tq->segment_xmit_ts[ix] = xmit_ts;
dcdff0d8
DH
586 tq->segment_serial[ix] = serial;
587 if (i + 1 == req->n)
588 /* Only sample the last subpacket in a jumbo. */
589 __set_bit(ix, &tq->rtt_samples);
06ea2c9c 590 len += rxrpc_prepare_data_subpacket(call, req, txb, whdr, serial, i);
b341a026
DH
591 serial++;
592 seq++;
593 i++;
594 if (i >= req->n)
595 break;
596 if (!(seq & RXRPC_TXQ_MASK)) {
597 tq = tq->next;
598 trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance);
599 xmit_ts = rxrpc_prepare_txqueue(tq, req);
600 }
4d267ad6
DH
601 }
602
b341a026 603 /* Set timeouts */
7c482665
DH
604 if (req->tlp_probe) {
605 /* Sending TLP loss probe [RFC8985 7.3]. */
606 call->tlp_serial = serial - 1;
607 call->tlp_seq = seq - 1;
608 } else if (start_tlp) {
609 /* Schedule TLP loss probe [RFC8985 7.2]. */
610 ktime_t pto;
611
612 if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
613 /* The first packet may take longer to elicit a response. */
614 pto = NSEC_PER_SEC;
615 else
616 pto = rxrpc_tlp_calc_pto(call, req->now);
617
618 call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO;
619 call->rack_timo_at = ktime_add(req->now, pto);
620 trace_rxrpc_rack_timer(call, pto, false);
621 trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto);
9b052c6b
DH
622 }
623
4d267ad6
DH
624 if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) {
625 ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo));
626
b341a026 627 call->expect_rx_by = ktime_add(req->now, delay);
4d267ad6
DH
628 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx);
629 }
630
b341a026 631 rxrpc_set_keepalive(call, req->now);
06ea2c9c 632 page_frag_free(whdr);
976b0ca5 633 return len;
44125d5a
DH
634}
635
636/*
b341a026 637 * Send one or more packets through the transport endpoint
44125d5a 638 */
b341a026 639void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req)
44125d5a 640{
06ea2c9c 641 struct rxrpc_wire_header *whdr;
44125d5a
DH
642 struct rxrpc_connection *conn = call->conn;
643 enum rxrpc_tx_point frag;
b341a026
DH
644 struct rxrpc_txqueue *tq = req->tq;
645 struct rxrpc_txbuf *txb;
44125d5a 646 struct msghdr msg;
b341a026 647 rxrpc_seq_t seq = req->seq;
06ea2c9c 648 size_t len = sizeof(*whdr);
976b0ca5 649 bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags);
f003e403 650 int ret, stat_ix;
44125d5a 651
b341a026 652 _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1);
44125d5a 653
06ea2c9c
DH
654 whdr = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr), GFP_NOFS);
655 if (!whdr)
656 return; /* Drop the packet if no memory. */
657
658 call->local->kvec[0].iov_base = whdr;
659 call->local->kvec[0].iov_len = sizeof(*whdr);
660
f003e403
DH
661 stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1;
662 atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]);
663
06ea2c9c 664 len += rxrpc_prepare_data_packet(call, req, whdr);
b341a026 665 txb = tq->bufs[seq & RXRPC_TXQ_MASK];
44125d5a 666
06ea2c9c 667 iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1 + req->n, len);
44125d5a
DH
668
669 msg.msg_name = &call->peer->srx.transport;
670 msg.msg_namelen = call->peer->srx.transport_len;
671 msg.msg_control = NULL;
672 msg.msg_controllen = 0;
49489bb0 673 msg.msg_flags = MSG_SPLICE_PAGES;
cf37b598 674
81e7761b
DH
675 /* Send the packet with the don't fragment bit set unless we think it's
676 * too big or if this is a retransmission.
cf37b598 677 */
b341a026 678 if (seq == call->tx_transmitted + 1 &&
81e7761b 679 len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) {
1ac6a853
DH
680 rxrpc_local_dont_fragment(conn->local, false);
681 frag = rxrpc_tx_point_call_data_frag;
682 } else {
683 rxrpc_local_dont_fragment(conn->local, true);
684 frag = rxrpc_tx_point_call_data_nofrag;
685 }
5a924b89 686
81e7761b
DH
687 /* Track what we've attempted to transmit at least once so that the
688 * retransmission algorithm doesn't try to resend what we haven't sent
689 * yet.
690 */
b341a026
DH
691 if (seq == call->tx_transmitted + 1)
692 call->tx_transmitted = seq + req->n - 1;
81e7761b 693
cd69a07b
DH
694 if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
695 static int lose;
696
697 if ((lose++ & 7) == 7) {
698 ret = 0;
372d12d1
DH
699 trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags,
700 rxrpc_txdata_inject_loss);
9b052c6b 701 conn->peer->last_tx_at = ktime_get_seconds();
cd69a07b
DH
702 goto done;
703 }
704 }
705
5a924b89
DH
706 /* send the packet by UDP
707 * - returns -EMSGSIZE if UDP would have to fragment the packet
708 * to go out of the interface
709 * - in which case, we'll have processed the ICMP error
710 * message and update the peer record
711 */
b0154246 712 rxrpc_inc_stat(call->rxnet, stat_tx_data_send);
2cc80086
DH
713 ret = do_udp_sendmsg(conn->local->socket, &msg, len);
714 conn->peer->last_tx_at = ktime_get_seconds();
5a924b89 715
81e7761b
DH
716 if (ret == -EMSGSIZE) {
717 rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize);
06ea2c9c 718 trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
81e7761b
DH
719 ret = 0;
720 } else if (ret < 0) {
32cf8edb 721 rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
44125d5a 722 trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag);
4700c4d8 723 } else {
06ea2c9c 724 trace_rxrpc_tx_packet(call->debug_id, whdr, frag);
4700c4d8
DH
725 }
726
c7e86acf 727 rxrpc_tx_backoff(call, ret);
5a924b89 728
976b0ca5 729 if (ret < 0) {
b341a026
DH
730 /* Cancel the call if the initial transmission fails or if we
731 * hit due to network routing issues that aren't going away
732 * anytime soon. The layer above can arrange the
733 * retransmission.
c7e86acf 734 */
b341a026
DH
735 if (new_call ||
736 ret == -ENETUNREACH ||
737 ret == -EHOSTUNREACH ||
738 ret == -ECONNREFUSED)
c7e86acf
DH
739 rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
740 RX_USER_ABORT, ret);
741 }
415f44e4 742
976b0ca5 743done:
eeaedc54 744 _leave(" = %d [%u]", ret, call->peer->max_data);
17926a79 745}
248f219c 746
a00ce28b
DH
747/*
748 * Transmit a connection-level abort.
749 */
750void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
751{
752 struct rxrpc_wire_header whdr;
753 struct msghdr msg;
754 struct kvec iov[2];
755 __be32 word;
756 size_t len;
757 u32 serial;
758 int ret;
759
760 msg.msg_name = &conn->peer->srx.transport;
761 msg.msg_namelen = conn->peer->srx.transport_len;
762 msg.msg_control = NULL;
763 msg.msg_controllen = 0;
764 msg.msg_flags = 0;
765
766 whdr.epoch = htonl(conn->proto.epoch);
767 whdr.cid = htonl(conn->proto.cid);
768 whdr.callNumber = 0;
769 whdr.seq = 0;
770 whdr.type = RXRPC_PACKET_TYPE_ABORT;
771 whdr.flags = conn->out_clientflag;
772 whdr.userStatus = 0;
773 whdr.securityIndex = conn->security_ix;
774 whdr._rsvd = 0;
775 whdr.serviceId = htons(conn->service_id);
776
777 word = htonl(conn->abort_code);
778
779 iov[0].iov_base = &whdr;
780 iov[0].iov_len = sizeof(whdr);
781 iov[1].iov_base = &word;
782 iov[1].iov_len = sizeof(word);
783
784 len = iov[0].iov_len + iov[1].iov_len;
785
f3104141 786 serial = rxrpc_get_next_serial(conn);
a00ce28b
DH
787 whdr.serial = htonl(serial);
788
789 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
790 ret = do_udp_sendmsg(conn->local->socket, &msg, len);
791 if (ret < 0) {
792 trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
793 rxrpc_tx_point_conn_abort);
794 _debug("sendmsg failed: %d", ret);
795 return;
796 }
797
798 trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
799
800 conn->peer->last_tx_at = ktime_get_seconds();
801}
802
248f219c 803/*
5e6ef4f1 804 * Reject a packet through the local endpoint.
248f219c 805 */
5e6ef4f1 806void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
248f219c 807{
248f219c 808 struct rxrpc_wire_header whdr;
5e6ef4f1
DH
809 struct sockaddr_rxrpc srx;
810 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
248f219c
DH
811 struct msghdr msg;
812 struct kvec iov[2];
813 size_t size;
814 __be32 code;
ece64fec 815 int ret, ioc;
248f219c 816
5e6ef4f1 817 rxrpc_see_skb(skb, rxrpc_skb_see_reject);
248f219c
DH
818
819 iov[0].iov_base = &whdr;
820 iov[0].iov_len = sizeof(whdr);
821 iov[1].iov_base = &code;
822 iov[1].iov_len = sizeof(code);
248f219c 823
1c2bc7b9 824 msg.msg_name = &srx.transport;
248f219c
DH
825 msg.msg_control = NULL;
826 msg.msg_controllen = 0;
827 msg.msg_flags = 0;
828
248f219c 829 memset(&whdr, 0, sizeof(whdr));
248f219c 830
5e6ef4f1
DH
831 switch (skb->mark) {
832 case RXRPC_SKB_MARK_REJECT_BUSY:
833 whdr.type = RXRPC_PACKET_TYPE_BUSY;
834 size = sizeof(whdr);
835 ioc = 1;
836 break;
837 case RXRPC_SKB_MARK_REJECT_ABORT:
838 whdr.type = RXRPC_PACKET_TYPE_ABORT;
839 code = htonl(skb->priority);
840 size = sizeof(whdr) + sizeof(code);
841 ioc = 2;
842 break;
843 default:
844 return;
845 }
1c2bc7b9 846
5e6ef4f1
DH
847 if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
848 msg.msg_namelen = srx.transport_len;
ece64fec 849
5e6ef4f1
DH
850 whdr.epoch = htonl(sp->hdr.epoch);
851 whdr.cid = htonl(sp->hdr.cid);
852 whdr.callNumber = htonl(sp->hdr.callNumber);
853 whdr.serviceId = htons(sp->hdr.serviceId);
854 whdr.flags = sp->hdr.flags;
855 whdr.flags ^= RXRPC_CLIENT_INITIATED;
856 whdr.flags &= RXRPC_CLIENT_INITIATED;
248f219c 857
5e6ef4f1
DH
858 iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
859 ret = do_udp_sendmsg(local->socket, &msg, size);
860 if (ret < 0)
861 trace_rxrpc_tx_fail(local->debug_id, 0, ret,
862 rxrpc_tx_point_reject);
863 else
864 trace_rxrpc_tx_packet(local->debug_id, &whdr,
865 rxrpc_tx_point_reject);
248f219c 866 }
248f219c 867}
ace45bec
DH
868
869/*
870 * Send a VERSION reply to a peer as a keepalive.
871 */
872void rxrpc_send_keepalive(struct rxrpc_peer *peer)
873{
874 struct rxrpc_wire_header whdr;
875 struct msghdr msg;
876 struct kvec iov[2];
877 size_t len;
878 int ret;
879
880 _enter("");
881
882 msg.msg_name = &peer->srx.transport;
883 msg.msg_namelen = peer->srx.transport_len;
884 msg.msg_control = NULL;
885 msg.msg_controllen = 0;
886 msg.msg_flags = 0;
887
888 whdr.epoch = htonl(peer->local->rxnet->epoch);
889 whdr.cid = 0;
890 whdr.callNumber = 0;
891 whdr.seq = 0;
892 whdr.serial = 0;
893 whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */
894 whdr.flags = RXRPC_LAST_PACKET;
895 whdr.userStatus = 0;
896 whdr.securityIndex = 0;
897 whdr._rsvd = 0;
898 whdr.serviceId = 0;
899
900 iov[0].iov_base = &whdr;
901 iov[0].iov_len = sizeof(whdr);
902 iov[1].iov_base = (char *)rxrpc_keepalive_string;
903 iov[1].iov_len = sizeof(rxrpc_keepalive_string);
904
905 len = iov[0].iov_len + iov[1].iov_len;
906
ed472b0c
DH
907 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
908 ret = do_udp_sendmsg(peer->local->socket, &msg, len);
ace45bec 909 if (ret < 0)
6b47fe1d 910 trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
4764c0da
DH
911 rxrpc_tx_point_version_keepalive);
912 else
913 trace_rxrpc_tx_packet(peer->debug_id, &whdr,
914 rxrpc_tx_point_version_keepalive);
ace45bec 915
330bdcfa 916 peer->last_tx_at = ktime_get_seconds();
ace45bec
DH
917 _leave("");
918}
5800b1cf
DH
919
920/*
921 * Send a RESPONSE message.
922 */
923void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response)
924{
925 struct rxrpc_skb_priv *sp = rxrpc_skb(response);
926 struct scatterlist sg[16];
927 struct bio_vec bvec[16];
928 struct msghdr msg;
929 size_t len = sp->resp.len;
930 __be32 wserial;
931 u32 serial = 0;
932 int ret, nr_sg;
933
934 _enter("C=%x,%x", conn->debug_id, sp->resp.challenge_serial);
935
936 sg_init_table(sg, ARRAY_SIZE(sg));
937 ret = skb_to_sgvec(response, sg, 0, len);
938 if (ret < 0)
939 goto fail;
940 nr_sg = ret;
941
942 for (int i = 0; i < nr_sg; i++)
943 bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset);
944
945 iov_iter_bvec(&msg.msg_iter, WRITE, bvec, nr_sg, len);
946
947 msg.msg_name = &conn->peer->srx.transport;
948 msg.msg_namelen = conn->peer->srx.transport_len;
949 msg.msg_control = NULL;
950 msg.msg_controllen = 0;
951 msg.msg_flags = MSG_SPLICE_PAGES;
952
953 serial = rxrpc_get_next_serials(conn, 1);
954 wserial = htonl(serial);
955
fba69957
DH
956 trace_rxrpc_tx_response(conn, serial, sp);
957
5800b1cf
DH
958 ret = skb_store_bits(response, offsetof(struct rxrpc_wire_header, serial),
959 &wserial, sizeof(wserial));
960 if (ret < 0)
961 goto fail;
962
963 rxrpc_local_dont_fragment(conn->local, false);
964
965 ret = do_udp_sendmsg(conn->local->socket, &msg, len);
966 if (ret < 0)
967 goto fail;
968
969 conn->peer->last_tx_at = ktime_get_seconds();
970 return;
971
972fail:
973 trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
974 rxrpc_tx_point_response);
975 kleave(" = %d", ret);
976}