]>
Commit | Line | Data |
---|---|---|
c410bf01 DH |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* RTT/RTO calculation. | |
3 | * | |
4 | * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) | |
5 | * | |
6 | * https://tools.ietf.org/html/rfc6298 | |
7 | * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 | |
8 | * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf | |
9 | */ | |
10 | ||
11 | #include <linux/net.h> | |
12 | #include "ar-internal.h" | |
13 | ||
14 | #define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) | |
15 | #define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ | |
16 | #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ | |
c410bf01 DH |
17 | |
18 | static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) | |
19 | { | |
20 | return 200; | |
21 | } | |
22 | ||
23 | static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) | |
24 | { | |
acde891c | 25 | return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); |
c410bf01 DH |
26 | } |
27 | ||
28 | static u32 rxrpc_bound_rto(u32 rto) | |
29 | { | |
30 | return min(rto, RXRPC_RTO_MAX); | |
31 | } | |
32 | ||
33 | /* | |
34 | * Called to compute a smoothed rtt estimate. The data fed to this | |
35 | * routine either comes from timestamps, or from segments that were | |
36 | * known _not_ to have been retransmitted [see Karn/Partridge | |
37 | * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 | |
38 | * piece by Van Jacobson. | |
39 | * NOTE: the next three routines used to be one big routine. | |
40 | * To save cycles in the RFC 1323 implementation it was better to break | |
41 | * it up into three procedures. -- erics | |
42 | */ | |
43 | static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) | |
44 | { | |
45 | long m = sample_rtt_us; /* RTT */ | |
46 | u32 srtt = peer->srtt_us; | |
47 | ||
48 | /* The following amusing code comes from Jacobson's | |
49 | * article in SIGCOMM '88. Note that rtt and mdev | |
50 | * are scaled versions of rtt and mean deviation. | |
51 | * This is designed to be as fast as possible | |
52 | * m stands for "measurement". | |
53 | * | |
54 | * On a 1990 paper the rto value is changed to: | |
55 | * RTO = rtt + 4 * mdev | |
56 | * | |
57 | * Funny. This algorithm seems to be very broken. | |
58 | * These formulae increase RTO, when it should be decreased, increase | |
59 | * too slowly, when it should be increased quickly, decrease too quickly | |
60 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely | |
61 | * does not matter how to _calculate_ it. Seems, it was trap | |
62 | * that VJ failed to avoid. 8) | |
63 | */ | |
64 | if (srtt != 0) { | |
65 | m -= (srtt >> 3); /* m is now error in rtt est */ | |
66 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ | |
67 | if (m < 0) { | |
68 | m = -m; /* m is now abs(error) */ | |
69 | m -= (peer->mdev_us >> 2); /* similar update on mdev */ | |
70 | /* This is similar to one of Eifel findings. | |
71 | * Eifel blocks mdev updates when rtt decreases. | |
72 | * This solution is a bit different: we use finer gain | |
73 | * for mdev in this case (alpha*beta). | |
74 | * Like Eifel it also prevents growth of rto, | |
75 | * but also it limits too fast rto decreases, | |
76 | * happening in pure Eifel. | |
77 | */ | |
78 | if (m > 0) | |
79 | m >>= 3; | |
80 | } else { | |
81 | m -= (peer->mdev_us >> 2); /* similar update on mdev */ | |
82 | } | |
83 | ||
84 | peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ | |
85 | if (peer->mdev_us > peer->mdev_max_us) { | |
86 | peer->mdev_max_us = peer->mdev_us; | |
87 | if (peer->mdev_max_us > peer->rttvar_us) | |
88 | peer->rttvar_us = peer->mdev_max_us; | |
89 | } | |
90 | } else { | |
91 | /* no previous measure. */ | |
92 | srtt = m << 3; /* take the measured time to be rtt */ | |
93 | peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ | |
94 | peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); | |
95 | peer->mdev_max_us = peer->rttvar_us; | |
96 | } | |
97 | ||
98 | peer->srtt_us = max(1U, srtt); | |
99 | } | |
100 | ||
101 | /* | |
102 | * Calculate rto without backoff. This is the second half of Van Jacobson's | |
103 | * routine referred to above. | |
104 | */ | |
105 | static void rxrpc_set_rto(struct rxrpc_peer *peer) | |
106 | { | |
107 | u32 rto; | |
108 | ||
109 | /* 1. If rtt variance happened to be less 50msec, it is hallucination. | |
110 | * It cannot be less due to utterly erratic ACK generation made | |
111 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ | |
112 | * to do with delayed acks, because at cwnd>2 true delack timeout | |
113 | * is invisible. Actually, Linux-2.4 also generates erratic | |
114 | * ACKs in some circumstances. | |
115 | */ | |
116 | rto = __rxrpc_set_rto(peer); | |
117 | ||
118 | /* 2. Fixups made earlier cannot be right. | |
119 | * If we do not estimate RTO correctly without them, | |
120 | * all the algo is pure shit and should be replaced | |
121 | * with correct one. It is exactly, which we pretend to do. | |
122 | */ | |
123 | ||
124 | /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo | |
125 | * guarantees that rto is higher. | |
126 | */ | |
127 | peer->rto_j = rxrpc_bound_rto(rto); | |
128 | } | |
129 | ||
130 | static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) | |
131 | { | |
132 | if (rtt_us < 0) | |
133 | return; | |
134 | ||
135 | //rxrpc_update_rtt_min(peer, rtt_us); | |
136 | rxrpc_rtt_estimator(peer, rtt_us); | |
137 | rxrpc_set_rto(peer); | |
138 | ||
139 | /* RFC6298: only reset backoff on valid RTT measurement. */ | |
140 | peer->backoff = 0; | |
141 | } | |
142 | ||
143 | /* | |
144 | * Add RTT information to cache. This is called in softirq mode and has | |
145 | * exclusive access to the peer RTT data. | |
146 | */ | |
147 | void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, | |
4700c4d8 | 148 | int rtt_slot, |
c410bf01 DH |
149 | rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, |
150 | ktime_t send_time, ktime_t resp_time) | |
151 | { | |
152 | struct rxrpc_peer *peer = call->peer; | |
153 | s64 rtt_us; | |
154 | ||
155 | rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); | |
156 | if (rtt_us < 0) | |
157 | return; | |
158 | ||
159 | spin_lock(&peer->rtt_input_lock); | |
160 | rxrpc_ack_update_rtt(peer, rtt_us); | |
161 | if (peer->rtt_count < 3) | |
162 | peer->rtt_count++; | |
163 | spin_unlock(&peer->rtt_input_lock); | |
164 | ||
4700c4d8 | 165 | trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, |
c410bf01 DH |
166 | peer->srtt_us >> 3, peer->rto_j); |
167 | } | |
168 | ||
169 | /* | |
170 | * Get the retransmission timeout to set in jiffies, backing it off each time | |
171 | * we retransmit. | |
172 | */ | |
173 | unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) | |
174 | { | |
175 | u64 timo_j; | |
176 | u8 backoff = READ_ONCE(peer->backoff); | |
177 | ||
178 | timo_j = peer->rto_j; | |
179 | timo_j <<= backoff; | |
180 | if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) | |
181 | WRITE_ONCE(peer->backoff, backoff + 1); | |
182 | ||
183 | if (timo_j < 1) | |
184 | timo_j = 1; | |
185 | ||
186 | return timo_j; | |
187 | } | |
188 | ||
189 | void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) | |
190 | { | |
191 | peer->rto_j = RXRPC_TIMEOUT_INIT; | |
192 | peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); | |
193 | peer->backoff = 0; | |
194 | //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); | |
195 | } |