[people/ms/linux.git] / net / rxrpc / rtt.c

// SPDX-License-Identifier: GPL-2.0
/* RTT/RTO calculation.
 *
 * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com)
 *
 * https://tools.ietf.org/html/rfc6298
 * https://tools.ietf.org/html/rfc1122#section-4.2.3.1
 * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf
 */

#include <linux/net.h>
#include "ar-internal.h"

#define RXRPC_RTO_MAX	((unsigned)(120 * HZ))
#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/
#define rxrpc_jiffies32 ((u32)jiffies)		/* As rxrpc_jiffies32 */

static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
{
	return 200;
}

static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
{
	return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
}

static u32 rxrpc_bound_rto(u32 rto)
{
	return min(rto, RXRPC_RTO_MAX);
}

/*
 * Called to compute a smoothed rtt estimate. The data fed to this
 * routine either comes from timestamps, or from segments that were
 * known _not_ to have been retransmitted [see Karn/Partridge
 * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
 * piece by Van Jacobson.
 * NOTE: the next three routines used to be one big routine.
 * To save cycles in the RFC 1323 implementation it was better to break
 * it up into three procedures. -- erics
 */
static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
{
	long m = sample_rtt_us; /* RTT */
	u32 srtt = peer->srtt_us;

	/*	The following amusing code comes from Jacobson's
	 *	article in SIGCOMM '88.  Note that rtt and mdev
	 *	are scaled versions of rtt and mean deviation.
	 *	This is designed to be as fast as possible
	 *	m stands for "measurement".
	 *
	 *	On a 1990 paper the rto value is changed to:
	 *	RTO = rtt + 4 * mdev
	 *
	 * Funny. This algorithm seems to be very broken.
	 * These formulae increase RTO, when it should be decreased, increase
	 * too slowly, when it should be increased quickly, decrease too quickly
	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
	 * does not matter how to _calculate_ it. Seems, it was trap
	 * that VJ failed to avoid. 8)
	 */
	if (srtt != 0) {
		m -= (srtt >> 3);	/* m is now error in rtt est */
		srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
		if (m < 0) {
			m = -m;		/* m is now abs(error) */
			m -= (peer->mdev_us >> 2);   /* similar update on mdev */
			/* This is similar to one of Eifel findings.
			 * Eifel blocks mdev updates when rtt decreases.
			 * This solution is a bit different: we use finer gain
			 * for mdev in this case (alpha*beta).
			 * Like Eifel it also prevents growth of rto,
			 * but also it limits too fast rto decreases,
			 * happening in pure Eifel.
			 */
			if (m > 0)
				m >>= 3;
		} else {
			m -= (peer->mdev_us >> 2);   /* similar update on mdev */
		}

		peer->mdev_us += m;		/* mdev = 3/4 mdev + 1/4 new */
		if (peer->mdev_us > peer->mdev_max_us) {
			peer->mdev_max_us = peer->mdev_us;
			if (peer->mdev_max_us > peer->rttvar_us)
				peer->rttvar_us = peer->mdev_max_us;
		}
	} else {
		/* no previous measure. */
		srtt = m << 3;		/* take the measured time to be rtt */
		peer->mdev_us = m << 1;	/* make sure rto = 3*rtt */
		peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer));
		peer->mdev_max_us = peer->rttvar_us;
	}

	peer->srtt_us = max(1U, srtt);
}

/*
 * Calculate rto without backoff.  This is the second half of Van Jacobson's
 * routine referred to above.
 */
static void rxrpc_set_rto(struct rxrpc_peer *peer)
{
	u32 rto;

	/* 1. If rtt variance happened to be less 50msec, it is hallucination.
	 *    It cannot be less due to utterly erratic ACK generation made
	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
	 *    to do with delayed acks, because at cwnd>2 true delack timeout
	 *    is invisible. Actually, Linux-2.4 also generates erratic
	 *    ACKs in some circumstances.
	 */
	rto = __rxrpc_set_rto(peer);

	/* 2. Fixups made earlier cannot be right.
	 *    If we do not estimate RTO correctly without them,
	 *    all the algo is pure shit and should be replaced
	 *    with correct one. It is exactly, which we pretend to do.
	 */

	/* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
	 * guarantees that rto is higher.
	 */
	peer->rto_j = rxrpc_bound_rto(rto);
}

static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
{
	if (rtt_us < 0)
		return;

	//rxrpc_update_rtt_min(peer, rtt_us);
	rxrpc_rtt_estimator(peer, rtt_us);
	rxrpc_set_rto(peer);

	/* RFC6298: only reset backoff on valid RTT measurement. */
	peer->backoff = 0;
}

/*
 * Add RTT information to cache.  This is called in softirq mode and has
 * exclusive access to the peer RTT data.
 */
void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
			int rtt_slot,
			rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
			ktime_t send_time, ktime_t resp_time)
{
	struct rxrpc_peer *peer = call->peer;
	s64 rtt_us;

	rtt_us = ktime_to_us(ktime_sub(resp_time, send_time));
	if (rtt_us < 0)
		return;

	spin_lock(&peer->rtt_input_lock);
	rxrpc_ack_update_rtt(peer, rtt_us);
	if (peer->rtt_count < 3)
		peer->rtt_count++;
	spin_unlock(&peer->rtt_input_lock);

	trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
			   peer->srtt_us >> 3, peer->rto_j);
}

/*
 * Get the retransmission timeout to set in jiffies, backing it off each time
 * we retransmit.
 */
unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
{
	u64 timo_j;
	u8 backoff = READ_ONCE(peer->backoff);

	timo_j = peer->rto_j;
	timo_j <<= backoff;
	if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
		WRITE_ONCE(peer->backoff, backoff + 1);

	if (timo_j < 1)
		timo_j = 1;

	return timo_j;
}

void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
{
	peer->rto_j	= RXRPC_TIMEOUT_INIT;
	peer->mdev_us	= jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
	peer->backoff	= 0;
	//minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
}
Commit	Line	Data
c410bf01 DH	1	// SPDX-License-Identifier: GPL-2.0
	2	/* RTT/RTO calculation.
	3	*
	4	* Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com)
	5	*
	6	* https://tools.ietf.org/html/rfc6298
	7	* https://tools.ietf.org/html/rfc1122#section-4.2.3.1
	8	* http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf
	9	*/
	10
	11	#include <linux/net.h>
	12	#include "ar-internal.h"
	13
	14	#define RXRPC_RTO_MAX ((unsigned)(120 * HZ))
	15	#define RXRPC_TIMEOUT_INIT ((unsigned)(1HZ)) / RFC6298 2.1 initial RTO value */
	16	#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
c410bf01 DH	17
	18	static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
	19	{
	20	return 200;
	21	}
	22
	23	static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
	24	{
acde891c	25	return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
c410bf01 DH	26	}
	27
	28	static u32 rxrpc_bound_rto(u32 rto)
	29	{
	30	return min(rto, RXRPC_RTO_MAX);
	31	}
	32
	33	/*
	34	* Called to compute a smoothed rtt estimate. The data fed to this
	35	* routine either comes from timestamps, or from segments that were
	36	* known _not_ to have been retransmitted [see Karn/Partridge
	37	* Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
	38	* piece by Van Jacobson.
	39	* NOTE: the next three routines used to be one big routine.
	40	* To save cycles in the RFC 1323 implementation it was better to break
	41	* it up into three procedures. -- erics
	42	*/
	43	static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
	44	{
	45	long m = sample_rtt_us; /* RTT */
	46	u32 srtt = peer->srtt_us;
	47
	48	/* The following amusing code comes from Jacobson's
	49	* article in SIGCOMM '88. Note that rtt and mdev
	50	* are scaled versions of rtt and mean deviation.
	51	* This is designed to be as fast as possible
	52	* m stands for "measurement".
	53	*
	54	* On a 1990 paper the rto value is changed to:
	55	* RTO = rtt + 4 * mdev
	56	*
	57	* Funny. This algorithm seems to be very broken.
	58	* These formulae increase RTO, when it should be decreased, increase
	59	* too slowly, when it should be increased quickly, decrease too quickly
	60	* etc. I guess in BSD RTO takes ONE value, so that it is absolutely
	61	* does not matter how to _calculate_ it. Seems, it was trap
	62	* that VJ failed to avoid. 8)
	63	*/
	64	if (srtt != 0) {
	65	m -= (srtt >> 3); /* m is now error in rtt est */
	66	srtt += m; /* rtt = 7/8 rtt + 1/8 new */
	67	if (m < 0) {
	68	m = -m; /* m is now abs(error) */
	69	m -= (peer->mdev_us >> 2); /* similar update on mdev */
	70	/* This is similar to one of Eifel findings.
	71	* Eifel blocks mdev updates when rtt decreases.
	72	* This solution is a bit different: we use finer gain
	73	* for mdev in this case (alpha*beta).
	74	* Like Eifel it also prevents growth of rto,
	75	* but also it limits too fast rto decreases,
	76	* happening in pure Eifel.
	77	*/
	78	if (m > 0)
	79	m >>= 3;
	80	} else {
	81	m -= (peer->mdev_us >> 2); /* similar update on mdev */
	82	}
	83
	84	peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
	85	if (peer->mdev_us > peer->mdev_max_us) {
	86	peer->mdev_max_us = peer->mdev_us;
	87	if (peer->mdev_max_us > peer->rttvar_us)
	88	peer->rttvar_us = peer->mdev_max_us;
	89	}
90	} else {
91	/* no previous measure. */
92	srtt = m << 3; /* take the measured time to be rtt */
93	peer->mdev_us = m << 1; /* make sure rto = 3rtt /
94	peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer));
95	peer->mdev_max_us = peer->rttvar_us;
96	}
97
98	peer->srtt_us = max(1U, srtt);
99	}
100
101	/*
102	* Calculate rto without backoff. This is the second half of Van Jacobson's
103	* routine referred to above.
104	*/
105	static void rxrpc_set_rto(struct rxrpc_peer *peer)
106	{
107	u32 rto;
108
109	/* 1. If rtt variance happened to be less 50msec, it is hallucination.
110	* It cannot be less due to utterly erratic ACK generation made
111	* at least by solaris and freebsd. "Erratic ACKs" has _nothing_
112	* to do with delayed acks, because at cwnd>2 true delack timeout
113	* is invisible. Actually, Linux-2.4 also generates erratic
114	* ACKs in some circumstances.
115	*/
116	rto = __rxrpc_set_rto(peer);
117
118	/* 2. Fixups made earlier cannot be right.
119	* If we do not estimate RTO correctly without them,
120	* all the algo is pure shit and should be replaced
121	* with correct one. It is exactly, which we pretend to do.
122	*/
123
124	/* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
125	* guarantees that rto is higher.
126	*/
127	peer->rto_j = rxrpc_bound_rto(rto);
128	}
129
130	static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
131	{
132	if (rtt_us < 0)
133	return;
134
135	//rxrpc_update_rtt_min(peer, rtt_us);
136	rxrpc_rtt_estimator(peer, rtt_us);
137	rxrpc_set_rto(peer);
138
139	/* RFC6298: only reset backoff on valid RTT measurement. */
140	peer->backoff = 0;
141	}
142
143	/*
144	* Add RTT information to cache. This is called in softirq mode and has
145	* exclusive access to the peer RTT data.
146	*/
147	void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
4700c4d8	148	int rtt_slot,
c410bf01 DH	149	rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
	150	ktime_t send_time, ktime_t resp_time)
	151	{
	152	struct rxrpc_peer *peer = call->peer;
	153	s64 rtt_us;
	154
	155	rtt_us = ktime_to_us(ktime_sub(resp_time, send_time));
	156	if (rtt_us < 0)
	157	return;
	158
	159	spin_lock(&peer->rtt_input_lock);
	160	rxrpc_ack_update_rtt(peer, rtt_us);
	161	if (peer->rtt_count < 3)
	162	peer->rtt_count++;
	163	spin_unlock(&peer->rtt_input_lock);
	164
4700c4d8	165	trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
c410bf01 DH	166	peer->srtt_us >> 3, peer->rto_j);
	167	}
	168
	169	/*
	170	* Get the retransmission timeout to set in jiffies, backing it off each time
	171	* we retransmit.
	172	*/
	173	unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
	174	{
	175	u64 timo_j;
	176	u8 backoff = READ_ONCE(peer->backoff);
	177
	178	timo_j = peer->rto_j;
	179	timo_j <<= backoff;
	180	if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
	181	WRITE_ONCE(peer->backoff, backoff + 1);
	182
	183	if (timo_j < 1)
	184	timo_j = 1;
	185
	186	return timo_j;
	187	}
	188
	189	void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
	190	{
	191	peer->rto_j = RXRPC_TIMEOUT_INIT;
	192	peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
	193	peer->backoff = 0;
	194	//minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
	195	}