erofs-fix-up-erofs_lookup-tracepoint.patch
arm64-dts-marvell-armada-37xx-extend-pcie-mem-space.patch
pci-aardvark-fix-checking-for-pio-status.patch
+tcp-address-problems-caused-by-edt-misshaps.patch
+tcp-always-set-retrans_stamp-on-recovery.patch
+tcp-create-a-helper-to-model-exponential-backoff.patch
+tcp-adjust-rto_base-in-retransmits_timed_out.patch
--- /dev/null
+From 9efdda4e3abed13f0903b7b6e4d4c2102019440a Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 24 Nov 2018 09:12:24 -0800
+Subject: tcp: address problems caused by EDT misshaps
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 9efdda4e3abed13f0903b7b6e4d4c2102019440a upstream.
+
+When a qdisc setup including pacing FQ is dismantled and recreated,
+some TCP packets are sent earlier than instructed by TCP stack.
+
+TCP can be fooled when ACK comes back, because the following
+operation can return a negative value.
+
+ tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+
+Some paths in TCP stack were not dealing properly with this,
+this patch addresses four of them.
+
+Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 16 ++++++++++------
+ net/ipv4/tcp_timer.c | 10 ++++++----
+ 2 files changed, 16 insertions(+), 10 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -581,10 +581,12 @@ static inline void tcp_rcv_rtt_measure_t
+ u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+ u32 delta_us;
+
+- if (!delta)
+- delta = 1;
+- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+- tcp_rcv_rtt_update(tp, delta_us, 0);
++ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
++ if (!delta)
++ delta = 1;
++ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
++ tcp_rcv_rtt_update(tp, delta_us, 0);
++ }
+ }
+ }
+
+@@ -2931,9 +2933,11 @@ static bool tcp_ack_update_rtt(struct so
+ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+ flag & FLAG_ACKED) {
+ u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+- seq_rtt_us = ca_rtt_us = delta_us;
++ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
++ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
++ ca_rtt_us = seq_rtt_us;
++ }
+ }
+ rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
+ if (seq_rtt_us < 0)
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 elapsed, start_ts;
++ s32 remaining;
+
+ start_ts = tcp_retransmit_stamp(sk);
+ if (!icsk->icsk_user_timeout || !start_ts)
+ return icsk->icsk_rto;
+ elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
+- if (elapsed >= icsk->icsk_user_timeout)
++ remaining = icsk->icsk_user_timeout - elapsed;
++ if (remaining <= 0)
+ return 1; /* user timeout has passed; fire ASAP */
+- else
+- return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
++
++ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
+ }
+
+ /**
+@@ -210,7 +212,7 @@ static bool retransmits_timed_out(struct
+ (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+ timeout = jiffies_to_msecs(timeout);
+ }
+- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
++ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
+ }
+
+ /* A write timeout has occurred. Process the after effects. */
--- /dev/null
+From 3256a2d6ab1f71f9a1bd2d7f6f18eb8108c48d17 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 30 Sep 2019 15:44:44 -0700
+Subject: tcp: adjust rto_base in retransmits_timed_out()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 3256a2d6ab1f71f9a1bd2d7f6f18eb8108c48d17 upstream.
+
+The cited commit exposed an old retransmits_timed_out() bug
+which assumed it could call tcp_model_timeout() with
+TCP_RTO_MIN as rto_base for all states.
+
+But flows in SYN_SENT or SYN_RECV state uses a different
+RTO base (1 sec instead of 200 ms, unless BPF choses
+another value)
+
+This caused a reduction of SYN retransmits from 6 to 4 with
+the default /proc/sys/net/ipv4/tcp_syn_retries value.
+
+Fixes: a41e8a88b06e ("tcp: better handle TCP_USER_TIMEOUT in SYN_SENT state")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Marek Majkowski <marek@cloudflare.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_timer.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -197,8 +197,13 @@ static bool retransmits_timed_out(struct
+ return false;
+
+ start_ts = tcp_sk(sk)->retrans_stamp;
+- if (likely(timeout == 0))
+- timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN);
++ if (likely(timeout == 0)) {
++ unsigned int rto_base = TCP_RTO_MIN;
++
++ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
++ rto_base = tcp_timeout_init(sk);
++ timeout = tcp_model_timeout(sk, boundary, rto_base);
++ }
+
+ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
+ }
--- /dev/null
+From 7ae189759cc48cf8b54beebff566e9fd2d4e7d7c Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 16 Jan 2019 15:05:30 -0800
+Subject: tcp: always set retrans_stamp on recovery
+
+From: Yuchung Cheng <ycheng@google.com>
+
+commit 7ae189759cc48cf8b54beebff566e9fd2d4e7d7c upstream.
+
+Previously TCP socket's retrans_stamp is not set if the
+retransmission has failed to send. As a result if a socket is
+experiencing local issues to retransmit packets, determining when
+to abort a socket is complicated w/o knowning the starting time of
+the recovery since retrans_stamp may remain zero.
+
+This complication causes sub-optimal behavior that TCP may use the
+latest, instead of the first, retransmission time to compute the
+elapsed time of a stalling connection due to local issues. Then TCP
+may disrecard TCP retries settings and keep retrying until it finally
+succeed: not a good idea when the local host is already strained.
+
+The simple fix is to always timestamp the start of a recovery.
+It's worth noting that retrans_stamp is also used to compare echo
+timestamp values to detect spurious recovery. This patch does
+not break that because retrans_stamp is still later than when the
+original packet was sent.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 9 ++++-----
+ net/ipv4/tcp_timer.c | 23 +++--------------------
+ 2 files changed, 7 insertions(+), 25 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2958,13 +2958,12 @@ int tcp_retransmit_skb(struct sock *sk,
+ #endif
+ TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
+ tp->retrans_out += tcp_skb_pcount(skb);
+-
+- /* Save stamp of the first retransmit. */
+- if (!tp->retrans_stamp)
+- tp->retrans_stamp = tcp_skb_timestamp(skb);
+-
+ }
+
++ /* Save stamp of the first (attempted) retransmit. */
++ if (!tp->retrans_stamp)
++ tp->retrans_stamp = tcp_skb_timestamp(skb);
++
+ if (tp->undo_retrans < 0)
+ tp->undo_retrans = 0;
+ tp->undo_retrans += tcp_skb_pcount(skb);
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -22,28 +22,14 @@
+ #include <linux/gfp.h>
+ #include <net/tcp.h>
+
+-static u32 tcp_retransmit_stamp(const struct sock *sk)
+-{
+- u32 start_ts = tcp_sk(sk)->retrans_stamp;
+-
+- if (unlikely(!start_ts)) {
+- struct sk_buff *head = tcp_rtx_queue_head(sk);
+-
+- if (!head)
+- return 0;
+- start_ts = tcp_skb_timestamp(head);
+- }
+- return start_ts;
+-}
+-
+ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 elapsed, start_ts;
+ s32 remaining;
+
+- start_ts = tcp_retransmit_stamp(sk);
+- if (!icsk->icsk_user_timeout || !start_ts)
++ start_ts = tcp_sk(sk)->retrans_stamp;
++ if (!icsk->icsk_user_timeout)
+ return icsk->icsk_rto;
+ elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
+ remaining = icsk->icsk_user_timeout - elapsed;
+@@ -198,10 +184,7 @@ static bool retransmits_timed_out(struct
+ if (!inet_csk(sk)->icsk_retransmits)
+ return false;
+
+- start_ts = tcp_retransmit_stamp(sk);
+- if (!start_ts)
+- return false;
+-
++ start_ts = tcp_sk(sk)->retrans_stamp;
+ if (likely(timeout == 0)) {
+ linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
+
--- /dev/null
+From 01a523b071618abbc634d1958229fe3bd2dfa5fa Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 16 Jan 2019 15:05:32 -0800
+Subject: tcp: create a helper to model exponential backoff
+
+From: Yuchung Cheng <ycheng@google.com>
+
+commit 01a523b071618abbc634d1958229fe3bd2dfa5fa upstream.
+
+Create a helper to model TCP exponential backoff for the next patch.
+This is pure refactor w no behavior change.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_timer.c | 27 ++++++++++++++++-----------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -160,7 +160,20 @@ static void tcp_mtu_probing(struct inet_
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ }
+
++static unsigned int tcp_model_timeout(struct sock *sk,
++ unsigned int boundary,
++ unsigned int rto_base)
++{
++ unsigned int linear_backoff_thresh, timeout;
+
++ linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
++ if (boundary <= linear_backoff_thresh)
++ timeout = ((2 << boundary) - 1) * rto_base;
++ else
++ timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
++ (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
++ return jiffies_to_msecs(timeout);
++}
+ /**
+ * retransmits_timed_out() - returns true if this connection has timed out
+ * @sk: The current socket
+@@ -178,23 +191,15 @@ static bool retransmits_timed_out(struct
+ unsigned int boundary,
+ unsigned int timeout)
+ {
+- const unsigned int rto_base = TCP_RTO_MIN;
+- unsigned int linear_backoff_thresh, start_ts;
++ unsigned int start_ts;
+
+ if (!inet_csk(sk)->icsk_retransmits)
+ return false;
+
+ start_ts = tcp_sk(sk)->retrans_stamp;
+- if (likely(timeout == 0)) {
+- linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
++ if (likely(timeout == 0))
++ timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN);
+
+- if (boundary <= linear_backoff_thresh)
+- timeout = ((2 << boundary) - 1) * rto_base;
+- else
+- timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
+- (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+- timeout = jiffies_to_msecs(timeout);
+- }
+ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
+ }
+