]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Sep 2021 12:23:31 +0000 (14:23 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Sep 2021 12:23:31 +0000 (14:23 +0200)
added patches:
tcp-address-problems-caused-by-edt-misshaps.patch
tcp-adjust-rto_base-in-retransmits_timed_out.patch
tcp-always-set-retrans_stamp-on-recovery.patch
tcp-create-a-helper-to-model-exponential-backoff.patch

queue-4.19/series
queue-4.19/tcp-address-problems-caused-by-edt-misshaps.patch [new file with mode: 0644]
queue-4.19/tcp-adjust-rto_base-in-retransmits_timed_out.patch [new file with mode: 0644]
queue-4.19/tcp-always-set-retrans_stamp-on-recovery.patch [new file with mode: 0644]
queue-4.19/tcp-create-a-helper-to-model-exponential-backoff.patch [new file with mode: 0644]

index 335ae8f941bf54670c52d5a76c41a8f77c145ddf..2b99d63c64d933fd1e67dcf978038d28e3352316 100644 (file)
@@ -47,3 +47,7 @@ spi-fix-tegra20-build-with-config_pm-n.patch
 erofs-fix-up-erofs_lookup-tracepoint.patch
 arm64-dts-marvell-armada-37xx-extend-pcie-mem-space.patch
 pci-aardvark-fix-checking-for-pio-status.patch
+tcp-address-problems-caused-by-edt-misshaps.patch
+tcp-always-set-retrans_stamp-on-recovery.patch
+tcp-create-a-helper-to-model-exponential-backoff.patch
+tcp-adjust-rto_base-in-retransmits_timed_out.patch
diff --git a/queue-4.19/tcp-address-problems-caused-by-edt-misshaps.patch b/queue-4.19/tcp-address-problems-caused-by-edt-misshaps.patch
new file mode 100644 (file)
index 0000000..9fa4bb2
--- /dev/null
@@ -0,0 +1,95 @@
+From 9efdda4e3abed13f0903b7b6e4d4c2102019440a Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 24 Nov 2018 09:12:24 -0800
+Subject: tcp: address problems caused by EDT misshaps
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 9efdda4e3abed13f0903b7b6e4d4c2102019440a upstream.
+
+When a qdisc setup including pacing FQ is dismantled and recreated,
+some TCP packets are sent earlier than instructed by TCP stack.
+
+TCP can be fooled when ACK comes back, because the following
+operation can return a negative value.
+
+    tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+
+Some paths in TCP stack were not dealing properly with this,
+this patch addresses four of them.
+
+Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   16 ++++++++++------
+ net/ipv4/tcp_timer.c |   10 ++++++----
+ 2 files changed, 16 insertions(+), 10 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -581,10 +581,12 @@ static inline void tcp_rcv_rtt_measure_t
+               u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+               u32 delta_us;
+-              if (!delta)
+-                      delta = 1;
+-              delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+-              tcp_rcv_rtt_update(tp, delta_us, 0);
++              if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
++                      if (!delta)
++                              delta = 1;
++                      delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
++                      tcp_rcv_rtt_update(tp, delta_us, 0);
++              }
+       }
+ }
+@@ -2931,9 +2933,11 @@ static bool tcp_ack_update_rtt(struct so
+       if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+           flag & FLAG_ACKED) {
+               u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+-              u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+-              seq_rtt_us = ca_rtt_us = delta_us;
++              if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
++                      seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
++                      ca_rtt_us = seq_rtt_us;
++              }
+       }
+       rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
+       if (seq_rtt_us < 0)
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       u32 elapsed, start_ts;
++      s32 remaining;
+       start_ts = tcp_retransmit_stamp(sk);
+       if (!icsk->icsk_user_timeout || !start_ts)
+               return icsk->icsk_rto;
+       elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
+-      if (elapsed >= icsk->icsk_user_timeout)
++      remaining = icsk->icsk_user_timeout - elapsed;
++      if (remaining <= 0)
+               return 1; /* user timeout has passed; fire ASAP */
+-      else
+-              return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
++
++      return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
+ }
+ /**
+@@ -210,7 +212,7 @@ static bool retransmits_timed_out(struct
+                               (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+               timeout = jiffies_to_msecs(timeout);
+       }
+-      return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
++      return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
+ }
+ /* A write timeout has occurred. Process the after effects. */
diff --git a/queue-4.19/tcp-adjust-rto_base-in-retransmits_timed_out.patch b/queue-4.19/tcp-adjust-rto_base-in-retransmits_timed_out.patch
new file mode 100644 (file)
index 0000000..aac7415
--- /dev/null
@@ -0,0 +1,49 @@
+From 3256a2d6ab1f71f9a1bd2d7f6f18eb8108c48d17 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 30 Sep 2019 15:44:44 -0700
+Subject: tcp: adjust rto_base in retransmits_timed_out()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 3256a2d6ab1f71f9a1bd2d7f6f18eb8108c48d17 upstream.
+
+The cited commit exposed an old retransmits_timed_out() bug
+which assumed it could call tcp_model_timeout() with
+TCP_RTO_MIN as rto_base for all states.
+
+But flows in SYN_SENT or SYN_RECV state uses a different
+RTO base (1 sec instead of 200 ms, unless BPF choses
+another value)
+
+This caused a reduction of SYN retransmits from 6 to 4 with
+the default /proc/sys/net/ipv4/tcp_syn_retries value.
+
+Fixes: a41e8a88b06e ("tcp: better handle TCP_USER_TIMEOUT in SYN_SENT state")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Marek Majkowski <marek@cloudflare.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_timer.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -197,8 +197,13 @@ static bool retransmits_timed_out(struct
+               return false;
+       start_ts = tcp_sk(sk)->retrans_stamp;
+-      if (likely(timeout == 0))
+-              timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN);
++      if (likely(timeout == 0)) {
++              unsigned int rto_base = TCP_RTO_MIN;
++
++              if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
++                      rto_base = tcp_timeout_init(sk);
++              timeout = tcp_model_timeout(sk, boundary, rto_base);
++      }
+       return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
+ }
diff --git a/queue-4.19/tcp-always-set-retrans_stamp-on-recovery.patch b/queue-4.19/tcp-always-set-retrans_stamp-on-recovery.patch
new file mode 100644 (file)
index 0000000..299de01
--- /dev/null
@@ -0,0 +1,104 @@
+From 7ae189759cc48cf8b54beebff566e9fd2d4e7d7c Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 16 Jan 2019 15:05:30 -0800
+Subject: tcp: always set retrans_stamp on recovery
+
+From: Yuchung Cheng <ycheng@google.com>
+
+commit 7ae189759cc48cf8b54beebff566e9fd2d4e7d7c upstream.
+
+Previously TCP socket's retrans_stamp is not set if the
+retransmission has failed to send. As a result if a socket is
+experiencing local issues to retransmit packets, determining when
+to abort a socket is complicated w/o knowning the starting time of
+the recovery since retrans_stamp may remain zero.
+
+This complication causes sub-optimal behavior that TCP may use the
+latest, instead of the first, retransmission time to compute the
+elapsed time of a stalling connection due to local issues. Then TCP
+may disrecard TCP retries settings and keep retrying until it finally
+succeed: not a good idea when the local host is already strained.
+
+The simple fix is to always timestamp the start of a recovery.
+It's worth noting that retrans_stamp is also used to compare echo
+timestamp values to detect spurious recovery. This patch does
+not break that because retrans_stamp is still later than when the
+original packet was sent.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    9 ++++-----
+ net/ipv4/tcp_timer.c  |   23 +++--------------------
+ 2 files changed, 7 insertions(+), 25 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2958,13 +2958,12 @@ int tcp_retransmit_skb(struct sock *sk,
+ #endif
+               TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
+               tp->retrans_out += tcp_skb_pcount(skb);
+-
+-              /* Save stamp of the first retransmit. */
+-              if (!tp->retrans_stamp)
+-                      tp->retrans_stamp = tcp_skb_timestamp(skb);
+-
+       }
++      /* Save stamp of the first (attempted) retransmit. */
++      if (!tp->retrans_stamp)
++              tp->retrans_stamp = tcp_skb_timestamp(skb);
++
+       if (tp->undo_retrans < 0)
+               tp->undo_retrans = 0;
+       tp->undo_retrans += tcp_skb_pcount(skb);
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -22,28 +22,14 @@
+ #include <linux/gfp.h>
+ #include <net/tcp.h>
+-static u32 tcp_retransmit_stamp(const struct sock *sk)
+-{
+-      u32 start_ts = tcp_sk(sk)->retrans_stamp;
+-
+-      if (unlikely(!start_ts)) {
+-              struct sk_buff *head = tcp_rtx_queue_head(sk);
+-
+-              if (!head)
+-                      return 0;
+-              start_ts = tcp_skb_timestamp(head);
+-      }
+-      return start_ts;
+-}
+-
+ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       u32 elapsed, start_ts;
+       s32 remaining;
+-      start_ts = tcp_retransmit_stamp(sk);
+-      if (!icsk->icsk_user_timeout || !start_ts)
++      start_ts = tcp_sk(sk)->retrans_stamp;
++      if (!icsk->icsk_user_timeout)
+               return icsk->icsk_rto;
+       elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
+       remaining = icsk->icsk_user_timeout - elapsed;
+@@ -198,10 +184,7 @@ static bool retransmits_timed_out(struct
+       if (!inet_csk(sk)->icsk_retransmits)
+               return false;
+-      start_ts = tcp_retransmit_stamp(sk);
+-      if (!start_ts)
+-              return false;
+-
++      start_ts = tcp_sk(sk)->retrans_stamp;
+       if (likely(timeout == 0)) {
+               linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
diff --git a/queue-4.19/tcp-create-a-helper-to-model-exponential-backoff.patch b/queue-4.19/tcp-create-a-helper-to-model-exponential-backoff.patch
new file mode 100644 (file)
index 0000000..88ea230
--- /dev/null
@@ -0,0 +1,73 @@
+From 01a523b071618abbc634d1958229fe3bd2dfa5fa Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 16 Jan 2019 15:05:32 -0800
+Subject: tcp: create a helper to model exponential backoff
+
+From: Yuchung Cheng <ycheng@google.com>
+
+commit 01a523b071618abbc634d1958229fe3bd2dfa5fa upstream.
+
+Create a helper to model TCP exponential backoff for the next patch.
+This is pure refactor w no behavior change.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Qiumiao Zhang <zhangqiumiao1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_timer.c |   27 ++++++++++++++++-----------
+ 1 file changed, 16 insertions(+), 11 deletions(-)
+
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -160,7 +160,20 @@ static void tcp_mtu_probing(struct inet_
+       tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ }
++static unsigned int tcp_model_timeout(struct sock *sk,
++                                    unsigned int boundary,
++                                    unsigned int rto_base)
++{
++      unsigned int linear_backoff_thresh, timeout;
++      linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
++      if (boundary <= linear_backoff_thresh)
++              timeout = ((2 << boundary) - 1) * rto_base;
++      else
++              timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
++                      (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
++      return jiffies_to_msecs(timeout);
++}
+ /**
+  *  retransmits_timed_out() - returns true if this connection has timed out
+  *  @sk:       The current socket
+@@ -178,23 +191,15 @@ static bool retransmits_timed_out(struct
+                                 unsigned int boundary,
+                                 unsigned int timeout)
+ {
+-      const unsigned int rto_base = TCP_RTO_MIN;
+-      unsigned int linear_backoff_thresh, start_ts;
++      unsigned int start_ts;
+       if (!inet_csk(sk)->icsk_retransmits)
+               return false;
+       start_ts = tcp_sk(sk)->retrans_stamp;
+-      if (likely(timeout == 0)) {
+-              linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
++      if (likely(timeout == 0))
++              timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN);
+-              if (boundary <= linear_backoff_thresh)
+-                      timeout = ((2 << boundary) - 1) * rto_base;
+-              else
+-                      timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
+-                              (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+-              timeout = jiffies_to_msecs(timeout);
+-      }
+       return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
+ }