From: Greg Kroah-Hartman Date: Mon, 27 Sep 2021 12:23:31 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v5.4.150~12 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=088f5415bf519ea870f94195d7c9fe6f0969dbe3;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: tcp-address-problems-caused-by-edt-misshaps.patch tcp-adjust-rto_base-in-retransmits_timed_out.patch tcp-always-set-retrans_stamp-on-recovery.patch tcp-create-a-helper-to-model-exponential-backoff.patch --- diff --git a/queue-4.19/series b/queue-4.19/series index 335ae8f941b..2b99d63c64d 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -47,3 +47,7 @@ spi-fix-tegra20-build-with-config_pm-n.patch erofs-fix-up-erofs_lookup-tracepoint.patch arm64-dts-marvell-armada-37xx-extend-pcie-mem-space.patch pci-aardvark-fix-checking-for-pio-status.patch +tcp-address-problems-caused-by-edt-misshaps.patch +tcp-always-set-retrans_stamp-on-recovery.patch +tcp-create-a-helper-to-model-exponential-backoff.patch +tcp-adjust-rto_base-in-retransmits_timed_out.patch diff --git a/queue-4.19/tcp-address-problems-caused-by-edt-misshaps.patch b/queue-4.19/tcp-address-problems-caused-by-edt-misshaps.patch new file mode 100644 index 00000000000..9fa4bb24c24 --- /dev/null +++ b/queue-4.19/tcp-address-problems-caused-by-edt-misshaps.patch @@ -0,0 +1,95 @@ +From 9efdda4e3abed13f0903b7b6e4d4c2102019440a Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sat, 24 Nov 2018 09:12:24 -0800 +Subject: tcp: address problems caused by EDT misshaps + +From: Eric Dumazet + +commit 9efdda4e3abed13f0903b7b6e4d4c2102019440a upstream. + +When a qdisc setup including pacing FQ is dismantled and recreated, +some TCP packets are sent earlier than instructed by TCP stack. + +TCP can be fooled when ACK comes back, because the following +operation can return a negative value. + + tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + +Some paths in TCP stack were not dealing properly with this, +this patch addresses four of them. + +Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Qiumiao Zhang +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 16 ++++++++++------ + net/ipv4/tcp_timer.c | 10 ++++++---- + 2 files changed, 16 insertions(+), 10 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -581,10 +581,12 @@ static inline void tcp_rcv_rtt_measure_t + u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + u32 delta_us; + +- if (!delta) +- delta = 1; +- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); +- tcp_rcv_rtt_update(tp, delta_us, 0); ++ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { ++ if (!delta) ++ delta = 1; ++ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); ++ tcp_rcv_rtt_update(tp, delta_us, 0); ++ } + } + } + +@@ -2931,9 +2933,11 @@ static bool tcp_ack_update_rtt(struct so + if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + flag & FLAG_ACKED) { + u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; +- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + +- seq_rtt_us = ca_rtt_us = delta_us; ++ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { ++ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); ++ ca_rtt_us = seq_rtt_us; ++ } + } + rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ + if (seq_rtt_us < 0) +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout + { + struct inet_connection_sock *icsk = inet_csk(sk); + u32 elapsed, start_ts; ++ s32 remaining; + + start_ts = tcp_retransmit_stamp(sk); + if (!icsk->icsk_user_timeout || !start_ts) + return icsk->icsk_rto; + elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; +- if (elapsed >= icsk->icsk_user_timeout) ++ remaining = icsk->icsk_user_timeout - elapsed; ++ if (remaining <= 0) + return 1; /* user timeout has passed; fire ASAP */ +- else +- return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed)); ++ ++ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); + } + + /** +@@ -210,7 +212,7 @@ static bool retransmits_timed_out(struct + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + timeout = jiffies_to_msecs(timeout); + } +- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout; ++ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; + } + + /* A write timeout has occurred. Process the after effects. */ diff --git a/queue-4.19/tcp-adjust-rto_base-in-retransmits_timed_out.patch b/queue-4.19/tcp-adjust-rto_base-in-retransmits_timed_out.patch new file mode 100644 index 00000000000..aac74153bb3 --- /dev/null +++ b/queue-4.19/tcp-adjust-rto_base-in-retransmits_timed_out.patch @@ -0,0 +1,49 @@ +From 3256a2d6ab1f71f9a1bd2d7f6f18eb8108c48d17 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 30 Sep 2019 15:44:44 -0700 +Subject: tcp: adjust rto_base in retransmits_timed_out() + +From: Eric Dumazet + +commit 3256a2d6ab1f71f9a1bd2d7f6f18eb8108c48d17 upstream. + +The cited commit exposed an old retransmits_timed_out() bug +which assumed it could call tcp_model_timeout() with +TCP_RTO_MIN as rto_base for all states. + +But flows in SYN_SENT or SYN_RECV state uses a different +RTO base (1 sec instead of 200 ms, unless BPF choses +another value) + +This caused a reduction of SYN retransmits from 6 to 4 with +the default /proc/sys/net/ipv4/tcp_syn_retries value. + +Fixes: a41e8a88b06e ("tcp: better handle TCP_USER_TIMEOUT in SYN_SENT state") +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Cc: Marek Majkowski +Signed-off-by: David S. Miller +Signed-off-by: Qiumiao Zhang +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_timer.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -197,8 +197,13 @@ static bool retransmits_timed_out(struct + return false; + + start_ts = tcp_sk(sk)->retrans_stamp; +- if (likely(timeout == 0)) +- timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN); ++ if (likely(timeout == 0)) { ++ unsigned int rto_base = TCP_RTO_MIN; ++ ++ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) ++ rto_base = tcp_timeout_init(sk); ++ timeout = tcp_model_timeout(sk, boundary, rto_base); ++ } + + return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; + } diff --git a/queue-4.19/tcp-always-set-retrans_stamp-on-recovery.patch b/queue-4.19/tcp-always-set-retrans_stamp-on-recovery.patch new file mode 100644 index 00000000000..299de0143e8 --- /dev/null +++ b/queue-4.19/tcp-always-set-retrans_stamp-on-recovery.patch @@ -0,0 +1,104 @@ +From 7ae189759cc48cf8b54beebff566e9fd2d4e7d7c Mon Sep 17 00:00:00 2001 +From: Yuchung Cheng +Date: Wed, 16 Jan 2019 15:05:30 -0800 +Subject: tcp: always set retrans_stamp on recovery + +From: Yuchung Cheng + +commit 7ae189759cc48cf8b54beebff566e9fd2d4e7d7c upstream. + +Previously TCP socket's retrans_stamp is not set if the +retransmission has failed to send. As a result if a socket is +experiencing local issues to retransmit packets, determining when +to abort a socket is complicated w/o knowning the starting time of +the recovery since retrans_stamp may remain zero. + +This complication causes sub-optimal behavior that TCP may use the +latest, instead of the first, retransmission time to compute the +elapsed time of a stalling connection due to local issues. Then TCP +may disrecard TCP retries settings and keep retrying until it finally +succeed: not a good idea when the local host is already strained. + +The simple fix is to always timestamp the start of a recovery. +It's worth noting that retrans_stamp is also used to compare echo +timestamp values to detect spurious recovery. This patch does +not break that because retrans_stamp is still later than when the +original packet was sent. + +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Reviewed-by: Neal Cardwell +Reviewed-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Qiumiao Zhang +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 9 ++++----- + net/ipv4/tcp_timer.c | 23 +++-------------------- + 2 files changed, 7 insertions(+), 25 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2958,13 +2958,12 @@ int tcp_retransmit_skb(struct sock *sk, + #endif + TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; + tp->retrans_out += tcp_skb_pcount(skb); +- +- /* Save stamp of the first retransmit. */ +- if (!tp->retrans_stamp) +- tp->retrans_stamp = tcp_skb_timestamp(skb); +- + } + ++ /* Save stamp of the first (attempted) retransmit. */ ++ if (!tp->retrans_stamp) ++ tp->retrans_stamp = tcp_skb_timestamp(skb); ++ + if (tp->undo_retrans < 0) + tp->undo_retrans = 0; + tp->undo_retrans += tcp_skb_pcount(skb); +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -22,28 +22,14 @@ + #include + #include + +-static u32 tcp_retransmit_stamp(const struct sock *sk) +-{ +- u32 start_ts = tcp_sk(sk)->retrans_stamp; +- +- if (unlikely(!start_ts)) { +- struct sk_buff *head = tcp_rtx_queue_head(sk); +- +- if (!head) +- return 0; +- start_ts = tcp_skb_timestamp(head); +- } +- return start_ts; +-} +- + static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) + { + struct inet_connection_sock *icsk = inet_csk(sk); + u32 elapsed, start_ts; + s32 remaining; + +- start_ts = tcp_retransmit_stamp(sk); +- if (!icsk->icsk_user_timeout || !start_ts) ++ start_ts = tcp_sk(sk)->retrans_stamp; ++ if (!icsk->icsk_user_timeout) + return icsk->icsk_rto; + elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; + remaining = icsk->icsk_user_timeout - elapsed; +@@ -198,10 +184,7 @@ static bool retransmits_timed_out(struct + if (!inet_csk(sk)->icsk_retransmits) + return false; + +- start_ts = tcp_retransmit_stamp(sk); +- if (!start_ts) +- return false; +- ++ start_ts = tcp_sk(sk)->retrans_stamp; + if (likely(timeout == 0)) { + linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); + diff --git a/queue-4.19/tcp-create-a-helper-to-model-exponential-backoff.patch b/queue-4.19/tcp-create-a-helper-to-model-exponential-backoff.patch new file mode 100644 index 00000000000..88ea230e1cf --- /dev/null +++ b/queue-4.19/tcp-create-a-helper-to-model-exponential-backoff.patch @@ -0,0 +1,73 @@ +From 01a523b071618abbc634d1958229fe3bd2dfa5fa Mon Sep 17 00:00:00 2001 +From: Yuchung Cheng +Date: Wed, 16 Jan 2019 15:05:32 -0800 +Subject: tcp: create a helper to model exponential backoff + +From: Yuchung Cheng + +commit 01a523b071618abbc634d1958229fe3bd2dfa5fa upstream. + +Create a helper to model TCP exponential backoff for the next patch. +This is pure refactor w no behavior change. + +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Reviewed-by: Neal Cardwell +Reviewed-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Qiumiao Zhang +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_timer.c | 27 ++++++++++++++++----------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -160,7 +160,20 @@ static void tcp_mtu_probing(struct inet_ + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); + } + ++static unsigned int tcp_model_timeout(struct sock *sk, ++ unsigned int boundary, ++ unsigned int rto_base) ++{ ++ unsigned int linear_backoff_thresh, timeout; + ++ linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base); ++ if (boundary <= linear_backoff_thresh) ++ timeout = ((2 << boundary) - 1) * rto_base; ++ else ++ timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + ++ (boundary - linear_backoff_thresh) * TCP_RTO_MAX; ++ return jiffies_to_msecs(timeout); ++} + /** + * retransmits_timed_out() - returns true if this connection has timed out + * @sk: The current socket +@@ -178,23 +191,15 @@ static bool retransmits_timed_out(struct + unsigned int boundary, + unsigned int timeout) + { +- const unsigned int rto_base = TCP_RTO_MIN; +- unsigned int linear_backoff_thresh, start_ts; ++ unsigned int start_ts; + + if (!inet_csk(sk)->icsk_retransmits) + return false; + + start_ts = tcp_sk(sk)->retrans_stamp; +- if (likely(timeout == 0)) { +- linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); ++ if (likely(timeout == 0)) ++ timeout = tcp_model_timeout(sk, boundary, TCP_RTO_MIN); + +- if (boundary <= linear_backoff_thresh) +- timeout = ((2 << boundary) - 1) * rto_base; +- else +- timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + +- (boundary - linear_backoff_thresh) * TCP_RTO_MAX; +- timeout = jiffies_to_msecs(timeout); +- } + return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; + } +