]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
tcp: fix TCP_USER_TIMEOUT with zero window
authorEnke Chen <enchen@paloaltonetworks.com>
Fri, 15 Jan 2021 22:30:58 +0000 (14:30 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 27 Jan 2021 10:47:55 +0000 (11:47 +0100)
commit 9d9b1ee0b2d1c9e02b2338c4a4b0a062d2d3edac upstream.

The TCP session does not terminate with TCP_USER_TIMEOUT when data
remain untransmitted due to zero window.

The number of unanswered zero-window probes (tcp_probes_out) is
reset to zero with incoming acks irrespective of the window size,
as described in tcp_probe_timer():

    RFC 1122 4.2.2.17 requires the sender to stay open indefinitely
    as long as the receiver continues to respond probes. We support
    this by default and reset icsk_probes_out with incoming ACKs.

This counter, however, is the wrong one to be used in calculating the
duration that the window remains closed and data remain untransmitted.
Thanks to Jonathan Maxwell <jmaxwell37@gmail.com> for diagnosing the
actual issue.

In this patch a new timestamp is introduced for the socket in order to
track the elapsed time for the zero-window probes that have not been
answered with any non-zero window ack.

Fixes: 9721e709fa68 ("tcp: simplify window probe aborting on USER_TIMEOUT")
Reported-by: William McCall <william.mccall@gmail.com>
Co-developed-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Enke Chen <enchen@paloaltonetworks.com>
Reviewed-by: Yuchung Cheng <ycheng@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20210115223058.GA39267@localhost.localdomain
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/net/inet_connection_sock.h
net/ipv4/inet_connection_sock.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c

index c71eb294da954c14e5ac9944c9464906a763a18c..6c8f8e5e33c3dddbbf50837a4174e31ea3d6832c 100644 (file)
@@ -83,6 +83,8 @@ struct inet_connection_sock_af_ops {
  * @icsk_ext_hdr_len:     Network protocol overhead (IP/IPv6 options)
  * @icsk_ack:             Delayed ACK control data
  * @icsk_mtup;            MTU probing control data
+ * @icsk_probes_tstamp:    Probe timestamp (cleared by non-zero window ack)
+ * @icsk_user_timeout:    TCP_USER_TIMEOUT value
  */
 struct inet_connection_sock {
        /* inet_sock has to be the first member! */
@@ -133,6 +135,7 @@ struct inet_connection_sock {
 
                u32               probe_timestamp;
        } icsk_mtup;
+       u32                       icsk_probes_tstamp;
        u32                       icsk_user_timeout;
 
        u64                       icsk_ca_priv[104 / sizeof(u64)];
index 9745c52f49ca409276ae17a8889e5a059ec5be26..ac5c4f6cdefee6eeb1858d4dd721200bfaf69c71 100644 (file)
@@ -840,6 +840,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
                newicsk->icsk_retransmits = 0;
                newicsk->icsk_backoff     = 0;
                newicsk->icsk_probes_out  = 0;
+               newicsk->icsk_probes_tstamp = 0;
 
                /* Deinitialize accept_queue to trap illegal accesses. */
                memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
index d3ced6a39bcf1f48a12a3f3941fe53644f13f93f..6ddec8a23942beb5b2b065a328cfc85c0c7672d4 100644 (file)
@@ -2627,6 +2627,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        icsk->icsk_backoff = 0;
        tp->snd_cwnd = 2;
        icsk->icsk_probes_out = 0;
+       icsk->icsk_probes_tstamp = 0;
        icsk->icsk_rto = TCP_TIMEOUT_INIT;
        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
        tp->snd_cwnd = TCP_INIT_CWND;
index adace90f49facb1c5331460520373182153e1e6a..7411a43134629ed4530342cb30c74b4dcc2cdb5f 100644 (file)
@@ -3286,6 +3286,7 @@ static void tcp_ack_probe(struct sock *sk)
                return;
        if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
                icsk->icsk_backoff = 0;
+               icsk->icsk_probes_tstamp = 0;
                inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
                /* Socket must be waked up by subsequent tcp_data_snd_check().
                 * This function is not for random using!
index 5e311e6a31d516a55e74d4f44599da7d2d9593a7..5da6ffce390c2346b0bdafa4a9e35bce05630600 100644 (file)
@@ -3835,6 +3835,7 @@ void tcp_send_probe0(struct sock *sk)
                /* Cancel probe timer, if it is not required. */
                icsk->icsk_probes_out = 0;
                icsk->icsk_backoff = 0;
+               icsk->icsk_probes_tstamp = 0;
                return;
        }
 
index dd5a6317a8018a45ad609f832ced6df2937ad453..7fcd116fbd378cb997eede31b2f0dc3b8c9481e4 100644 (file)
@@ -344,6 +344,7 @@ static void tcp_probe_timer(struct sock *sk)
 
        if (tp->packets_out || !skb) {
                icsk->icsk_probes_out = 0;
+               icsk->icsk_probes_tstamp = 0;
                return;
        }
 
@@ -355,13 +356,12 @@ static void tcp_probe_timer(struct sock *sk)
         * corresponding system limit. We also implement similar policy when
         * we use RTO to probe window in tcp_retransmit_timer().
         */
-       if (icsk->icsk_user_timeout) {
-               u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
-                                               tcp_probe0_base(sk));
-
-               if (elapsed >= icsk->icsk_user_timeout)
-                       goto abort;
-       }
+       if (!icsk->icsk_probes_tstamp)
+               icsk->icsk_probes_tstamp = tcp_jiffies32;
+       else if (icsk->icsk_user_timeout &&
+                (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
+                msecs_to_jiffies(icsk->icsk_user_timeout))
+               goto abort;
 
        max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
        if (sock_flag(sk, SOCK_DEAD)) {