]> git.ipfire.org Git - thirdparty/kernel/linux.git/blobdiff - net/ipv4/tcp_ipv4.c
Merge tag 'net-next-6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev...
[thirdparty/kernel/linux.git] / net / ipv4 / tcp_ipv4.c
index e0cef75f85fb92c173700b5b417b7e6c91e39716..30ef0c8f5e92d301c31ea1a05f662c1fc4cf37af 100644 (file)
@@ -70,6 +70,7 @@
 #include <net/xfrm.h>
 #include <net/secure_seq.h>
 #include <net/busy_poll.h>
+#include <net/rstreason.h>
 
 #include <linux/inet.h>
 #include <linux/ipv6.h>
@@ -729,7 +730,8 @@ out:
  *     Exception: precedence violation. We do not implement it in any case.
  */
 
-static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
+                             enum sk_rst_reason reason)
 {
        const struct tcphdr *th = tcp_hdr(skb);
        struct {
@@ -872,11 +874,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
         * routing might fail in this case. No choice here, if we choose to force
         * input interface, we will misroute in case of asymmetric route.
         */
-       if (sk) {
+       if (sk)
                arg.bound_dev_if = sk->sk_bound_dev_if;
-               if (sk_fullsock(sk))
-                       trace_tcp_send_reset(sk, skb);
-       }
+
+       trace_tcp_send_reset(sk, skb, reason);
 
        BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
                     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -1673,7 +1674,8 @@ static void tcp_v4_init_req(struct request_sock *req,
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
                                          struct sk_buff *skb,
                                          struct flowi *fl,
-                                         struct request_sock *req)
+                                         struct request_sock *req,
+                                         u32 tw_isn)
 {
        tcp_v4_init_req(req, sk, skb);
 
@@ -1940,7 +1942,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
        return 0;
 
 reset:
-       tcp_v4_send_reset(rsk, skb);
+       tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason));
 discard:
        kfree_skb_reason(skb, reason);
        /* Be careful here. If this function gets more complicated and
@@ -2001,7 +2003,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
                     enum skb_drop_reason *reason)
 {
-       u32 limit, tail_gso_size, tail_gso_segs;
+       u32 tail_gso_size, tail_gso_segs;
        struct skb_shared_info *shinfo;
        const struct tcphdr *th;
        struct tcphdr *thtail;
@@ -2010,6 +2012,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
        bool fragstolen;
        u32 gso_segs;
        u32 gso_size;
+       u64 limit;
        int delta;
 
        /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@@ -2051,10 +2054,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
              TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
            ((TCP_SKB_CB(tail)->tcp_flags ^
              TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
-#ifdef CONFIG_TLS_DEVICE
-           tail->decrypted != skb->decrypted ||
-#endif
            !mptcp_skb_can_collapse(tail, skb) ||
+           skb_cmp_decrypted(tail, skb) ||
            thtail->doff != th->doff ||
            memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
                goto no_coalesce;
@@ -2107,7 +2108,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
        __skb_push(skb, hdrlen);
 
 no_coalesce:
-       limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
+       /* sk->sk_backlog.len is reset only at the end of __release_sock().
+        * Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach
+        * sk_rcvbuf in normal conditions.
+        */
+       limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1;
+
+       limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1;
 
        /* Only socket owner can try to collapse/prune rx queues
         * to reduce memory overhead, so add a little headroom here.
@@ -2115,6 +2122,8 @@ no_coalesce:
         */
        limit += 64 * 1024;
 
+       limit = min_t(u64, limit, UINT_MAX);
+
        if (unlikely(sk_add_backlog(sk, skb, limit))) {
                bh_unlock_sock(sk);
                *reason = SKB_DROP_REASON_SOCKET_BACKLOG;
@@ -2154,7 +2163,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
                                    skb->len - th->doff * 4);
        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
        TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
-       TCP_SKB_CB(skb)->tcp_tw_isn = 0;
        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
        TCP_SKB_CB(skb)->sacked  = 0;
        TCP_SKB_CB(skb)->has_rxtstamp =
@@ -2176,6 +2184,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
        bool refcounted;
        struct sock *sk;
        int ret;
+       u32 isn;
 
        drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
        if (skb->pkt_type != PACKET_HOST)
@@ -2213,7 +2222,6 @@ lookup:
        if (!sk)
                goto no_tcp_socket;
 
-process:
        if (sk->sk_state == TCP_TIME_WAIT)
                goto do_time_wait;
 
@@ -2285,7 +2293,10 @@ process:
                } else {
                        drop_reason = tcp_child_process(sk, nsk, skb);
                        if (drop_reason) {
-                               tcp_v4_send_reset(nsk, skb);
+                               enum sk_rst_reason rst_reason;
+
+                               rst_reason = sk_rst_convert_drop_reason(drop_reason);
+                               tcp_v4_send_reset(nsk, skb, rst_reason);
                                goto discard_and_relse;
                        }
                        sock_put(sk);
@@ -2293,6 +2304,7 @@ process:
                }
        }
 
+process:
        if (static_branch_unlikely(&ip4_min_ttl)) {
                /* min_ttl can be changed concurrently from do_ip_setsockopt() */
                if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
@@ -2363,7 +2375,7 @@ csum_error:
 bad_packet:
                __TCP_INC_STATS(net, TCP_MIB_INERRS);
        } else {
-               tcp_v4_send_reset(NULL, skb);
+               tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
        }
 
 discard_it:
@@ -2391,7 +2403,7 @@ do_time_wait:
                inet_twsk_put(inet_twsk(sk));
                goto csum_error;
        }
-       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
        case TCP_TW_SYN: {
                struct sock *sk2 = inet_lookup_listener(net,
                                                        net->ipv4.tcp_death_row.hashinfo,
@@ -2405,6 +2417,7 @@ do_time_wait:
                        sk = sk2;
                        tcp_v4_restore_cb(skb);
                        refcounted = false;
+                       __this_cpu_write(tcp_tw_isn, isn);
                        goto process;
                }
        }
@@ -2414,7 +2427,7 @@ do_time_wait:
                tcp_v4_timewait_ack(sk, skb);
                break;
        case TCP_TW_RST:
-               tcp_v4_send_reset(sk, skb);
+               tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
                inet_twsk_deschedule_put(inet_twsk(sk));
                goto discard_it;
        case TCP_TW_SUCCESS:;
@@ -2424,7 +2437,6 @@ do_time_wait:
 
 static struct timewait_sock_ops tcp_timewait_sock_ops = {
        .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
-       .twsk_unique    = tcp_twsk_unique,
        .twsk_destructor= tcp_twsk_destructor,
 };
 
@@ -3507,7 +3519,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
 {
        struct net *net;
 
-       tcp_twsk_purge(net_exit_list, AF_INET);
+       tcp_twsk_purge(net_exit_list);
 
        list_for_each_entry(net, net_exit_list, exit_list) {
                inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);