Merge tag 'net-next-6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev...

[thirdparty/kernel/linux.git] / net / ipv4 / tcp_ipv4.c
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index e0cef75f85fb92c173700b5b417b7e6c91e39716..30ef0c8f5e92d301c31ea1a05f662c1fc4cf37af 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -70,6 +70,7 @@
  #include <net/xfrm.h>
  #include <net/secure_seq.h>
  #include <net/busy_poll.h>
+#include <net/rstreason.h>
  
  #include <linux/inet.h>
  #include <linux/ipv6.h>
@@ -729,7 +730,8 @@ out:
   *     Exception: precedence violation. We do not implement it in any case.
   */
  
-static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
+                             enum sk_rst_reason reason)
  {
         const struct tcphdr *th = tcp_hdr(skb);
         struct {
@@ -872,11 +874,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
          * routing might fail in this case. No choice here, if we choose to force
          * input interface, we will misroute in case of asymmetric route.
          */
-       if (sk) {
+       if (sk)
                 arg.bound_dev_if = sk->sk_bound_dev_if;
-               if (sk_fullsock(sk))
-                       trace_tcp_send_reset(sk, skb);
-       }
+
+       trace_tcp_send_reset(sk, skb, reason);
  
         BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
                      offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -1673,7 +1674,8 @@ static void tcp_v4_init_req(struct request_sock *req,
  static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
                                           struct sk_buff *skb,
                                           struct flowi *fl,
-                                         struct request_sock *req)
+                                         struct request_sock *req,
+                                         u32 tw_isn)
  {
         tcp_v4_init_req(req, sk, skb);
  
@@ -1940,7 +1942,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
         return 0;
  
  reset:
-       tcp_v4_send_reset(rsk, skb);
+       tcp_v4_send_reset(rsk, skb, sk_rst_convert_drop_reason(reason));
  discard:
         kfree_skb_reason(skb, reason);
         /* Be careful here. If this function gets more complicated and
@@ -2001,7 +2003,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
                      enum skb_drop_reason *reason)
  {
-       u32 limit, tail_gso_size, tail_gso_segs;
+       u32 tail_gso_size, tail_gso_segs;
         struct skb_shared_info *shinfo;
         const struct tcphdr *th;
         struct tcphdr *thtail;
@@ -2010,6 +2012,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
         bool fragstolen;
         u32 gso_segs;
         u32 gso_size;
+       u64 limit;
         int delta;
  
         /* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@@ -2051,10 +2054,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
               TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
             ((TCP_SKB_CB(tail)->tcp_flags ^
               TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
-#ifdef CONFIG_TLS_DEVICE
-           tail->decrypted != skb->decrypted ||
-#endif
             !mptcp_skb_can_collapse(tail, skb) ||
+           skb_cmp_decrypted(tail, skb) ||
             thtail->doff != th->doff ||
             memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
                 goto no_coalesce;
@@ -2107,7 +2108,13 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
         __skb_push(skb, hdrlen);
  
  no_coalesce:
-       limit = (u32)READ_ONCE(sk->sk_rcvbuf) + (u32)(READ_ONCE(sk->sk_sndbuf) >> 1);
+       /* sk->sk_backlog.len is reset only at the end of __release_sock().
+        * Both sk->sk_backlog.len and sk->sk_rmem_alloc could reach
+        * sk_rcvbuf in normal conditions.
+        */
+       limit = ((u64)READ_ONCE(sk->sk_rcvbuf)) << 1;
+
+       limit += ((u32)READ_ONCE(sk->sk_sndbuf)) >> 1;
  
         /* Only socket owner can try to collapse/prune rx queues
          * to reduce memory overhead, so add a little headroom here.
@@ -2115,6 +2122,8 @@ no_coalesce:
          */
         limit += 64 * 1024;
  
+       limit = min_t(u64, limit, UINT_MAX);
+
         if (unlikely(sk_add_backlog(sk, skb, limit))) {
                 bh_unlock_sock(sk);
                 *reason = SKB_DROP_REASON_SOCKET_BACKLOG;
@@ -2154,7 +2163,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
                                     skb->len - th->doff * 4);
         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
-       TCP_SKB_CB(skb)->tcp_tw_isn = 0;
         TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
         TCP_SKB_CB(skb)->sacked  = 0;
         TCP_SKB_CB(skb)->has_rxtstamp =
@@ -2176,6 +2184,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
         bool refcounted;
         struct sock *sk;
         int ret;
+       u32 isn;
  
         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
         if (skb->pkt_type != PACKET_HOST)
@@ -2213,7 +2222,6 @@ lookup:
         if (!sk)
                 goto no_tcp_socket;
  
-process:
         if (sk->sk_state == TCP_TIME_WAIT)
                 goto do_time_wait;
  
@@ -2285,7 +2293,10 @@ process:
                 } else {
                         drop_reason = tcp_child_process(sk, nsk, skb);
                         if (drop_reason) {
-                               tcp_v4_send_reset(nsk, skb);
+                               enum sk_rst_reason rst_reason;
+
+                               rst_reason = sk_rst_convert_drop_reason(drop_reason);
+                               tcp_v4_send_reset(nsk, skb, rst_reason);
                                 goto discard_and_relse;
                         }
                         sock_put(sk);
@@ -2293,6 +2304,7 @@ process:
                 }
         }
  
+process:
         if (static_branch_unlikely(&ip4_min_ttl)) {
                 /* min_ttl can be changed concurrently from do_ip_setsockopt() */
                 if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
@@ -2363,7 +2375,7 @@ csum_error:
  bad_packet:
                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
         } else {
-               tcp_v4_send_reset(NULL, skb);
+               tcp_v4_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
         }
  
  discard_it:
@@ -2391,7 +2403,7 @@ do_time_wait:
                 inet_twsk_put(inet_twsk(sk));
                 goto csum_error;
         }
-       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
         case TCP_TW_SYN: {
                 struct sock *sk2 = inet_lookup_listener(net,
                                                         net->ipv4.tcp_death_row.hashinfo,
@@ -2405,6 +2417,7 @@ do_time_wait:
                         sk = sk2;
                         tcp_v4_restore_cb(skb);
                         refcounted = false;
+                       __this_cpu_write(tcp_tw_isn, isn);
                         goto process;
                 }
         }
@@ -2414,7 +2427,7 @@ do_time_wait:
                 tcp_v4_timewait_ack(sk, skb);
                 break;
         case TCP_TW_RST:
-               tcp_v4_send_reset(sk, skb);
+               tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
                 inet_twsk_deschedule_put(inet_twsk(sk));
                 goto discard_it;
         case TCP_TW_SUCCESS:;
@@ -2424,7 +2437,6 @@ do_time_wait:
  
  static struct timewait_sock_ops tcp_timewait_sock_ops = {
         .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
-       .twsk_unique    = tcp_twsk_unique,
         .twsk_destructor= tcp_twsk_destructor,
  };
  
@@ -3507,7 +3519,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
  {
         struct net *net;
  
-       tcp_twsk_purge(net_exit_list, AF_INET);
+       tcp_twsk_purge(net_exit_list);
  
         list_for_each_entry(net, net_exit_list, exit_list) {
                 inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);