]>
Commit | Line | Data |
---|---|---|
ffc20820 GKH |
1 | From foo@baz Fri 31 May 2019 03:16:39 PM PDT |
2 | From: Willem de Bruijn <willemb@google.com> | |
3 | Date: Thu, 30 May 2019 18:01:21 -0400 | |
4 | Subject: net: correct zerocopy refcnt with udp MSG_MORE | |
5 | ||
6 | From: Willem de Bruijn <willemb@google.com> | |
7 | ||
8 | [ Upstream commit 100f6d8e09905c59be45b6316f8f369c0be1b2d8 ] | |
9 | ||
10 | TCP zerocopy takes a uarg reference for every skb, plus one for the | |
11 | tcp_sendmsg_locked datapath temporarily, to avoid reaching refcnt zero | |
12 | as it builds, sends and frees skbs inside its inner loop. | |
13 | ||
14 | UDP and RAW zerocopy do not send inside the inner loop so do not need | |
15 | the extra sock_zerocopy_get + sock_zerocopy_put pair. Commit | |
16 | 52900d22288ed ("udp: elide zerocopy operation in hot path") introduced | |
17 | extra_uref to pass the initial reference taken in sock_zerocopy_alloc | |
18 | to the first generated skb. | |
19 | ||
20 | But, sock_zerocopy_realloc takes this extra reference at the start of | |
21 | every call. With MSG_MORE, no new skb may be generated to attach the | |
22 | extra_uref to, so refcnt is incorrectly 2 with only one skb. | |
23 | ||
24 | Do not take the extra ref if uarg && !tcp, which implies MSG_MORE. | |
25 | Update extra_uref accordingly. | |
26 | ||
27 | This conditional assignment triggers a false positive may be used | |
28 | uninitialized warning, so have to initialize extra_uref at define. | |
29 | ||
30 | Changes v1->v2: fix typo in Fixes SHA1 | |
31 | ||
32 | Fixes: 52900d22288e7 ("udp: elide zerocopy operation in hot path") | |
33 | Reported-by: syzbot <syzkaller@googlegroups.com> | |
34 | Diagnosed-by: Eric Dumazet <edumazet@google.com> | |
35 | Signed-off-by: Willem de Bruijn <willemb@google.com> | |
36 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
37 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
38 | --- | |
39 | net/core/skbuff.c | 6 +++++- | |
40 | net/ipv4/ip_output.c | 4 ++-- | |
41 | net/ipv6/ip6_output.c | 4 ++-- | |
42 | 3 files changed, 9 insertions(+), 5 deletions(-) | |
43 | ||
44 | --- a/net/core/skbuff.c | |
45 | +++ b/net/core/skbuff.c | |
46 | @@ -1001,7 +1001,11 @@ struct ubuf_info *sock_zerocopy_realloc( | |
47 | uarg->len++; | |
48 | uarg->bytelen = bytelen; | |
49 | atomic_set(&sk->sk_zckey, ++next); | |
50 | - sock_zerocopy_get(uarg); | |
51 | + | |
52 | + /* no extra ref when appending to datagram (MSG_MORE) */ | |
53 | + if (sk->sk_type == SOCK_STREAM) | |
54 | + sock_zerocopy_get(uarg); | |
55 | + | |
56 | return uarg; | |
57 | } | |
58 | } | |
59 | --- a/net/ipv4/ip_output.c | |
60 | +++ b/net/ipv4/ip_output.c | |
61 | @@ -883,7 +883,7 @@ static int __ip_append_data(struct sock | |
62 | int csummode = CHECKSUM_NONE; | |
63 | struct rtable *rt = (struct rtable *)cork->dst; | |
64 | unsigned int wmem_alloc_delta = 0; | |
65 | - bool paged, extra_uref; | |
66 | + bool paged, extra_uref = false; | |
67 | u32 tskey = 0; | |
68 | ||
69 | skb = skb_peek_tail(queue); | |
70 | @@ -923,7 +923,7 @@ static int __ip_append_data(struct sock | |
71 | uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); | |
72 | if (!uarg) | |
73 | return -ENOBUFS; | |
74 | - extra_uref = true; | |
75 | + extra_uref = !skb; /* only extra ref if !MSG_MORE */ | |
76 | if (rt->dst.dev->features & NETIF_F_SG && | |
77 | csummode == CHECKSUM_PARTIAL) { | |
78 | paged = true; | |
79 | --- a/net/ipv6/ip6_output.c | |
80 | +++ b/net/ipv6/ip6_output.c | |
81 | @@ -1275,7 +1275,7 @@ static int __ip6_append_data(struct sock | |
82 | int csummode = CHECKSUM_NONE; | |
83 | unsigned int maxnonfragsize, headersize; | |
84 | unsigned int wmem_alloc_delta = 0; | |
85 | - bool paged, extra_uref; | |
86 | + bool paged, extra_uref = false; | |
87 | ||
88 | skb = skb_peek_tail(queue); | |
89 | if (!skb) { | |
90 | @@ -1344,7 +1344,7 @@ emsgsize: | |
91 | uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb)); | |
92 | if (!uarg) | |
93 | return -ENOBUFS; | |
94 | - extra_uref = true; | |
95 | + extra_uref = !skb; /* only extra ref if !MSG_MORE */ | |
96 | if (rt->dst.dev->features & NETIF_F_SG && | |
97 | csummode == CHECKSUM_PARTIAL) { | |
98 | paged = true; |