]>
Commit | Line | Data |
---|---|---|
fa55523a GKH |
1 | From foo@baz Thu Dec 14 11:45:40 CET 2017 |
2 | From: Wei Xu <wexu@redhat.com> | |
3 | Date: Fri, 1 Dec 2017 05:10:36 -0500 | |
4 | Subject: vhost: fix skb leak in handle_rx() | |
5 | ||
6 | From: Wei Xu <wexu@redhat.com> | |
7 | ||
8 | ||
9 | [ Upstream commit 6e474083f3daf3a3546737f5d7d502ad12eb257c ] | |
10 | ||
11 | Matthew found a roughly 40% tcp throughput regression with commit | |
12 | c67df11f(vhost_net: try batch dequing from skb array) as discussed | |
13 | in the following thread: | |
14 | https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html | |
15 | ||
16 | Eventually we figured out that it was a skb leak in handle_rx() | |
17 | when sending packets to the VM. This usually happens when a guest | |
18 | can not drain out vq as fast as vhost fills in, afterwards it sets | |
19 | off the traffic jam and leaks skb(s) which occurs as no headcount | |
20 | to send on the vq from vhost side. | |
21 | ||
22 | This can be avoided by making sure we have got enough headcount | |
23 | before actually consuming a skb from the batched rx array while | |
24 | transmitting, which is simply done by moving checking the zero | |
25 | headcount a bit ahead. | |
26 | ||
27 | Signed-off-by: Wei Xu <wexu@redhat.com> | |
28 | Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com> | |
29 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | |
30 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
31 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
32 | --- | |
33 | drivers/vhost/net.c | 20 ++++++++++---------- | |
34 | 1 file changed, 10 insertions(+), 10 deletions(-) | |
35 | ||
36 | --- a/drivers/vhost/net.c | |
37 | +++ b/drivers/vhost/net.c | |
38 | @@ -782,16 +782,6 @@ static void handle_rx(struct vhost_net * | |
39 | /* On error, stop handling until the next kick. */ | |
40 | if (unlikely(headcount < 0)) | |
41 | goto out; | |
42 | - if (nvq->rx_array) | |
43 | - msg.msg_control = vhost_net_buf_consume(&nvq->rxq); | |
44 | - /* On overrun, truncate and discard */ | |
45 | - if (unlikely(headcount > UIO_MAXIOV)) { | |
46 | - iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); | |
47 | - err = sock->ops->recvmsg(sock, &msg, | |
48 | - 1, MSG_DONTWAIT | MSG_TRUNC); | |
49 | - pr_debug("Discarded rx packet: len %zd\n", sock_len); | |
50 | - continue; | |
51 | - } | |
52 | /* OK, now we need to know about added descriptors. */ | |
53 | if (!headcount) { | |
54 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { | |
55 | @@ -804,6 +794,16 @@ static void handle_rx(struct vhost_net * | |
56 | * they refilled. */ | |
57 | goto out; | |
58 | } | |
59 | + if (nvq->rx_array) | |
60 | + msg.msg_control = vhost_net_buf_consume(&nvq->rxq); | |
61 | + /* On overrun, truncate and discard */ | |
62 | + if (unlikely(headcount > UIO_MAXIOV)) { | |
63 | + iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); | |
64 | + err = sock->ops->recvmsg(sock, &msg, | |
65 | + 1, MSG_DONTWAIT | MSG_TRUNC); | |
66 | + pr_debug("Discarded rx packet: len %zd\n", sock_len); | |
67 | + continue; | |
68 | + } | |
69 | /* We don't need to be notified again. */ | |
70 | iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len); | |
71 | fixup = msg.msg_iter; |