]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 26 Jan 2019 10:07:06 +0000 (11:07 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 26 Jan 2019 10:07:06 +0000 (11:07 +0100)
added patches:
net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch
net-fix-usage-of-pskb_trim_rcsum.patch
net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch
net_sched-refetch-skb-protocol-for-each-filter.patch
openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch
vhost-log-dirty-page-correctly.patch

queue-4.9/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch [new file with mode: 0644]
queue-4.9/net-fix-usage-of-pskb_trim_rcsum.patch [new file with mode: 0644]
queue-4.9/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch [new file with mode: 0644]
queue-4.9/net_sched-refetch-skb-protocol-for-each-filter.patch [new file with mode: 0644]
queue-4.9/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch [new file with mode: 0644]
queue-4.9/series [new file with mode: 0644]
queue-4.9/vhost-log-dirty-page-correctly.patch [new file with mode: 0644]

diff --git a/queue-4.9/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch b/queue-4.9/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch
new file mode 100644 (file)
index 0000000..e287e4a
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Sat Jan 26 10:53:10 CET 2019
+From: Yunjian Wang <wangyunjian@huawei.com>
+Date: Thu, 17 Jan 2019 09:46:41 +0800
+Subject: net: bridge: Fix ethernet header pointer before check skb forwardable
+
+From: Yunjian Wang <wangyunjian@huawei.com>
+
+[ Upstream commit 28c1382fa28f2e2d9d0d6f25ae879b5af2ecbd03 ]
+
+The skb header should be set to ethernet header before using
+is_skb_forwardable. Because the ethernet header length has been
+considered in is_skb_forwardable(including dev->hard_header_len
+length).
+
+To reproduce the issue:
+1, add 2 ports on linux bridge br using following commands:
+$ brctl addbr br
+$ brctl addif br eth0
+$ brctl addif br eth1
+2, the MTU of eth0 and eth1 is 1500
+3, send a packet(Data 1480, UDP 8, IP 20, Ethernet 14, VLAN 4)
+from eth0 to eth1
+
+So the expect result is packet larger than 1500 cannot pass through
+eth0 and eth1. But currently, the packet passes through success, it
+means eth1's MTU limit doesn't take effect.
+
+Fixes: f6367b4660dd ("bridge: use is_skb_forwardable in forward path")
+Cc: bridge@lists.linux-foundation.org
+Cc: Nkolay Aleksandrov <nikolay@cumulusnetworks.com>
+Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
+Cc: Stephen Hemminger <stephen@networkplumber.org>
+Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_forward.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/net/bridge/br_forward.c
++++ b/net/bridge/br_forward.c
+@@ -35,10 +35,10 @@ static inline int should_deliver(const s
+ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
++      skb_push(skb, ETH_HLEN);
+       if (!is_skb_forwardable(skb->dev, skb))
+               goto drop;
+-      skb_push(skb, ETH_HLEN);
+       br_drop_fake_rtable(skb);
+       if (skb->ip_summed == CHECKSUM_PARTIAL &&
+@@ -96,12 +96,11 @@ static void __br_forward(const struct ne
+               net = dev_net(indev);
+       } else {
+               if (unlikely(netpoll_tx_running(to->br->dev))) {
+-                      if (!is_skb_forwardable(skb->dev, skb)) {
++                      skb_push(skb, ETH_HLEN);
++                      if (!is_skb_forwardable(skb->dev, skb))
+                               kfree_skb(skb);
+-                      } else {
+-                              skb_push(skb, ETH_HLEN);
++                      else
+                               br_netpoll_send_skb(to, skb);
+-                      }
+                       return;
+               }
+               br_hook = NF_BR_LOCAL_OUT;
diff --git a/queue-4.9/net-fix-usage-of-pskb_trim_rcsum.patch b/queue-4.9/net-fix-usage-of-pskb_trim_rcsum.patch
new file mode 100644 (file)
index 0000000..ddcd674
--- /dev/null
@@ -0,0 +1,75 @@
+From foo@baz Sat Jan 26 10:53:10 CET 2019
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+Date: Thu, 17 Jan 2019 15:34:38 +0000
+Subject: net: Fix usage of pskb_trim_rcsum
+
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+
+[ Upstream commit 6c57f0458022298e4da1729c67bd33ce41c14e7a ]
+
+In certain cases, pskb_trim_rcsum() may change skb pointers.
+Reinitialize header pointers afterwards to avoid potential
+use-after-frees. Add a note in the documentation of
+pskb_trim_rcsum(). Found by KASAN.
+
+Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/pppoe.c                  |    1 +
+ include/linux/skbuff.h                   |    1 +
+ net/bridge/br_netfilter_ipv6.c           |    1 +
+ net/bridge/netfilter/nft_reject_bridge.c |    1 +
+ net/ipv4/ip_input.c                      |    1 +
+ 5 files changed, 5 insertions(+)
+
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -442,6 +442,7 @@ static int pppoe_rcv(struct sk_buff *skb
+       if (pskb_trim_rcsum(skb, len))
+               goto drop;
++      ph = pppoe_hdr(skb);
+       pn = pppoe_pernet(dev_net(dev));
+       /* Note that get_item does a sock_hold(), so sk_pppox(po)
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2962,6 +2962,7 @@ int pskb_trim_rcsum_slow(struct sk_buff
+  *
+  *    This is exactly the same as pskb_trim except that it ensures the
+  *    checksum of received packets are still valid after the operation.
++ *    It can change skb pointers.
+  */
+ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+--- a/net/bridge/br_netfilter_ipv6.c
++++ b/net/bridge/br_netfilter_ipv6.c
+@@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, st
+                                       IPSTATS_MIB_INDISCARDS);
+                       goto drop;
+               }
++              hdr = ipv6_hdr(skb);
+       }
+       if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb))
+               goto drop;
+--- a/net/bridge/netfilter/nft_reject_bridge.c
++++ b/net/bridge/netfilter/nft_reject_bridge.c
+@@ -236,6 +236,7 @@ static bool reject6_br_csum_ok(struct sk
+           pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h)))
+               return false;
++      ip6h = ipv6_hdr(skb);
+       thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+       if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+               return false;
+--- a/net/ipv4/ip_input.c
++++ b/net/ipv4/ip_input.c
+@@ -475,6 +475,7 @@ int ip_rcv(struct sk_buff *skb, struct n
+               goto drop;
+       }
++      iph = ip_hdr(skb);
+       skb->transport_header = skb->network_header + iph->ihl*4;
+       /* Remove any debris in the socket control block */
diff --git a/queue-4.9/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch b/queue-4.9/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch
new file mode 100644 (file)
index 0000000..52160a4
--- /dev/null
@@ -0,0 +1,147 @@
+From foo@baz Sat Jan 26 10:53:10 CET 2019
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 9 Jan 2019 09:57:39 +0000
+Subject: net: ipv4: Fix memory leak in network namespace dismantle
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit f97f4dd8b3bb9d0993d2491e0f22024c68109184 ]
+
+IPv4 routing tables are flushed in two cases:
+
+1. In response to events in the netdev and inetaddr notification chains
+2. When a network namespace is being dismantled
+
+In both cases only routes associated with a dead nexthop group are
+flushed. However, a nexthop group will only be marked as dead in case it
+is populated with actual nexthops using a nexthop device. This is not
+the case when the route in question is an error route (e.g.,
+'blackhole', 'unreachable').
+
+Therefore, when a network namespace is being dismantled such routes are
+not flushed and leaked [1].
+
+To reproduce:
+# ip netns add blue
+# ip -n blue route add unreachable 192.0.2.0/24
+# ip netns del blue
+
+Fix this by not skipping error routes that are not marked with
+RTNH_F_DEAD when flushing the routing tables.
+
+To prevent the flushing of such routes in case #1, add a parameter to
+fib_table_flush() that indicates if the table is flushed as part of
+namespace dismantle or not.
+
+Note that this problem does not exist in IPv6 since error routes are
+associated with the loopback device.
+
+[1]
+unreferenced object 0xffff888066650338 (size 56):
+  comm "ip", pid 1206, jiffies 4294786063 (age 26.235s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 b0 1c 62 61 80 88 ff ff  ..........ba....
+    e8 8b a1 64 80 88 ff ff 00 07 00 08 fe 00 00 00  ...d............
+  backtrace:
+    [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220
+    [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20
+    [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380
+    [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690
+    [<0000000014f62875>] netlink_sendmsg+0x929/0xe10
+    [<00000000bac9d967>] sock_sendmsg+0xc8/0x110
+    [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0
+    [<000000002e94f880>] __sys_sendmsg+0xf7/0x250
+    [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610
+    [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+    [<000000003a8b605b>] 0xffffffffffffffff
+unreferenced object 0xffff888061621c88 (size 48):
+  comm "ip", pid 1206, jiffies 4294786063 (age 26.235s)
+  hex dump (first 32 bytes):
+    6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+    6b 6b 6b 6b 6b 6b 6b 6b d8 8e 26 5f 80 88 ff ff  kkkkkkkk..&_....
+  backtrace:
+    [<00000000733609e3>] fib_table_insert+0x978/0x1500
+    [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220
+    [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20
+    [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380
+    [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690
+    [<0000000014f62875>] netlink_sendmsg+0x929/0xe10
+    [<00000000bac9d967>] sock_sendmsg+0xc8/0x110
+    [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0
+    [<000000002e94f880>] __sys_sendmsg+0xf7/0x250
+    [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610
+    [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
+    [<000000003a8b605b>] 0xffffffffffffffff
+
+Fixes: 8cced9eff1d4 ("[NETNS]: Enable routing configuration in non-initial namespace.")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip_fib.h    |    2 +-
+ net/ipv4/fib_frontend.c |    4 ++--
+ net/ipv4/fib_trie.c     |   14 ++++++++++++--
+ 3 files changed, 15 insertions(+), 5 deletions(-)
+
+--- a/include/net/ip_fib.h
++++ b/include/net/ip_fib.h
+@@ -242,7 +242,7 @@ int fib_table_insert(struct net *, struc
+ int fib_table_delete(struct net *, struct fib_table *, struct fib_config *);
+ int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
+                  struct netlink_callback *cb);
+-int fib_table_flush(struct net *net, struct fib_table *table);
++int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all);
+ struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
+ void fib_table_flush_external(struct fib_table *table);
+ void fib_free_table(struct fib_table *tb);
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -193,7 +193,7 @@ static void fib_flush(struct net *net)
+               struct fib_table *tb;
+               hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
+-                      flushed += fib_table_flush(net, tb);
++                      flushed += fib_table_flush(net, tb, false);
+       }
+       if (flushed)
+@@ -1277,7 +1277,7 @@ static void ip_fib_net_exit(struct net *
+               hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
+                       hlist_del(&tb->tb_hlist);
+-                      fib_table_flush(net, tb);
++                      fib_table_flush(net, tb, true);
+                       fib_free_table(tb);
+               }
+       }
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1826,7 +1826,7 @@ void fib_table_flush_external(struct fib
+ }
+ /* Caller must hold RTNL. */
+-int fib_table_flush(struct net *net, struct fib_table *tb)
++int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
+ {
+       struct trie *t = (struct trie *)tb->tb_data;
+       struct key_vector *pn = t->kv;
+@@ -1874,7 +1874,17 @@ int fib_table_flush(struct net *net, str
+               hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
+                       struct fib_info *fi = fa->fa_info;
+-                      if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) {
++                      if (!fi ||
++                          (!(fi->fib_flags & RTNH_F_DEAD) &&
++                           !fib_props[fa->fa_type].error)) {
++                              slen = fa->fa_slen;
++                              continue;
++                      }
++
++                      /* Do not flush error routes if network namespace is
++                       * not being dismantled
++                       */
++                      if (!flush_all && fib_props[fa->fa_type].error) {
+                               slen = fa->fa_slen;
+                               continue;
+                       }
diff --git a/queue-4.9/net_sched-refetch-skb-protocol-for-each-filter.patch b/queue-4.9/net_sched-refetch-skb-protocol-for-each-filter.patch
new file mode 100644 (file)
index 0000000..53fe89a
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Sat Jan 26 10:53:10 CET 2019
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Fri, 11 Jan 2019 18:55:42 -0800
+Subject: net_sched: refetch skb protocol for each filter
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit cd0c4e70fc0ccfa705cdf55efb27519ce9337a26 ]
+
+Martin reported a set of filters don't work after changing
+from reclassify to continue. Looking into the code, it
+looks like skb protocol is not always fetched for each
+iteration of the filters. But, as demonstrated by Martin,
+TC actions could modify skb->protocol, for example act_vlan,
+this means we have to refetch skb protocol in each iteration,
+rather than using the one we fetch in the beginning of the loop.
+
+This bug is _not_ introduced by commit 3b3ae880266d
+("net: sched: consolidate tc_classify{,_compat}"), technically,
+if act_vlan is the only action that modifies skb protocol, then
+it is commit c7e2b9689ef8 ("sched: introduce vlan action") which
+introduced this bug.
+
+Reported-by: Martin Olsson <martin.olsson+netdev@sentorsecurity.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Cc: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_api.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -1850,7 +1850,6 @@ done:
+ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+               struct tcf_result *res, bool compat_mode)
+ {
+-      __be16 protocol = tc_skb_protocol(skb);
+ #ifdef CONFIG_NET_CLS_ACT
+       const struct tcf_proto *old_tp = tp;
+       int limit = 0;
+@@ -1858,6 +1857,7 @@ int tc_classify(struct sk_buff *skb, con
+ reclassify:
+ #endif
+       for (; tp; tp = rcu_dereference_bh(tp->next)) {
++              __be16 protocol = tc_skb_protocol(skb);
+               int err;
+               if (tp->protocol != protocol &&
+@@ -1884,7 +1884,6 @@ reset:
+       }
+       tp = old_tp;
+-      protocol = tc_skb_protocol(skb);
+       goto reclassify;
+ #endif
+ }
diff --git a/queue-4.9/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch b/queue-4.9/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch
new file mode 100644 (file)
index 0000000..523a48b
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Sat Jan 26 10:53:10 CET 2019
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+Date: Mon, 14 Jan 2019 09:16:56 +0000
+Subject: openvswitch: Avoid OOB read when parsing flow nlattrs
+
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+
+[ Upstream commit 04a4af334b971814eedf4e4a413343ad3287d9a9 ]
+
+For nested and variable attributes, the expected length of an attribute
+is not known and marked by a negative number.  This results in an OOB
+read when the expected length is later used to check if the attribute is
+all zeros. Fix this by using the actual length of the attribute rather
+than the expected length.
+
+Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow_netlink.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -409,7 +409,7 @@ static int __parse_flow_nlattrs(const st
+                       return -EINVAL;
+               }
+-              if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
++              if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) {
+                       attrs |= 1 << type;
+                       a[type] = nla;
+               }
diff --git a/queue-4.9/series b/queue-4.9/series
new file mode 100644 (file)
index 0000000..ce5814f
--- /dev/null
@@ -0,0 +1,6 @@
+net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch
+net-fix-usage-of-pskb_trim_rcsum.patch
+openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch
+vhost-log-dirty-page-correctly.patch
+net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch
+net_sched-refetch-skb-protocol-for-each-filter.patch
diff --git a/queue-4.9/vhost-log-dirty-page-correctly.patch b/queue-4.9/vhost-log-dirty-page-correctly.patch
new file mode 100644 (file)
index 0000000..49c537a
--- /dev/null
@@ -0,0 +1,202 @@
+From foo@baz Sat Jan 26 10:53:10 CET 2019
+From: Jason Wang <jasowang@redhat.com>
+Date: Wed, 16 Jan 2019 16:54:42 +0800
+Subject: vhost: log dirty page correctly
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ]
+
+Vhost dirty page logging API is designed to sync through GPA. But we
+try to log GIOVA when device IOTLB is enabled. This is wrong and may
+lead to missing data after migration.
+
+To solve this issue, when logging with device IOTLB enabled, we will:
+
+1) reuse the device IOTLB translation result of GIOVA->HVA mapping to
+   get HVA, for writable descriptor, get HVA through iovec. For used
+   ring update, translate its GIOVA to HVA
+2) traverse the GPA->HVA mapping to get the possible GPA and log
+   through GPA. Pay attention this reverse mapping is not guaranteed
+   to be unique, so we should log each possible GPA in this case.
+
+This fix the failure of scp to guest during migration. In -next, we
+will probably support passing GIOVA->GPA instead of GIOVA->HVA.
+
+Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
+Reported-by: Jintack Lim <jintack@cs.columbia.edu>
+Cc: Jintack Lim <jintack@cs.columbia.edu>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c   |    3 +
+ drivers/vhost/vhost.c |   97 ++++++++++++++++++++++++++++++++++++++++++--------
+ drivers/vhost/vhost.h |    3 +
+ 3 files changed, 87 insertions(+), 16 deletions(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -751,7 +751,8 @@ static void handle_rx(struct vhost_net *
+               vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
+                                           headcount);
+               if (unlikely(vq_log))
+-                      vhost_log_write(vq, vq_log, log, vhost_len);
++                      vhost_log_write(vq, vq_log, log, vhost_len,
++                                      vq->iov, in);
+               total_len += vhost_len;
+               if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+                       vhost_poll_queue(&vq->poll);
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -1646,13 +1646,87 @@ static int log_write(void __user *log_ba
+       return r;
+ }
++static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
++{
++      struct vhost_umem *umem = vq->umem;
++      struct vhost_umem_node *u;
++      u64 start, end, l, min;
++      int r;
++      bool hit = false;
++
++      while (len) {
++              min = len;
++              /* More than one GPAs can be mapped into a single HVA. So
++               * iterate all possible umems here to be safe.
++               */
++              list_for_each_entry(u, &umem->umem_list, link) {
++                      if (u->userspace_addr > hva - 1 + len ||
++                          u->userspace_addr - 1 + u->size < hva)
++                              continue;
++                      start = max(u->userspace_addr, hva);
++                      end = min(u->userspace_addr - 1 + u->size,
++                                hva - 1 + len);
++                      l = end - start + 1;
++                      r = log_write(vq->log_base,
++                                    u->start + start - u->userspace_addr,
++                                    l);
++                      if (r < 0)
++                              return r;
++                      hit = true;
++                      min = min(l, min);
++              }
++
++              if (!hit)
++                      return -EFAULT;
++
++              len -= min;
++              hva += min;
++      }
++
++      return 0;
++}
++
++static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
++{
++      struct iovec iov[64];
++      int i, ret;
++
++      if (!vq->iotlb)
++              return log_write(vq->log_base, vq->log_addr + used_offset, len);
++
++      ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
++                           len, iov, 64, VHOST_ACCESS_WO);
++      if (ret)
++              return ret;
++
++      for (i = 0; i < ret; i++) {
++              ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
++                                  iov[i].iov_len);
++              if (ret)
++                      return ret;
++      }
++
++      return 0;
++}
++
+ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+-                  unsigned int log_num, u64 len)
++                  unsigned int log_num, u64 len, struct iovec *iov, int count)
+ {
+       int i, r;
+       /* Make sure data written is seen before log. */
+       smp_wmb();
++
++      if (vq->iotlb) {
++              for (i = 0; i < count; i++) {
++                      r = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
++                                        iov[i].iov_len);
++                      if (r < 0)
++                              return r;
++              }
++              return 0;
++      }
++
+       for (i = 0; i < log_num; ++i) {
+               u64 l = min(log[i].len, len);
+               r = log_write(vq->log_base, log[i].addr, l);
+@@ -1682,9 +1756,8 @@ static int vhost_update_used_flags(struc
+               smp_wmb();
+               /* Log used flag write. */
+               used = &vq->used->flags;
+-              log_write(vq->log_base, vq->log_addr +
+-                        (used - (void __user *)vq->used),
+-                        sizeof vq->used->flags);
++              log_used(vq, (used - (void __user *)vq->used),
++                       sizeof vq->used->flags);
+               if (vq->log_ctx)
+                       eventfd_signal(vq->log_ctx, 1);
+       }
+@@ -1702,9 +1775,8 @@ static int vhost_update_avail_event(stru
+               smp_wmb();
+               /* Log avail event write */
+               used = vhost_avail_event(vq);
+-              log_write(vq->log_base, vq->log_addr +
+-                        (used - (void __user *)vq->used),
+-                        sizeof *vhost_avail_event(vq));
++              log_used(vq, (used - (void __user *)vq->used),
++                       sizeof *vhost_avail_event(vq));
+               if (vq->log_ctx)
+                       eventfd_signal(vq->log_ctx, 1);
+       }
+@@ -2103,10 +2175,8 @@ static int __vhost_add_used_n(struct vho
+               /* Make sure data is seen before log. */
+               smp_wmb();
+               /* Log used ring entry write. */
+-              log_write(vq->log_base,
+-                        vq->log_addr +
+-                         ((void __user *)used - (void __user *)vq->used),
+-                        count * sizeof *used);
++              log_used(vq, ((void __user *)used - (void __user *)vq->used),
++                       count * sizeof *used);
+       }
+       old = vq->last_used_idx;
+       new = (vq->last_used_idx += count);
+@@ -2148,9 +2218,8 @@ int vhost_add_used_n(struct vhost_virtqu
+               /* Make sure used idx is seen before log. */
+               smp_wmb();
+               /* Log used index update. */
+-              log_write(vq->log_base,
+-                        vq->log_addr + offsetof(struct vring_used, idx),
+-                        sizeof vq->used->idx);
++              log_used(vq, offsetof(struct vring_used, idx),
++                       sizeof vq->used->idx);
+               if (vq->log_ctx)
+                       eventfd_signal(vq->log_ctx, 1);
+       }
+--- a/drivers/vhost/vhost.h
++++ b/drivers/vhost/vhost.h
+@@ -199,7 +199,8 @@ bool vhost_vq_avail_empty(struct vhost_d
+ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
+ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+-                  unsigned int log_num, u64 len);
++                  unsigned int log_num, u64 len,
++                  struct iovec *iov, int count);
+ int vq_iotlb_prefetch(struct vhost_virtqueue *vq);
+ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);