From: Greg Kroah-Hartman Date: Sat, 26 Jan 2019 10:07:06 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.9.154~70 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d0bf63bf44247afa779ab87d06e2079f53137bc6;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch net-fix-usage-of-pskb_trim_rcsum.patch net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch net_sched-refetch-skb-protocol-for-each-filter.patch openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch vhost-log-dirty-page-correctly.patch --- diff --git a/queue-4.9/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch b/queue-4.9/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch new file mode 100644 index 00000000000..e287e4af670 --- /dev/null +++ b/queue-4.9/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch @@ -0,0 +1,69 @@ +From foo@baz Sat Jan 26 10:53:10 CET 2019 +From: Yunjian Wang +Date: Thu, 17 Jan 2019 09:46:41 +0800 +Subject: net: bridge: Fix ethernet header pointer before check skb forwardable + +From: Yunjian Wang + +[ Upstream commit 28c1382fa28f2e2d9d0d6f25ae879b5af2ecbd03 ] + +The skb header should be set to ethernet header before using +is_skb_forwardable. Because the ethernet header length has been +considered in is_skb_forwardable(including dev->hard_header_len +length). + +To reproduce the issue: +1, add 2 ports on linux bridge br using following commands: +$ brctl addbr br +$ brctl addif br eth0 +$ brctl addif br eth1 +2, the MTU of eth0 and eth1 is 1500 +3, send a packet(Data 1480, UDP 8, IP 20, Ethernet 14, VLAN 4) +from eth0 to eth1 + +So the expect result is packet larger than 1500 cannot pass through +eth0 and eth1. But currently, the packet passes through success, it +means eth1's MTU limit doesn't take effect. + +Fixes: f6367b4660dd ("bridge: use is_skb_forwardable in forward path") +Cc: bridge@lists.linux-foundation.org +Cc: Nkolay Aleksandrov +Cc: Roopa Prabhu +Cc: Stephen Hemminger +Signed-off-by: Yunjian Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_forward.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/net/bridge/br_forward.c ++++ b/net/bridge/br_forward.c +@@ -35,10 +35,10 @@ static inline int should_deliver(const s + + int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) + { ++ skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) + goto drop; + +- skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && +@@ -96,12 +96,11 @@ static void __br_forward(const struct ne + net = dev_net(indev); + } else { + if (unlikely(netpoll_tx_running(to->br->dev))) { +- if (!is_skb_forwardable(skb->dev, skb)) { ++ skb_push(skb, ETH_HLEN); ++ if (!is_skb_forwardable(skb->dev, skb)) + kfree_skb(skb); +- } else { +- skb_push(skb, ETH_HLEN); ++ else + br_netpoll_send_skb(to, skb); +- } + return; + } + br_hook = NF_BR_LOCAL_OUT; diff --git a/queue-4.9/net-fix-usage-of-pskb_trim_rcsum.patch b/queue-4.9/net-fix-usage-of-pskb_trim_rcsum.patch new file mode 100644 index 00000000000..ddcd674922f --- /dev/null +++ b/queue-4.9/net-fix-usage-of-pskb_trim_rcsum.patch @@ -0,0 +1,75 @@ +From foo@baz Sat Jan 26 10:53:10 CET 2019 +From: Ross Lagerwall +Date: Thu, 17 Jan 2019 15:34:38 +0000 +Subject: net: Fix usage of pskb_trim_rcsum + +From: Ross Lagerwall + +[ Upstream commit 6c57f0458022298e4da1729c67bd33ce41c14e7a ] + +In certain cases, pskb_trim_rcsum() may change skb pointers. +Reinitialize header pointers afterwards to avoid potential +use-after-frees. Add a note in the documentation of +pskb_trim_rcsum(). Found by KASAN. + +Signed-off-by: Ross Lagerwall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 1 + + include/linux/skbuff.h | 1 + + net/bridge/br_netfilter_ipv6.c | 1 + + net/bridge/netfilter/nft_reject_bridge.c | 1 + + net/ipv4/ip_input.c | 1 + + 5 files changed, 5 insertions(+) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -442,6 +442,7 @@ static int pppoe_rcv(struct sk_buff *skb + if (pskb_trim_rcsum(skb, len)) + goto drop; + ++ ph = pppoe_hdr(skb); + pn = pppoe_pernet(dev_net(dev)); + + /* Note that get_item does a sock_hold(), so sk_pppox(po) +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -2962,6 +2962,7 @@ int pskb_trim_rcsum_slow(struct sk_buff + * + * This is exactly the same as pskb_trim except that it ensures the + * checksum of received packets are still valid after the operation. ++ * It can change skb pointers. + */ + + static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) +--- a/net/bridge/br_netfilter_ipv6.c ++++ b/net/bridge/br_netfilter_ipv6.c +@@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, st + IPSTATS_MIB_INDISCARDS); + goto drop; + } ++ hdr = ipv6_hdr(skb); + } + if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + goto drop; +--- a/net/bridge/netfilter/nft_reject_bridge.c ++++ b/net/bridge/netfilter/nft_reject_bridge.c +@@ -236,6 +236,7 @@ static bool reject6_br_csum_ok(struct sk + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + ++ ip6h = ipv6_hdr(skb); + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -475,6 +475,7 @@ int ip_rcv(struct sk_buff *skb, struct n + goto drop; + } + ++ iph = ip_hdr(skb); + skb->transport_header = skb->network_header + iph->ihl*4; + + /* Remove any debris in the socket control block */ diff --git a/queue-4.9/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch b/queue-4.9/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch new file mode 100644 index 00000000000..52160a488f8 --- /dev/null +++ b/queue-4.9/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch @@ -0,0 +1,147 @@ +From foo@baz Sat Jan 26 10:53:10 CET 2019 +From: Ido Schimmel +Date: Wed, 9 Jan 2019 09:57:39 +0000 +Subject: net: ipv4: Fix memory leak in network namespace dismantle + +From: Ido Schimmel + +[ Upstream commit f97f4dd8b3bb9d0993d2491e0f22024c68109184 ] + +IPv4 routing tables are flushed in two cases: + +1. In response to events in the netdev and inetaddr notification chains +2. When a network namespace is being dismantled + +In both cases only routes associated with a dead nexthop group are +flushed. However, a nexthop group will only be marked as dead in case it +is populated with actual nexthops using a nexthop device. This is not +the case when the route in question is an error route (e.g., +'blackhole', 'unreachable'). + +Therefore, when a network namespace is being dismantled such routes are +not flushed and leaked [1]. + +To reproduce: +# ip netns add blue +# ip -n blue route add unreachable 192.0.2.0/24 +# ip netns del blue + +Fix this by not skipping error routes that are not marked with +RTNH_F_DEAD when flushing the routing tables. + +To prevent the flushing of such routes in case #1, add a parameter to +fib_table_flush() that indicates if the table is flushed as part of +namespace dismantle or not. + +Note that this problem does not exist in IPv6 since error routes are +associated with the loopback device. + +[1] +unreferenced object 0xffff888066650338 (size 56): + comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 b0 1c 62 61 80 88 ff ff ..........ba.... + e8 8b a1 64 80 88 ff ff 00 07 00 08 fe 00 00 00 ...d............ + backtrace: + [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 + [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 + [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 + [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 + [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 + [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 + [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 + [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 + [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 + [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [<000000003a8b605b>] 0xffffffffffffffff +unreferenced object 0xffff888061621c88 (size 48): + comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) + hex dump (first 32 bytes): + 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk + 6b 6b 6b 6b 6b 6b 6b 6b d8 8e 26 5f 80 88 ff ff kkkkkkkk..&_.... + backtrace: + [<00000000733609e3>] fib_table_insert+0x978/0x1500 + [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 + [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 + [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 + [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 + [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 + [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 + [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 + [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 + [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 + [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [<000000003a8b605b>] 0xffffffffffffffff + +Fixes: 8cced9eff1d4 ("[NETNS]: Enable routing configuration in non-initial namespace.") +Signed-off-by: Ido Schimmel +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip_fib.h | 2 +- + net/ipv4/fib_frontend.c | 4 ++-- + net/ipv4/fib_trie.c | 14 ++++++++++++-- + 3 files changed, 15 insertions(+), 5 deletions(-) + +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -242,7 +242,7 @@ int fib_table_insert(struct net *, struc + int fib_table_delete(struct net *, struct fib_table *, struct fib_config *); + int fib_table_dump(struct fib_table *table, struct sk_buff *skb, + struct netlink_callback *cb); +-int fib_table_flush(struct net *net, struct fib_table *table); ++int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); + struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); + void fib_table_flush_external(struct fib_table *table); + void fib_free_table(struct fib_table *tb); +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -193,7 +193,7 @@ static void fib_flush(struct net *net) + struct fib_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) +- flushed += fib_table_flush(net, tb); ++ flushed += fib_table_flush(net, tb, false); + } + + if (flushed) +@@ -1277,7 +1277,7 @@ static void ip_fib_net_exit(struct net * + + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { + hlist_del(&tb->tb_hlist); +- fib_table_flush(net, tb); ++ fib_table_flush(net, tb, true); + fib_free_table(tb); + } + } +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -1826,7 +1826,7 @@ void fib_table_flush_external(struct fib + } + + /* Caller must hold RTNL. */ +-int fib_table_flush(struct net *net, struct fib_table *tb) ++int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) + { + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; +@@ -1874,7 +1874,17 @@ int fib_table_flush(struct net *net, str + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + +- if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) { ++ if (!fi || ++ (!(fi->fib_flags & RTNH_F_DEAD) && ++ !fib_props[fa->fa_type].error)) { ++ slen = fa->fa_slen; ++ continue; ++ } ++ ++ /* Do not flush error routes if network namespace is ++ * not being dismantled ++ */ ++ if (!flush_all && fib_props[fa->fa_type].error) { + slen = fa->fa_slen; + continue; + } diff --git a/queue-4.9/net_sched-refetch-skb-protocol-for-each-filter.patch b/queue-4.9/net_sched-refetch-skb-protocol-for-each-filter.patch new file mode 100644 index 00000000000..53fe89a74c2 --- /dev/null +++ b/queue-4.9/net_sched-refetch-skb-protocol-for-each-filter.patch @@ -0,0 +1,60 @@ +From foo@baz Sat Jan 26 10:53:10 CET 2019 +From: Cong Wang +Date: Fri, 11 Jan 2019 18:55:42 -0800 +Subject: net_sched: refetch skb protocol for each filter + +From: Cong Wang + +[ Upstream commit cd0c4e70fc0ccfa705cdf55efb27519ce9337a26 ] + +Martin reported a set of filters don't work after changing +from reclassify to continue. Looking into the code, it +looks like skb protocol is not always fetched for each +iteration of the filters. But, as demonstrated by Martin, +TC actions could modify skb->protocol, for example act_vlan, +this means we have to refetch skb protocol in each iteration, +rather than using the one we fetch in the beginning of the loop. + +This bug is _not_ introduced by commit 3b3ae880266d +("net: sched: consolidate tc_classify{,_compat}"), technically, +if act_vlan is the only action that modifies skb protocol, then +it is commit c7e2b9689ef8 ("sched: introduce vlan action") which +introduced this bug. + +Reported-by: Martin Olsson +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -1850,7 +1850,6 @@ done: + int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) + { +- __be16 protocol = tc_skb_protocol(skb); + #ifdef CONFIG_NET_CLS_ACT + const struct tcf_proto *old_tp = tp; + int limit = 0; +@@ -1858,6 +1857,7 @@ int tc_classify(struct sk_buff *skb, con + reclassify: + #endif + for (; tp; tp = rcu_dereference_bh(tp->next)) { ++ __be16 protocol = tc_skb_protocol(skb); + int err; + + if (tp->protocol != protocol && +@@ -1884,7 +1884,6 @@ reset: + } + + tp = old_tp; +- protocol = tc_skb_protocol(skb); + goto reclassify; + #endif + } diff --git a/queue-4.9/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch b/queue-4.9/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch new file mode 100644 index 00000000000..523a48bad0c --- /dev/null +++ b/queue-4.9/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Jan 26 10:53:10 CET 2019 +From: Ross Lagerwall +Date: Mon, 14 Jan 2019 09:16:56 +0000 +Subject: openvswitch: Avoid OOB read when parsing flow nlattrs + +From: Ross Lagerwall + +[ Upstream commit 04a4af334b971814eedf4e4a413343ad3287d9a9 ] + +For nested and variable attributes, the expected length of an attribute +is not known and marked by a negative number. This results in an OOB +read when the expected length is later used to check if the attribute is +all zeros. Fix this by using the actual length of the attribute rather +than the expected length. + +Signed-off-by: Ross Lagerwall +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -409,7 +409,7 @@ static int __parse_flow_nlattrs(const st + return -EINVAL; + } + +- if (!nz || !is_all_zero(nla_data(nla), expected_len)) { ++ if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { + attrs |= 1 << type; + a[type] = nla; + } diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..ce5814f07af --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1,6 @@ +net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch +net-fix-usage-of-pskb_trim_rcsum.patch +openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch +vhost-log-dirty-page-correctly.patch +net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch +net_sched-refetch-skb-protocol-for-each-filter.patch diff --git a/queue-4.9/vhost-log-dirty-page-correctly.patch b/queue-4.9/vhost-log-dirty-page-correctly.patch new file mode 100644 index 00000000000..49c537af4d0 --- /dev/null +++ b/queue-4.9/vhost-log-dirty-page-correctly.patch @@ -0,0 +1,202 @@ +From foo@baz Sat Jan 26 10:53:10 CET 2019 +From: Jason Wang +Date: Wed, 16 Jan 2019 16:54:42 +0800 +Subject: vhost: log dirty page correctly + +From: Jason Wang + +[ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ] + +Vhost dirty page logging API is designed to sync through GPA. But we +try to log GIOVA when device IOTLB is enabled. This is wrong and may +lead to missing data after migration. + +To solve this issue, when logging with device IOTLB enabled, we will: + +1) reuse the device IOTLB translation result of GIOVA->HVA mapping to + get HVA, for writable descriptor, get HVA through iovec. For used + ring update, translate its GIOVA to HVA +2) traverse the GPA->HVA mapping to get the possible GPA and log + through GPA. Pay attention this reverse mapping is not guaranteed + to be unique, so we should log each possible GPA in this case. + +This fix the failure of scp to guest during migration. In -next, we +will probably support passing GIOVA->GPA instead of GIOVA->HVA. + +Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") +Reported-by: Jintack Lim +Cc: Jintack Lim +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 3 + + drivers/vhost/vhost.c | 97 ++++++++++++++++++++++++++++++++++++++++++-------- + drivers/vhost/vhost.h | 3 + + 3 files changed, 87 insertions(+), 16 deletions(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -751,7 +751,8 @@ static void handle_rx(struct vhost_net * + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, + headcount); + if (unlikely(vq_log)) +- vhost_log_write(vq, vq_log, log, vhost_len); ++ vhost_log_write(vq, vq_log, log, vhost_len, ++ vq->iov, in); + total_len += vhost_len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -1646,13 +1646,87 @@ static int log_write(void __user *log_ba + return r; + } + ++static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) ++{ ++ struct vhost_umem *umem = vq->umem; ++ struct vhost_umem_node *u; ++ u64 start, end, l, min; ++ int r; ++ bool hit = false; ++ ++ while (len) { ++ min = len; ++ /* More than one GPAs can be mapped into a single HVA. So ++ * iterate all possible umems here to be safe. ++ */ ++ list_for_each_entry(u, &umem->umem_list, link) { ++ if (u->userspace_addr > hva - 1 + len || ++ u->userspace_addr - 1 + u->size < hva) ++ continue; ++ start = max(u->userspace_addr, hva); ++ end = min(u->userspace_addr - 1 + u->size, ++ hva - 1 + len); ++ l = end - start + 1; ++ r = log_write(vq->log_base, ++ u->start + start - u->userspace_addr, ++ l); ++ if (r < 0) ++ return r; ++ hit = true; ++ min = min(l, min); ++ } ++ ++ if (!hit) ++ return -EFAULT; ++ ++ len -= min; ++ hva += min; ++ } ++ ++ return 0; ++} ++ ++static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) ++{ ++ struct iovec iov[64]; ++ int i, ret; ++ ++ if (!vq->iotlb) ++ return log_write(vq->log_base, vq->log_addr + used_offset, len); ++ ++ ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, ++ len, iov, 64, VHOST_ACCESS_WO); ++ if (ret) ++ return ret; ++ ++ for (i = 0; i < ret; i++) { ++ ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, ++ iov[i].iov_len); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, +- unsigned int log_num, u64 len) ++ unsigned int log_num, u64 len, struct iovec *iov, int count) + { + int i, r; + + /* Make sure data written is seen before log. */ + smp_wmb(); ++ ++ if (vq->iotlb) { ++ for (i = 0; i < count; i++) { ++ r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, ++ iov[i].iov_len); ++ if (r < 0) ++ return r; ++ } ++ return 0; ++ } ++ + for (i = 0; i < log_num; ++i) { + u64 l = min(log[i].len, len); + r = log_write(vq->log_base, log[i].addr, l); +@@ -1682,9 +1756,8 @@ static int vhost_update_used_flags(struc + smp_wmb(); + /* Log used flag write. */ + used = &vq->used->flags; +- log_write(vq->log_base, vq->log_addr + +- (used - (void __user *)vq->used), +- sizeof vq->used->flags); ++ log_used(vq, (used - (void __user *)vq->used), ++ sizeof vq->used->flags); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +@@ -1702,9 +1775,8 @@ static int vhost_update_avail_event(stru + smp_wmb(); + /* Log avail event write */ + used = vhost_avail_event(vq); +- log_write(vq->log_base, vq->log_addr + +- (used - (void __user *)vq->used), +- sizeof *vhost_avail_event(vq)); ++ log_used(vq, (used - (void __user *)vq->used), ++ sizeof *vhost_avail_event(vq)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +@@ -2103,10 +2175,8 @@ static int __vhost_add_used_n(struct vho + /* Make sure data is seen before log. */ + smp_wmb(); + /* Log used ring entry write. */ +- log_write(vq->log_base, +- vq->log_addr + +- ((void __user *)used - (void __user *)vq->used), +- count * sizeof *used); ++ log_used(vq, ((void __user *)used - (void __user *)vq->used), ++ count * sizeof *used); + } + old = vq->last_used_idx; + new = (vq->last_used_idx += count); +@@ -2148,9 +2218,8 @@ int vhost_add_used_n(struct vhost_virtqu + /* Make sure used idx is seen before log. */ + smp_wmb(); + /* Log used index update. */ +- log_write(vq->log_base, +- vq->log_addr + offsetof(struct vring_used, idx), +- sizeof vq->used->idx); ++ log_used(vq, offsetof(struct vring_used, idx), ++ sizeof vq->used->idx); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +--- a/drivers/vhost/vhost.h ++++ b/drivers/vhost/vhost.h +@@ -199,7 +199,8 @@ bool vhost_vq_avail_empty(struct vhost_d + bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); + + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, +- unsigned int log_num, u64 len); ++ unsigned int log_num, u64 len, ++ struct iovec *iov, int count); + int vq_iotlb_prefetch(struct vhost_virtqueue *vq); + + struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);