From 20718833425451d74b3057173f53af64feeb63a5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Apr 2014 22:02:11 -0700 Subject: [PATCH] 3.14-stable patches added patches: call-efx_set_channels-before-efx-type-dimension_resources.patch futex-avoid-race-between-requeue-and-wake.patch ipv6-some-ipv6-statistic-counters-failed-to-disable-bh.patch isdnloop-several-buffer-overflows.patch isdnloop-validate-nul-terminated-strings-from-user.patch net-at91_ether-avoid-null-pointer-dereference.patch net-vxlan-fix-crash-when-interface-is-created-with-no-group.patch netlink-don-t-compare-the-nul-termination-in-nla_strcmp.patch rds-prevent-dereference-of-a-null-device-in-rds_iw_laddr_check.patch xen-netback-bug_on-in-xenvif_rx_action-not-catching-overflow.patch xen-netback-disable-rogue-vif-in-kthread-context.patch xen-netback-remove-pointless-clause-from-if-statement.patch xen-netback-worse-case-estimate-in-xenvif_rx_action-is-underestimating.patch --- ...-before-efx-type-dimension_resources.patch | 67 ++++++++ ...-avoid-race-between-requeue-and-wake.patch | 92 +++++++++++ ...tistic-counters-failed-to-disable-bh.patch | 94 +++++++++++ .../isdnloop-several-buffer-overflows.patch | 95 +++++++++++ ...ate-nul-terminated-strings-from-user.patch | 34 ++++ ...ether-avoid-null-pointer-dereference.patch | 36 ++++ ...n-interface-is-created-with-no-group.patch | 154 ++++++++++++++++++ ...re-the-nul-termination-in-nla_strcmp.patch | 55 +++++++ ...-a-null-device-in-rds_iw_laddr_check.patch | 31 ++++ queue-3.14/series | 13 ++ ...nvif_rx_action-not-catching-overflow.patch | 50 ++++++ ...disable-rogue-vif-in-kthread-context.patch | 128 +++++++++++++++ ...e-pointless-clause-from-if-statement.patch | 39 +++++ ...-xenvif_rx_action-is-underestimating.patch | 62 +++++++ 14 files changed, 950 insertions(+) create mode 100644 queue-3.14/call-efx_set_channels-before-efx-type-dimension_resources.patch create mode 100644 queue-3.14/futex-avoid-race-between-requeue-and-wake.patch create mode 100644 queue-3.14/ipv6-some-ipv6-statistic-counters-failed-to-disable-bh.patch create mode 100644 queue-3.14/isdnloop-several-buffer-overflows.patch create mode 100644 queue-3.14/isdnloop-validate-nul-terminated-strings-from-user.patch create mode 100644 queue-3.14/net-at91_ether-avoid-null-pointer-dereference.patch create mode 100644 queue-3.14/net-vxlan-fix-crash-when-interface-is-created-with-no-group.patch create mode 100644 queue-3.14/netlink-don-t-compare-the-nul-termination-in-nla_strcmp.patch create mode 100644 queue-3.14/rds-prevent-dereference-of-a-null-device-in-rds_iw_laddr_check.patch create mode 100644 queue-3.14/xen-netback-bug_on-in-xenvif_rx_action-not-catching-overflow.patch create mode 100644 queue-3.14/xen-netback-disable-rogue-vif-in-kthread-context.patch create mode 100644 queue-3.14/xen-netback-remove-pointless-clause-from-if-statement.patch create mode 100644 queue-3.14/xen-netback-worse-case-estimate-in-xenvif_rx_action-is-underestimating.patch diff --git a/queue-3.14/call-efx_set_channels-before-efx-type-dimension_resources.patch b/queue-3.14/call-efx_set_channels-before-efx-type-dimension_resources.patch new file mode 100644 index 00000000000..7e349d2693c --- /dev/null +++ b/queue-3.14/call-efx_set_channels-before-efx-type-dimension_resources.patch @@ -0,0 +1,67 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Daniel Pieczko +Date: Tue, 1 Apr 2014 13:10:34 +0100 +Subject: Call efx_set_channels() before efx->type->dimension_resources() + +From: Daniel Pieczko + +[ Upstream commit 52ad762b85ed7947ec9eff6b036eb985352f6874 ] + +When using the "separate_tx_channels=1" module parameter, the TX queues are +initially numbered starting from the first TX-only channel number (after all the +RX-only channels). efx_set_channels() renumbers the queues so that they are +indexed from zero. + +On EF10, the TX queues need to be relabelled in this way before calling the +dimension_resources NIC type operation, otherwise the TX queue PIO buffers can be +linked to the wrong VIs when using "separate_tx_channels=1". + +Added comments to explain UC/WC mappings for PIO buffers + +Signed-off-by: Shradha Shah +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/sfc/ef10.c | 7 +++++++ + drivers/net/ethernet/sfc/efx.c | 3 ++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/sfc/ef10.c ++++ b/drivers/net/ethernet/sfc/ef10.c +@@ -565,10 +565,17 @@ static int efx_ef10_dimension_resources( + * several of each (in fact that's the only option if host + * page size is >4K). So we may allocate some extra VIs just + * for writing PIO buffers through. ++ * ++ * The UC mapping contains (min_vis - 1) complete VIs and the ++ * first half of the next VI. Then the WC mapping begins with ++ * the second half of this last VI. + */ + uc_mem_map_size = PAGE_ALIGN((min_vis - 1) * EFX_VI_PAGE_SIZE + + ER_DZ_TX_PIOBUF); + if (nic_data->n_piobufs) { ++ /* pio_write_vi_base rounds down to give the number of complete ++ * VIs inside the UC mapping. ++ */ + pio_write_vi_base = uc_mem_map_size / EFX_VI_PAGE_SIZE; + wc_mem_map_size = (PAGE_ALIGN((pio_write_vi_base + + nic_data->n_piobufs) * +--- a/drivers/net/ethernet/sfc/efx.c ++++ b/drivers/net/ethernet/sfc/efx.c +@@ -1603,6 +1603,8 @@ static int efx_probe_nic(struct efx_nic + if (rc) + goto fail1; + ++ efx_set_channels(efx); ++ + rc = efx->type->dimension_resources(efx); + if (rc) + goto fail2; +@@ -1613,7 +1615,6 @@ static int efx_probe_nic(struct efx_nic + efx->rx_indir_table[i] = + ethtool_rxfh_indir_default(i, efx->rss_spread); + +- efx_set_channels(efx); + netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); + netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); + diff --git a/queue-3.14/futex-avoid-race-between-requeue-and-wake.patch b/queue-3.14/futex-avoid-race-between-requeue-and-wake.patch new file mode 100644 index 00000000000..b62c31a12ff --- /dev/null +++ b/queue-3.14/futex-avoid-race-between-requeue-and-wake.patch @@ -0,0 +1,92 @@ +From 69cd9eba38867a493a043bb13eb9b33cad5f1a9a Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 8 Apr 2014 15:30:07 -0700 +Subject: futex: avoid race between requeue and wake + +From: Linus Torvalds + +commit 69cd9eba38867a493a043bb13eb9b33cad5f1a9a upstream. + +Jan Stancek reported: + "pthread_cond_broadcast/4-1.c testcase from openposix testsuite (LTP) + occasionally fails, because some threads fail to wake up. + + Testcase creates 5 threads, which are all waiting on same condition. + Main thread then calls pthread_cond_broadcast() without holding mutex, + which calls: + + futex(uaddr1, FUTEX_CMP_REQUEUE_PRIVATE, 1, 2147483647, uaddr2, ..) + + This immediately wakes up single thread A, which unlocks mutex and + tries to wake up another thread: + + futex(uaddr2, FUTEX_WAKE_PRIVATE, 1) + + If thread A manages to call futex_wake() before any waiters are + requeued for uaddr2, no other thread is woken up" + +The ordering constraints for the hash bucket waiter counting are that +the waiter counts have to be incremented _before_ getting the spinlock +(because the spinlock acts as part of the memory barrier), but the +"requeue" operation didn't honor those rules, and nobody had even +thought about that case. + +This fairly simple patch just increments the waiter count for the target +hash bucket (hb2) when requeing a futex before taking the locks. It +then decrements them again after releasing the lock - the code that +actually moves the futex(es) between hash buckets will do the additional +required waiter count housekeeping. + +Reported-and-tested-by: Jan Stancek +Acked-by: Davidlohr Bueso +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/futex.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -1450,6 +1450,7 @@ retry: + hb2 = hash_futex(&key2); + + retry_private: ++ hb_waiters_inc(hb2); + double_lock_hb(hb1, hb2); + + if (likely(cmpval != NULL)) { +@@ -1459,6 +1460,7 @@ retry_private: + + if (unlikely(ret)) { + double_unlock_hb(hb1, hb2); ++ hb_waiters_dec(hb2); + + ret = get_user(curval, uaddr1); + if (ret) +@@ -1508,6 +1510,7 @@ retry_private: + break; + case -EFAULT: + double_unlock_hb(hb1, hb2); ++ hb_waiters_dec(hb2); + put_futex_key(&key2); + put_futex_key(&key1); + ret = fault_in_user_writeable(uaddr2); +@@ -1517,6 +1520,7 @@ retry_private: + case -EAGAIN: + /* The owner was exiting, try again. */ + double_unlock_hb(hb1, hb2); ++ hb_waiters_dec(hb2); + put_futex_key(&key2); + put_futex_key(&key1); + cond_resched(); +@@ -1592,6 +1596,7 @@ retry_private: + + out_unlock: + double_unlock_hb(hb1, hb2); ++ hb_waiters_dec(hb2); + + /* + * drop_futex_key_refs() must be called outside the spinlocks. During diff --git a/queue-3.14/ipv6-some-ipv6-statistic-counters-failed-to-disable-bh.patch b/queue-3.14/ipv6-some-ipv6-statistic-counters-failed-to-disable-bh.patch new file mode 100644 index 00000000000..ba57fb7ae4c --- /dev/null +++ b/queue-3.14/ipv6-some-ipv6-statistic-counters-failed-to-disable-bh.patch @@ -0,0 +1,94 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Hannes Frederic Sowa +Date: Mon, 31 Mar 2014 20:14:10 +0200 +Subject: ipv6: some ipv6 statistic counters failed to disable bh + +From: Hannes Frederic Sowa + +[ Upstream commit 43a43b6040165f7b40b5b489fe61a4cb7f8c4980 ] + +After commit c15b1ccadb323ea ("ipv6: move DAD and addrconf_verify +processing to workqueue") some counters are now updated in process context +and thus need to disable bh before doing so, otherwise deadlocks can +happen on 32-bit archs. Fabio Estevam noticed this while while mounting +a NFS volume on an ARM board. + +As a compensation for missing this I looked after the other *_STATS_BH +and found three other calls which need updating: + +1) icmp6_send: ip6_fragment -> icmpv6_send -> icmp6_send (error handling) +2) ip6_push_pending_frames: rawv6_sendmsg -> rawv6_push_pending_frames -> ... + (only in case of icmp protocol with raw sockets in error handling) +3) ping6_v6_sendmsg (error handling) + +Fixes: c15b1ccadb323ea ("ipv6: move DAD and addrconf_verify processing to workqueue") +Reported-by: Fabio Estevam +Tested-by: Fabio Estevam +Cc: Eric Dumazet +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/icmp.c | 2 +- + net/ipv6/ip6_output.c | 4 ++-- + net/ipv6/mcast.c | 11 ++++++----- + net/ipv6/ping.c | 4 ++-- + 4 files changed, 11 insertions(+), 10 deletions(-) + +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -520,7 +520,7 @@ static void icmp6_send(struct sk_buff *s + np->tclass, NULL, &fl6, (struct rt6_info *)dst, + MSG_DONTWAIT, np->dontfrag); + if (err) { +- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ++ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1566,8 +1566,8 @@ int ip6_push_pending_frames(struct sock + if (proto == IPPROTO_ICMPV6) { + struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + +- ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); +- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); ++ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); ++ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); + } + + err = ip6_local_out(skb); +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -1620,11 +1620,12 @@ static void mld_sendpack(struct sk_buff + dst_output); + out: + if (!err) { +- ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); +- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); +- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); +- } else +- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); ++ ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); ++ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); ++ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); ++ } else { ++ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); ++ } + + rcu_read_unlock(); + return; +--- a/net/ipv6/ping.c ++++ b/net/ipv6/ping.c +@@ -182,8 +182,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, + MSG_DONTWAIT, np->dontfrag); + + if (err) { +- ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, +- ICMP6_MIB_OUTERRORS); ++ ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, ++ ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, diff --git a/queue-3.14/isdnloop-several-buffer-overflows.patch b/queue-3.14/isdnloop-several-buffer-overflows.patch new file mode 100644 index 00000000000..ba373064f2f --- /dev/null +++ b/queue-3.14/isdnloop-several-buffer-overflows.patch @@ -0,0 +1,95 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Dan Carpenter +Date: Tue, 8 Apr 2014 12:23:09 +0300 +Subject: isdnloop: several buffer overflows + +From: Dan Carpenter + +[ Upstream commit 7563487cbf865284dcd35e9ef5a95380da046737 ] + +There are three buffer overflows addressed in this patch. + +1) In isdnloop_fake_err() we add an 'E' to a 60 character string and +then copy it into a 60 character buffer. I have made the destination +buffer 64 characters and I'm changed the sprintf() to a snprintf(). + +2) In isdnloop_parse_cmd(), p points to a 6 characters into a 60 +character buffer so we have 54 characters. The ->eazlist[] is 11 +characters long. I have modified the code to return if the source +buffer is too long. + +3) In isdnloop_command() the cbuf[] array was 60 characters long but the +max length of the string then can be up to 79 characters. I made the +cbuf array 80 characters long and changed the sprintf() to snprintf(). +I also removed the temporary "dial" buffer and changed it to use "p" +directly. + +Unfortunately, we pass the "cbuf" string from isdnloop_command() to +isdnloop_writecmd() which truncates anything over 60 characters to make +it fit in card->omsg[]. (It can accept values up to 255 characters so +long as there is a '\n' character every 60 characters). For now I have +just fixed the memory corruption bug and left the other problems in this +driver alone. + +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/isdnloop/isdnloop.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +--- a/drivers/isdn/isdnloop/isdnloop.c ++++ b/drivers/isdn/isdnloop/isdnloop.c +@@ -518,9 +518,9 @@ static isdnloop_stat isdnloop_cmd_table[ + static void + isdnloop_fake_err(isdnloop_card *card) + { +- char buf[60]; ++ char buf[64]; + +- sprintf(buf, "E%s", card->omsg); ++ snprintf(buf, sizeof(buf), "E%s", card->omsg); + isdnloop_fake(card, buf, -1); + isdnloop_fake(card, "NAK", -1); + } +@@ -903,6 +903,8 @@ isdnloop_parse_cmd(isdnloop_card *card) + case 7: + /* 0x;EAZ */ + p += 3; ++ if (strlen(p) >= sizeof(card->eazlist[0])) ++ break; + strcpy(card->eazlist[ch - 1], p); + break; + case 8: +@@ -1133,7 +1135,7 @@ isdnloop_command(isdn_ctrl *c, isdnloop_ + { + ulong a; + int i; +- char cbuf[60]; ++ char cbuf[80]; + isdn_ctrl cmd; + isdnloop_cdef cdef; + +@@ -1198,7 +1200,6 @@ isdnloop_command(isdn_ctrl *c, isdnloop_ + break; + if ((c->arg & 255) < ISDNLOOP_BCH) { + char *p; +- char dial[50]; + char dcode[4]; + + a = c->arg; +@@ -1210,10 +1211,10 @@ isdnloop_command(isdn_ctrl *c, isdnloop_ + } else + /* Normal Dial */ + strcpy(dcode, "CAL"); +- strcpy(dial, p); +- sprintf(cbuf, "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1), +- dcode, dial, c->parm.setup.si1, +- c->parm.setup.si2, c->parm.setup.eazmsn); ++ snprintf(cbuf, sizeof(cbuf), ++ "%02d;D%s_R%s,%02d,%02d,%s\n", (int) (a + 1), ++ dcode, p, c->parm.setup.si1, ++ c->parm.setup.si2, c->parm.setup.eazmsn); + i = isdnloop_writecmd(cbuf, strlen(cbuf), 0, card); + } + break; diff --git a/queue-3.14/isdnloop-validate-nul-terminated-strings-from-user.patch b/queue-3.14/isdnloop-validate-nul-terminated-strings-from-user.patch new file mode 100644 index 00000000000..b77c2a43b6b --- /dev/null +++ b/queue-3.14/isdnloop-validate-nul-terminated-strings-from-user.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: YOSHIFUJI Hideaki +Date: Wed, 2 Apr 2014 12:48:42 +0900 +Subject: isdnloop: Validate NUL-terminated strings from user. + +From: YOSHIFUJI Hideaki + +[ Upstream commit 77bc6bed7121936bb2e019a8c336075f4c8eef62 ] + +Return -EINVAL unless all of user-given strings are correctly +NUL-terminated. + +Signed-off-by: YOSHIFUJI Hideaki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/isdnloop/isdnloop.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/isdn/isdnloop/isdnloop.c ++++ b/drivers/isdn/isdnloop/isdnloop.c +@@ -1070,6 +1070,12 @@ isdnloop_start(isdnloop_card *card, isdn + return -EBUSY; + if (copy_from_user((char *) &sdef, (char *) sdefp, sizeof(sdef))) + return -EFAULT; ++ ++ for (i = 0; i < 3; i++) { ++ if (!memchr(sdef.num[i], 0, sizeof(sdef.num[i]))) ++ return -EINVAL; ++ } ++ + spin_lock_irqsave(&card->isdnloop_lock, flags); + switch (sdef.ptype) { + case ISDN_PTYPE_EURO: diff --git a/queue-3.14/net-at91_ether-avoid-null-pointer-dereference.patch b/queue-3.14/net-at91_ether-avoid-null-pointer-dereference.patch new file mode 100644 index 00000000000..801c6604480 --- /dev/null +++ b/queue-3.14/net-at91_ether-avoid-null-pointer-dereference.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Apr 10 20:31:47 PDT 2014 +From: Gilles Chanteperdrix +Date: Sun, 6 Apr 2014 20:37:44 +0200 +Subject: net/at91_ether: avoid NULL pointer dereference + +From: Gilles Chanteperdrix + +[ Upstream commit c293fb785bdda64d88f197e6758a3c16ae83e569 ] + +The at91_ether driver calls macb_mii_init passing a 'struct macb' +structure whose tx_clk member is initialized to 0. However, +macb_handle_link_change() expects tx_clk to be the result of +a call to clk_get, and so IS_ERR(tx_clk) to be true if the clock +is invalid. This causes an oops when booting Linux 3.14 on the +csb637 board. The following changes avoids this. + +Signed-off-by: Gilles Chanteperdrix +Acked-by: Nicolas Ferre +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/at91_ether.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/cadence/at91_ether.c ++++ b/drivers/net/ethernet/cadence/at91_ether.c +@@ -342,6 +342,9 @@ static int __init at91ether_probe(struct + } + clk_enable(lp->pclk); + ++ lp->hclk = ERR_PTR(-ENOENT); ++ lp->tx_clk = ERR_PTR(-ENOENT); ++ + /* Install the interrupt handler */ + dev->irq = platform_get_irq(pdev, 0); + res = devm_request_irq(&pdev->dev, dev->irq, at91ether_interrupt, 0, dev->name, dev); diff --git a/queue-3.14/net-vxlan-fix-crash-when-interface-is-created-with-no-group.patch b/queue-3.14/net-vxlan-fix-crash-when-interface-is-created-with-no-group.patch new file mode 100644 index 00000000000..3a467bfa6a2 --- /dev/null +++ b/queue-3.14/net-vxlan-fix-crash-when-interface-is-created-with-no-group.patch @@ -0,0 +1,154 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Mike Rapoport +Date: Tue, 1 Apr 2014 09:23:01 +0300 +Subject: net: vxlan: fix crash when interface is created with no group + +From: Mike Rapoport + +[ Upstream commit 5933a7bbb5de66482ea8aa874a7ebaf8e67603c4 ] + +If the vxlan interface is created without explicit group definition, +there are corner cases which may cause kernel panic. + +For instance, in the following scenario: + +node A: +$ ip link add dev vxlan42 address 2c:c2:60:00:10:20 type vxlan id 42 +$ ip addr add dev vxlan42 10.0.0.1/24 +$ ip link set up dev vxlan42 +$ arp -i vxlan42 -s 10.0.0.2 2c:c2:60:00:01:02 +$ bridge fdb add dev vxlan42 to 2c:c2:60:00:01:02 dst +$ ping 10.0.0.2 + +node B: +$ ip link add dev vxlan42 address 2c:c2:60:00:01:02 type vxlan id 42 +$ ip addr add dev vxlan42 10.0.0.2/24 +$ ip link set up dev vxlan42 +$ arp -i vxlan42 -s 10.0.0.1 2c:c2:60:00:10:20 + +node B crashes: + + vxlan42: 2c:c2:60:00:10:20 migrated from 4011:eca4:c0a8:6466:c0a8:6415:8e09:2118 to (invalid address) + vxlan42: 2c:c2:60:00:10:20 migrated from 4011:eca4:c0a8:6466:c0a8:6415:8e09:2118 to (invalid address) + BUG: unable to handle kernel NULL pointer dereference at 0000000000000046 + IP: [] ip6_route_output+0x58/0x82 + PGD 7bd89067 PUD 7bd4e067 PMD 0 + Oops: 0000 [#1] SMP + Modules linked in: + CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.14.0-rc8-hvx-xen-00019-g97a5221-dirty #154 + Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 + task: ffff88007c774f50 ti: ffff88007c79c000 task.ti: ffff88007c79c000 + RIP: 0010:[] [] ip6_route_output+0x58/0x82 + RSP: 0018:ffff88007fd03668 EFLAGS: 00010282 + RAX: 0000000000000000 RBX: ffffffff8186a000 RCX: 0000000000000040 + RDX: 0000000000000000 RSI: ffff88007b0e4a80 RDI: ffff88007fd03754 + RBP: ffff88007fd03688 R08: ffff88007b0e4a80 R09: 0000000000000000 + R10: 0200000a0100000a R11: 0001002200000000 R12: ffff88007fd03740 + R13: ffff88007b0e4a80 R14: ffff88007b0e4a80 R15: ffff88007bba0c50 + FS: 0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b + CR2: 0000000000000046 CR3: 000000007bb60000 CR4: 00000000000006e0 + Stack: + 0000000000000000 ffff88007fd037a0 ffffffff8186a000 ffff88007fd03740 + ffff88007fd036c8 ffffffff814320bb 0000000000006e49 ffff88007b8b7360 + ffff88007bdbf200 ffff88007bcbc000 ffff88007b8b7000 ffff88007b8b7360 + Call Trace: + + [] ip6_dst_lookup_tail+0x2d/0xa4 + [] ip6_dst_lookup+0x10/0x12 + [] vxlan_xmit_one+0x32a/0x68c + [] ? _raw_spin_unlock_irqrestore+0x12/0x14 + [] ? lock_timer_base.isra.23+0x26/0x4b + [] vxlan_xmit+0x66a/0x6a8 + [] ? ipt_do_table+0x35f/0x37e + [] ? selinux_ip_postroute+0x41/0x26e + [] dev_hard_start_xmit+0x2ce/0x3ce + [] __dev_queue_xmit+0x2d0/0x392 + [] ? eth_header+0x28/0xb5 + [] dev_queue_xmit+0xb/0xd + [] neigh_resolve_output+0x134/0x152 + [] ip_finish_output2+0x236/0x299 + [] ip_finish_output+0x98/0x9d + [] ip_output+0x62/0x67 + [] dst_output+0xf/0x11 + [] ip_local_out+0x1b/0x1f + [] ip_send_skb+0x11/0x37 + [] ip_push_pending_frames+0x2f/0x33 + [] icmp_push_reply+0x106/0x115 + [] icmp_reply+0x142/0x164 + [] icmp_echo.part.16+0x46/0x48 + [] ? nf_iterate+0x43/0x80 + [] ? xfrm4_policy_check.constprop.11+0x52/0x52 + [] icmp_echo+0x25/0x27 + [] icmp_rcv+0x1d2/0x20a + [] ? xfrm4_policy_check.constprop.11+0x52/0x52 + [] ip_local_deliver_finish+0xd6/0x14f + [] ? xfrm4_policy_check.constprop.11+0x52/0x52 + [] NF_HOOK.constprop.10+0x4c/0x53 + [] ip_local_deliver+0x4a/0x4f + [] ip_rcv_finish+0x253/0x26a + [] ? inet_add_protocol+0x3e/0x3e + [] NF_HOOK.constprop.10+0x4c/0x53 + [] ip_rcv+0x2a6/0x2ec + [] __netif_receive_skb_core+0x43e/0x478 + [] ? virtqueue_poll+0x16/0x27 + [] __netif_receive_skb+0x55/0x5a + [] process_backlog+0x76/0x12f + [] net_rx_action+0xa2/0x1ab + [] __do_softirq+0xca/0x1d1 + [] irq_exit+0x3e/0x85 + [] do_IRQ+0xa9/0xc4 + [] common_interrupt+0x6d/0x6d + + [] ? native_safe_halt+0x6/0x8 + [] default_idle+0x9/0xd + [] arch_cpu_idle+0x13/0x1c + [] cpu_startup_entry+0xbc/0x137 + [] start_secondary+0x1a0/0x1a5 + Code: 24 14 e8 f1 e5 01 00 31 d2 a8 32 0f 95 c2 49 8b 44 24 2c 49 0b 44 24 24 74 05 83 ca 04 eb 1c 4d 85 ed 74 17 49 8b 85 a8 02 00 00 <66> 8b 40 46 66 c1 e8 07 83 e0 07 c1 e0 03 09 c2 4c 89 e6 48 89 + RIP [] ip6_route_output+0x58/0x82 + RSP + CR2: 0000000000000046 + ---[ end trace 4612329caab37efd ]--- + +When vxlan interface is created without explicit group definition, the +default_dst protocol family is initialiazed to AF_UNSPEC and the driver +assumes IPv4 configuration. On the other side, the default_dst protocol +family is used to differentiate between IPv4 and IPv6 cases and, since, +AF_UNSPEC != AF_INET, the processing takes the IPv6 path. + +Making the IPv4 assumption explicit by settting default_dst protocol +family to AF_INET4 and preventing mixing of IPv4 and IPv6 addresses in +snooped fdb entries fixes the corner case crashes. + +Signed-off-by: Mike Rapoport +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -871,6 +871,9 @@ static int vxlan_fdb_add(struct ndmsg *n + if (err) + return err; + ++ if (vxlan->default_dst.remote_ip.sa.sa_family != ip.sa.sa_family) ++ return -EAFNOSUPPORT; ++ + spin_lock_bh(&vxlan->hash_lock); + err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, + port, vni, ifindex, ndm->ndm_flags); +@@ -2612,9 +2615,10 @@ static int vxlan_newlink(struct net *net + vni = nla_get_u32(data[IFLA_VXLAN_ID]); + dst->remote_vni = vni; + ++ /* Unless IPv6 is explicitly requested, assume IPv4 */ ++ dst->remote_ip.sa.sa_family = AF_INET; + if (data[IFLA_VXLAN_GROUP]) { + dst->remote_ip.sin.sin_addr.s_addr = nla_get_be32(data[IFLA_VXLAN_GROUP]); +- dst->remote_ip.sa.sa_family = AF_INET; + } else if (data[IFLA_VXLAN_GROUP6]) { + if (!IS_ENABLED(CONFIG_IPV6)) + return -EPFNOSUPPORT; diff --git a/queue-3.14/netlink-don-t-compare-the-nul-termination-in-nla_strcmp.patch b/queue-3.14/netlink-don-t-compare-the-nul-termination-in-nla_strcmp.patch new file mode 100644 index 00000000000..2d17777725c --- /dev/null +++ b/queue-3.14/netlink-don-t-compare-the-nul-termination-in-nla_strcmp.patch @@ -0,0 +1,55 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Pablo Neira +Date: Tue, 1 Apr 2014 19:38:44 +0200 +Subject: netlink: don't compare the nul-termination in nla_strcmp + +From: Pablo Neira + +[ Upstream commit 8b7b932434f5eee495b91a2804f5b64ebb2bc835 ] + +nla_strcmp compares the string length plus one, so it's implicitly +including the nul-termination in the comparison. + + int nla_strcmp(const struct nlattr *nla, const char *str) + { + int len = strlen(str) + 1; + ... + d = memcmp(nla_data(nla), str, len); + +However, if NLA_STRING is used, userspace can send us a string without +the nul-termination. This is a problem since the string +comparison will not match as the last byte may be not the +nul-termination. + +Fix this by skipping the comparison of the nul-termination if the +attribute data is nul-terminated. Suggested by Thomas Graf. + +Cc: Florian Westphal +Cc: Thomas Graf +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + lib/nlattr.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/lib/nlattr.c ++++ b/lib/nlattr.c +@@ -303,9 +303,15 @@ int nla_memcmp(const struct nlattr *nla, + */ + int nla_strcmp(const struct nlattr *nla, const char *str) + { +- int len = strlen(str) + 1; +- int d = nla_len(nla) - len; ++ int len = strlen(str); ++ char *buf = nla_data(nla); ++ int attrlen = nla_len(nla); ++ int d; + ++ if (attrlen > 0 && buf[attrlen - 1] == '\0') ++ attrlen--; ++ ++ d = attrlen - len; + if (d == 0) + d = memcmp(nla_data(nla), str, len); + diff --git a/queue-3.14/rds-prevent-dereference-of-a-null-device-in-rds_iw_laddr_check.patch b/queue-3.14/rds-prevent-dereference-of-a-null-device-in-rds_iw_laddr_check.patch new file mode 100644 index 00000000000..1103e21ae37 --- /dev/null +++ b/queue-3.14/rds-prevent-dereference-of-a-null-device-in-rds_iw_laddr_check.patch @@ -0,0 +1,31 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Sasha Levin +Date: Sat, 29 Mar 2014 20:39:35 -0400 +Subject: rds: prevent dereference of a NULL device in rds_iw_laddr_check + +From: Sasha Levin + +[ Upstream commit bf39b4247b8799935ea91d90db250ab608a58e50 ] + +Binding might result in a NULL device which is later dereferenced +without checking. + +Signed-off-by: Sasha Levin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/iw.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/rds/iw.c ++++ b/net/rds/iw.c +@@ -239,7 +239,8 @@ static int rds_iw_laddr_check(__be32 add + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); + /* due to this, we will claim to support IB devices unless we + check node_type. */ +- if (ret || cm_id->device->node_type != RDMA_NODE_RNIC) ++ if (ret || !cm_id->device || ++ cm_id->device->node_type != RDMA_NODE_RNIC) + ret = -EADDRNOTAVAIL; + + rdsdebug("addr %pI4 ret %d node type %d\n", diff --git a/queue-3.14/series b/queue-3.14/series index e6345e1b159..96eeb1db957 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -2,3 +2,16 @@ revert-alsa-hda-increment-default-stream-numbers-for-amd-hdmi-controllers.patch selinux-correctly-label-proc-inodes-in-use-before-the-policy-is-loaded.patch x86-pageattr-export-page-unmapping-interface.patch x86-efi-make-efi-virtual-runtime-map-passing-more-robust.patch +futex-avoid-race-between-requeue-and-wake.patch +xen-netback-remove-pointless-clause-from-if-statement.patch +xen-netback-worse-case-estimate-in-xenvif_rx_action-is-underestimating.patch +xen-netback-bug_on-in-xenvif_rx_action-not-catching-overflow.patch +ipv6-some-ipv6-statistic-counters-failed-to-disable-bh.patch +netlink-don-t-compare-the-nul-termination-in-nla_strcmp.patch +xen-netback-disable-rogue-vif-in-kthread-context.patch +call-efx_set_channels-before-efx-type-dimension_resources.patch +net-vxlan-fix-crash-when-interface-is-created-with-no-group.patch +isdnloop-validate-nul-terminated-strings-from-user.patch +isdnloop-several-buffer-overflows.patch +rds-prevent-dereference-of-a-null-device-in-rds_iw_laddr_check.patch +net-at91_ether-avoid-null-pointer-dereference.patch diff --git a/queue-3.14/xen-netback-bug_on-in-xenvif_rx_action-not-catching-overflow.patch b/queue-3.14/xen-netback-bug_on-in-xenvif_rx_action-not-catching-overflow.patch new file mode 100644 index 00000000000..b9219a532f7 --- /dev/null +++ b/queue-3.14/xen-netback-bug_on-in-xenvif_rx_action-not-catching-overflow.patch @@ -0,0 +1,50 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Paul Durrant +Date: Fri, 28 Mar 2014 11:39:07 +0000 +Subject: xen-netback: BUG_ON in xenvif_rx_action() not catching overflow + +From: Paul Durrant + +[ Upstream commit 1425c7a4e8d3d2eebf308bcbdc3fa3c1247686b4 ] + +The BUG_ON to catch ring overflow in xenvif_rx_action() makes the assumption +that meta_slots_used == ring slots used. This is not necessarily the case +for GSO packets, because the non-prefix GSO protocol consumes one more ring +slot than meta-slot for the 'extra_info'. This patch changes the test to +actually check ring slots. + +Signed-off-by: Paul Durrant +Cc: Ian Campbell +Cc: Wei Liu +Cc: Sander Eikelenboom +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/netback.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -482,6 +482,8 @@ static void xenvif_rx_action(struct xenv + + while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { + RING_IDX max_slots_needed; ++ RING_IDX old_req_cons; ++ RING_IDX ring_slots_used; + int i; + + /* We need a cheap worse case estimate for the number of +@@ -530,8 +532,12 @@ static void xenvif_rx_action(struct xenv + vif->rx_last_skb_slots = 0; + + sco = (struct skb_cb_overlay *)skb->cb; ++ ++ old_req_cons = vif->rx.req_cons; + sco->meta_slots_used = xenvif_gop_skb(skb, &npo); +- BUG_ON(sco->meta_slots_used > max_slots_needed); ++ ring_slots_used = vif->rx.req_cons - old_req_cons; ++ ++ BUG_ON(ring_slots_used > max_slots_needed); + + __skb_queue_tail(&rxq, skb); + } diff --git a/queue-3.14/xen-netback-disable-rogue-vif-in-kthread-context.patch b/queue-3.14/xen-netback-disable-rogue-vif-in-kthread-context.patch new file mode 100644 index 00000000000..e93cfd17303 --- /dev/null +++ b/queue-3.14/xen-netback-disable-rogue-vif-in-kthread-context.patch @@ -0,0 +1,128 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Wei Liu +Date: Tue, 1 Apr 2014 12:46:12 +0100 +Subject: xen-netback: disable rogue vif in kthread context +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Wei Liu + +[ Upstream commit e9d8b2c2968499c1f96563e6522c56958d5a1d0d ] + +When netback discovers frontend is sending malformed packet it will +disables the interface which serves that frontend. + +However disabling a network interface involving taking a mutex which +cannot be done in softirq context, so we need to defer this process to +kthread context. + +This patch does the following: +1. introduce a flag to indicate the interface is disabled. +2. check that flag in TX path, don't do any work if it's true. +3. check that flag in RX path, turn off that interface if it's true. + +The reason to disable it in RX path is because RX uses kthread. After +this change the behavior of netback is still consistent -- it won't do +any TX work for a rogue frontend, and the interface will be eventually +turned off. + +Also change a "continue" to "break" after xenvif_fatal_tx_err, as it +doesn't make sense to continue processing packets if frontend is rogue. + +This is a fix for XSA-90. + +Reported-by: Török Edwin +Signed-off-by: Wei Liu +Cc: Ian Campbell +Reviewed-by: David Vrabel +Acked-by: Ian Campbell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/common.h | 5 +++++ + drivers/net/xen-netback/interface.c | 11 +++++++++++ + drivers/net/xen-netback/netback.c | 16 ++++++++++++++-- + 3 files changed, 30 insertions(+), 2 deletions(-) + +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -113,6 +113,11 @@ struct xenvif { + domid_t domid; + unsigned int handle; + ++ /* Is this interface disabled? True when backend discovers ++ * frontend is rogue. ++ */ ++ bool disabled; ++ + /* Use NAPI for guest TX */ + struct napi_struct napi; + /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -62,6 +62,15 @@ static int xenvif_poll(struct napi_struc + struct xenvif *vif = container_of(napi, struct xenvif, napi); + int work_done; + ++ /* This vif is rogue, we pretend we've there is nothing to do ++ * for this vif to deschedule it from NAPI. But this interface ++ * will be turned off in thread context later. ++ */ ++ if (unlikely(vif->disabled)) { ++ napi_complete(napi); ++ return 0; ++ } ++ + work_done = xenvif_tx_action(vif, budget); + + if (work_done < budget) { +@@ -321,6 +330,8 @@ struct xenvif *xenvif_alloc(struct devic + vif->ip_csum = 1; + vif->dev = dev; + ++ vif->disabled = false; ++ + vif->credit_bytes = vif->remaining_credit = ~0UL; + vif->credit_usec = 0UL; + init_timer(&vif->credit_timeout); +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -680,7 +680,8 @@ static void xenvif_tx_err(struct xenvif + static void xenvif_fatal_tx_err(struct xenvif *vif) + { + netdev_err(vif->dev, "fatal error; disabling device\n"); +- xenvif_carrier_off(vif); ++ vif->disabled = true; ++ xenvif_kick_thread(vif); + } + + static int xenvif_count_requests(struct xenvif *vif, +@@ -1151,7 +1152,7 @@ static unsigned xenvif_tx_build_gops(str + vif->tx.sring->req_prod, vif->tx.req_cons, + XEN_NETIF_TX_RING_SIZE); + xenvif_fatal_tx_err(vif); +- continue; ++ break; + } + + work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx); +@@ -1573,7 +1574,18 @@ int xenvif_kthread(void *data) + while (!kthread_should_stop()) { + wait_event_interruptible(vif->wq, + rx_work_todo(vif) || ++ vif->disabled || + kthread_should_stop()); ++ ++ /* This frontend is found to be rogue, disable it in ++ * kthread context. Currently this is only set when ++ * netback finds out frontend sends malformed packet, ++ * but we cannot disable the interface in softirq ++ * context so we defer it here. ++ */ ++ if (unlikely(vif->disabled && netif_carrier_ok(vif->dev))) ++ xenvif_carrier_off(vif); ++ + if (kthread_should_stop()) + break; + diff --git a/queue-3.14/xen-netback-remove-pointless-clause-from-if-statement.patch b/queue-3.14/xen-netback-remove-pointless-clause-from-if-statement.patch new file mode 100644 index 00000000000..303f4900d68 --- /dev/null +++ b/queue-3.14/xen-netback-remove-pointless-clause-from-if-statement.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Paul Durrant +Date: Fri, 28 Mar 2014 11:39:05 +0000 +Subject: xen-netback: remove pointless clause from if statement + +From: Paul Durrant + +[ Upstream commit 0576eddf24df716d8570ef8ca11452a9f98eaab2 ] + +This patch removes a test in start_new_rx_buffer() that checks whether +a copy operation is less than MAX_BUFFER_OFFSET in length, since +MAX_BUFFER_OFFSET is defined to be PAGE_SIZE and the only caller of +start_new_rx_buffer() already limits copy operations to PAGE_SIZE or less. + +Signed-off-by: Paul Durrant +Cc: Ian Campbell +Cc: Wei Liu +Cc: Sander Eikelenboom +Reported-By: Sander Eikelenboom +Tested-By: Sander Eikelenboom +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/netback.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -192,8 +192,8 @@ static bool start_new_rx_buffer(int offs + * into multiple copies tend to give large frags their + * own buffers as before. + */ +- if ((offset + size > MAX_BUFFER_OFFSET) && +- (size <= MAX_BUFFER_OFFSET) && offset && !head) ++ BUG_ON(size > MAX_BUFFER_OFFSET); ++ if ((offset + size > MAX_BUFFER_OFFSET) && offset && !head) + return true; + + return false; diff --git a/queue-3.14/xen-netback-worse-case-estimate-in-xenvif_rx_action-is-underestimating.patch b/queue-3.14/xen-netback-worse-case-estimate-in-xenvif_rx_action-is-underestimating.patch new file mode 100644 index 00000000000..e90a80c9924 --- /dev/null +++ b/queue-3.14/xen-netback-worse-case-estimate-in-xenvif_rx_action-is-underestimating.patch @@ -0,0 +1,62 @@ +From foo@baz Thu Apr 10 20:31:46 PDT 2014 +From: Paul Durrant +Date: Fri, 28 Mar 2014 11:39:06 +0000 +Subject: xen-netback: worse-case estimate in xenvif_rx_action is underestimating + +From: Paul Durrant + +[ Upstream commit a02eb4732cf975d7fc71b6d1a71c058c9988b949 ] + +The worse-case estimate for skb ring slot usage in xenvif_rx_action() +fails to take fragment page_offset into account. The page_offset does, +however, affect the number of times the fragmentation code calls +start_new_rx_buffer() (i.e. consume another slot) and the worse-case +should assume that will always return true. This patch adds the page_offset +into the DIV_ROUND_UP for each frag. + +Unfortunately some frontends aggressively limit the number of requests +they post into the shared ring so to avoid an estimate that is 'too' +pessimal it is capped at MAX_SKB_FRAGS. + +Signed-off-by: Paul Durrant +Cc: Ian Campbell +Cc: Wei Liu +Cc: Sander Eikelenboom +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/netback.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -493,9 +493,28 @@ static void xenvif_rx_action(struct xenv + PAGE_SIZE); + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + unsigned int size; ++ unsigned int offset; ++ + size = skb_frag_size(&skb_shinfo(skb)->frags[i]); +- max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE); ++ offset = skb_shinfo(skb)->frags[i].page_offset; ++ ++ /* For a worse-case estimate we need to factor in ++ * the fragment page offset as this will affect the ++ * number of times xenvif_gop_frag_copy() will ++ * call start_new_rx_buffer(). ++ */ ++ max_slots_needed += DIV_ROUND_UP(offset + size, ++ PAGE_SIZE); + } ++ ++ /* To avoid the estimate becoming too pessimal for some ++ * frontends that limit posted rx requests, cap the estimate ++ * at MAX_SKB_FRAGS. ++ */ ++ if (max_slots_needed > MAX_SKB_FRAGS) ++ max_slots_needed = MAX_SKB_FRAGS; ++ ++ /* We may need one more slot for GSO metadata */ + if (skb_is_gso(skb) && + (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || + skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) -- 2.47.3