From: Greg Kroah-Hartman Date: Thu, 19 Jul 2018 06:33:52 +0000 (+0200) Subject: 4.17-stable patches X-Git-Tag: v4.4.142~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2e7dcb5c1891105a5aac4096507cd8e8e952645e;p=thirdparty%2Fkernel%2Fstable-queue.git 4.17-stable patches added patches: alx-take-rtnl-before-calling-__alx_open-from-resume.patch atm-preserve-value-of-skb-truesize-when-accounting-to-vcc.patch atm-zatm-fix-potential-spectre-v1.patch enic-do-not-overwrite-error-code.patch hinic-reset-irq-affinity-before-freeing-irq.patch hv_netvsc-split-sub-channel-setup-into-async-and-sync.patch i40e-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch ib-mlx5-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch ipv6-sr-fix-passing-wrong-flags-to-crypto_alloc_shash.patch ipvlan-fix-ifla_mtu-ignored-on-newlink.patch ixgbe-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch net-dccp-avoid-crash-in-ccid3_hc_rx_send_feedback.patch net-dccp-switch-rx_tstamp_last_feedback-to-monotonic-clock.patch net-fix-use-after-free-in-gro-with-esp.patch net-macb-fix-ptp-time-adjustment-for-large-negative-delta.patch net-macb-initialize-bp-queues.bp-for-at91rm9200.patch net-mlx5-e-switch-avoid-setup-attempt-if-not-being-e-switch-manager.patch net-mlx5-fix-command-interface-race-in-polling-mode.patch net-mlx5-fix-incorrect-raw-command-length-parsing.patch net-mlx5-fix-required-capability-for-manipulating-mpfs.patch net-mlx5-fix-wrong-size-allocation-for-qos-etc-tc-regitster.patch net-mlx5e-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch net-mlx5e-don-t-attempt-to-dereference-the-ppriv-struct-if-not-being-eswitch-manager.patch net-mvneta-fix-the-rx-desc-dma-address-in-the-rx-path.patch net-packet-fix-use-after-free.patch net-sched-act_ife-fix-recursive-lock-and-idr-leak.patch net-sched-act_ife-preserve-the-action-control-in-case-of-error.patch net-sungem-fix-rx-checksum-support.patch net-tcp-fix-socket-lookups-with-so_bindtodevice.patch net-use-dev_change_tx_queue_len-for-siocsiftxqlen.patch net_sched-blackhole-tell-upper-qdisc-about-dropped-packets.patch nfp-flower-fix-mpls-ether-type-detection.patch nfp-reject-binding-to-shared-blocks.patch qed-fix-setting-of-incorrect-eswitch-mode.patch qed-fix-use-of-incorrect-size-in-memcpy-call.patch qed-limit-msix-vectors-in-kdump-kernel-to-the-minimum-required-count.patch qede-adverstise-software-timestamp-caps-when-phc-is-not-available.patch qmi_wwan-add-support-for-the-dell-wireless-5821e-module.patch r8152-napi-hangup-fix-after-disconnect.patch revert-s390-qeth-use-read-device-to-query-hypervisor-for-mac.patch s390-qeth-avoid-using-is_multicast_ether_addr_64bits-on-u8.patch s390-qeth-don-t-clobber-buffer-on-async-tx-completion.patch s390-qeth-fix-race-when-setting-mac-address.patch sfc-correctly-initialise-filter-rwsem-for-farch.patch stmmac-fix-dma-channel-hang-in-half-duplex-mode.patch strparser-remove-early-eaten-to-fix-full-tcp-receive-buffer-stall.patch tcp-fix-fast-open-key-endianness.patch tcp-prevent-bogus-frto-undos-with-non-sack-flows.patch vhost_net-validate-sock-before-trying-to-put-its-fd.patch virtio_net-split-xdp_tx-kick-and-xdp_redirect-map-flushing.patch vsock-fix-loopback-on-big-endian-systems.patch xen-netfront-fix-mismatched-rtnl_unlock.patch xen-netfront-update-features-after-registering-netdev.patch --- diff --git a/queue-4.17/alx-take-rtnl-before-calling-__alx_open-from-resume.patch b/queue-4.17/alx-take-rtnl-before-calling-__alx_open-from-resume.patch new file mode 100644 index 00000000000..78d4c1eff23 --- /dev/null +++ b/queue-4.17/alx-take-rtnl-before-calling-__alx_open-from-resume.patch @@ -0,0 +1,48 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Sabrina Dubroca +Date: Fri, 29 Jun 2018 17:51:26 +0200 +Subject: alx: take rtnl before calling __alx_open from resume + +From: Sabrina Dubroca + +[ Upstream commit bc800e8b39bad60ccdb83be828da63af71ab87b3 ] + +The __alx_open function can be called from ndo_open, which is called +under RTNL, or from alx_resume, which isn't. Since commit d768319cd427, +we're calling the netif_set_real_num_{tx,rx}_queues functions, which +need to be called under RTNL. + +This is similar to commit 0c2cc02e571a ("igb: Move the calls to set the +Tx and Rx queues into igb_open"). + +Fixes: d768319cd427 ("alx: enable multiple tx queues") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/atheros/alx/main.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/atheros/alx/main.c ++++ b/drivers/net/ethernet/atheros/alx/main.c +@@ -1897,13 +1897,19 @@ static int alx_resume(struct device *dev + struct pci_dev *pdev = to_pci_dev(dev); + struct alx_priv *alx = pci_get_drvdata(pdev); + struct alx_hw *hw = &alx->hw; ++ int err; + + alx_reset_phy(hw); + + if (!netif_running(alx->dev)) + return 0; + netif_device_attach(alx->dev); +- return __alx_open(alx, true); ++ ++ rtnl_lock(); ++ err = __alx_open(alx, true); ++ rtnl_unlock(); ++ ++ return err; + } + + static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); diff --git a/queue-4.17/atm-preserve-value-of-skb-truesize-when-accounting-to-vcc.patch b/queue-4.17/atm-preserve-value-of-skb-truesize-when-accounting-to-vcc.patch new file mode 100644 index 00000000000..b465c3029ee --- /dev/null +++ b/queue-4.17/atm-preserve-value-of-skb-truesize-when-accounting-to-vcc.patch @@ -0,0 +1,174 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: David Woodhouse +Date: Sat, 16 Jun 2018 11:55:44 +0100 +Subject: atm: Preserve value of skb->truesize when accounting to vcc + +From: David Woodhouse + +[ Upstream commit 9bbe60a67be5a1c6f79b3c9be5003481a50529ff ] + +ATM accounts for in-flight TX packets in sk_wmem_alloc of the VCC on +which they are to be sent. But it doesn't take ownership of those +packets from the sock (if any) which originally owned them. They should +remain owned by their actual sender until they've left the box. + +There's a hack in pskb_expand_head() to avoid adjusting skb->truesize +for certain skbs, precisely to avoid messing up sk_wmem_alloc +accounting. Ideally that hack would cover the ATM use case too, but it +doesn't — skbs which aren't owned by any sock, for example PPP control +frames, still get their truesize adjusted when the low-level ATM driver +adds headroom. + +This has always been an issue, it seems. The truesize of a packet +increases, and sk_wmem_alloc on the VCC goes negative. But this wasn't +for normal traffic, only for control frames. So I think we just got away +with it, and we probably needed to send 2GiB of LCP echo frames before +the misaccounting would ever have caused a problem and caused +atm_may_send() to start refusing packets. + +Commit 14afee4b609 ("net: convert sock.sk_wmem_alloc from atomic_t to +refcount_t") did exactly what it was intended to do, and turned this +mostly-theoretical problem into a real one, causing PPPoATM to fail +immediately as sk_wmem_alloc underflows and atm_may_send() *immediately* +starts refusing to allow new packets. + +The least intrusive solution to this problem is to stash the value of +skb->truesize that was accounted to the VCC, in a new member of the +ATM_SKB(skb) structure. Then in atm_pop_raw() subtract precisely that +value instead of the then-current value of skb->truesize. + +Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()") +Signed-off-by: David Woodhouse +Tested-by: Kevin Darbyshire-Bryant +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/atmdev.h | 15 +++++++++++++++ + net/atm/br2684.c | 3 +-- + net/atm/clip.c | 3 +-- + net/atm/common.c | 3 +-- + net/atm/lec.c | 3 +-- + net/atm/mpc.c | 3 +-- + net/atm/pppoatm.c | 3 +-- + net/atm/raw.c | 4 ++-- + 8 files changed, 23 insertions(+), 14 deletions(-) + +--- a/include/linux/atmdev.h ++++ b/include/linux/atmdev.h +@@ -214,6 +214,7 @@ struct atmphy_ops { + struct atm_skb_data { + struct atm_vcc *vcc; /* ATM VCC */ + unsigned long atm_options; /* ATM layer options */ ++ unsigned int acct_truesize; /* truesize accounted to vcc */ + }; + + #define VCC_HTABLE_SIZE 32 +@@ -241,6 +242,20 @@ void vcc_insert_socket(struct sock *sk); + + void atm_dev_release_vccs(struct atm_dev *dev); + ++static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) ++{ ++ /* ++ * Because ATM skbs may not belong to a sock (and we don't ++ * necessarily want to), skb->truesize may be adjusted, ++ * escaping the hack in pskb_expand_head() which avoids ++ * doing so for some cases. So stash the value of truesize ++ * at the time we accounted it, and atm_pop_raw() can use ++ * that value later, in case it changes. ++ */ ++ refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); ++ ATM_SKB(skb)->acct_truesize = skb->truesize; ++ ATM_SKB(skb)->atm_options = vcc->atm_options; ++} + + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) + { +--- a/net/atm/br2684.c ++++ b/net/atm/br2684.c +@@ -252,8 +252,7 @@ static int br2684_xmit_vcc(struct sk_buf + + ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); +- refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); +- ATM_SKB(skb)->atm_options = atmvcc->atm_options; ++ atm_account_tx(atmvcc, skb); + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + +--- a/net/atm/clip.c ++++ b/net/atm/clip.c +@@ -381,8 +381,7 @@ static netdev_tx_t clip_start_xmit(struc + memcpy(here, llc_oui, sizeof(llc_oui)); + ((__be16 *) here)[3] = skb->protocol; + } +- refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); +- ATM_SKB(skb)->atm_options = vcc->atm_options; ++ atm_account_tx(vcc, skb); + entry->vccs->last_use = jiffies; + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); + old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ +--- a/net/atm/common.c ++++ b/net/atm/common.c +@@ -630,10 +630,9 @@ int vcc_sendmsg(struct socket *sock, str + goto out; + } + pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); +- refcount_add(skb->truesize, &sk->sk_wmem_alloc); ++ atm_account_tx(vcc, skb); + + skb->dev = NULL; /* for paths shared with net_device interfaces */ +- ATM_SKB(skb)->atm_options = vcc->atm_options; + if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { + kfree_skb(skb); + error = -EFAULT; +--- a/net/atm/lec.c ++++ b/net/atm/lec.c +@@ -182,9 +182,8 @@ lec_send(struct atm_vcc *vcc, struct sk_ + struct net_device *dev = skb->dev; + + ATM_SKB(skb)->vcc = vcc; +- ATM_SKB(skb)->atm_options = vcc->atm_options; ++ atm_account_tx(vcc, skb); + +- refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + if (vcc->send(vcc, skb) < 0) { + dev->stats.tx_dropped++; + return; +--- a/net/atm/mpc.c ++++ b/net/atm/mpc.c +@@ -555,8 +555,7 @@ static int send_via_shortcut(struct sk_b + sizeof(struct llc_snap_hdr)); + } + +- refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); +- ATM_SKB(skb)->atm_options = entry->shortcut->atm_options; ++ atm_account_tx(entry->shortcut, skb); + entry->shortcut->send(entry->shortcut, skb); + entry->packets_fwded++; + mpc->in_ops->put(entry); +--- a/net/atm/pppoatm.c ++++ b/net/atm/pppoatm.c +@@ -350,8 +350,7 @@ static int pppoatm_send(struct ppp_chann + return 1; + } + +- refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); +- ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; ++ atm_account_tx(vcc, skb); + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", + skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); + ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb) +--- a/net/atm/raw.c ++++ b/net/atm/raw.c +@@ -35,8 +35,8 @@ static void atm_pop_raw(struct atm_vcc * + struct sock *sk = sk_atm(vcc); + + pr_debug("(%d) %d -= %d\n", +- vcc->vci, sk_wmem_alloc_get(sk), skb->truesize); +- WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); ++ vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); ++ WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); + dev_kfree_skb_any(skb); + sk->sk_write_space(sk); + } diff --git a/queue-4.17/atm-zatm-fix-potential-spectre-v1.patch b/queue-4.17/atm-zatm-fix-potential-spectre-v1.patch new file mode 100644 index 00000000000..62defdb12c0 --- /dev/null +++ b/queue-4.17/atm-zatm-fix-potential-spectre-v1.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: "Gustavo A. R. Silva" +Date: Fri, 29 Jun 2018 13:28:07 -0500 +Subject: atm: zatm: Fix potential Spectre v1 + +From: "Gustavo A. R. Silva" + +[ Upstream commit ced9e191501e52b95e1b57b8e0db00943869eed0 ] + +pool can be indirectly controlled by user-space, hence leading to +a potential exploitation of the Spectre variant 1 vulnerability. + +This issue was detected with the help of Smatch: + +drivers/atm/zatm.c:1491 zatm_ioctl() warn: potential spectre issue +'zatm_dev->pool_info' (local cap) + +Fix this by sanitizing pool before using it to index +zatm_dev->pool_info + +Notice that given that speculation windows are large, the policy is +to kill the speculation on the first load and not worry if it can be +completed with a dependent load/store [1]. + +[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 + +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/atm/zatm.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/atm/zatm.c ++++ b/drivers/atm/zatm.c +@@ -1483,6 +1483,8 @@ static int zatm_ioctl(struct atm_dev *de + return -EFAULT; + if (pool < 0 || pool > ZATM_LAST_POOL) + return -EINVAL; ++ pool = array_index_nospec(pool, ++ ZATM_LAST_POOL + 1); + if (copy_from_user(&info, + &((struct zatm_pool_req __user *) arg)->info, + sizeof(info))) return -EFAULT; diff --git a/queue-4.17/enic-do-not-overwrite-error-code.patch b/queue-4.17/enic-do-not-overwrite-error-code.patch new file mode 100644 index 00000000000..aaa2af257c3 --- /dev/null +++ b/queue-4.17/enic-do-not-overwrite-error-code.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Govindarajulu Varadarajan +Date: Mon, 18 Jun 2018 10:01:05 -0700 +Subject: enic: do not overwrite error code + +From: Govindarajulu Varadarajan + +[ Upstream commit 56f772279a762984f6e9ebbf24a7c829faba5712 ] + +In failure path, we overwrite err to what vnic_rq_disable() returns. In +case it returns 0, enic_open() returns success in case of error. + +Reported-by: Ben Hutchings +Fixes: e8588e268509 ("enic: enable rq before updating rq descriptors") +Signed-off-by: Govindarajulu Varadarajan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cisco/enic/enic_main.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/cisco/enic/enic_main.c ++++ b/drivers/net/ethernet/cisco/enic/enic_main.c +@@ -1920,7 +1920,7 @@ static int enic_open(struct net_device * + { + struct enic *enic = netdev_priv(netdev); + unsigned int i; +- int err; ++ int err, ret; + + err = enic_request_intr(enic); + if (err) { +@@ -1977,10 +1977,9 @@ static int enic_open(struct net_device * + + err_out_free_rq: + for (i = 0; i < enic->rq_count; i++) { +- err = vnic_rq_disable(&enic->rq[i]); +- if (err) +- return err; +- vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); ++ ret = vnic_rq_disable(&enic->rq[i]); ++ if (!ret) ++ vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); + } + enic_dev_notify_unset(enic); + err_out_free_intr: diff --git a/queue-4.17/hinic-reset-irq-affinity-before-freeing-irq.patch b/queue-4.17/hinic-reset-irq-affinity-before-freeing-irq.patch new file mode 100644 index 00000000000..b33e963b4f8 --- /dev/null +++ b/queue-4.17/hinic-reset-irq-affinity-before-freeing-irq.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Wei Yongjun +Date: Wed, 27 Jun 2018 17:47:37 +0800 +Subject: hinic: reset irq affinity before freeing irq + +From: Wei Yongjun + +[ Upstream commit 82be2ab159a3a0ae4024b946a31f12b221f6c8ff ] + +Following warning is seen when rmmod hinic. This is because affinity +value is not reset before calling free_irq(). This patch fixes it. + +[ 55.181232] WARNING: CPU: 38 PID: 19589 at kernel/irq/manage.c:1608 +__free_irq+0x2aa/0x2c0 + +Fixes: 352f58b0d9f2 ("net-next/hinic: Set Rxq irq to specific cpu for NUMA") +Signed-off-by: Wei Yongjun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/huawei/hinic/hinic_rx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c +@@ -439,6 +439,7 @@ static void rx_free_irq(struct hinic_rxq + { + struct hinic_rq *rq = rxq->rq; + ++ irq_set_affinity_hint(rq->irq, NULL); + free_irq(rq->irq, rxq); + rx_del_napi(rxq); + } diff --git a/queue-4.17/hv_netvsc-split-sub-channel-setup-into-async-and-sync.patch b/queue-4.17/hv_netvsc-split-sub-channel-setup-into-async-and-sync.patch new file mode 100644 index 00000000000..47b3ebc1a40 --- /dev/null +++ b/queue-4.17/hv_netvsc-split-sub-channel-setup-into-async-and-sync.patch @@ -0,0 +1,230 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Stephen Hemminger +Date: Fri, 29 Jun 2018 14:07:16 -0700 +Subject: hv_netvsc: split sub-channel setup into async and sync + +From: Stephen Hemminger + +[ Upstream commit 3ffe64f1a641b80a82d9ef4efa7a05ce69049871 ] + +When doing device hotplug the sub channel must be async to avoid +deadlock issues because device is discovered in softirq context. + +When doing changes to MTU and number of channels, the setup +must be synchronous to avoid races such as when MTU and device +settings are done in a single ip command. + +Reported-by: Thomas Walker +Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") +Fixes: 732e49850c5e ("netvsc: fix race on sub channel creation") +Signed-off-by: Stephen Hemminger +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/hyperv_net.h | 2 - + drivers/net/hyperv/netvsc.c | 37 ++++++++++++++++++++++- + drivers/net/hyperv/netvsc_drv.c | 17 +++++++++- + drivers/net/hyperv/rndis_filter.c | 61 +++++++------------------------------- + 4 files changed, 65 insertions(+), 52 deletions(-) + +--- a/drivers/net/hyperv/hyperv_net.h ++++ b/drivers/net/hyperv/hyperv_net.h +@@ -211,7 +211,7 @@ int netvsc_recv_callback(struct net_devi + void netvsc_channel_cb(void *context); + int netvsc_poll(struct napi_struct *napi, int budget); + +-void rndis_set_subchannel(struct work_struct *w); ++int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev); + int rndis_filter_open(struct netvsc_device *nvdev); + int rndis_filter_close(struct netvsc_device *nvdev); + struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, +--- a/drivers/net/hyperv/netvsc.c ++++ b/drivers/net/hyperv/netvsc.c +@@ -66,6 +66,41 @@ void netvsc_switch_datapath(struct net_d + VM_PKT_DATA_INBAND, 0); + } + ++/* Worker to setup sub channels on initial setup ++ * Initial hotplug event occurs in softirq context ++ * and can't wait for channels. ++ */ ++static void netvsc_subchan_work(struct work_struct *w) ++{ ++ struct netvsc_device *nvdev = ++ container_of(w, struct netvsc_device, subchan_work); ++ struct rndis_device *rdev; ++ int i, ret; ++ ++ /* Avoid deadlock with device removal already under RTNL */ ++ if (!rtnl_trylock()) { ++ schedule_work(w); ++ return; ++ } ++ ++ rdev = nvdev->extension; ++ if (rdev) { ++ ret = rndis_set_subchannel(rdev->ndev, nvdev); ++ if (ret == 0) { ++ netif_device_attach(rdev->ndev); ++ } else { ++ /* fallback to only primary channel */ ++ for (i = 1; i < nvdev->num_chn; i++) ++ netif_napi_del(&nvdev->chan_table[i].napi); ++ ++ nvdev->max_chn = 1; ++ nvdev->num_chn = 1; ++ } ++ } ++ ++ rtnl_unlock(); ++} ++ + static struct netvsc_device *alloc_net_device(void) + { + struct netvsc_device *net_device; +@@ -82,7 +117,7 @@ static struct netvsc_device *alloc_net_d + + init_completion(&net_device->channel_init_wait); + init_waitqueue_head(&net_device->subchan_open); +- INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); ++ INIT_WORK(&net_device->subchan_work, netvsc_subchan_work); + + return net_device; + } +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -905,8 +905,20 @@ static int netvsc_attach(struct net_devi + if (IS_ERR(nvdev)) + return PTR_ERR(nvdev); + +- /* Note: enable and attach happen when sub-channels setup */ ++ if (nvdev->num_chn > 1) { ++ ret = rndis_set_subchannel(ndev, nvdev); ++ ++ /* if unavailable, just proceed with one queue */ ++ if (ret) { ++ nvdev->max_chn = 1; ++ nvdev->num_chn = 1; ++ } ++ } + ++ /* In any case device is now ready */ ++ netif_device_attach(ndev); ++ ++ /* Note: enable and attach happen when sub-channels setup */ + netif_carrier_off(ndev); + + if (netif_running(ndev)) { +@@ -2064,6 +2076,9 @@ static int netvsc_probe(struct hv_device + + memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + ++ if (nvdev->num_chn > 1) ++ schedule_work(&nvdev->subchan_work); ++ + /* hw_features computed in rndis_netdev_set_hwcaps() */ + net->features = net->hw_features | + NETIF_F_HIGHDMA | NETIF_F_SG | +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1061,29 +1061,15 @@ static void netvsc_sc_open(struct vmbus_ + * This breaks overlap of processing the host message for the + * new primary channel with the initialization of sub-channels. + */ +-void rndis_set_subchannel(struct work_struct *w) ++int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev) + { +- struct netvsc_device *nvdev +- = container_of(w, struct netvsc_device, subchan_work); + struct nvsp_message *init_packet = &nvdev->channel_init_pkt; +- struct net_device_context *ndev_ctx; +- struct rndis_device *rdev; +- struct net_device *ndev; +- struct hv_device *hv_dev; ++ struct net_device_context *ndev_ctx = netdev_priv(ndev); ++ struct hv_device *hv_dev = ndev_ctx->device_ctx; ++ struct rndis_device *rdev = nvdev->extension; + int i, ret; + +- if (!rtnl_trylock()) { +- schedule_work(w); +- return; +- } +- +- rdev = nvdev->extension; +- if (!rdev) +- goto unlock; /* device was removed */ +- +- ndev = rdev->ndev; +- ndev_ctx = netdev_priv(ndev); +- hv_dev = ndev_ctx->device_ctx; ++ ASSERT_RTNL(); + + memset(init_packet, 0, sizeof(struct nvsp_message)); + init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; +@@ -1099,13 +1085,13 @@ void rndis_set_subchannel(struct work_st + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret) { + netdev_err(ndev, "sub channel allocate send failed: %d\n", ret); +- goto failed; ++ return ret; + } + + wait_for_completion(&nvdev->channel_init_wait); + if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { + netdev_err(ndev, "sub channel request failed\n"); +- goto failed; ++ return -EIO; + } + + nvdev->num_chn = 1 + +@@ -1124,21 +1110,7 @@ void rndis_set_subchannel(struct work_st + for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) + ndev_ctx->tx_table[i] = i % nvdev->num_chn; + +- netif_device_attach(ndev); +- rtnl_unlock(); +- return; +- +-failed: +- /* fallback to only primary channel */ +- for (i = 1; i < nvdev->num_chn; i++) +- netif_napi_del(&nvdev->chan_table[i].napi); +- +- nvdev->max_chn = 1; +- nvdev->num_chn = 1; +- +- netif_device_attach(ndev); +-unlock: +- rtnl_unlock(); ++ return 0; + } + + static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, +@@ -1329,21 +1301,12 @@ struct netvsc_device *rndis_filter_devic + netif_napi_add(net, &net_device->chan_table[i].napi, + netvsc_poll, NAPI_POLL_WEIGHT); + +- if (net_device->num_chn > 1) +- schedule_work(&net_device->subchan_work); ++ return net_device; + + out: +- /* if unavailable, just proceed with one queue */ +- if (ret) { +- net_device->max_chn = 1; +- net_device->num_chn = 1; +- } +- +- /* No sub channels, device is ready */ +- if (net_device->num_chn == 1) +- netif_device_attach(net); +- +- return net_device; ++ /* setting up multiple channels failed */ ++ net_device->max_chn = 1; ++ net_device->num_chn = 1; + + err_dev_remv: + rndis_filter_device_remove(dev, net_device); diff --git a/queue-4.17/i40e-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch b/queue-4.17/i40e-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch new file mode 100644 index 00000000000..382f2bcfcbb --- /dev/null +++ b/queue-4.17/i40e-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch @@ -0,0 +1,89 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Jesper Dangaard Brouer +Date: Tue, 26 Jun 2018 17:39:53 +0200 +Subject: i40e: split XDP_TX tail and XDP_REDIRECT map flushing + +From: Jesper Dangaard Brouer + +[ Upstream commit 2e6893123830d04edc366e0ce59d46e622e140c1 ] + +The driver was combining the XDP_TX tail flush and XDP_REDIRECT +map flushing (xdp_do_flush_map). This is suboptimal, these two +flush operations should be kept separate. + +It looks like the mistake was copy-pasted from ixgbe. + +Fixes: d9314c474d4f ("i40e: add support for XDP_REDIRECT") +Signed-off-by: Jesper Dangaard Brouer +Acked-by: Björn Töpel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++++--------- + 1 file changed, 15 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c +@@ -2199,9 +2199,10 @@ static bool i40e_is_non_eop(struct i40e_ + return true; + } + +-#define I40E_XDP_PASS 0 +-#define I40E_XDP_CONSUMED 1 +-#define I40E_XDP_TX 2 ++#define I40E_XDP_PASS 0 ++#define I40E_XDP_CONSUMED BIT(0) ++#define I40E_XDP_TX BIT(1) ++#define I40E_XDP_REDIR BIT(2) + + static int i40e_xmit_xdp_ring(struct xdp_buff *xdp, + struct i40e_ring *xdp_ring); +@@ -2235,7 +2236,7 @@ static struct sk_buff *i40e_run_xdp(stru + break; + case XDP_REDIRECT: + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); +- result = !err ? I40E_XDP_TX : I40E_XDP_CONSUMED; ++ result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; + break; + default: + bpf_warn_invalid_xdp_action(act); +@@ -2298,7 +2299,8 @@ static int i40e_clean_rx_irq(struct i40e + unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; + u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); +- bool failure = false, xdp_xmit = false; ++ unsigned int xdp_xmit = 0; ++ bool failure = false; + struct xdp_buff xdp; + + xdp.rxq = &rx_ring->xdp_rxq; +@@ -2359,8 +2361,10 @@ static int i40e_clean_rx_irq(struct i40e + } + + if (IS_ERR(skb)) { +- if (PTR_ERR(skb) == -I40E_XDP_TX) { +- xdp_xmit = true; ++ unsigned int xdp_res = -PTR_ERR(skb); ++ ++ if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { ++ xdp_xmit |= xdp_res; + i40e_rx_buffer_flip(rx_ring, rx_buffer, size); + } else { + rx_buffer->pagecnt_bias++; +@@ -2414,12 +2418,14 @@ static int i40e_clean_rx_irq(struct i40e + total_rx_packets++; + } + +- if (xdp_xmit) { ++ if (xdp_xmit & I40E_XDP_REDIR) ++ xdp_do_flush_map(); ++ ++ if (xdp_xmit & I40E_XDP_TX) { + struct i40e_ring *xdp_ring = + rx_ring->vsi->xdp_rings[rx_ring->queue_index]; + + i40e_xdp_ring_update_tail(xdp_ring); +- xdp_do_flush_map(); + } + + rx_ring->skb = skb; diff --git a/queue-4.17/ib-mlx5-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch b/queue-4.17/ib-mlx5-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch new file mode 100644 index 00000000000..738a6ac85ab --- /dev/null +++ b/queue-4.17/ib-mlx5-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Or Gerlitz +Date: Thu, 31 May 2018 11:40:17 +0300 +Subject: IB/mlx5: Avoid dealing with vport representors if not being e-switch manager + +From: Or Gerlitz + +[ Upstream commit aff2252a2ad3844ca47bf2f18af071101baace40 ] + +In smartnic env, the host (PF) driver might not be an e-switch +manager, hence the switchdev mode representors are running on +the embedded cpu (EC) and not at the host. + +As such, we should avoid dealing with vport representors if +not being esw manager. + +Fixes: b5ca15ad7e61 ('IB/mlx5: Add proper representors support') +Signed-off-by: Or Gerlitz +Reviewed-by: Eli Cohen +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -5736,7 +5736,7 @@ static void *mlx5_ib_add(struct mlx5_cor + dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), + MLX5_CAP_GEN(mdev, num_vhca_ports)); + +- if (MLX5_VPORT_MANAGER(mdev) && ++ if (MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); + diff --git a/queue-4.17/ipv6-sr-fix-passing-wrong-flags-to-crypto_alloc_shash.patch b/queue-4.17/ipv6-sr-fix-passing-wrong-flags-to-crypto_alloc_shash.patch new file mode 100644 index 00000000000..40a5a2baedc --- /dev/null +++ b/queue-4.17/ipv6-sr-fix-passing-wrong-flags-to-crypto_alloc_shash.patch @@ -0,0 +1,31 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Eric Biggers +Date: Sat, 30 Jun 2018 15:26:56 -0700 +Subject: ipv6: sr: fix passing wrong flags to crypto_alloc_shash() + +From: Eric Biggers + +[ Upstream commit fc9c2029e37c3ae9efc28bf47045e0b87e09660c ] + +The 'mask' argument to crypto_alloc_shash() uses the CRYPTO_ALG_* flags, +not 'gfp_t'. So don't pass GFP_KERNEL to it. + +Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") +Signed-off-by: Eric Biggers +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6_hmac.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/seg6_hmac.c ++++ b/net/ipv6/seg6_hmac.c +@@ -373,7 +373,7 @@ static int seg6_hmac_init_algo(void) + return -ENOMEM; + + for_each_possible_cpu(cpu) { +- tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); ++ tfm = crypto_alloc_shash(algo->name, 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + p_tfm = per_cpu_ptr(algo->tfms, cpu); diff --git a/queue-4.17/ipvlan-fix-ifla_mtu-ignored-on-newlink.patch b/queue-4.17/ipvlan-fix-ifla_mtu-ignored-on-newlink.patch new file mode 100644 index 00000000000..f87af7115d7 --- /dev/null +++ b/queue-4.17/ipvlan-fix-ifla_mtu-ignored-on-newlink.patch @@ -0,0 +1,38 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Xin Long +Date: Thu, 21 Jun 2018 12:56:04 +0800 +Subject: ipvlan: fix IFLA_MTU ignored on NEWLINK + +From: Xin Long + +[ Upstream commit 30877961b1cdd6fdca783c2e8c4f0f47e95dc58c ] + +Commit 296d48568042 ("ipvlan: inherit MTU from master device") adjusted +the mtu from the master device when creating a ipvlan device, but it +would also override the mtu value set in rtnl_create_link. It causes +IFLA_MTU param not to take effect. + +So this patch is to not adjust the mtu if IFLA_MTU param is set when +creating a ipvlan device. + +Fixes: 296d48568042 ("ipvlan: inherit MTU from master device") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipvlan/ipvlan_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ipvlan/ipvlan_main.c ++++ b/drivers/net/ipvlan/ipvlan_main.c +@@ -594,7 +594,8 @@ int ipvlan_link_new(struct net *src_net, + ipvlan->phy_dev = phy_dev; + ipvlan->dev = dev; + ipvlan->sfeatures = IPVLAN_FEATURES; +- ipvlan_adjust_mtu(ipvlan, phy_dev); ++ if (!tb[IFLA_MTU]) ++ ipvlan_adjust_mtu(ipvlan, phy_dev); + INIT_LIST_HEAD(&ipvlan->addrs); + spin_lock_init(&ipvlan->addrs_lock); + diff --git a/queue-4.17/ixgbe-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch b/queue-4.17/ixgbe-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch new file mode 100644 index 00000000000..1833b2e3926 --- /dev/null +++ b/queue-4.17/ixgbe-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch @@ -0,0 +1,89 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Jesper Dangaard Brouer +Date: Tue, 26 Jun 2018 17:39:48 +0200 +Subject: ixgbe: split XDP_TX tail and XDP_REDIRECT map flushing + +From: Jesper Dangaard Brouer + +[ Upstream commit ad088ec480768850db019a5cc543685e868a513d ] + +The driver was combining the XDP_TX tail flush and XDP_REDIRECT +map flushing (xdp_do_flush_map). This is suboptimal, these two +flush operations should be kept separate. + +Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map") +Signed-off-by: Jesper Dangaard Brouer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -2257,9 +2257,10 @@ static struct sk_buff *ixgbe_build_skb(s + return skb; + } + +-#define IXGBE_XDP_PASS 0 +-#define IXGBE_XDP_CONSUMED 1 +-#define IXGBE_XDP_TX 2 ++#define IXGBE_XDP_PASS 0 ++#define IXGBE_XDP_CONSUMED BIT(0) ++#define IXGBE_XDP_TX BIT(1) ++#define IXGBE_XDP_REDIR BIT(2) + + static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, + struct xdp_buff *xdp); +@@ -2288,7 +2289,7 @@ static struct sk_buff *ixgbe_run_xdp(str + case XDP_REDIRECT: + err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); + if (!err) +- result = IXGBE_XDP_TX; ++ result = IXGBE_XDP_REDIR; + else + result = IXGBE_XDP_CONSUMED; + break; +@@ -2348,7 +2349,7 @@ static int ixgbe_clean_rx_irq(struct ixg + unsigned int mss = 0; + #endif /* IXGBE_FCOE */ + u16 cleaned_count = ixgbe_desc_unused(rx_ring); +- bool xdp_xmit = false; ++ unsigned int xdp_xmit = 0; + struct xdp_buff xdp; + + xdp.rxq = &rx_ring->xdp_rxq; +@@ -2391,8 +2392,10 @@ static int ixgbe_clean_rx_irq(struct ixg + } + + if (IS_ERR(skb)) { +- if (PTR_ERR(skb) == -IXGBE_XDP_TX) { +- xdp_xmit = true; ++ unsigned int xdp_res = -PTR_ERR(skb); ++ ++ if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) { ++ xdp_xmit |= xdp_res; + ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size); + } else { + rx_buffer->pagecnt_bias++; +@@ -2464,7 +2467,10 @@ static int ixgbe_clean_rx_irq(struct ixg + total_rx_packets++; + } + +- if (xdp_xmit) { ++ if (xdp_xmit & IXGBE_XDP_REDIR) ++ xdp_do_flush_map(); ++ ++ if (xdp_xmit & IXGBE_XDP_TX) { + struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; + + /* Force memory writes to complete before letting h/w +@@ -2472,8 +2478,6 @@ static int ixgbe_clean_rx_irq(struct ixg + */ + wmb(); + writel(ring->next_to_use, ring->tail); +- +- xdp_do_flush_map(); + } + + u64_stats_update_begin(&rx_ring->syncp); diff --git a/queue-4.17/net-dccp-avoid-crash-in-ccid3_hc_rx_send_feedback.patch b/queue-4.17/net-dccp-avoid-crash-in-ccid3_hc_rx_send_feedback.patch new file mode 100644 index 00000000000..adaa0a195dd --- /dev/null +++ b/queue-4.17/net-dccp-avoid-crash-in-ccid3_hc_rx_send_feedback.patch @@ -0,0 +1,71 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Eric Dumazet +Date: Fri, 22 Jun 2018 06:44:14 -0700 +Subject: net: dccp: avoid crash in ccid3_hc_rx_send_feedback() + +From: Eric Dumazet + +[ Upstream commit 74174fe5634ffbf645a7ca5a261571f700b2f332 ] + +On fast hosts or malicious bots, we trigger a DCCP_BUG() which +seems excessive. + +syzbot reported : + +BUG: delta (-6195) <= 0 at net/dccp/ccids/ccid3.c:628/ccid3_hc_rx_send_feedback() +CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.18.0-rc1+ #112 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + ccid3_hc_rx_send_feedback net/dccp/ccids/ccid3.c:628 [inline] + ccid3_hc_rx_packet_recv.cold.16+0x38/0x71 net/dccp/ccids/ccid3.c:793 + ccid_hc_rx_packet_recv net/dccp/ccid.h:185 [inline] + dccp_deliver_input_to_ccids+0xf0/0x280 net/dccp/input.c:180 + dccp_rcv_established+0x87/0xb0 net/dccp/input.c:378 + dccp_v4_do_rcv+0x153/0x180 net/dccp/ipv4.c:654 + sk_backlog_rcv include/net/sock.h:914 [inline] + __sk_receive_skb+0x3ba/0xd80 net/core/sock.c:517 + dccp_v4_rcv+0x10f9/0x1f58 net/dccp/ipv4.c:875 + ip_local_deliver_finish+0x2eb/0xda0 net/ipv4/ip_input.c:215 + NF_HOOK include/linux/netfilter.h:287 [inline] + ip_local_deliver+0x1e9/0x750 net/ipv4/ip_input.c:256 + dst_input include/net/dst.h:450 [inline] + ip_rcv_finish+0x823/0x2220 net/ipv4/ip_input.c:396 + NF_HOOK include/linux/netfilter.h:287 [inline] + ip_rcv+0xa18/0x1284 net/ipv4/ip_input.c:492 + __netif_receive_skb_core+0x2488/0x3680 net/core/dev.c:4628 + __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 + process_backlog+0x219/0x760 net/core/dev.c:5373 + napi_poll net/core/dev.c:5771 [inline] + net_rx_action+0x7da/0x1980 net/core/dev.c:5837 + __do_softirq+0x2e8/0xb17 kernel/softirq.c:284 + run_ksoftirqd+0x86/0x100 kernel/softirq.c:645 + smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164 + kthread+0x345/0x410 kernel/kthread.c:240 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 + +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Gerrit Renker +Cc: dccp@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid3.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/dccp/ccids/ccid3.c ++++ b/net/dccp/ccids/ccid3.c +@@ -625,9 +625,8 @@ static void ccid3_hc_rx_send_feedback(st + case CCID3_FBACK_PERIODIC: + delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback); + if (delta <= 0) +- DCCP_BUG("delta (%ld) <= 0", (long)delta); +- else +- hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); ++ delta = 1; ++ hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); + break; + default: + return; diff --git a/queue-4.17/net-dccp-switch-rx_tstamp_last_feedback-to-monotonic-clock.patch b/queue-4.17/net-dccp-switch-rx_tstamp_last_feedback-to-monotonic-clock.patch new file mode 100644 index 00000000000..80961b36be4 --- /dev/null +++ b/queue-4.17/net-dccp-switch-rx_tstamp_last_feedback-to-monotonic-clock.patch @@ -0,0 +1,65 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Eric Dumazet +Date: Fri, 22 Jun 2018 06:44:15 -0700 +Subject: net: dccp: switch rx_tstamp_last_feedback to monotonic clock + +From: Eric Dumazet + +[ Upstream commit 0ce4e70ff00662ad7490e545ba0cd8c1fa179fca ] + +To compute delays, better not use time of the day which can +be changed by admins or malicious programs. + +Also change ccid3_first_li() to use s64 type for delta variable +to avoid potential overflows. + +Signed-off-by: Eric Dumazet +Cc: Gerrit Renker +Cc: dccp@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid3.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/dccp/ccids/ccid3.c ++++ b/net/dccp/ccids/ccid3.c +@@ -600,7 +600,7 @@ static void ccid3_hc_rx_send_feedback(st + { + struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); +- ktime_t now = ktime_get_real(); ++ ktime_t now = ktime_get(); + s64 delta = 0; + + switch (fbtype) { +@@ -632,7 +632,7 @@ static void ccid3_hc_rx_send_feedback(st + return; + } + +- ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, ++ ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta, + hc->rx_x_recv, hc->rx_pinv); + + hc->rx_tstamp_last_feedback = now; +@@ -679,7 +679,8 @@ static int ccid3_hc_rx_insert_options(st + static u32 ccid3_first_li(struct sock *sk) + { + struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); +- u32 x_recv, p, delta; ++ u32 x_recv, p; ++ s64 delta; + u64 fval; + + if (hc->rx_rtt == 0) { +@@ -687,7 +688,9 @@ static u32 ccid3_first_li(struct sock *s + hc->rx_rtt = DCCP_FALLBACK_RTT; + } + +- delta = ktime_to_us(net_timedelta(hc->rx_tstamp_last_feedback)); ++ delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback); ++ if (delta <= 0) ++ delta = 1; + x_recv = scaled_div32(hc->rx_bytes_recv, delta); + if (x_recv == 0) { /* would also trigger divide-by-zero */ + DCCP_WARN("X_recv==0\n"); diff --git a/queue-4.17/net-fix-use-after-free-in-gro-with-esp.patch b/queue-4.17/net-fix-use-after-free-in-gro-with-esp.patch new file mode 100644 index 00000000000..eb1fb62b409 --- /dev/null +++ b/queue-4.17/net-fix-use-after-free-in-gro-with-esp.patch @@ -0,0 +1,141 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Sabrina Dubroca +Date: Sat, 30 Jun 2018 17:38:55 +0200 +Subject: net: fix use-after-free in GRO with ESP + +From: Sabrina Dubroca + +[ Upstream commit 603d4cf8fe095b1ee78f423d514427be507fb513 ] + +Since the addition of GRO for ESP, gro_receive can consume the skb and +return -EINPROGRESS. In that case, the lower layer GRO handler cannot +touch the skb anymore. + +Commit 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") converted +some of the gro_receive handlers that can lead to ESP's gro_receive so +that they wouldn't access the skb when -EINPROGRESS is returned, but +missed other spots, mainly in tunneling protocols. + +This patch finishes the conversion to using skb_gro_flush_final(), and +adds a new helper, skb_gro_flush_final_remcsum(), used in VXLAN and +GUE. + +Fixes: 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/geneve.c | 2 +- + drivers/net/vxlan.c | 4 +--- + include/linux/netdevice.h | 20 ++++++++++++++++++++ + net/8021q/vlan.c | 2 +- + net/ipv4/fou.c | 4 +--- + net/ipv4/gre_offload.c | 2 +- + net/ipv4/udp_offload.c | 2 +- + 7 files changed, 26 insertions(+), 10 deletions(-) + +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -474,7 +474,7 @@ static struct sk_buff **geneve_gro_recei + out_unlock: + rcu_read_unlock(); + out: +- NAPI_GRO_CB(skb)->flush |= flush; ++ skb_gro_flush_final(skb, pp, flush); + + return pp; + } +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -623,9 +623,7 @@ static struct sk_buff **vxlan_gro_receiv + flush = 0; + + out: +- skb_gro_remcsum_cleanup(skb, &grc); +- skb->remcsum_offload = 0; +- NAPI_GRO_CB(skb)->flush |= flush; ++ skb_gro_flush_final_remcsum(skb, pp, flush, &grc); + + return pp; + } +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2735,11 +2735,31 @@ static inline void skb_gro_flush_final(s + if (PTR_ERR(pp) != -EINPROGRESS) + NAPI_GRO_CB(skb)->flush |= flush; + } ++static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, ++ struct sk_buff **pp, ++ int flush, ++ struct gro_remcsum *grc) ++{ ++ if (PTR_ERR(pp) != -EINPROGRESS) { ++ NAPI_GRO_CB(skb)->flush |= flush; ++ skb_gro_remcsum_cleanup(skb, grc); ++ skb->remcsum_offload = 0; ++ } ++} + #else + static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) + { + NAPI_GRO_CB(skb)->flush |= flush; + } ++static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, ++ struct sk_buff **pp, ++ int flush, ++ struct gro_remcsum *grc) ++{ ++ NAPI_GRO_CB(skb)->flush |= flush; ++ skb_gro_remcsum_cleanup(skb, grc); ++ skb->remcsum_offload = 0; ++} + #endif + + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, +--- a/net/8021q/vlan.c ++++ b/net/8021q/vlan.c +@@ -688,7 +688,7 @@ static struct sk_buff **vlan_gro_receive + out_unlock: + rcu_read_unlock(); + out: +- NAPI_GRO_CB(skb)->flush |= flush; ++ skb_gro_flush_final(skb, pp, flush); + + return pp; + } +--- a/net/ipv4/fou.c ++++ b/net/ipv4/fou.c +@@ -448,9 +448,7 @@ next_proto: + out_unlock: + rcu_read_unlock(); + out: +- NAPI_GRO_CB(skb)->flush |= flush; +- skb_gro_remcsum_cleanup(skb, &grc); +- skb->remcsum_offload = 0; ++ skb_gro_flush_final_remcsum(skb, pp, flush, &grc); + + return pp; + } +--- a/net/ipv4/gre_offload.c ++++ b/net/ipv4/gre_offload.c +@@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive( + out_unlock: + rcu_read_unlock(); + out: +- NAPI_GRO_CB(skb)->flush |= flush; ++ skb_gro_flush_final(skb, pp, flush); + + return pp; + } +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -295,7 +295,7 @@ unflush: + out_unlock: + rcu_read_unlock(); + out: +- NAPI_GRO_CB(skb)->flush |= flush; ++ skb_gro_flush_final(skb, pp, flush); + return pp; + } + EXPORT_SYMBOL(udp_gro_receive); diff --git a/queue-4.17/net-macb-fix-ptp-time-adjustment-for-large-negative-delta.patch b/queue-4.17/net-macb-fix-ptp-time-adjustment-for-large-negative-delta.patch new file mode 100644 index 00000000000..9370be6f83e --- /dev/null +++ b/queue-4.17/net-macb-fix-ptp-time-adjustment-for-large-negative-delta.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Harini Katakam +Date: Wed, 20 Jun 2018 17:04:20 +0530 +Subject: net: macb: Fix ptp time adjustment for large negative delta + +From: Harini Katakam + +[ Upstream commit 64d7839af8c8f67daaf9bf387135052c55d85f90 ] + +When delta passed to gem_ptp_adjtime is negative, the sign is +maintained in the ns_to_timespec64 conversion. Hence timespec_add +should be used directly. timespec_sub will just subtract the negative +value thus increasing the time difference. + +Signed-off-by: Harini Katakam +Acked-by: Nicolas Ferre +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/macb_ptp.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/drivers/net/ethernet/cadence/macb_ptp.c ++++ b/drivers/net/ethernet/cadence/macb_ptp.c +@@ -170,10 +170,7 @@ static int gem_ptp_adjtime(struct ptp_cl + + if (delta > TSU_NSEC_MAX_VAL) { + gem_tsu_get_time(&bp->ptp_clock_info, &now); +- if (sign) +- now = timespec64_sub(now, then); +- else +- now = timespec64_add(now, then); ++ now = timespec64_add(now, then); + + gem_tsu_set_time(&bp->ptp_clock_info, + (const struct timespec64 *)&now); diff --git a/queue-4.17/net-macb-initialize-bp-queues.bp-for-at91rm9200.patch b/queue-4.17/net-macb-initialize-bp-queues.bp-for-at91rm9200.patch new file mode 100644 index 00000000000..fb42e1dbde5 --- /dev/null +++ b/queue-4.17/net-macb-initialize-bp-queues.bp-for-at91rm9200.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Alexandre Belloni +Date: Tue, 26 Jun 2018 10:44:01 +0200 +Subject: net: macb: initialize bp->queues[0].bp for at91rm9200 + +From: Alexandre Belloni + +[ Upstream commit fec9d3b1dc4c481f20f5d2f5aef3ad1cb7504186 ] + +The macb driver currently crashes on at91rm9200 with the following trace: + +Unable to handle kernel NULL pointer dereference at virtual address 00000014 +[...] +[] (macb_rx_desc) from [] (at91ether_open+0x2e8/0x3f8) +[] (at91ether_open) from [] (__dev_open+0x120/0x13c) +[] (__dev_open) from [] (__dev_change_flags+0x17c/0x1a8) +[] (__dev_change_flags) from [] (dev_change_flags+0x18/0x4c) +[] (dev_change_flags) from [] (ip_auto_config+0x220/0x10b0) +[] (ip_auto_config) from [] (do_one_initcall+0x78/0x18c) +[] (do_one_initcall) from [] (kernel_init_freeable+0x184/0x1c4) +[] (kernel_init_freeable) from [] (kernel_init+0x8/0xe8) +[] (kernel_init) from [] (ret_from_fork+0x14/0x34) + +Solve that by initializing bp->queues[0].bp in at91ether_init (as is done +in macb_init). + +Fixes: ae1f2a56d273 ("net: macb: Added support for many RX queues") +Signed-off-by: Alexandre Belloni +Acked-by: Nicolas Ferre +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/macb_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/cadence/macb_main.c ++++ b/drivers/net/ethernet/cadence/macb_main.c +@@ -3732,6 +3732,8 @@ static int at91ether_init(struct platfor + int err; + u32 reg; + ++ bp->queues[0].bp = bp; ++ + dev->netdev_ops = &at91ether_netdev_ops; + dev->ethtool_ops = &macb_ethtool_ops; + diff --git a/queue-4.17/net-mlx5-e-switch-avoid-setup-attempt-if-not-being-e-switch-manager.patch b/queue-4.17/net-mlx5-e-switch-avoid-setup-attempt-if-not-being-e-switch-manager.patch new file mode 100644 index 00000000000..117f5db141a --- /dev/null +++ b/queue-4.17/net-mlx5-e-switch-avoid-setup-attempt-if-not-being-e-switch-manager.patch @@ -0,0 +1,154 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Or Gerlitz +Date: Thu, 31 May 2018 11:16:18 +0300 +Subject: net/mlx5: E-Switch, Avoid setup attempt if not being e-switch manager + +From: Or Gerlitz + +[ Upstream commit 0efc8562491b7d36f6bbc4fbc8f3348cb6641e9c ] + +In smartnic env, the host (PF) driver might not be an e-switch +manager, hence the FW will err on driver attempts to deal with +setting/unsetting the eswitch and as a result the overall setup +of sriov will fail. + +Fix that by avoiding the operation if e-switch management is not +allowed for this driver instance. While here, move to use the +correct name for the esw manager capability name. + +Fixes: 81848731ff40 ('net/mlx5: E-Switch, Add SR-IOV (FDB) support') +Signed-off-by: Or Gerlitz +Reported-by: Guy Kushnir +Reviewed-by: Eli Cohen +Tested-by: Eli Cohen +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- + drivers/net/ethernet/mellanox/mlx5/core/fw.c | 5 +++-- + drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 7 ++++++- + include/linux/mlx5/eswitch.h | 2 ++ + include/linux/mlx5/mlx5_ifc.h | 2 +- + 7 files changed, 16 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +@@ -806,7 +806,7 @@ static bool mlx5e_is_vf_vport_rep(struct + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + +- if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table)) ++ if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + return false; + + rep = rpriv->rep; +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -1604,7 +1604,7 @@ int mlx5_eswitch_enable_sriov(struct mlx + if (!ESW_ALLOWED(esw)) + return 0; + +- if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || ++ if (!MLX5_ESWITCH_MANAGER(esw->dev) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -EOPNOTSUPP; +--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +@@ -32,6 +32,7 @@ + + #include + #include ++#include + + #include "mlx5_core.h" + #include "fs_core.h" +@@ -2631,7 +2632,7 @@ int mlx5_init_fs(struct mlx5_core_dev *d + goto err; + } + +- if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { ++ if (MLX5_ESWITCH_MANAGER(dev)) { + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { + err = init_fdb_root_ns(steering); + if (err) +--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c +@@ -32,6 +32,7 @@ + + #include + #include ++#include + #include + #include "mlx5_core.h" + #include "../../mlxfw/mlxfw.h" +@@ -159,13 +160,13 @@ int mlx5_query_hca_caps(struct mlx5_core + } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && +- MLX5_CAP_GEN(dev, eswitch_flow_table)) { ++ MLX5_ESWITCH_MANAGER(dev)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); + if (err) + return err; + } + +- if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { ++ if (MLX5_ESWITCH_MANAGER(dev)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); + if (err) + return err; +--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +@@ -88,6 +88,9 @@ static int mlx5_device_enable_sriov(stru + return -EBUSY; + } + ++ if (!MLX5_ESWITCH_MANAGER(dev)) ++ goto enable_vfs_hca; ++ + err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); + if (err) { + mlx5_core_warn(dev, +@@ -95,6 +98,7 @@ static int mlx5_device_enable_sriov(stru + return err; + } + ++enable_vfs_hca: + for (vf = 0; vf < num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf + 1); + if (err) { +@@ -140,7 +144,8 @@ static void mlx5_device_disable_sriov(st + } + + out: +- mlx5_eswitch_disable_sriov(dev->priv.eswitch); ++ if (MLX5_ESWITCH_MANAGER(dev)) ++ mlx5_eswitch_disable_sriov(dev->priv.eswitch); + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); +--- a/include/linux/mlx5/eswitch.h ++++ b/include/linux/mlx5/eswitch.h +@@ -8,6 +8,8 @@ + + #include + ++#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) ++ + enum { + SRIOV_NONE, + SRIOV_LEGACY, +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -905,7 +905,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { + u8 vnic_env_queue_counters[0x1]; + u8 ets[0x1]; + u8 nic_flow_table[0x1]; +- u8 eswitch_flow_table[0x1]; ++ u8 eswitch_manager[0x1]; + u8 device_memory[0x1]; + u8 mcam_reg[0x1]; + u8 pcam_reg[0x1]; diff --git a/queue-4.17/net-mlx5-fix-command-interface-race-in-polling-mode.patch b/queue-4.17/net-mlx5-fix-command-interface-race-in-polling-mode.patch new file mode 100644 index 00000000000..6f78c01aab0 --- /dev/null +++ b/queue-4.17/net-mlx5-fix-command-interface-race-in-polling-mode.patch @@ -0,0 +1,79 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Alex Vesker +Date: Tue, 12 Jun 2018 16:14:31 +0300 +Subject: net/mlx5: Fix command interface race in polling mode + +From: Alex Vesker + +[ Upstream commit d412c31dae053bf30a1bc15582a9990df297a660 ] + +The command interface can work in two modes: Events and Polling. +In the general case, each time we invoke a command, a work is +queued to handle it. + +When working in events, the interrupt handler completes the +command execution. On the other hand, when working in polling +mode, the work itself completes it. + +Due to a bug in the work handler, a command could have been +completed by the interrupt handler, while the work handler +hasn't finished yet, causing the it to complete once again +if the command interface mode was changed from Events to +polling after the interrupt handler was called. + +mlx5_unload_one() + mlx5_stop_eqs() + // Destroy the EQ before cmd EQ + ...cmd_work_handler() + write_doorbell() + --> EVENT_TYPE_CMD + mlx5_cmd_comp_handler() // First free + free_ent(cmd, ent->idx) + complete(&ent->done) + + <-- mlx5_stop_eqs //cmd was complete + // move to polling before destroying the last cmd EQ + mlx5_cmd_use_polling() + cmd->mode = POLL; + + --> cmd_work_handler (continues) + if (cmd->mode == POLL) + mlx5_cmd_comp_handler() // Double free + +The solution is to store the cmd->mode before writing the doorbell. + +Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") +Signed-off-by: Alex Vesker +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -803,6 +803,7 @@ static void cmd_work_handler(struct work + unsigned long flags; + bool poll_cmd = ent->polling; + int alloc_ret; ++ int cmd_mode; + + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; + down(sem); +@@ -849,6 +850,7 @@ static void cmd_work_handler(struct work + set_signature(ent, !cmd->checksum_disabled); + dump_command(dev, ent, 1); + ent->ts1 = ktime_get_ns(); ++ cmd_mode = cmd->mode; + + if (ent->callback) + schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); +@@ -873,7 +875,7 @@ static void cmd_work_handler(struct work + iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); + mmiowb(); + /* if not in polling don't use ent after this point */ +- if (cmd->mode == CMD_MODE_POLLING || poll_cmd) { ++ if (cmd_mode == CMD_MODE_POLLING || poll_cmd) { + poll_timeout(ent); + /* make sure we read the descriptor after ownership is SW */ + rmb(); diff --git a/queue-4.17/net-mlx5-fix-incorrect-raw-command-length-parsing.patch b/queue-4.17/net-mlx5-fix-incorrect-raw-command-length-parsing.patch new file mode 100644 index 00000000000..27e9969466d --- /dev/null +++ b/queue-4.17/net-mlx5-fix-incorrect-raw-command-length-parsing.patch @@ -0,0 +1,42 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Alex Vesker +Date: Fri, 25 May 2018 20:25:59 +0300 +Subject: net/mlx5: Fix incorrect raw command length parsing + +From: Alex Vesker + +[ Upstream commit 603b7bcff824740500ddfa001d7a7168b0b38542 ] + +The NULL character was not set correctly for the string containing +the command length, this caused failures reading the output of the +command due to a random length. The fix is to initialize the output +length string. + +Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") +Signed-off-by: Alex Vesker +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -1276,7 +1276,7 @@ static ssize_t outlen_write(struct file + { + struct mlx5_core_dev *dev = filp->private_data; + struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; +- char outlen_str[8]; ++ char outlen_str[8] = {0}; + int outlen; + void *ptr; + int err; +@@ -1291,8 +1291,6 @@ static ssize_t outlen_write(struct file + if (copy_from_user(outlen_str, buf, count)) + return -EFAULT; + +- outlen_str[7] = 0; +- + err = sscanf(outlen_str, "%d", &outlen); + if (err < 0) + return err; diff --git a/queue-4.17/net-mlx5-fix-required-capability-for-manipulating-mpfs.patch b/queue-4.17/net-mlx5-fix-required-capability-for-manipulating-mpfs.patch new file mode 100644 index 00000000000..46cb350dd0e --- /dev/null +++ b/queue-4.17/net-mlx5-fix-required-capability-for-manipulating-mpfs.patch @@ -0,0 +1,66 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Eli Cohen +Date: Wed, 13 Jun 2018 10:27:34 +0300 +Subject: net/mlx5: Fix required capability for manipulating MPFS + +From: Eli Cohen + +[ Upstream commit f811980444ec59ad62f9e041adbb576a821132c7 ] + +Manipulating of the MPFS requires eswitch manager capabilities. + +Fixes: eeb66cdb6826 ('net/mlx5: Separate between E-Switch and MPFS') +Signed-off-by: Eli Cohen +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + #include "mlx5_core.h" + #include "lib/mpfs.h" + +@@ -98,7 +99,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev + int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + struct mlx5_mpfs *mpfs; + +- if (!MLX5_VPORT_MANAGER(dev)) ++ if (!MLX5_ESWITCH_MANAGER(dev)) + return 0; + + mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); +@@ -122,7 +123,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_ + { + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + +- if (!MLX5_VPORT_MANAGER(dev)) ++ if (!MLX5_ESWITCH_MANAGER(dev)) + return; + + WARN_ON(!hlist_empty(mpfs->hash)); +@@ -137,7 +138,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_d + u32 index; + int err; + +- if (!MLX5_VPORT_MANAGER(dev)) ++ if (!MLX5_ESWITCH_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); +@@ -179,7 +180,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_d + int err = 0; + u32 index; + +- if (!MLX5_VPORT_MANAGER(dev)) ++ if (!MLX5_ESWITCH_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); diff --git a/queue-4.17/net-mlx5-fix-wrong-size-allocation-for-qos-etc-tc-regitster.patch b/queue-4.17/net-mlx5-fix-wrong-size-allocation-for-qos-etc-tc-regitster.patch new file mode 100644 index 00000000000..89814e7adf7 --- /dev/null +++ b/queue-4.17/net-mlx5-fix-wrong-size-allocation-for-qos-etc-tc-regitster.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Shay Agroskin +Date: Tue, 22 May 2018 14:14:02 +0300 +Subject: net/mlx5: Fix wrong size allocation for QoS ETC TC regitster + +From: Shay Agroskin + +[ Upstream commit d14fcb8d877caf1b8d6bd65d444bf62b21f2070c ] + +The driver allocates wrong size (due to wrong struct name) when issuing +a query/set request to NIC's register. + +Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate") +Signed-off-by: Shay Agroskin +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/port.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c +@@ -701,7 +701,7 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_prio_t + static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, + int inlen) + { +- u32 out[MLX5_ST_SZ_DW(qtct_reg)]; ++ u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -EOPNOTSUPP; +@@ -713,7 +713,7 @@ static int mlx5_set_port_qetcr_reg(struc + static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, + int outlen) + { +- u32 in[MLX5_ST_SZ_DW(qtct_reg)]; ++ u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + + if (!MLX5_CAP_GEN(mdev, ets)) + return -EOPNOTSUPP; diff --git a/queue-4.17/net-mlx5e-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch b/queue-4.17/net-mlx5e-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch new file mode 100644 index 00000000000..84f924401c7 --- /dev/null +++ b/queue-4.17/net-mlx5e-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch @@ -0,0 +1,110 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Or Gerlitz +Date: Thu, 31 May 2018 11:32:56 +0300 +Subject: net/mlx5e: Avoid dealing with vport representors if not being e-switch manager + +From: Or Gerlitz + +[ Upstream commit 733d3e5497070d05971352ca5087bac83c197c3d ] + +In smartnic env, the host (PF) driver might not be an e-switch +manager, hence the switchdev mode representors are running on +the embedded cpu (EC) and not at the host. + +As such, we should avoid dealing with vport representors if +not being esw manager. + +While here, make sure to disallow eswitch switchdev related +setups through devlink if we are not esw managers. + +Fixes: cb67b832921c ('net/mlx5e: Introduce SRIOV VF representors') +Signed-off-by: Or Gerlitz +Reviewed-by: Eli Cohen +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++++------ + drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- + 3 files changed, 9 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -2612,7 +2612,7 @@ void mlx5e_activate_priv_channels(struct + mlx5e_activate_channels(&priv->channels); + netif_tx_start_all_queues(priv->netdev); + +- if (MLX5_VPORT_MANAGER(priv->mdev)) ++ if (MLX5_ESWITCH_MANAGER(priv->mdev)) + mlx5e_add_sqs_fwd_rules(priv); + + mlx5e_wait_channels_min_rx_wqes(&priv->channels); +@@ -2623,7 +2623,7 @@ void mlx5e_deactivate_priv_channels(stru + { + mlx5e_redirect_rqts_to_drop(priv); + +- if (MLX5_VPORT_MANAGER(priv->mdev)) ++ if (MLX5_ESWITCH_MANAGER(priv->mdev)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* FIXME: This is a W/A only for tx timeout watch dog false alarm when +@@ -4315,7 +4315,7 @@ static void mlx5e_build_nic_netdev(struc + mlx5e_set_netdev_dev_addr(netdev); + + #if IS_ENABLED(CONFIG_MLX5_ESWITCH) +- if (MLX5_VPORT_MANAGER(mdev)) ++ if (MLX5_ESWITCH_MANAGER(mdev)) + netdev->switchdev_ops = &mlx5e_switchdev_ops; + #endif + +@@ -4465,7 +4465,7 @@ static void mlx5e_nic_enable(struct mlx5 + + mlx5e_enable_async_events(priv); + +- if (MLX5_VPORT_MANAGER(priv->mdev)) ++ if (MLX5_ESWITCH_MANAGER(priv->mdev)) + mlx5e_register_vport_reps(priv); + + if (netdev->reg_state != NETREG_REGISTERED) +@@ -4500,7 +4500,7 @@ static void mlx5e_nic_disable(struct mlx + + queue_work(priv->wq, &priv->set_rx_mode_work); + +- if (MLX5_VPORT_MANAGER(priv->mdev)) ++ if (MLX5_ESWITCH_MANAGER(priv->mdev)) + mlx5e_unregister_vport_reps(priv); + + mlx5e_disable_async_events(priv); +@@ -4684,7 +4684,7 @@ static void *mlx5e_add(struct mlx5_core_ + return NULL; + + #ifdef CONFIG_MLX5_ESWITCH +- if (MLX5_VPORT_MANAGER(mdev)) { ++ if (MLX5_ESWITCH_MANAGER(mdev)) { + rpriv = mlx5e_alloc_nic_rep_priv(mdev); + if (!rpriv) { + mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +@@ -790,7 +790,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_pr + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; + +- if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) ++ if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + return false; + + rep = rpriv->rep; +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -983,8 +983,8 @@ static int mlx5_devlink_eswitch_check(st + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return -EOPNOTSUPP; + +- if (!MLX5_CAP_GEN(dev, vport_group_manager)) +- return -EOPNOTSUPP; ++ if(!MLX5_ESWITCH_MANAGER(dev)) ++ return -EPERM; + + if (dev->priv.eswitch->mode == SRIOV_NONE) + return -EOPNOTSUPP; diff --git a/queue-4.17/net-mlx5e-don-t-attempt-to-dereference-the-ppriv-struct-if-not-being-eswitch-manager.patch b/queue-4.17/net-mlx5e-don-t-attempt-to-dereference-the-ppriv-struct-if-not-being-eswitch-manager.patch new file mode 100644 index 00000000000..588207bacea --- /dev/null +++ b/queue-4.17/net-mlx5e-don-t-attempt-to-dereference-the-ppriv-struct-if-not-being-eswitch-manager.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Or Gerlitz +Date: Mon, 4 Jun 2018 19:46:53 +0300 +Subject: net/mlx5e: Don't attempt to dereference the ppriv struct if not being eswitch manager + +From: Or Gerlitz + +[ Upstream commit 8ffd569aaa818f2624ca821d9a246342fa8b8c50 ] + +The check for cpu hit statistics was not returning immediate false for +any non vport rep netdev and hence we crashed (say on mlx5 probed VFs) if +user-space tool was calling into any possible netdev in the system. + +Fix that by doing a proper check before dereferencing. + +Fixes: 1d447a39142e ('net/mlx5e: Extendable vport representor netdev private data') +Signed-off-by: Or Gerlitz +Reported-by: Eli Cohen +Reviewed-by: Eli Cohen +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +@@ -804,8 +804,12 @@ bool mlx5e_is_uplink_rep(struct mlx5e_pr + static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) + { + struct mlx5e_rep_priv *rpriv = priv->ppriv; +- struct mlx5_eswitch_rep *rep = rpriv->rep; ++ struct mlx5_eswitch_rep *rep; + ++ if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table)) ++ return false; ++ ++ rep = rpriv->rep; + if (rep && rep->vport != FDB_UPLINK_VPORT) + return true; + diff --git a/queue-4.17/net-mvneta-fix-the-rx-desc-dma-address-in-the-rx-path.patch b/queue-4.17/net-mvneta-fix-the-rx-desc-dma-address-in-the-rx-path.patch new file mode 100644 index 00000000000..3fc14482be4 --- /dev/null +++ b/queue-4.17/net-mvneta-fix-the-rx-desc-dma-address-in-the-rx-path.patch @@ -0,0 +1,38 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Antoine Tenart +Date: Fri, 22 Jun 2018 10:15:39 +0200 +Subject: net: mvneta: fix the Rx desc DMA address in the Rx path + +From: Antoine Tenart + +[ Upstream commit 271f7ff5aa5a73488b7a9d8b84b5205fb5b2f7cc ] + +When using s/w buffer management, buffers are allocated and DMA mapped. +When doing so on an arm64 platform, an offset correction is applied on +the DMA address, before storing it in an Rx descriptor. The issue is +this DMA address is then used later in the Rx path without removing the +offset correction. Thus the DMA address is wrong, which can led to +various issues. + +This patch fixes this by removing the offset correction from the DMA +address retrieved from the Rx descriptor before using it in the Rx path. + +Fixes: 8d5047cf9ca2 ("net: mvneta: Convert to be 64 bits compatible") +Signed-off-by: Antoine Tenart +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -1932,7 +1932,7 @@ static int mvneta_rx_swbm(struct mvneta_ + rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE); + index = rx_desc - rxq->descs; + data = rxq->buf_virt_addr[index]; +- phys_addr = rx_desc->buf_phys_addr; ++ phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction; + + if (!mvneta_rxq_desc_is_first_last(rx_status) || + (rx_status & MVNETA_RXD_ERR_SUMMARY)) { diff --git a/queue-4.17/net-packet-fix-use-after-free.patch b/queue-4.17/net-packet-fix-use-after-free.patch new file mode 100644 index 00000000000..a99c1765995 --- /dev/null +++ b/queue-4.17/net-packet-fix-use-after-free.patch @@ -0,0 +1,176 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Eric Dumazet +Date: Thu, 21 Jun 2018 14:16:02 -0700 +Subject: net/packet: fix use-after-free + +From: Eric Dumazet + +[ Upstream commit 945d015ee0c3095d2290e845565a23dedfd8027c ] + +We should put copy_skb in receive_queue only after +a successful call to virtio_net_hdr_from_skb(). + +syzbot report : + +BUG: KASAN: use-after-free in __skb_unlink include/linux/skbuff.h:1843 [inline] +BUG: KASAN: use-after-free in __skb_dequeue include/linux/skbuff.h:1863 [inline] +BUG: KASAN: use-after-free in skb_dequeue+0x16a/0x180 net/core/skbuff.c:2815 +Read of size 8 at addr ffff8801b044ecc0 by task syz-executor217/4553 + +CPU: 0 PID: 4553 Comm: syz-executor217 Not tainted 4.18.0-rc1+ #111 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 + print_address_description+0x6c/0x20b mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 + __skb_unlink include/linux/skbuff.h:1843 [inline] + __skb_dequeue include/linux/skbuff.h:1863 [inline] + skb_dequeue+0x16a/0x180 net/core/skbuff.c:2815 + skb_queue_purge+0x26/0x40 net/core/skbuff.c:2852 + packet_set_ring+0x675/0x1da0 net/packet/af_packet.c:4331 + packet_release+0x630/0xd90 net/packet/af_packet.c:2991 + __sock_release+0xd7/0x260 net/socket.c:603 + sock_close+0x19/0x20 net/socket.c:1186 + __fput+0x35b/0x8b0 fs/file_table.c:209 + ____fput+0x15/0x20 fs/file_table.c:243 + task_work_run+0x1ec/0x2a0 kernel/task_work.c:113 + exit_task_work include/linux/task_work.h:22 [inline] + do_exit+0x1b08/0x2750 kernel/exit.c:865 + do_group_exit+0x177/0x440 kernel/exit.c:968 + __do_sys_exit_group kernel/exit.c:979 [inline] + __se_sys_exit_group kernel/exit.c:977 [inline] + __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:977 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x4448e9 +Code: Bad RIP value. +RSP: 002b:00007ffd5f777ca8 EFLAGS: 00000202 ORIG_RAX: 00000000000000e7 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004448e9 +RDX: 00000000004448e9 RSI: 000000000000fcfb RDI: 0000000000000001 +RBP: 00000000006cf018 R08: 00007ffd0000a45b R09: 0000000000000000 +R10: 00007ffd5f777e48 R11: 0000000000000202 R12: 00000000004021f0 +R13: 0000000000402280 R14: 0000000000000000 R15: 0000000000000000 + +Allocated by task 4553: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 + kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554 + skb_clone+0x1f5/0x500 net/core/skbuff.c:1282 + tpacket_rcv+0x28f7/0x3200 net/packet/af_packet.c:2221 + deliver_skb net/core/dev.c:1925 [inline] + deliver_ptype_list_skb net/core/dev.c:1940 [inline] + __netif_receive_skb_core+0x1bfb/0x3680 net/core/dev.c:4611 + __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 + netif_receive_skb_internal+0x12e/0x7d0 net/core/dev.c:4767 + netif_receive_skb+0xbf/0x420 net/core/dev.c:4791 + tun_rx_batched.isra.55+0x4ba/0x8c0 drivers/net/tun.c:1571 + tun_get_user+0x2af1/0x42f0 drivers/net/tun.c:1981 + tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:2009 + call_write_iter include/linux/fs.h:1795 [inline] + new_sync_write fs/read_write.c:474 [inline] + __vfs_write+0x6c6/0x9f0 fs/read_write.c:487 + vfs_write+0x1f8/0x560 fs/read_write.c:549 + ksys_write+0x101/0x260 fs/read_write.c:598 + __do_sys_write fs/read_write.c:610 [inline] + __se_sys_write fs/read_write.c:607 [inline] + __x64_sys_write+0x73/0xb0 fs/read_write.c:607 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Freed by task 4553: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 + kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 + __cache_free mm/slab.c:3498 [inline] + kmem_cache_free+0x86/0x2d0 mm/slab.c:3756 + kfree_skbmem+0x154/0x230 net/core/skbuff.c:582 + __kfree_skb net/core/skbuff.c:642 [inline] + kfree_skb+0x1a5/0x580 net/core/skbuff.c:659 + tpacket_rcv+0x189e/0x3200 net/packet/af_packet.c:2385 + deliver_skb net/core/dev.c:1925 [inline] + deliver_ptype_list_skb net/core/dev.c:1940 [inline] + __netif_receive_skb_core+0x1bfb/0x3680 net/core/dev.c:4611 + __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 + netif_receive_skb_internal+0x12e/0x7d0 net/core/dev.c:4767 + netif_receive_skb+0xbf/0x420 net/core/dev.c:4791 + tun_rx_batched.isra.55+0x4ba/0x8c0 drivers/net/tun.c:1571 + tun_get_user+0x2af1/0x42f0 drivers/net/tun.c:1981 + tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:2009 + call_write_iter include/linux/fs.h:1795 [inline] + new_sync_write fs/read_write.c:474 [inline] + __vfs_write+0x6c6/0x9f0 fs/read_write.c:487 + vfs_write+0x1f8/0x560 fs/read_write.c:549 + ksys_write+0x101/0x260 fs/read_write.c:598 + __do_sys_write fs/read_write.c:610 [inline] + __se_sys_write fs/read_write.c:607 [inline] + __x64_sys_write+0x73/0xb0 fs/read_write.c:607 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +The buggy address belongs to the object at ffff8801b044ecc0 + which belongs to the cache skbuff_head_cache of size 232 +The buggy address is located 0 bytes inside of + 232-byte region [ffff8801b044ecc0, ffff8801b044eda8) +The buggy address belongs to the page: +page:ffffea0006c11380 count:1 mapcount:0 mapping:ffff8801d9be96c0 index:0x0 +flags: 0x2fffc0000000100(slab) +raw: 02fffc0000000100 ffffea0006c17988 ffff8801d9bec248 ffff8801d9be96c0 +raw: 0000000000000000 ffff8801b044e040 000000010000000c 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8801b044eb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff8801b044ec00: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc +>ffff8801b044ec80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb + ^ + ffff8801b044ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8801b044ed80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc + +Fixes: 58d19b19cd99 ("packet: vnet_hdr support for tpacket_rcv") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 16 +++++++--------- + 1 file changed, 7 insertions(+), 9 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2294,6 +2294,13 @@ static int tpacket_rcv(struct sk_buff *s + if (po->stats.stats1.tp_drops) + status |= TP_STATUS_LOSING; + } ++ ++ if (do_vnet && ++ virtio_net_hdr_from_skb(skb, h.raw + macoff - ++ sizeof(struct virtio_net_hdr), ++ vio_le(), true, 0)) ++ goto drop_n_account; ++ + po->stats.stats1.tp_packets++; + if (copy_skb) { + status |= TP_STATUS_COPY; +@@ -2301,15 +2308,6 @@ static int tpacket_rcv(struct sk_buff *s + } + spin_unlock(&sk->sk_receive_queue.lock); + +- if (do_vnet) { +- if (virtio_net_hdr_from_skb(skb, h.raw + macoff - +- sizeof(struct virtio_net_hdr), +- vio_le(), true, 0)) { +- spin_lock(&sk->sk_receive_queue.lock); +- goto drop_n_account; +- } +- } +- + skb_copy_bits(skb, 0, h.raw + macoff, snaplen); + + if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) diff --git a/queue-4.17/net-sched-act_ife-fix-recursive-lock-and-idr-leak.patch b/queue-4.17/net-sched-act_ife-fix-recursive-lock-and-idr-leak.patch new file mode 100644 index 00000000000..75153f03642 --- /dev/null +++ b/queue-4.17/net-sched-act_ife-fix-recursive-lock-and-idr-leak.patch @@ -0,0 +1,198 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Davide Caratti +Date: Tue, 19 Jun 2018 15:39:46 +0200 +Subject: net/sched: act_ife: fix recursive lock and idr leak + +From: Davide Caratti + +[ Upstream commit 0a889b9404c084c6fd145020c939a8f688b3e058 ] + +a recursive lock warning [1] can be observed with the following script, + + # $TC actions add action ife encode allow prio pass index 42 + IFE type 0xED3E + # $TC actions replace action ife encode allow tcindex pass index 42 + +in case the kernel was unable to run the last command (e.g. because of +the impossibility to load 'act_meta_skbtcindex'). For a similar reason, +the kernel can leak idr in the error path of tcf_ife_init(), because +tcf_idr_release() is not called after successful idr reservation: + + # $TC actions add action ife encode allow tcindex index 47 + IFE type 0xED3E + RTNETLINK answers: No such file or directory + We have an error talking to the kernel + # $TC actions add action ife encode allow tcindex index 47 + IFE type 0xED3E + RTNETLINK answers: No space left on device + We have an error talking to the kernel + # $TC actions add action ife encode use mark 7 type 0xfefe pass index 47 + IFE type 0xFEFE + RTNETLINK answers: No space left on device + We have an error talking to the kernel + +Since tcfa_lock is already taken when the action is being edited, a call +to tcf_idr_release() wrongly makes tcf_idr_cleanup() take the same lock +again. On the other hand, tcf_idr_release() needs to be called in the +error path of tcf_ife_init(), to undo the last tcf_idr_create() invocation. +Fix both problems in tcf_ife_init(). +Since the cleanup() routine can now be called when ife->params is NULL, +also add a NULL pointer check to avoid calling kfree_rcu(NULL, rcu). + + [1] + ============================================ + WARNING: possible recursive locking detected + 4.17.0-rc4.kasan+ #417 Tainted: G E + -------------------------------------------- + tc/3932 is trying to acquire lock: + 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_cleanup+0x19/0x80 [act_ife] + + but task is already holding lock: + 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife] + + other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&(&p->tcfa_lock)->rlock); + lock(&(&p->tcfa_lock)->rlock); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + + 2 locks held by tc/3932: + #0: 000000007ca8e990 (rtnl_mutex){+.+.}, at: tcf_ife_init+0xf61/0x13c0 [act_ife] + #1: 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife] + + stack backtrace: + CPU: 3 PID: 3932 Comm: tc Tainted: G E 4.17.0-rc4.kasan+ #417 + Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 + Call Trace: + dump_stack+0x9a/0xeb + __lock_acquire+0xf43/0x34a0 + ? debug_check_no_locks_freed+0x2b0/0x2b0 + ? debug_check_no_locks_freed+0x2b0/0x2b0 + ? debug_check_no_locks_freed+0x2b0/0x2b0 + ? __mutex_lock+0x62f/0x1240 + ? kvm_sched_clock_read+0x1a/0x30 + ? sched_clock+0x5/0x10 + ? sched_clock_cpu+0x18/0x170 + ? find_held_lock+0x39/0x1d0 + ? lock_acquire+0x10b/0x330 + lock_acquire+0x10b/0x330 + ? tcf_ife_cleanup+0x19/0x80 [act_ife] + _raw_spin_lock_bh+0x38/0x70 + ? tcf_ife_cleanup+0x19/0x80 [act_ife] + tcf_ife_cleanup+0x19/0x80 [act_ife] + __tcf_idr_release+0xff/0x350 + tcf_ife_init+0xdde/0x13c0 [act_ife] + ? ife_exit_net+0x290/0x290 [act_ife] + ? __lock_is_held+0xb4/0x140 + tcf_action_init_1+0x67b/0xad0 + ? tcf_action_dump_old+0xa0/0xa0 + ? sched_clock+0x5/0x10 + ? sched_clock_cpu+0x18/0x170 + ? kvm_sched_clock_read+0x1a/0x30 + ? sched_clock+0x5/0x10 + ? sched_clock_cpu+0x18/0x170 + ? memset+0x1f/0x40 + tcf_action_init+0x30f/0x590 + ? tcf_action_init_1+0xad0/0xad0 + ? memset+0x1f/0x40 + tc_ctl_action+0x48e/0x5e0 + ? mutex_lock_io_nested+0x1160/0x1160 + ? tca_action_gd+0x990/0x990 + ? sched_clock+0x5/0x10 + ? find_held_lock+0x39/0x1d0 + rtnetlink_rcv_msg+0x4da/0x990 + ? validate_linkmsg+0x680/0x680 + ? sched_clock_cpu+0x18/0x170 + ? find_held_lock+0x39/0x1d0 + netlink_rcv_skb+0x127/0x350 + ? validate_linkmsg+0x680/0x680 + ? netlink_ack+0x970/0x970 + ? __kmalloc_node_track_caller+0x304/0x3a0 + netlink_unicast+0x40f/0x5d0 + ? netlink_attachskb+0x580/0x580 + ? _copy_from_iter_full+0x187/0x760 + ? import_iovec+0x90/0x390 + netlink_sendmsg+0x67f/0xb50 + ? netlink_unicast+0x5d0/0x5d0 + ? copy_msghdr_from_user+0x206/0x340 + ? netlink_unicast+0x5d0/0x5d0 + sock_sendmsg+0xb3/0xf0 + ___sys_sendmsg+0x60a/0x8b0 + ? copy_msghdr_from_user+0x340/0x340 + ? lock_downgrade+0x5e0/0x5e0 + ? tty_write_lock+0x18/0x50 + ? kvm_sched_clock_read+0x1a/0x30 + ? sched_clock+0x5/0x10 + ? sched_clock_cpu+0x18/0x170 + ? find_held_lock+0x39/0x1d0 + ? lock_downgrade+0x5e0/0x5e0 + ? lock_acquire+0x10b/0x330 + ? __audit_syscall_entry+0x316/0x690 + ? current_kernel_time64+0x6b/0xd0 + ? __fget_light+0x55/0x1f0 + ? __sys_sendmsg+0xd2/0x170 + __sys_sendmsg+0xd2/0x170 + ? __ia32_sys_shutdown+0x70/0x70 + ? syscall_trace_enter+0x57a/0xd60 + ? rcu_read_lock_sched_held+0xdc/0x110 + ? __bpf_trace_sys_enter+0x10/0x10 + ? do_syscall_64+0x22/0x480 + do_syscall_64+0xa5/0x480 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + RIP: 0033:0x7fd646988ba0 + RSP: 002b:00007fffc9fab3c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e + RAX: ffffffffffffffda RBX: 00007fffc9fab4f0 RCX: 00007fd646988ba0 + RDX: 0000000000000000 RSI: 00007fffc9fab440 RDI: 0000000000000003 + RBP: 000000005b28c8b3 R08: 0000000000000002 R09: 0000000000000000 + R10: 00007fffc9faae20 R11: 0000000000000246 R12: 0000000000000000 + R13: 00007fffc9fab504 R14: 0000000000000001 R15: 000000000066c100 + +Fixes: 4e8c86155010 ("net sched: net sched: ife action fix late binding") +Fixes: ef6980b6becb ("introduce IFE action") +Signed-off-by: Davide Caratti +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_ife.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -415,7 +415,8 @@ static void tcf_ife_cleanup(struct tc_ac + spin_unlock_bh(&ife->tcf_lock); + + p = rcu_dereference_protected(ife->params, 1); +- kfree_rcu(p, rcu); ++ if (p) ++ kfree_rcu(p, rcu); + } + + /* under ife->tcf_lock for existing action */ +@@ -543,10 +544,8 @@ static int tcf_ife_init(struct net *net, + NULL, NULL); + if (err) { + metadata_parse_err: +- if (exists) +- tcf_idr_release(*a, bind); + if (ret == ACT_P_CREATED) +- _tcf_ife_cleanup(*a); ++ tcf_idr_release(*a, bind); + + if (exists) + spin_unlock_bh(&ife->tcf_lock); +@@ -567,7 +566,7 @@ metadata_parse_err: + err = use_all_metadata(ife); + if (err) { + if (ret == ACT_P_CREATED) +- _tcf_ife_cleanup(*a); ++ tcf_idr_release(*a, bind); + + if (exists) + spin_unlock_bh(&ife->tcf_lock); diff --git a/queue-4.17/net-sched-act_ife-preserve-the-action-control-in-case-of-error.patch b/queue-4.17/net-sched-act_ife-preserve-the-action-control-in-case-of-error.patch new file mode 100644 index 00000000000..09f7047024d --- /dev/null +++ b/queue-4.17/net-sched-act_ife-preserve-the-action-control-in-case-of-error.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Davide Caratti +Date: Tue, 19 Jun 2018 15:45:50 +0200 +Subject: net/sched: act_ife: preserve the action control in case of error + +From: Davide Caratti + +[ Upstream commit cbf56c29624fa056a0c1c3d177e67aa51a7fd8d6 ] + +in the following script + + # tc actions add action ife encode allow prio pass index 42 + # tc actions replace action ife encode allow tcindex drop index 42 + +the action control should remain equal to 'pass', if the kernel failed +to replace the TC action. Pospone the assignment of the action control, +to ensure it is not overwritten in the error path of tcf_ife_init(). + +Fixes: ef6980b6becb ("introduce IFE action") +Signed-off-by: Davide Caratti +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_ife.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/sched/act_ife.c ++++ b/net/sched/act_ife.c +@@ -517,8 +517,6 @@ static int tcf_ife_init(struct net *net, + saddr = nla_data(tb[TCA_IFE_SMAC]); + } + +- ife->tcf_action = parm->action; +- + if (parm->flags & IFE_ENCODE) { + if (daddr) + ether_addr_copy(p->eth_dst, daddr); +@@ -575,6 +573,7 @@ metadata_parse_err: + } + } + ++ ife->tcf_action = parm->action; + if (exists) + spin_unlock_bh(&ife->tcf_lock); + diff --git a/queue-4.17/net-sungem-fix-rx-checksum-support.patch b/queue-4.17/net-sungem-fix-rx-checksum-support.patch new file mode 100644 index 00000000000..e7bb0da76ac --- /dev/null +++ b/queue-4.17/net-sungem-fix-rx-checksum-support.patch @@ -0,0 +1,109 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Eric Dumazet +Date: Tue, 19 Jun 2018 19:18:50 -0700 +Subject: net: sungem: fix rx checksum support + +From: Eric Dumazet + +[ Upstream commit 12b03558cef6d655d0d394f5e98a6fd07c1f6c0f ] + +After commit 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE +are friends"), sungem owners reported the infamous "eth0: hw csum failure" +message. + +CHECKSUM_COMPLETE has in fact never worked for this driver, but this +was masked by the fact that upper stacks had to strip the FCS, and +therefore skb->ip_summed was set back to CHECKSUM_NONE before +my recent change. + +Driver configures a number of bytes to skip when the chip computes +the checksum, and for some reason only half of the Ethernet header +was skipped. + +Then a second problem is that we should strip the FCS by default, +unless the driver is updated to eventually support NETIF_F_RXFCS in +the future. + +Finally, a driver should check if NETIF_F_RXCSUM feature is enabled +or not, so that the admin can turn off rx checksum if wanted. + +Many thanks to Andreas Schwab and Mathieu Malaterre for their +help in debugging this issue. + +Signed-off-by: Eric Dumazet +Reported-by: Meelis Roos +Reported-by: Mathieu Malaterre +Reported-by: Andreas Schwab +Tested-by: Andreas Schwab +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/sun/sungem.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/sun/sungem.c ++++ b/drivers/net/ethernet/sun/sungem.c +@@ -60,8 +60,7 @@ + #include + #include "sungem.h" + +-/* Stripping FCS is causing problems, disabled for now */ +-#undef STRIP_FCS ++#define STRIP_FCS + + #define DEFAULT_MSG (NETIF_MSG_DRV | \ + NETIF_MSG_PROBE | \ +@@ -435,7 +434,7 @@ static int gem_rxmac_reset(struct gem *g + writel(desc_dma & 0xffffffff, gp->regs + RXDMA_DBLOW); + writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK); + val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) | +- ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128); ++ (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128); + writel(val, gp->regs + RXDMA_CFG); + if (readl(gp->regs + GREG_BIFCFG) & GREG_BIFCFG_M66EN) + writel(((5 & RXDMA_BLANK_IPKTS) | +@@ -760,7 +759,6 @@ static int gem_rx(struct gem *gp, int wo + struct net_device *dev = gp->dev; + int entry, drops, work_done = 0; + u32 done; +- __sum16 csum; + + if (netif_msg_rx_status(gp)) + printk(KERN_DEBUG "%s: rx interrupt, done: %d, rx_new: %d\n", +@@ -855,9 +853,13 @@ static int gem_rx(struct gem *gp, int wo + skb = copy_skb; + } + +- csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff); +- skb->csum = csum_unfold(csum); +- skb->ip_summed = CHECKSUM_COMPLETE; ++ if (likely(dev->features & NETIF_F_RXCSUM)) { ++ __sum16 csum; ++ ++ csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff); ++ skb->csum = csum_unfold(csum); ++ skb->ip_summed = CHECKSUM_COMPLETE; ++ } + skb->protocol = eth_type_trans(skb, gp->dev); + + napi_gro_receive(&gp->napi, skb); +@@ -1761,7 +1763,7 @@ static void gem_init_dma(struct gem *gp) + writel(0, gp->regs + TXDMA_KICK); + + val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) | +- ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128); ++ (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128); + writel(val, gp->regs + RXDMA_CFG); + + writel(desc_dma >> 32, gp->regs + RXDMA_DBHI); +@@ -2985,8 +2987,8 @@ static int gem_init_one(struct pci_dev * + pci_set_drvdata(pdev, dev); + + /* We can do scatter/gather and HW checksum */ +- dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM; +- dev->features |= dev->hw_features | NETIF_F_RXCSUM; ++ dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; ++ dev->features = dev->hw_features; + if (pci_using_dac) + dev->features |= NETIF_F_HIGHDMA; + diff --git a/queue-4.17/net-tcp-fix-socket-lookups-with-so_bindtodevice.patch b/queue-4.17/net-tcp-fix-socket-lookups-with-so_bindtodevice.patch new file mode 100644 index 00000000000..24800e05f4b --- /dev/null +++ b/queue-4.17/net-tcp-fix-socket-lookups-with-so_bindtodevice.patch @@ -0,0 +1,56 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: David Ahern +Date: Mon, 18 Jun 2018 12:30:37 -0700 +Subject: net/tcp: Fix socket lookups with SO_BINDTODEVICE + +From: David Ahern + +[ Upstream commit 8c43bd1706885ba1acfa88da02bc60a2ec16f68c ] + +Similar to 69678bcd4d2d ("udp: fix SO_BINDTODEVICE"), TCP socket lookups +need to fail if dev_match is not true. Currently, a packet to a given port +can match a socket bound to device when it should not. In the VRF case, +this causes the lookup to hit a VRF socket and not a global socket +resulting in a response trying to go through the VRF when it should not. + +Fixes: 3fa6f616a7a4d ("net: ipv4: add second dif to inet socket lookups") +Fixes: 4297a0ef08572 ("net: ipv6: add second dif to inet6 socket lookups") +Reported-by: Lou Berger +Diagnosed-by: Renato Westphal +Tested-by: Renato Westphal +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_hashtables.c | 4 ++-- + net/ipv6/inet6_hashtables.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -243,9 +243,9 @@ static inline int compute_score(struct s + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + +- if (exact_dif && !dev_match) ++ if (!dev_match) + return -1; +- if (sk->sk_bound_dev_if && dev_match) ++ if (sk->sk_bound_dev_if) + score += 4; + } + if (sk->sk_incoming_cpu == raw_smp_processor_id()) +--- a/net/ipv6/inet6_hashtables.c ++++ b/net/ipv6/inet6_hashtables.c +@@ -113,9 +113,9 @@ static inline int compute_score(struct s + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + +- if (exact_dif && !dev_match) ++ if (!dev_match) + return -1; +- if (sk->sk_bound_dev_if && dev_match) ++ if (sk->sk_bound_dev_if) + score++; + } + if (sk->sk_incoming_cpu == raw_smp_processor_id()) diff --git a/queue-4.17/net-use-dev_change_tx_queue_len-for-siocsiftxqlen.patch b/queue-4.17/net-use-dev_change_tx_queue_len-for-siocsiftxqlen.patch new file mode 100644 index 00000000000..4e16355aa53 --- /dev/null +++ b/queue-4.17/net-use-dev_change_tx_queue_len-for-siocsiftxqlen.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Cong Wang +Date: Fri, 29 Jun 2018 13:42:48 -0700 +Subject: net: use dev_change_tx_queue_len() for SIOCSIFTXQLEN + +From: Cong Wang + +[ Upstream commit 3f76df198288ceec92fc9eddecad1e73c52769b0 ] + +As noticed by Eric, we need to switch to the helper +dev_change_tx_queue_len() for SIOCSIFTXQLEN call path too, +otheriwse still miss dev_qdisc_change_tx_queue_len(). + +Fixes: 6a643ddb5624 ("net: introduce helper dev_change_tx_queue_len()") +Reported-by: Eric Dumazet +Signed-off-by: Cong Wang +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev_ioctl.c | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +--- a/net/core/dev_ioctl.c ++++ b/net/core/dev_ioctl.c +@@ -285,16 +285,9 @@ static int dev_ifsioc(struct net *net, s + if (ifr->ifr_qlen < 0) + return -EINVAL; + if (dev->tx_queue_len ^ ifr->ifr_qlen) { +- unsigned int orig_len = dev->tx_queue_len; +- +- dev->tx_queue_len = ifr->ifr_qlen; +- err = call_netdevice_notifiers( +- NETDEV_CHANGE_TX_QUEUE_LEN, dev); +- err = notifier_to_errno(err); +- if (err) { +- dev->tx_queue_len = orig_len; ++ err = dev_change_tx_queue_len(dev, ifr->ifr_qlen); ++ if (err) + return err; +- } + } + return 0; + diff --git a/queue-4.17/net_sched-blackhole-tell-upper-qdisc-about-dropped-packets.patch b/queue-4.17/net_sched-blackhole-tell-upper-qdisc-about-dropped-packets.patch new file mode 100644 index 00000000000..a6964c3b94e --- /dev/null +++ b/queue-4.17/net_sched-blackhole-tell-upper-qdisc-about-dropped-packets.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Konstantin Khlebnikov +Date: Fri, 15 Jun 2018 13:27:31 +0300 +Subject: net_sched: blackhole: tell upper qdisc about dropped packets + +From: Konstantin Khlebnikov + +[ Upstream commit 7e85dc8cb35abf16455f1511f0670b57c1a84608 ] + +When blackhole is used on top of classful qdisc like hfsc it breaks +qlen and backlog counters because packets are disappear without notice. + +In HFSC non-zero qlen while all classes are inactive triggers warning: +WARNING: ... at net/sched/sch_hfsc.c:1393 hfsc_dequeue+0xba4/0xe90 [sch_hfsc] +and schedules watchdog work endlessly. + +This patch return __NET_XMIT_BYPASS in addition to NET_XMIT_SUCCESS, +this flag tells upper layer: this packet is gone and isn't queued. + +Signed-off-by: Konstantin Khlebnikov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_blackhole.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/sch_blackhole.c ++++ b/net/sched/sch_blackhole.c +@@ -21,7 +21,7 @@ static int blackhole_enqueue(struct sk_b + struct sk_buff **to_free) + { + qdisc_drop(skb, sch, to_free); +- return NET_XMIT_SUCCESS; ++ return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + } + + static struct sk_buff *blackhole_dequeue(struct Qdisc *sch) diff --git a/queue-4.17/nfp-flower-fix-mpls-ether-type-detection.patch b/queue-4.17/nfp-flower-fix-mpls-ether-type-detection.patch new file mode 100644 index 00000000000..e20d8432fe2 --- /dev/null +++ b/queue-4.17/nfp-flower-fix-mpls-ether-type-detection.patch @@ -0,0 +1,77 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Pieter Jansen van Vuuren +Date: Mon, 25 Jun 2018 20:36:27 -0700 +Subject: nfp: flower: fix mpls ether type detection + +From: Pieter Jansen van Vuuren + +[ Upstream commit a64119415ff248efa61301783bc26551df5dabf6 ] + +Previously it was not possible to distinguish between mpls ether types and +other ether types. This leads to incorrect classification of offloaded +filters that match on mpls ether type. For example the following two +filters overlap: + + # tc filter add dev eth0 parent ffff: \ + protocol 0x8847 flower \ + action mirred egress redirect dev eth1 + + # tc filter add dev eth0 parent ffff: \ + protocol 0x0800 flower \ + action mirred egress redirect dev eth2 + +The driver now correctly includes the mac_mpls layer where HW stores mpls +fields, when it detects an mpls ether type. It also sets the MPLS_Q bit to +indicate that the filter should match mpls packets. + +Fixes: bb055c198d9b ("nfp: add mpls match offloading support") +Signed-off-by: Pieter Jansen van Vuuren +Reviewed-by: Simon Horman +Reviewed-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/flower/match.c | 14 ++++++++++++++ + drivers/net/ethernet/netronome/nfp/flower/offload.c | 8 ++++++++ + 2 files changed, 22 insertions(+) + +--- a/drivers/net/ethernet/netronome/nfp/flower/match.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/match.c +@@ -123,6 +123,20 @@ nfp_flower_compile_mac(struct nfp_flower + NFP_FLOWER_MASK_MPLS_Q; + + frame->mpls_lse = cpu_to_be32(t_mpls); ++ } else if (dissector_uses_key(flow->dissector, ++ FLOW_DISSECTOR_KEY_BASIC)) { ++ /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q ++ * bit, which indicates an mpls ether type but without any ++ * mpls fields. ++ */ ++ struct flow_dissector_key_basic *key_basic; ++ ++ key_basic = skb_flow_dissector_target(flow->dissector, ++ FLOW_DISSECTOR_KEY_BASIC, ++ flow->key); ++ if (key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || ++ key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) ++ frame->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); + } + } + +--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c +@@ -264,6 +264,14 @@ nfp_flower_calculate_key_layers(struct n + case cpu_to_be16(ETH_P_ARP): + return -EOPNOTSUPP; + ++ case cpu_to_be16(ETH_P_MPLS_UC): ++ case cpu_to_be16(ETH_P_MPLS_MC): ++ if (!(key_layer & NFP_FLOWER_LAYER_MAC)) { ++ key_layer |= NFP_FLOWER_LAYER_MAC; ++ key_size += sizeof(struct nfp_flower_mac_mpls); ++ } ++ break; ++ + /* Will be included in layer 2. */ + case cpu_to_be16(ETH_P_8021Q): + break; diff --git a/queue-4.17/nfp-reject-binding-to-shared-blocks.patch b/queue-4.17/nfp-reject-binding-to-shared-blocks.patch new file mode 100644 index 00000000000..431e39996c0 --- /dev/null +++ b/queue-4.17/nfp-reject-binding-to-shared-blocks.patch @@ -0,0 +1,72 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: John Hurley +Date: Mon, 25 Jun 2018 20:36:28 -0700 +Subject: nfp: reject binding to shared blocks + +From: John Hurley + +[ Upstream commit 951a8ee6def39e25d0e60b9394e5a249ba8b2390 ] + +TC shared blocks allow multiple qdiscs to be grouped together and filters +shared between them. Currently the chains of filters attached to a block +are only flushed when the block is removed. If a qdisc is removed from a +block but the block still exists, flow del messages are not passed to the +callback registered for that qdisc. For the NFP, this presents the +possibility of rules still existing in hw when they should be removed. + +Prevent binding to shared blocks until the kernel can send per qdisc del +messages when block unbinds occur. + +tcf_block_shared() was not used outside of the core until now, so also +add an empty implementation for builds with CONFIG_NET_CLS=n. + +Fixes: 4861738775d7 ("net: sched: introduce shared filter blocks infrastructure") +Signed-off-by: John Hurley +Signed-off-by: Jakub Kicinski +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/bpf/main.c | 3 +++ + drivers/net/ethernet/netronome/nfp/flower/offload.c | 3 +++ + include/net/pkt_cls.h | 5 +++++ + 3 files changed, 11 insertions(+) + +--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c ++++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c +@@ -194,6 +194,9 @@ static int nfp_bpf_setup_tc_block(struct + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + ++ if (tcf_block_shared(f->block)) ++ return -EOPNOTSUPP; ++ + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, +--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c +@@ -601,6 +601,9 @@ static int nfp_flower_setup_tc_block(str + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + ++ if (tcf_block_shared(f->block)) ++ return -EOPNOTSUPP; ++ + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, +--- a/include/net/pkt_cls.h ++++ b/include/net/pkt_cls.h +@@ -111,6 +111,11 @@ void tcf_block_put_ext(struct tcf_block + { + } + ++static inline bool tcf_block_shared(struct tcf_block *block) ++{ ++ return false; ++} ++ + static inline struct Qdisc *tcf_block_q(struct tcf_block *block) + { + return NULL; diff --git a/queue-4.17/qed-fix-setting-of-incorrect-eswitch-mode.patch b/queue-4.17/qed-fix-setting-of-incorrect-eswitch-mode.patch new file mode 100644 index 00000000000..8461bacd43d --- /dev/null +++ b/queue-4.17/qed-fix-setting-of-incorrect-eswitch-mode.patch @@ -0,0 +1,77 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Sudarsana Reddy Kalluru +Date: Sun, 1 Jul 2018 20:03:06 -0700 +Subject: qed: Fix setting of incorrect eswitch mode. + +From: Sudarsana Reddy Kalluru + +[ Upstream commit 538f8d00ba8bb417c4d9e76c61dee59d812d8287 ] + +By default, driver sets the eswitch mode incorrectly as VEB (virtual +Ethernet bridging). +Need to set VEB eswitch mode only when sriov is enabled, and it should be +to set NONE by default. The patch incorporates this change. + +Fixes: 0fefbfbaa ("qed*: Management firmware - notifications and defaults") +Signed-off-by: Sudarsana Reddy Kalluru +Signed-off-by: Michal Kalderon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_dev.c | 2 +- + drivers/net/ethernet/qlogic/qed/qed_sriov.c | 19 +++++++++++++++++-- + 2 files changed, 18 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c +@@ -1789,7 +1789,7 @@ int qed_hw_init(struct qed_dev *cdev, st + DP_INFO(p_hwfn, "Failed to update driver state\n"); + + rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt, +- QED_OV_ESWITCH_VEB); ++ QED_OV_ESWITCH_NONE); + if (rc) + DP_INFO(p_hwfn, "Failed to update eswitch mode\n"); + } +--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c +@@ -4400,6 +4400,8 @@ static void qed_sriov_enable_qid_config( + static int qed_sriov_enable(struct qed_dev *cdev, int num) + { + struct qed_iov_vf_init_params params; ++ struct qed_hwfn *hwfn; ++ struct qed_ptt *ptt; + int i, j, rc; + + if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) { +@@ -4412,8 +4414,8 @@ static int qed_sriov_enable(struct qed_d + + /* Initialize HW for VF access */ + for_each_hwfn(cdev, j) { +- struct qed_hwfn *hwfn = &cdev->hwfns[j]; +- struct qed_ptt *ptt = qed_ptt_acquire(hwfn); ++ hwfn = &cdev->hwfns[j]; ++ ptt = qed_ptt_acquire(hwfn); + + /* Make sure not to use more than 16 queues per VF */ + params.num_queues = min_t(int, +@@ -4449,6 +4451,19 @@ static int qed_sriov_enable(struct qed_d + goto err; + } + ++ hwfn = QED_LEADING_HWFN(cdev); ++ ptt = qed_ptt_acquire(hwfn); ++ if (!ptt) { ++ DP_ERR(hwfn, "Failed to acquire ptt\n"); ++ rc = -EBUSY; ++ goto err; ++ } ++ ++ rc = qed_mcp_ov_update_eswitch(hwfn, ptt, QED_OV_ESWITCH_VEB); ++ if (rc) ++ DP_INFO(cdev, "Failed to update eswitch mode\n"); ++ qed_ptt_release(hwfn, ptt); ++ + return num; + + err: diff --git a/queue-4.17/qed-fix-use-of-incorrect-size-in-memcpy-call.patch b/queue-4.17/qed-fix-use-of-incorrect-size-in-memcpy-call.patch new file mode 100644 index 00000000000..c565dbed297 --- /dev/null +++ b/queue-4.17/qed-fix-use-of-incorrect-size-in-memcpy-call.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Sudarsana Reddy Kalluru +Date: Sun, 1 Jul 2018 20:03:07 -0700 +Subject: qed: Fix use of incorrect size in memcpy call. + +From: Sudarsana Reddy Kalluru + +[ Upstream commit cc9b27cdf7bd3c86df73439758ac1564bc8f5bbe ] + +Use the correct size value while copying chassis/port id values. + +Fixes: 6ad8c632e ("qed: Add support for query/config dcbx.") +Signed-off-by: Sudarsana Reddy Kalluru +Signed-off-by: Michal Kalderon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +@@ -700,9 +700,9 @@ qed_dcbx_get_local_lldp_params(struct qe + p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE]; + + memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id, +- ARRAY_SIZE(p_local->local_chassis_id)); ++ sizeof(p_local->local_chassis_id)); + memcpy(params->lldp_local.local_port_id, p_local->local_port_id, +- ARRAY_SIZE(p_local->local_port_id)); ++ sizeof(p_local->local_port_id)); + } + + static void +@@ -714,9 +714,9 @@ qed_dcbx_get_remote_lldp_params(struct q + p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE]; + + memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id, +- ARRAY_SIZE(p_remote->peer_chassis_id)); ++ sizeof(p_remote->peer_chassis_id)); + memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id, +- ARRAY_SIZE(p_remote->peer_port_id)); ++ sizeof(p_remote->peer_port_id)); + } + + static int diff --git a/queue-4.17/qed-limit-msix-vectors-in-kdump-kernel-to-the-minimum-required-count.patch b/queue-4.17/qed-limit-msix-vectors-in-kdump-kernel-to-the-minimum-required-count.patch new file mode 100644 index 00000000000..cb0df65a548 --- /dev/null +++ b/queue-4.17/qed-limit-msix-vectors-in-kdump-kernel-to-the-minimum-required-count.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Sudarsana Reddy Kalluru +Date: Sun, 1 Jul 2018 20:03:05 -0700 +Subject: qed: Limit msix vectors in kdump kernel to the minimum required count. + +From: Sudarsana Reddy Kalluru + +[ Upstream commit bb7858ba1102f82470a917e041fd23e6385c31be ] + +Memory size is limited in the kdump kernel environment. Allocation of more +msix-vectors (or queues) consumes few tens of MBs of memory, which might +lead to the kdump kernel failure. +This patch adds changes to limit the number of MSI-X vectors in kdump +kernel to minimum required value (i.e., 2 per engine). + +Fixes: fe56b9e6a ("qed: Add module with basic common support") +Signed-off-by: Sudarsana Reddy Kalluru +Signed-off-by: Michal Kalderon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_main.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/net/ethernet/qlogic/qed/qed_main.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_main.c +@@ -780,6 +780,14 @@ static int qed_slowpath_setup_int(struct + /* We want a minimum of one slowpath and one fastpath vector per hwfn */ + cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2; + ++ if (is_kdump_kernel()) { ++ DP_INFO(cdev, ++ "Kdump kernel: Limit the max number of requested MSI-X vectors to %hd\n", ++ cdev->int_params.in.min_msix_cnt); ++ cdev->int_params.in.num_vectors = ++ cdev->int_params.in.min_msix_cnt; ++ } ++ + rc = qed_set_int_mode(cdev, false); + if (rc) { + DP_ERR(cdev, "qed_slowpath_setup_int ERR\n"); diff --git a/queue-4.17/qede-adverstise-software-timestamp-caps-when-phc-is-not-available.patch b/queue-4.17/qede-adverstise-software-timestamp-caps-when-phc-is-not-available.patch new file mode 100644 index 00000000000..15af296d73e --- /dev/null +++ b/queue-4.17/qede-adverstise-software-timestamp-caps-when-phc-is-not-available.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Sudarsana Reddy Kalluru +Date: Sun, 1 Jul 2018 20:03:08 -0700 +Subject: qede: Adverstise software timestamp caps when PHC is not available. + +From: Sudarsana Reddy Kalluru + +[ Upstream commit 82a4e71b1565dea8387f54503e806cf374e779ec ] + +When ptp clock is not available for a PF (e.g., higher PFs in NPAR mode), +get-tsinfo() callback should return the software timestamp capabilities +instead of returning the error. + +Fixes: 4c55215c ("qede: Add driver support for PTP") +Signed-off-by: Sudarsana Reddy Kalluru +Signed-off-by: Michal Kalderon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qede/qede_ptp.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c ++++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c +@@ -337,8 +337,14 @@ int qede_ptp_get_ts_info(struct qede_dev + { + struct qede_ptp *ptp = edev->ptp; + +- if (!ptp) +- return -EIO; ++ if (!ptp) { ++ info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | ++ SOF_TIMESTAMPING_RX_SOFTWARE | ++ SOF_TIMESTAMPING_SOFTWARE; ++ info->phc_index = -1; ++ ++ return 0; ++ } + + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | diff --git a/queue-4.17/qmi_wwan-add-support-for-the-dell-wireless-5821e-module.patch b/queue-4.17/qmi_wwan-add-support-for-the-dell-wireless-5821e-module.patch new file mode 100644 index 00000000000..2e8a71a47c8 --- /dev/null +++ b/queue-4.17/qmi_wwan-add-support-for-the-dell-wireless-5821e-module.patch @@ -0,0 +1,34 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Aleksander Morgado +Date: Sat, 23 Jun 2018 23:22:52 +0200 +Subject: qmi_wwan: add support for the Dell Wireless 5821e module + +From: Aleksander Morgado + +[ Upstream commit e7e197edd09c25774b4f12cab19f9d5462f240f4 ] + +This module exposes two USB configurations: a QMI+AT capable setup on +USB config #1 and a MBIM capable setup on USB config #2. + +By default the kernel will choose the MBIM capable configuration as +long as the cdc_mbim driver is available. This patch adds support for +the QMI port in the secondary configuration. + +Signed-off-by: Aleksander Morgado +Acked-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -1246,6 +1246,7 @@ static const struct usb_device_id produc + {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ + {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ + {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ ++ {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ + {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ + {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ diff --git a/queue-4.17/r8152-napi-hangup-fix-after-disconnect.patch b/queue-4.17/r8152-napi-hangup-fix-after-disconnect.patch new file mode 100644 index 00000000000..ff2213097ce --- /dev/null +++ b/queue-4.17/r8152-napi-hangup-fix-after-disconnect.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Jiri Slaby +Date: Mon, 25 Jun 2018 09:26:27 +0200 +Subject: r8152: napi hangup fix after disconnect + +From: Jiri Slaby + +[ Upstream commit 0ee1f4734967af8321ecebaf9c74221ace34f2d5 ] + +When unplugging an r8152 adapter while the interface is UP, the NIC +becomes unusable. usb->disconnect (aka rtl8152_disconnect) deletes +napi. Then, rtl8152_disconnect calls unregister_netdev and that invokes +netdev->ndo_stop (aka rtl8152_close). rtl8152_close tries to +napi_disable, but the napi is already deleted by disconnect above. So +the first while loop in napi_disable never finishes. This results in +complete deadlock of the network layer as there is rtnl_mutex held by +unregister_netdev. + +So avoid the call to napi_disable in rtl8152_close when the device is +already gone. + +The other calls to usb_kill_urb, cancel_delayed_work_sync, +netif_stop_queue etc. seem to be fine. The urb and netdev is not +destroyed yet. + +Signed-off-by: Jiri Slaby +Cc: linux-usb@vger.kernel.org +Cc: netdev@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/r8152.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -3962,7 +3962,8 @@ static int rtl8152_close(struct net_devi + #ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&tp->pm_notifier); + #endif +- napi_disable(&tp->napi); ++ if (!test_bit(RTL8152_UNPLUG, &tp->flags)) ++ napi_disable(&tp->napi); + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); + cancel_delayed_work_sync(&tp->schedule); diff --git a/queue-4.17/revert-s390-qeth-use-read-device-to-query-hypervisor-for-mac.patch b/queue-4.17/revert-s390-qeth-use-read-device-to-query-hypervisor-for-mac.patch new file mode 100644 index 00000000000..03ef6bbbab1 --- /dev/null +++ b/queue-4.17/revert-s390-qeth-use-read-device-to-query-hypervisor-for-mac.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Julian Wiedmann +Date: Fri, 29 Jun 2018 19:45:50 +0200 +Subject: Revert "s390/qeth: use Read device to query hypervisor for MAC" + +From: Julian Wiedmann + +[ Upstream commit 4664610537d398d55be19432f9cd9c29c831e159 ] + +This reverts commit b7493e91c11a757cf0f8ab26989642ee4bb2c642. + +On its own, querying RDEV for a MAC address works fine. But when upgrading +from a qeth that previously queried DDEV on a z/VM NIC (ie. any kernel with +commit ec61bd2fd2a2), the RDEV query now returns a _different_ MAC address +than the DDEV query. + +If the NIC is configured with MACPROTECT, z/VM apparently requires us to +use the MAC that was initially returned (on DDEV) and registered. So after +upgrading to a kernel that uses RDEV, the SETVMAC registration cmd for the +new MAC address fails and we end up with a non-operabel interface. + +To avoid regressions on upgrade, switch back to using DDEV for the MAC +address query. The downgrade path (first RDEV, later DDEV) is fine, in this +case both queries return the same MAC address. + +Fixes: b7493e91c11a ("s390/qeth: use Read device to query hypervisor for MAC") +Reported-by: Michal Kubecek +Tested-by: Karsten Graul +Signed-off-by: Julian Wiedmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_core_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/s390/net/qeth_core_main.c ++++ b/drivers/s390/net/qeth_core_main.c +@@ -4845,7 +4845,7 @@ int qeth_vm_request_mac(struct qeth_card + goto out; + } + +- ccw_device_get_id(CARD_RDEV(card), &id); ++ ccw_device_get_id(CARD_DDEV(card), &id); + request->resp_buf_len = sizeof(*response); + request->resp_version = DIAG26C_VERSION2; + request->op_code = DIAG26C_GET_MAC; diff --git a/queue-4.17/s390-qeth-avoid-using-is_multicast_ether_addr_64bits-on-u8.patch b/queue-4.17/s390-qeth-avoid-using-is_multicast_ether_addr_64bits-on-u8.patch new file mode 100644 index 00000000000..44838fd43e8 --- /dev/null +++ b/queue-4.17/s390-qeth-avoid-using-is_multicast_ether_addr_64bits-on-u8.patch @@ -0,0 +1,63 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Vasily Gorbik +Date: Fri, 29 Jun 2018 19:45:52 +0200 +Subject: s390/qeth: avoid using is_multicast_ether_addr_64bits on (u8 *)[6] + +From: Vasily Gorbik + +[ Upstream commit 9d0a58fb9747afd27d490c02a97889a1b59f6be4 ] + +*ether_addr*_64bits functions have been introduced to optimize +performance critical paths, which access 6-byte ethernet address as u64 +value to get "nice" assembly. A harmless hack works nicely on ethernet +addresses shoved into a structure or a larger buffer, until busted by +Kasan on smth like plain (u8 *)[6]. + +qeth_l2_set_mac_address calls qeth_l2_remove_mac passing +u8 old_addr[ETH_ALEN] as an argument. + +Adding/removing macs for an ethernet adapter is not that performance +critical. Moreover is_multicast_ether_addr_64bits itself on s390 is not +faster than is_multicast_ether_addr: + +is_multicast_ether_addr(%r2) -> %r2 +llc %r2,0(%r2) +risbg %r2,%r2,63,191,0 + +is_multicast_ether_addr_64bits(%r2) -> %r2 +llgc %r2,0(%r2) +risbg %r2,%r2,63,191,0 + +So, let's just use is_multicast_ether_addr instead of +is_multicast_ether_addr_64bits. + +Fixes: bcacfcbc82b4 ("s390/qeth: fix MAC address update sequence") +Reviewed-by: Julian Wiedmann +Signed-off-by: Vasily Gorbik +Signed-off-by: Julian Wiedmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_l2_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/s390/net/qeth_l2_main.c ++++ b/drivers/s390/net/qeth_l2_main.c +@@ -141,7 +141,7 @@ static int qeth_l2_send_setmac(struct qe + + static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac) + { +- enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? ++ enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ? + IPA_CMD_SETGMAC : IPA_CMD_SETVMAC; + int rc; + +@@ -158,7 +158,7 @@ static int qeth_l2_write_mac(struct qeth + + static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac) + { +- enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? ++ enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ? + IPA_CMD_DELGMAC : IPA_CMD_DELVMAC; + int rc; + diff --git a/queue-4.17/s390-qeth-don-t-clobber-buffer-on-async-tx-completion.patch b/queue-4.17/s390-qeth-don-t-clobber-buffer-on-async-tx-completion.patch new file mode 100644 index 00000000000..d2172905c47 --- /dev/null +++ b/queue-4.17/s390-qeth-don-t-clobber-buffer-on-async-tx-completion.patch @@ -0,0 +1,112 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Julian Wiedmann +Date: Fri, 29 Jun 2018 19:45:53 +0200 +Subject: s390/qeth: don't clobber buffer on async TX completion + +From: Julian Wiedmann + +[ Upstream commit ce28867fd20c23cd769e78b4d619c4755bf71a1c ] + +If qeth_qdio_output_handler() detects that a transmit requires async +completion, it replaces the pending buffer's metadata object +(qeth_qdio_out_buffer) so that this queue buffer can be re-used while +the data is pending completion. + +Later when the CQ indicates async completion of such a metadata object, +qeth_qdio_cq_handler() tries to free any data associated with this +object (since HW has now completed the transfer). By calling +qeth_clear_output_buffer(), it erronously operates on the queue buffer +that _previously_ belonged to this transfer ... but which has been +potentially re-used several times by now. +This results in double-free's of the buffer's data, and failing +transmits as the buffer descriptor is scrubbed in mid-air. + +The correct way of handling this situation is to +1. scrub the queue buffer when it is prepared for re-use, and +2. later obtain the data addresses from the async-completion notifier + (ie. the AOB), instead of the queue buffer. + +All this only affects qeth devices used for af_iucv HiperTransport. + +Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") +Signed-off-by: Julian Wiedmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_core.h | 11 +++++++++++ + drivers/s390/net/qeth_core_main.c | 22 ++++++++++++++++------ + 2 files changed, 27 insertions(+), 6 deletions(-) + +--- a/drivers/s390/net/qeth_core.h ++++ b/drivers/s390/net/qeth_core.h +@@ -831,6 +831,17 @@ struct qeth_trap_id { + /*some helper functions*/ + #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "") + ++static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf, ++ unsigned int elements) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < elements; i++) ++ memset(&buf->element[i], 0, sizeof(struct qdio_buffer_element)); ++ buf->element[14].sflags = 0; ++ buf->element[15].sflags = 0; ++} ++ + /** + * qeth_get_elements_for_range() - find number of SBALEs to cover range. + * @start: Start of the address range. +--- a/drivers/s390/net/qeth_core_main.c ++++ b/drivers/s390/net/qeth_core_main.c +@@ -73,9 +73,6 @@ static void qeth_notify_skbs(struct qeth + struct qeth_qdio_out_buffer *buf, + enum iucv_tx_notify notification); + static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); +-static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, +- struct qeth_qdio_out_buffer *buf, +- enum qeth_qdio_buffer_states newbufstate); + static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); + + struct workqueue_struct *qeth_wq; +@@ -488,6 +485,7 @@ static void qeth_qdio_handle_aob(struct + struct qaob *aob; + struct qeth_qdio_out_buffer *buffer; + enum iucv_tx_notify notification; ++ unsigned int i; + + aob = (struct qaob *) phys_to_virt(phys_aob_addr); + QETH_CARD_TEXT(card, 5, "haob"); +@@ -512,10 +510,18 @@ static void qeth_qdio_handle_aob(struct + qeth_notify_skbs(buffer->q, buffer, notification); + + buffer->aob = NULL; +- qeth_clear_output_buffer(buffer->q, buffer, +- QETH_QDIO_BUF_HANDLED_DELAYED); ++ /* Free dangling allocations. The attached skbs are handled by ++ * qeth_cleanup_handled_pending(). ++ */ ++ for (i = 0; ++ i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); ++ i++) { ++ if (aob->sba[i] && buffer->is_header[i]) ++ kmem_cache_free(qeth_core_header_cache, ++ (void *) aob->sba[i]); ++ } ++ atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED); + +- /* from here on: do not touch buffer anymore */ + qdio_release_aob(aob); + } + +@@ -3759,6 +3765,10 @@ void qeth_qdio_output_handler(struct ccw + QETH_CARD_TEXT(queue->card, 5, "aob"); + QETH_CARD_TEXT_(queue->card, 5, "%lx", + virt_to_phys(buffer->aob)); ++ ++ /* prepare the queue slot for re-use: */ ++ qeth_scrub_qdio_buffer(buffer->buffer, ++ QETH_MAX_BUFFER_ELEMENTS(card)); + if (qeth_init_qdio_out_buf(queue, bidx)) { + QETH_CARD_TEXT(card, 2, "outofbuf"); + qeth_schedule_recovery(card); diff --git a/queue-4.17/s390-qeth-fix-race-when-setting-mac-address.patch b/queue-4.17/s390-qeth-fix-race-when-setting-mac-address.patch new file mode 100644 index 00000000000..b1a6f7ddc00 --- /dev/null +++ b/queue-4.17/s390-qeth-fix-race-when-setting-mac-address.patch @@ -0,0 +1,75 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Julian Wiedmann +Date: Fri, 29 Jun 2018 19:45:51 +0200 +Subject: s390/qeth: fix race when setting MAC address + +From: Julian Wiedmann + +[ Upstream commit 4789a21880488048105590049fc41a99f53d565d ] + +When qeth_l2_set_mac_address() finds the card in a non-reachable state, +it merely copies the new MAC address into dev->dev_addr so that +__qeth_l2_set_online() can later register it with the HW. + +But __qeth_l2_set_online() may very well be running concurrently, so we +can't trust the card state without appropriate locking: +If the online sequence is past the point where it registers +dev->dev_addr (but not yet in SOFTSETUP state), any address change needs +to be properly programmed into the HW. Otherwise the netdevice ends up +with a different MAC address than what's set in the HW, and inbound +traffic is not forwarded as expected. + +This is most likely to occur for OSD in LPAR, where +commit 21b1702af12e ("s390/qeth: improve fallback to random MAC address") +now triggers eg. systemd to immediately change the MAC when the netdevice +is registered with a NET_ADDR_RANDOM address. + +Fixes: bcacfcbc82b4 ("s390/qeth: fix MAC address update sequence") +Signed-off-by: Julian Wiedmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_l2_main.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/drivers/s390/net/qeth_l2_main.c ++++ b/drivers/s390/net/qeth_l2_main.c +@@ -523,27 +523,34 @@ static int qeth_l2_set_mac_address(struc + return -ERESTARTSYS; + } + ++ /* avoid racing against concurrent state change: */ ++ if (!mutex_trylock(&card->conf_mutex)) ++ return -EAGAIN; ++ + if (!qeth_card_hw_is_reachable(card)) { + ether_addr_copy(dev->dev_addr, addr->sa_data); +- return 0; ++ goto out_unlock; + } + + /* don't register the same address twice */ + if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && + (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) +- return 0; ++ goto out_unlock; + + /* add the new address, switch over, drop the old */ + rc = qeth_l2_send_setmac(card, addr->sa_data); + if (rc) +- return rc; ++ goto out_unlock; + ether_addr_copy(old_addr, dev->dev_addr); + ether_addr_copy(dev->dev_addr, addr->sa_data); + + if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) + qeth_l2_remove_mac(card, old_addr); + card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; +- return 0; ++ ++out_unlock: ++ mutex_unlock(&card->conf_mutex); ++ return rc; + } + + static void qeth_promisc_to_bridge(struct qeth_card *card) diff --git a/queue-4.17/series b/queue-4.17/series index 6ff54032919..7e90cfbf970 100644 --- a/queue-4.17/series +++ b/queue-4.17/series @@ -5,3 +5,56 @@ pinctrl-sh-pfc-r8a77970-remove-sh_pfc_pin_cfg_drive_strength-flag.patch pinctrl-mt7622-fix-error-path-on-failing-at-groups-building.patch pinctrl-mt7622-stop-using-the-deprecated-pinctrl_add_gpio_range.patch pinctrl-mt7622-fix-a-kernel-panic-when-gpio-hog-is-being-applied.patch +alx-take-rtnl-before-calling-__alx_open-from-resume.patch +atm-preserve-value-of-skb-truesize-when-accounting-to-vcc.patch +atm-zatm-fix-potential-spectre-v1.patch +hv_netvsc-split-sub-channel-setup-into-async-and-sync.patch +ipv6-sr-fix-passing-wrong-flags-to-crypto_alloc_shash.patch +ipvlan-fix-ifla_mtu-ignored-on-newlink.patch +ixgbe-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch +net-dccp-avoid-crash-in-ccid3_hc_rx_send_feedback.patch +net-dccp-switch-rx_tstamp_last_feedback-to-monotonic-clock.patch +net-fix-use-after-free-in-gro-with-esp.patch +net-macb-fix-ptp-time-adjustment-for-large-negative-delta.patch +net-mlx5e-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch +net-mlx5e-don-t-attempt-to-dereference-the-ppriv-struct-if-not-being-eswitch-manager.patch +net-mlx5-e-switch-avoid-setup-attempt-if-not-being-e-switch-manager.patch +net-mlx5-fix-command-interface-race-in-polling-mode.patch +net-mlx5-fix-incorrect-raw-command-length-parsing.patch +net-mlx5-fix-required-capability-for-manipulating-mpfs.patch +net-mlx5-fix-wrong-size-allocation-for-qos-etc-tc-regitster.patch +net-mvneta-fix-the-rx-desc-dma-address-in-the-rx-path.patch +net-packet-fix-use-after-free.patch +net-sched-act_ife-fix-recursive-lock-and-idr-leak.patch +net-sched-act_ife-preserve-the-action-control-in-case-of-error.patch +net_sched-blackhole-tell-upper-qdisc-about-dropped-packets.patch +net-sungem-fix-rx-checksum-support.patch +net-tcp-fix-socket-lookups-with-so_bindtodevice.patch +qede-adverstise-software-timestamp-caps-when-phc-is-not-available.patch +qed-fix-setting-of-incorrect-eswitch-mode.patch +qed-fix-use-of-incorrect-size-in-memcpy-call.patch +qed-limit-msix-vectors-in-kdump-kernel-to-the-minimum-required-count.patch +qmi_wwan-add-support-for-the-dell-wireless-5821e-module.patch +r8152-napi-hangup-fix-after-disconnect.patch +s390-qeth-don-t-clobber-buffer-on-async-tx-completion.patch +stmmac-fix-dma-channel-hang-in-half-duplex-mode.patch +strparser-remove-early-eaten-to-fix-full-tcp-receive-buffer-stall.patch +tcp-fix-fast-open-key-endianness.patch +tcp-prevent-bogus-frto-undos-with-non-sack-flows.patch +vhost_net-validate-sock-before-trying-to-put-its-fd.patch +vsock-fix-loopback-on-big-endian-systems.patch +hinic-reset-irq-affinity-before-freeing-irq.patch +nfp-flower-fix-mpls-ether-type-detection.patch +net-macb-initialize-bp-queues.bp-for-at91rm9200.patch +net-use-dev_change_tx_queue_len-for-siocsiftxqlen.patch +nfp-reject-binding-to-shared-blocks.patch +xen-netfront-fix-mismatched-rtnl_unlock.patch +xen-netfront-update-features-after-registering-netdev.patch +enic-do-not-overwrite-error-code.patch +i40e-split-xdp_tx-tail-and-xdp_redirect-map-flushing.patch +ib-mlx5-avoid-dealing-with-vport-representors-if-not-being-e-switch-manager.patch +revert-s390-qeth-use-read-device-to-query-hypervisor-for-mac.patch +s390-qeth-avoid-using-is_multicast_ether_addr_64bits-on-u8.patch +s390-qeth-fix-race-when-setting-mac-address.patch +sfc-correctly-initialise-filter-rwsem-for-farch.patch +virtio_net-split-xdp_tx-kick-and-xdp_redirect-map-flushing.patch diff --git a/queue-4.17/sfc-correctly-initialise-filter-rwsem-for-farch.patch b/queue-4.17/sfc-correctly-initialise-filter-rwsem-for-farch.patch new file mode 100644 index 00000000000..835c824d910 --- /dev/null +++ b/queue-4.17/sfc-correctly-initialise-filter-rwsem-for-farch.patch @@ -0,0 +1,28 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Bert Kenward +Date: Fri, 29 Jun 2018 16:29:28 +0100 +Subject: sfc: correctly initialise filter rwsem for farch + +From: Bert Kenward + +[ Upstream commit cafb39600e7a73263122a0e2db052d691686378f ] + +Fixes: fc7a6c287ff3 ("sfc: use a semaphore to lock farch filters too") +Suggested-by: Joseph Korty +Signed-off-by: Bert Kenward +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/sfc/farch.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/sfc/farch.c ++++ b/drivers/net/ethernet/sfc/farch.c +@@ -2794,6 +2794,7 @@ int efx_farch_filter_table_probe(struct + if (!state) + return -ENOMEM; + efx->filter_state = state; ++ init_rwsem(&state->lock); + + table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP]; + table->id = EFX_FARCH_FILTER_TABLE_RX_IP; diff --git a/queue-4.17/stmmac-fix-dma-channel-hang-in-half-duplex-mode.patch b/queue-4.17/stmmac-fix-dma-channel-hang-in-half-duplex-mode.patch new file mode 100644 index 00000000000..ec01183bc29 --- /dev/null +++ b/queue-4.17/stmmac-fix-dma-channel-hang-in-half-duplex-mode.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Bhadram Varka +Date: Sun, 17 Jun 2018 20:02:05 +0530 +Subject: stmmac: fix DMA channel hang in half-duplex mode + +From: Bhadram Varka + +[ Upstream commit b6cfffa7ad923c73f317ea50fd4ebcb3b4b6669c ] + +HW does not support Half-duplex mode in multi-queue +scenario. Fix it by not advertising the Half-Duplex +mode if multi-queue enabled. + +Signed-off-by: Bhadram Varka +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -927,6 +927,7 @@ static void stmmac_check_pcs_mode(struct + static int stmmac_init_phy(struct net_device *dev) + { + struct stmmac_priv *priv = netdev_priv(dev); ++ u32 tx_cnt = priv->plat->tx_queues_to_use; + struct phy_device *phydev; + char phy_id_fmt[MII_BUS_ID_SIZE + 3]; + char bus_id[MII_BUS_ID_SIZE]; +@@ -968,6 +969,15 @@ static int stmmac_init_phy(struct net_de + SUPPORTED_1000baseT_Full); + + /* ++ * Half-duplex mode not supported with multiqueue ++ * half-duplex can only works with single queue ++ */ ++ if (tx_cnt > 1) ++ phydev->supported &= ~(SUPPORTED_1000baseT_Half | ++ SUPPORTED_100baseT_Half | ++ SUPPORTED_10baseT_Half); ++ ++ /* + * Broken HW is sometimes missing the pull-up resistor on the + * MDIO line, which results in reads to non-existent devices returning + * 0 rather than 0xffff. Catch this here and treat 0 as a non-existent diff --git a/queue-4.17/strparser-remove-early-eaten-to-fix-full-tcp-receive-buffer-stall.patch b/queue-4.17/strparser-remove-early-eaten-to-fix-full-tcp-receive-buffer-stall.patch new file mode 100644 index 00000000000..fba87a32b77 --- /dev/null +++ b/queue-4.17/strparser-remove-early-eaten-to-fix-full-tcp-receive-buffer-stall.patch @@ -0,0 +1,73 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Doron Roberts-Kedes +Date: Tue, 26 Jun 2018 18:33:33 -0700 +Subject: strparser: Remove early eaten to fix full tcp receive buffer stall + +From: Doron Roberts-Kedes + +[ Upstream commit 977c7114ebda2e746a114840d3a875e0cdb826fb ] + +On receving an incomplete message, the existing code stores the +remaining length of the cloned skb in the early_eaten field instead of +incrementing the value returned by __strp_recv. This defers invocation +of sock_rfree for the current skb until the next invocation of +__strp_recv, which returns early_eaten if early_eaten is non-zero. + +This behavior causes a stall when the current message occupies the very +tail end of a massive skb, and strp_peek/need_bytes indicates that the +remainder of the current message has yet to arrive on the socket. The +TCP receive buffer is totally full, causing the TCP window to go to +zero, so the remainder of the message will never arrive. + +Incrementing the value returned by __strp_recv by the amount otherwise +stored in early_eaten prevents stalls of this nature. + +Signed-off-by: Doron Roberts-Kedes +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/strparser/strparser.c | 17 +---------------- + 1 file changed, 1 insertion(+), 16 deletions(-) + +--- a/net/strparser/strparser.c ++++ b/net/strparser/strparser.c +@@ -35,7 +35,6 @@ struct _strp_msg { + */ + struct strp_msg strp; + int accum_len; +- int early_eaten; + }; + + static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) +@@ -115,20 +114,6 @@ static int __strp_recv(read_descriptor_t + head = strp->skb_head; + if (head) { + /* Message already in progress */ +- +- stm = _strp_msg(head); +- if (unlikely(stm->early_eaten)) { +- /* Already some number of bytes on the receive sock +- * data saved in skb_head, just indicate they +- * are consumed. +- */ +- eaten = orig_len <= stm->early_eaten ? +- orig_len : stm->early_eaten; +- stm->early_eaten -= eaten; +- +- return eaten; +- } +- + if (unlikely(orig_offset)) { + /* Getting data with a non-zero offset when a message is + * in progress is not expected. If it does happen, we +@@ -297,9 +282,9 @@ static int __strp_recv(read_descriptor_t + } + + stm->accum_len += cand_len; ++ eaten += cand_len; + strp->need_bytes = stm->strp.full_len - + stm->accum_len; +- stm->early_eaten = cand_len; + STRP_STATS_ADD(strp->stats.bytes, cand_len); + desc->count = 0; /* Stop reading socket */ + break; diff --git a/queue-4.17/tcp-fix-fast-open-key-endianness.patch b/queue-4.17/tcp-fix-fast-open-key-endianness.patch new file mode 100644 index 00000000000..52557abd099 --- /dev/null +++ b/queue-4.17/tcp-fix-fast-open-key-endianness.patch @@ -0,0 +1,75 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Yuchung Cheng +Date: Wed, 27 Jun 2018 16:04:48 -0700 +Subject: tcp: fix Fast Open key endianness + +From: Yuchung Cheng + +[ Upstream commit c860e997e9170a6d68f9d1e6e2cf61f572191aaf ] + +Fast Open key could be stored in different endian based on the CPU. +Previously hosts in different endianness in a server farm using +the same key config (sysctl value) would produce different cookies. +This patch fixes it by always storing it as little endian to keep +same API for LE hosts. + +Reported-by: Daniele Iamartino +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Signed-off-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/sysctl_net_ipv4.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +--- a/net/ipv4/sysctl_net_ipv4.c ++++ b/net/ipv4/sysctl_net_ipv4.c +@@ -263,8 +263,9 @@ static int proc_tcp_fastopen_key(struct + ipv4.sysctl_tcp_fastopen); + struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; + struct tcp_fastopen_context *ctxt; +- int ret; + u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ ++ __le32 key[4]; ++ int ret, i; + + tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); + if (!tbl.data) +@@ -273,11 +274,14 @@ static int proc_tcp_fastopen_key(struct + rcu_read_lock(); + ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctxt) +- memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); ++ memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); + else +- memset(user_key, 0, sizeof(user_key)); ++ memset(key, 0, sizeof(key)); + rcu_read_unlock(); + ++ for (i = 0; i < ARRAY_SIZE(key); i++) ++ user_key[i] = le32_to_cpu(key[i]); ++ + snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", + user_key[0], user_key[1], user_key[2], user_key[3]); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); +@@ -288,13 +292,17 @@ static int proc_tcp_fastopen_key(struct + ret = -EINVAL; + goto bad_key; + } +- tcp_fastopen_reset_cipher(net, NULL, user_key, ++ ++ for (i = 0; i < ARRAY_SIZE(user_key); i++) ++ key[i] = cpu_to_le32(user_key[i]); ++ ++ tcp_fastopen_reset_cipher(net, NULL, key, + TCP_FASTOPEN_KEY_LENGTH); + } + + bad_key: + pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", +- user_key[0], user_key[1], user_key[2], user_key[3], ++ user_key[0], user_key[1], user_key[2], user_key[3], + (char *)tbl.data, ret); + kfree(tbl.data); + return ret; diff --git a/queue-4.17/tcp-prevent-bogus-frto-undos-with-non-sack-flows.patch b/queue-4.17/tcp-prevent-bogus-frto-undos-with-non-sack-flows.patch new file mode 100644 index 00000000000..5c17dcfa284 --- /dev/null +++ b/queue-4.17/tcp-prevent-bogus-frto-undos-with-non-sack-flows.patch @@ -0,0 +1,64 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: "Ilpo Järvinen" +Date: Fri, 29 Jun 2018 13:07:53 +0300 +Subject: tcp: prevent bogus FRTO undos with non-SACK flows + +From: "Ilpo Järvinen" + +[ Upstream commit 1236f22fbae15df3736ab4a984c64c0c6ee6254c ] + +If SACK is not enabled and the first cumulative ACK after the RTO +retransmission covers more than the retransmitted skb, a spurious +FRTO undo will trigger (assuming FRTO is enabled for that RTO). +The reason is that any non-retransmitted segment acknowledged will +set FLAG_ORIG_SACK_ACKED in tcp_clean_rtx_queue even if there is +no indication that it would have been delivered for real (the +scoreboard is not kept with TCPCB_SACKED_ACKED bits in the non-SACK +case so the check for that bit won't help like it does with SACK). +Having FLAG_ORIG_SACK_ACKED set results in the spurious FRTO undo +in tcp_process_loss. + +We need to use more strict condition for non-SACK case and check +that none of the cumulatively ACKed segments were retransmitted +to prove that progress is due to original transmissions. Only then +keep FLAG_ORIG_SACK_ACKED set, allowing FRTO undo to proceed in +non-SACK case. + +(FLAG_ORIG_SACK_ACKED is planned to be renamed to FLAG_ORIG_PROGRESS +to better indicate its purpose but to keep this change minimal, it +will be done in another patch). + +Besides burstiness and congestion control violations, this problem +can result in RTO loop: When the loss recovery is prematurely +undoed, only new data will be transmitted (if available) and +the next retransmission can occur only after a new RTO which in case +of multiple losses (that are not for consecutive packets) requires +one RTO per loss to recover. + +Signed-off-by: Ilpo Järvinen +Tested-by: Neal Cardwell +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3149,6 +3149,15 @@ static int tcp_clean_rtx_queue(struct so + + if (tcp_is_reno(tp)) { + tcp_remove_reno_sacks(sk, pkts_acked); ++ ++ /* If any of the cumulatively ACKed segments was ++ * retransmitted, non-SACK case cannot confirm that ++ * progress was due to original transmission due to ++ * lack of TCPCB_SACKED_ACKED bits even if some of ++ * the packets may have been never retransmitted. ++ */ ++ if (flag & FLAG_RETRANS_DATA_ACKED) ++ flag &= ~FLAG_ORIG_SACK_ACKED; + } else { + int delta; + diff --git a/queue-4.17/vhost_net-validate-sock-before-trying-to-put-its-fd.patch b/queue-4.17/vhost_net-validate-sock-before-trying-to-put-its-fd.patch new file mode 100644 index 00000000000..1dfccdde4d3 --- /dev/null +++ b/queue-4.17/vhost_net-validate-sock-before-trying-to-put-its-fd.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Jason Wang +Date: Thu, 21 Jun 2018 13:11:31 +0800 +Subject: vhost_net: validate sock before trying to put its fd + +From: Jason Wang + +[ Upstream commit b8f1f65882f07913157c44673af7ec0b308d03eb ] + +Sock will be NULL if we pass -1 to vhost_net_set_backend(), but when +we meet errors during ubuf allocation, the code does not check for +NULL before calling sockfd_put(), this will lead NULL +dereferencing. Fixing by checking sock pointer before. + +Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support") +Reported-by: Dan Carpenter +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -1219,7 +1219,8 @@ err_used: + if (ubufs) + vhost_net_ubuf_put_wait_and_free(ubufs); + err_ubufs: +- sockfd_put(sock); ++ if (sock) ++ sockfd_put(sock); + err_vq: + mutex_unlock(&vq->mutex); + err: diff --git a/queue-4.17/virtio_net-split-xdp_tx-kick-and-xdp_redirect-map-flushing.patch b/queue-4.17/virtio_net-split-xdp_tx-kick-and-xdp_redirect-map-flushing.patch new file mode 100644 index 00000000000..56fc6df58f4 --- /dev/null +++ b/queue-4.17/virtio_net-split-xdp_tx-kick-and-xdp_redirect-map-flushing.patch @@ -0,0 +1,137 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Jesper Dangaard Brouer +Date: Tue, 26 Jun 2018 17:39:58 +0200 +Subject: virtio_net: split XDP_TX kick and XDP_REDIRECT map flushing + +From: Jesper Dangaard Brouer + +[ Upstream commit 2471c75efed32529698c26da499954f0253cb401 ] + +The driver was combining XDP_TX virtqueue_kick and XDP_REDIRECT +map flushing (xdp_do_flush_map). This is suboptimal, these two +flush operations should be kept separate. + +The suboptimal behavior was introduced in commit 9267c430c6b6 +("virtio-net: add missing virtqueue kick when flushing packets"). + +Fixes: 9267c430c6b6 ("virtio-net: add missing virtqueue kick when flushing packets") +Signed-off-by: Jesper Dangaard Brouer +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 30 +++++++++++++++++++----------- + 1 file changed, 19 insertions(+), 11 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -50,6 +50,10 @@ module_param(napi_tx, bool, 0644); + /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ + #define VIRTIO_XDP_HEADROOM 256 + ++/* Separating two types of XDP xmit */ ++#define VIRTIO_XDP_TX BIT(0) ++#define VIRTIO_XDP_REDIR BIT(1) ++ + /* RX packet size EWMA. The average packet size is used to determine the packet + * buffer size when refilling RX rings. As the entire RX ring may be refilled + * at once, the weight is chosen so that the EWMA will be insensitive to short- +@@ -547,7 +551,7 @@ static struct sk_buff *receive_small(str + struct receive_queue *rq, + void *buf, void *ctx, + unsigned int len, +- bool *xdp_xmit) ++ unsigned int *xdp_xmit) + { + struct sk_buff *skb; + struct bpf_prog *xdp_prog; +@@ -615,14 +619,14 @@ static struct sk_buff *receive_small(str + trace_xdp_exception(vi->dev, xdp_prog, act); + goto err_xdp; + } +- *xdp_xmit = true; ++ *xdp_xmit |= VIRTIO_XDP_TX; + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: + err = xdp_do_redirect(dev, &xdp, xdp_prog); + if (err) + goto err_xdp; +- *xdp_xmit = true; ++ *xdp_xmit |= VIRTIO_XDP_REDIR; + rcu_read_unlock(); + goto xdp_xmit; + default: +@@ -684,7 +688,7 @@ static struct sk_buff *receive_mergeable + void *buf, + void *ctx, + unsigned int len, +- bool *xdp_xmit) ++ unsigned int *xdp_xmit) + { + struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); +@@ -772,7 +776,7 @@ static struct sk_buff *receive_mergeable + put_page(xdp_page); + goto err_xdp; + } +- *xdp_xmit = true; ++ *xdp_xmit |= VIRTIO_XDP_REDIR; + if (unlikely(xdp_page != page)) + put_page(page); + rcu_read_unlock(); +@@ -784,7 +788,7 @@ static struct sk_buff *receive_mergeable + put_page(xdp_page); + goto err_xdp; + } +- *xdp_xmit = true; ++ *xdp_xmit |= VIRTIO_XDP_TX; + if (unlikely(xdp_page != page)) + put_page(page); + rcu_read_unlock(); +@@ -893,7 +897,8 @@ xdp_xmit: + } + + static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, +- void *buf, unsigned int len, void **ctx, bool *xdp_xmit) ++ void *buf, unsigned int len, void **ctx, ++ unsigned int *xdp_xmit) + { + struct net_device *dev = vi->dev; + struct sk_buff *skb; +@@ -1186,7 +1191,8 @@ static void refill_work(struct work_stru + } + } + +-static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) ++static int virtnet_receive(struct receive_queue *rq, int budget, ++ unsigned int *xdp_xmit) + { + struct virtnet_info *vi = rq->vq->vdev->priv; + unsigned int len, received = 0, bytes = 0; +@@ -1275,7 +1281,7 @@ static int virtnet_poll(struct napi_stru + struct virtnet_info *vi = rq->vq->vdev->priv; + struct send_queue *sq; + unsigned int received, qp; +- bool xdp_xmit = false; ++ unsigned int xdp_xmit = 0; + + virtnet_poll_cleantx(rq); + +@@ -1285,12 +1291,14 @@ static int virtnet_poll(struct napi_stru + if (received < budget) + virtqueue_napi_complete(napi, rq->vq, received); + +- if (xdp_xmit) { ++ if (xdp_xmit & VIRTIO_XDP_REDIR) ++ xdp_do_flush_map(); ++ ++ if (xdp_xmit & VIRTIO_XDP_TX) { + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + + smp_processor_id(); + sq = &vi->sq[qp]; + virtqueue_kick(sq->vq); +- xdp_do_flush_map(); + } + + return received; diff --git a/queue-4.17/vsock-fix-loopback-on-big-endian-systems.patch b/queue-4.17/vsock-fix-loopback-on-big-endian-systems.patch new file mode 100644 index 00000000000..ed55a589a48 --- /dev/null +++ b/queue-4.17/vsock-fix-loopback-on-big-endian-systems.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Claudio Imbrenda +Date: Wed, 20 Jun 2018 15:51:51 +0200 +Subject: VSOCK: fix loopback on big-endian systems + +From: Claudio Imbrenda + +[ Upstream commit e5ab564c9ebee77794842ca7d7476147b83d6a27 ] + +The dst_cid and src_cid are 64 bits, therefore 64 bit accessors should be +used, and in fact in virtio_transport_common.c only 64 bit accessors are +used. Using 32 bit accessors for 64 bit values breaks big endian systems. + +This patch fixes a wrong use of le32_to_cpu in virtio_transport_send_pkt. + +Fixes: b9116823189e85ccf384 ("VSOCK: add loopback to virtio_transport") + +Signed-off-by: Claudio Imbrenda +Reviewed-by: Stefan Hajnoczi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/vmw_vsock/virtio_transport.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/vmw_vsock/virtio_transport.c ++++ b/net/vmw_vsock/virtio_transport.c +@@ -201,7 +201,7 @@ virtio_transport_send_pkt(struct virtio_ + return -ENODEV; + } + +- if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) ++ if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) + return virtio_transport_send_pkt_loopback(vsock, pkt); + + if (pkt->reply) diff --git a/queue-4.17/xen-netfront-fix-mismatched-rtnl_unlock.patch b/queue-4.17/xen-netfront-fix-mismatched-rtnl_unlock.patch new file mode 100644 index 00000000000..ebd9c7a8792 --- /dev/null +++ b/queue-4.17/xen-netfront-fix-mismatched-rtnl_unlock.patch @@ -0,0 +1,38 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Ross Lagerwall +Date: Thu, 21 Jun 2018 14:00:20 +0100 +Subject: xen-netfront: Fix mismatched rtnl_unlock + +From: Ross Lagerwall + +[ Upstream commit cb257783c2927b73614b20f915a91ff78aa6f3e8 ] + +Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") +Reported-by: Ben Hutchings +Signed-off-by: Ross Lagerwall +Reviewed-by: Juergen Gross +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -1810,7 +1810,7 @@ static int talk_to_netback(struct xenbus + err = xen_net_read_mac(dev, info->netdev->dev_addr); + if (err) { + xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); +- goto out; ++ goto out_unlocked; + } + + rtnl_lock(); +@@ -1925,6 +1925,7 @@ abort_transaction_no_dev_fatal: + xennet_destroy_queues(info); + out: + rtnl_unlock(); ++out_unlocked: + device_unregister(&dev->dev); + return err; + } diff --git a/queue-4.17/xen-netfront-update-features-after-registering-netdev.patch b/queue-4.17/xen-netfront-update-features-after-registering-netdev.patch new file mode 100644 index 00000000000..4c94a91a4ba --- /dev/null +++ b/queue-4.17/xen-netfront-update-features-after-registering-netdev.patch @@ -0,0 +1,49 @@ +From foo@baz Thu Jul 19 08:32:06 CEST 2018 +From: Ross Lagerwall +Date: Thu, 21 Jun 2018 14:00:21 +0100 +Subject: xen-netfront: Update features after registering netdev + +From: Ross Lagerwall + +[ Upstream commit 45c8184c1bed1ca8a7f02918552063a00b909bf5 ] + +Update the features after calling register_netdev() otherwise the +device features are not set up correctly and it not possible to change +the MTU of the device. After this change, the features reported by +ethtool match the device's features before the commit which introduced +the issue and it is possible to change the device's MTU. + +Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") +Reported-by: Liam Shepherd +Signed-off-by: Ross Lagerwall +Reviewed-by: Juergen Gross +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -1951,10 +1951,6 @@ static int xennet_connect(struct net_dev + /* talk_to_netback() sets the correct number of queues */ + num_queues = dev->real_num_tx_queues; + +- rtnl_lock(); +- netdev_update_features(dev); +- rtnl_unlock(); +- + if (dev->reg_state == NETREG_UNINITIALIZED) { + err = register_netdev(dev); + if (err) { +@@ -1964,6 +1960,10 @@ static int xennet_connect(struct net_dev + } + } + ++ rtnl_lock(); ++ netdev_update_features(dev); ++ rtnl_unlock(); ++ + /* + * All public and private state should now be sane. Get + * ready to start sending and receiving packets and give the driver