From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 14 Apr 2017 09:25:39 +0000 (+0200)
Subject: 4.4-stable patches
X-Git-Tag: v4.10.11~7
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=af3d8b4e397b249e80db3023dd9c3ad2d680caf2;p=thirdparty%2Fkernel%2Fstable-queue.git

4.4-stable patches

added patches:
	blk-mq-avoid-memory-reclaim-when-remapping-queues.patch
	ibmveth-set-correct-gso_size-and-gso_type.patch
	net-mlx4_core-fix-racy-cq-completion-queue-free.patch
	net-mlx4_core-fix-when-to-save-some-qp-context-flags-for-dynamic-vst-to-vgt-transitions.patch
	net-mlx4_en-fix-bad-wqe-issue.patch
	usb-hub-wait-for-connection-to-be-reestablished-after-port-reset.patch
---

diff --git a/queue-4.4/blk-mq-avoid-memory-reclaim-when-remapping-queues.patch b/queue-4.4/blk-mq-avoid-memory-reclaim-when-remapping-queues.patch
new file mode 100644
index 00000000000..beb048e7868
--- /dev/null
+++ b/queue-4.4/blk-mq-avoid-memory-reclaim-when-remapping-queues.patch
@@ -0,0 +1,109 @@
+From 36e1f3d107867b25c616c2fd294f5a1c9d4e5d09 Mon Sep 17 00:00:00 2001
+From: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+Date: Tue, 6 Dec 2016 13:31:44 -0200
+Subject: blk-mq: Avoid memory reclaim when remapping queues
+
+From: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+
+commit 36e1f3d107867b25c616c2fd294f5a1c9d4e5d09 upstream.
+
+While stressing memory and IO at the same time we changed SMT settings,
+we were able to consistently trigger deadlocks in the mm system, which
+froze the entire machine.
+
+I think that under memory stress conditions, the large allocations
+performed by blk_mq_init_rq_map may trigger a reclaim, which stalls
+waiting on the block layer remmaping completion, thus deadlocking the
+system.  The trace below was collected after the machine stalled,
+waiting for the hotplug event completion.
+
+The simplest fix for this is to make allocations in this path
+non-reclaimable, with GFP_NOIO.  With this patch, We couldn't hit the
+issue anymore.
+
+This should apply on top of Jens's for-next branch cleanly.
+
+Changes since v1:
+  - Use GFP_NOIO instead of GFP_NOWAIT.
+
+ Call Trace:
+[c000000f0160aaf0] [c000000f0160ab50] 0xc000000f0160ab50 (unreliable)
+[c000000f0160acc0] [c000000000016624] __switch_to+0x2e4/0x430
+[c000000f0160ad20] [c000000000b1a880] __schedule+0x310/0x9b0
+[c000000f0160ae00] [c000000000b1af68] schedule+0x48/0xc0
+[c000000f0160ae30] [c000000000b1b4b0] schedule_preempt_disabled+0x20/0x30
+[c000000f0160ae50] [c000000000b1d4fc] __mutex_lock_slowpath+0xec/0x1f0
+[c000000f0160aed0] [c000000000b1d678] mutex_lock+0x78/0xa0
+[c000000f0160af00] [d000000019413cac] xfs_reclaim_inodes_ag+0x33c/0x380 [xfs]
+[c000000f0160b0b0] [d000000019415164] xfs_reclaim_inodes_nr+0x54/0x70 [xfs]
+[c000000f0160b0f0] [d0000000194297f8] xfs_fs_free_cached_objects+0x38/0x60 [xfs]
+[c000000f0160b120] [c0000000003172c8] super_cache_scan+0x1f8/0x210
+[c000000f0160b190] [c00000000026301c] shrink_slab.part.13+0x21c/0x4c0
+[c000000f0160b2d0] [c000000000268088] shrink_zone+0x2d8/0x3c0
+[c000000f0160b380] [c00000000026834c] do_try_to_free_pages+0x1dc/0x520
+[c000000f0160b450] [c00000000026876c] try_to_free_pages+0xdc/0x250
+[c000000f0160b4e0] [c000000000251978] __alloc_pages_nodemask+0x868/0x10d0
+[c000000f0160b6f0] [c000000000567030] blk_mq_init_rq_map+0x160/0x380
+[c000000f0160b7a0] [c00000000056758c] blk_mq_map_swqueue+0x33c/0x360
+[c000000f0160b820] [c000000000567904] blk_mq_queue_reinit+0x64/0xb0
+[c000000f0160b850] [c00000000056a16c] blk_mq_queue_reinit_notify+0x19c/0x250
+[c000000f0160b8a0] [c0000000000f5d38] notifier_call_chain+0x98/0x100
+[c000000f0160b8f0] [c0000000000c5fb0] __cpu_notify+0x70/0xe0
+[c000000f0160b930] [c0000000000c63c4] notify_prepare+0x44/0xb0
+[c000000f0160b9b0] [c0000000000c52f4] cpuhp_invoke_callback+0x84/0x250
+[c000000f0160ba10] [c0000000000c570c] cpuhp_up_callbacks+0x5c/0x120
+[c000000f0160ba60] [c0000000000c7cb8] _cpu_up+0xf8/0x1d0
+[c000000f0160bac0] [c0000000000c7eb0] do_cpu_up+0x120/0x150
+[c000000f0160bb40] [c0000000006fe024] cpu_subsys_online+0x64/0xe0
+[c000000f0160bb90] [c0000000006f5124] device_online+0xb4/0x120
+[c000000f0160bbd0] [c0000000006f5244] online_store+0xb4/0xc0
+[c000000f0160bc20] [c0000000006f0a68] dev_attr_store+0x68/0xa0
+[c000000f0160bc60] [c0000000003ccc30] sysfs_kf_write+0x80/0xb0
+[c000000f0160bca0] [c0000000003cbabc] kernfs_fop_write+0x17c/0x250
+[c000000f0160bcf0] [c00000000030fe6c] __vfs_write+0x6c/0x1e0
+[c000000f0160bd90] [c000000000311490] vfs_write+0xd0/0x270
+[c000000f0160bde0] [c0000000003131fc] SyS_write+0x6c/0x110
+[c000000f0160be30] [c000000000009204] system_call+0x38/0xec
+
+Signed-off-by: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+Cc: Brian King <brking@linux.vnet.ibm.com>
+Cc: Douglas Miller <dougmill@linux.vnet.ibm.com>
+Cc: linux-block@vger.kernel.org
+Cc: linux-scsi@vger.kernel.org
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-mq.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -1470,7 +1470,7 @@ static struct blk_mq_tags *blk_mq_init_r
+ 	INIT_LIST_HEAD(&tags->page_list);
+ 
+ 	tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
+-				 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
++				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
+ 				 set->numa_node);
+ 	if (!tags->rqs) {
+ 		blk_mq_free_tags(tags);
+@@ -1496,7 +1496,7 @@ static struct blk_mq_tags *blk_mq_init_r
+ 
+ 		do {
+ 			page = alloc_pages_node(set->numa_node,
+-				GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
++				GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
+ 				this_order);
+ 			if (page)
+ 				break;
+@@ -1517,7 +1517,7 @@ static struct blk_mq_tags *blk_mq_init_r
+ 		 * Allow kmemleak to scan these pages as they contain pointers
+ 		 * to additional allocations like via ops->init_request().
+ 		 */
+-		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
++		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
+ 		entries_per_page = order_to_size(this_order) / rq_size;
+ 		to_do = min(entries_per_page, set->queue_depth - i);
+ 		left -= to_do * rq_size;
diff --git a/queue-4.4/ibmveth-set-correct-gso_size-and-gso_type.patch b/queue-4.4/ibmveth-set-correct-gso_size-and-gso_type.patch
new file mode 100644
index 00000000000..c55dcc03232
--- /dev/null
+++ b/queue-4.4/ibmveth-set-correct-gso_size-and-gso_type.patch
@@ -0,0 +1,172 @@
+From 7b5967389f5a8dfb9d32843830f5e2717e20995d Mon Sep 17 00:00:00 2001
+From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
+Date: Thu, 8 Dec 2016 16:40:03 -0600
+Subject: ibmveth: set correct gso_size and gso_type
+
+From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
+
+commit 7b5967389f5a8dfb9d32843830f5e2717e20995d upstream.
+
+This patch is based on an earlier one submitted
+by Jon Maxwell with the following commit message:
+
+"We recently encountered a bug where a few customers using ibmveth on the
+same LPAR hit an issue where a TCP session hung when large receive was
+enabled. Closer analysis revealed that the session was stuck because the
+one side was advertising a zero window repeatedly.
+
+We narrowed this down to the fact the ibmveth driver did not set gso_size
+which is translated by TCP into the MSS later up the stack. The MSS is
+used to calculate the TCP window size and as that was abnormally large,
+it was calculating a zero window, even although the sockets receive buffer
+was completely empty."
+
+We rely on the Virtual I/O Server partition in a pseries
+environment to provide the MSS through the TCP header checksum
+field. The stipulation is that users should not disable checksum
+offloading if rx packet aggregation is enabled through VIOS.
+
+Some firmware offerings provide the MSS in the RX buffer.
+This is signalled by a bit in the RX queue descriptor.
+
+Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
+Reviewed-by: Pradeep Satyanarayana <pradeeps@linux.vnet.ibm.com>
+Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Reviewed-by: Jonathan Maxwell <jmaxwell37@gmail.com>
+Reviewed-by: David Dai <zdai@us.ibm.com>
+Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/ibm/ibmveth.c |   65 +++++++++++++++++++++++++++++++++++--
+ drivers/net/ethernet/ibm/ibmveth.h |    1 
+ 2 files changed, 64 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/ibmveth.c
++++ b/drivers/net/ethernet/ibm/ibmveth.c
+@@ -58,7 +58,7 @@ static struct kobj_type ktype_veth_pool;
+ 
+ static const char ibmveth_driver_name[] = "ibmveth";
+ static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver";
+-#define ibmveth_driver_version "1.05"
++#define ibmveth_driver_version "1.06"
+ 
+ MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
+ MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
+@@ -137,6 +137,11 @@ static inline int ibmveth_rxq_frame_offs
+ 	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK;
+ }
+ 
++static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter)
++{
++	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT;
++}
++
+ static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter)
+ {
+ 	return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length);
+@@ -1172,6 +1177,45 @@ map_failed:
+ 	goto retry_bounce;
+ }
+ 
++static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
++{
++	int offset = 0;
++
++	/* only TCP packets will be aggregated */
++	if (skb->protocol == htons(ETH_P_IP)) {
++		struct iphdr *iph = (struct iphdr *)skb->data;
++
++		if (iph->protocol == IPPROTO_TCP) {
++			offset = iph->ihl * 4;
++			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++		} else {
++			return;
++		}
++	} else if (skb->protocol == htons(ETH_P_IPV6)) {
++		struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data;
++
++		if (iph6->nexthdr == IPPROTO_TCP) {
++			offset = sizeof(struct ipv6hdr);
++			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
++		} else {
++			return;
++		}
++	} else {
++		return;
++	}
++	/* if mss is not set through Large Packet bit/mss in rx buffer,
++	 * expect that the mss will be written to the tcp header checksum.
++	 */
++	if (lrg_pkt) {
++		skb_shinfo(skb)->gso_size = mss;
++	} else if (offset) {
++		struct tcphdr *tcph = (struct tcphdr *)(skb->data + offset);
++
++		skb_shinfo(skb)->gso_size = ntohs(tcph->check);
++		tcph->check = 0;
++	}
++}
++
+ static int ibmveth_poll(struct napi_struct *napi, int budget)
+ {
+ 	struct ibmveth_adapter *adapter =
+@@ -1180,6 +1224,7 @@ static int ibmveth_poll(struct napi_stru
+ 	int frames_processed = 0;
+ 	unsigned long lpar_rc;
+ 	struct iphdr *iph;
++	u16 mss = 0;
+ 
+ restart_poll:
+ 	while (frames_processed < budget) {
+@@ -1197,9 +1242,21 @@ restart_poll:
+ 			int length = ibmveth_rxq_frame_length(adapter);
+ 			int offset = ibmveth_rxq_frame_offset(adapter);
+ 			int csum_good = ibmveth_rxq_csum_good(adapter);
++			int lrg_pkt = ibmveth_rxq_large_packet(adapter);
+ 
+ 			skb = ibmveth_rxq_get_buffer(adapter);
+ 
++			/* if the large packet bit is set in the rx queue
++			 * descriptor, the mss will be written by PHYP eight
++			 * bytes from the start of the rx buffer, which is
++			 * skb->data at this stage
++			 */
++			if (lrg_pkt) {
++				__be64 *rxmss = (__be64 *)(skb->data + 8);
++
++				mss = (u16)be64_to_cpu(*rxmss);
++			}
++
+ 			new_skb = NULL;
+ 			if (length < rx_copybreak)
+ 				new_skb = netdev_alloc_skb(netdev, length);
+@@ -1233,11 +1290,15 @@ restart_poll:
+ 					if (iph->check == 0xffff) {
+ 						iph->check = 0;
+ 						iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+-						adapter->rx_large_packets++;
+ 					}
+ 				}
+ 			}
+ 
++			if (length > netdev->mtu + ETH_HLEN) {
++				ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
++				adapter->rx_large_packets++;
++			}
++
+ 			napi_gro_receive(napi, skb);	/* send it up */
+ 
+ 			netdev->stats.rx_packets++;
+--- a/drivers/net/ethernet/ibm/ibmveth.h
++++ b/drivers/net/ethernet/ibm/ibmveth.h
+@@ -209,6 +209,7 @@ struct ibmveth_rx_q_entry {
+ #define IBMVETH_RXQ_TOGGLE		0x80000000
+ #define IBMVETH_RXQ_TOGGLE_SHIFT	31
+ #define IBMVETH_RXQ_VALID		0x40000000
++#define IBMVETH_RXQ_LRG_PKT		0x04000000
+ #define IBMVETH_RXQ_NO_CSUM		0x02000000
+ #define IBMVETH_RXQ_CSUM_GOOD		0x01000000
+ #define IBMVETH_RXQ_OFF_MASK		0x0000FFFF
diff --git a/queue-4.4/net-mlx4_core-fix-racy-cq-completion-queue-free.patch b/queue-4.4/net-mlx4_core-fix-racy-cq-completion-queue-free.patch
new file mode 100644
index 00000000000..9c54aca07f3
--- /dev/null
+++ b/queue-4.4/net-mlx4_core-fix-racy-cq-completion-queue-free.patch
@@ -0,0 +1,150 @@
+From 291c566a28910614ce42d0ffe82196eddd6346f4 Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Mon, 16 Jan 2017 18:31:37 +0200
+Subject: net/mlx4_core: Fix racy CQ (Completion Queue) free
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit 291c566a28910614ce42d0ffe82196eddd6346f4 upstream.
+
+In function mlx4_cq_completion() and mlx4_cq_event(), the
+radix_tree_lookup requires a rcu_read_lock.
+This is mandatory: if another core frees the CQ, it could
+run the radix_tree_node_rcu_free() call_rcu() callback while
+its being used by the radix tree lookup function.
+
+Additionally, in function mlx4_cq_event(), since we are adding
+the rcu lock around the radix-tree lookup, we no longer need to take
+the spinlock. Also, the synchronize_irq() call for the async event
+eliminates the need for incrementing the cq reference count in
+mlx4_cq_event().
+
+Other changes:
+1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock:
+   we no longer take this spinlock in the interrupt context.
+   The spinlock here, therefore, simply protects against different
+   threads simultaneously invoking mlx4_cq_free() for different cq's.
+
+2. In function mlx4_cq_free(), we move the radix tree delete to before
+   the synchronize_irq() calls. This guarantees that we will not
+   access this cq during any subsequent interrupts, and therefore can
+   safely free the CQ after the synchronize_irq calls. The rcu_read_lock
+   in the interrupt handlers only needs to protect against corrupting the
+   radix tree; the interrupt handlers may access the cq outside the
+   rcu_read_lock due to the synchronize_irq calls which protect against
+   premature freeing of the cq.
+
+3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg.
+
+4. We leave the cq reference count mechanism in place, because it is
+   still needed for the cq completion tasklet mechanism.
+
+Fixes: 6d90aa5cf17b ("net/mlx4_core: Make sure there are no pending async events when freeing CQ")
+Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Matan Barak <matanb@mellanox.com>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx4/cq.c |   38 ++++++++++++++++----------------
+ 1 file changed, 20 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
++++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
+@@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev
+ {
+ 	struct mlx4_cq *cq;
+ 
++	rcu_read_lock();
+ 	cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
+ 			       cqn & (dev->caps.num_cqs - 1));
++	rcu_read_unlock();
++
+ 	if (!cq) {
+ 		mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
+ 		return;
+ 	}
+ 
++	/* Acessing the CQ outside of rcu_read_lock is safe, because
++	 * the CQ is freed only after interrupt handling is completed.
++	 */
+ 	++cq->arm_sn;
+ 
+ 	cq->comp(cq);
+@@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev,
+ 	struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
+ 	struct mlx4_cq *cq;
+ 
+-	spin_lock(&cq_table->lock);
+-
++	rcu_read_lock();
+ 	cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
+-	if (cq)
+-		atomic_inc(&cq->refcount);
+-
+-	spin_unlock(&cq_table->lock);
++	rcu_read_unlock();
+ 
+ 	if (!cq) {
+-		mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
++		mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn);
+ 		return;
+ 	}
+ 
++	/* Acessing the CQ outside of rcu_read_lock is safe, because
++	 * the CQ is freed only after interrupt handling is completed.
++	 */
+ 	cq->event(cq, event_type);
+-
+-	if (atomic_dec_and_test(&cq->refcount))
+-		complete(&cq->free);
+ }
+ 
+ static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+@@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev,
+ 	if (err)
+ 		return err;
+ 
+-	spin_lock_irq(&cq_table->lock);
++	spin_lock(&cq_table->lock);
+ 	err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
+-	spin_unlock_irq(&cq_table->lock);
++	spin_unlock(&cq_table->lock);
+ 	if (err)
+ 		goto err_icm;
+ 
+@@ -347,9 +349,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev,
+ 	return 0;
+ 
+ err_radix:
+-	spin_lock_irq(&cq_table->lock);
++	spin_lock(&cq_table->lock);
+ 	radix_tree_delete(&cq_table->tree, cq->cqn);
+-	spin_unlock_irq(&cq_table->lock);
++	spin_unlock(&cq_table->lock);
+ 
+ err_icm:
+ 	mlx4_cq_free_icm(dev, cq->cqn);
+@@ -368,15 +370,15 @@ void mlx4_cq_free(struct mlx4_dev *dev,
+ 	if (err)
+ 		mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
+ 
++	spin_lock(&cq_table->lock);
++	radix_tree_delete(&cq_table->tree, cq->cqn);
++	spin_unlock(&cq_table->lock);
++
+ 	synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq);
+ 	if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq !=
+ 	    priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
+ 		synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+ 
+-	spin_lock_irq(&cq_table->lock);
+-	radix_tree_delete(&cq_table->tree, cq->cqn);
+-	spin_unlock_irq(&cq_table->lock);
+-
+ 	if (atomic_dec_and_test(&cq->refcount))
+ 		complete(&cq->free);
+ 	wait_for_completion(&cq->free);
diff --git a/queue-4.4/net-mlx4_core-fix-when-to-save-some-qp-context-flags-for-dynamic-vst-to-vgt-transitions.patch b/queue-4.4/net-mlx4_core-fix-when-to-save-some-qp-context-flags-for-dynamic-vst-to-vgt-transitions.patch
new file mode 100644
index 00000000000..08103ce0806
--- /dev/null
+++ b/queue-4.4/net-mlx4_core-fix-when-to-save-some-qp-context-flags-for-dynamic-vst-to-vgt-transitions.patch
@@ -0,0 +1,58 @@
+From 7c3945bc2073554bb2ecf983e073dee686679c53 Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Mon, 16 Jan 2017 18:31:38 +0200
+Subject: net/mlx4_core: Fix when to save some qp context flags for dynamic VST to VGT transitions
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+commit 7c3945bc2073554bb2ecf983e073dee686679c53 upstream.
+
+Save the qp context flags byte containing the flag disabling vlan stripping
+in the RESET to INIT qp transition, rather than in the INIT to RTR
+transition. Per the firmware spec, the flags in this byte are active
+in the RESET to INIT transition.
+
+As a result of saving the flags in the incorrect qp transition, when
+switching dynamically from VGT to VST and back to VGT, the vlan
+remained stripped (as is required for VST) and did not return to
+not-stripped (as is required for VGT).
+
+Fixes: f0f829bf42cd ("net/mlx4_core: Add immediate activate for VGT->VST->VGT")
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx4/resource_tracker.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+@@ -2955,6 +2955,9 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4
+ 		put_res(dev, slave, srqn, RES_SRQ);
+ 		qp->srq = srq;
+ 	}
++
++	/* Save param3 for dynamic changes from VST back to VGT */
++	qp->param3 = qpc->param3;
+ 	put_res(dev, slave, rcqn, RES_CQ);
+ 	put_res(dev, slave, mtt_base, RES_MTT);
+ 	res_end_move(dev, slave, RES_QP, qpn);
+@@ -3747,7 +3750,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4
+ 	int qpn = vhcr->in_modifier & 0x7fffff;
+ 	struct res_qp *qp;
+ 	u8 orig_sched_queue;
+-	__be32	orig_param3 = qpc->param3;
+ 	u8 orig_vlan_control = qpc->pri_path.vlan_control;
+ 	u8 orig_fvl_rx = qpc->pri_path.fvl_rx;
+ 	u8 orig_pri_path_fl = qpc->pri_path.fl;
+@@ -3789,7 +3791,6 @@ out:
+ 	 */
+ 	if (!err) {
+ 		qp->sched_queue = orig_sched_queue;
+-		qp->param3	= orig_param3;
+ 		qp->vlan_control = orig_vlan_control;
+ 		qp->fvl_rx	=  orig_fvl_rx;
+ 		qp->pri_path_fl = orig_pri_path_fl;
diff --git a/queue-4.4/net-mlx4_en-fix-bad-wqe-issue.patch b/queue-4.4/net-mlx4_en-fix-bad-wqe-issue.patch
new file mode 100644
index 00000000000..5c21db05359
--- /dev/null
+++ b/queue-4.4/net-mlx4_en-fix-bad-wqe-issue.patch
@@ -0,0 +1,42 @@
+From 6496bbf0ec481966ef9ffe5b6660d8d1b55c60cc Mon Sep 17 00:00:00 2001
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Thu, 29 Dec 2016 18:37:10 +0200
+Subject: net/mlx4_en: Fix bad WQE issue
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+commit 6496bbf0ec481966ef9ffe5b6660d8d1b55c60cc upstream.
+
+Single send WQE in RX buffer should be stamped with software
+ownership in order to prevent the flow of QP in error in FW
+once UPDATE_QP is called.
+
+Fixes: 9f519f68cfff ('mlx4_en: Not using Shared Receive Queues')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx4/en_rx.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+@@ -439,8 +439,14 @@ int mlx4_en_activate_rx_rings(struct mlx
+ 		ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
+ 
+ 		ring->stride = stride;
+-		if (ring->stride <= TXBB_SIZE)
++		if (ring->stride <= TXBB_SIZE) {
++			/* Stamp first unused send wqe */
++			__be32 *ptr = (__be32 *)ring->buf;
++			__be32 stamp = cpu_to_be32(1 << STAMP_SHIFT);
++			*ptr = stamp;
++			/* Move pointer to start of rx section */
+ 			ring->buf += TXBB_SIZE;
++		}
+ 
+ 		ring->log_stride = ffs(ring->stride) - 1;
+ 		ring->buf_size = ring->size * ring->stride;
diff --git a/queue-4.4/series b/queue-4.4/series
index e61bafc1a08..456d9525318 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -10,3 +10,9 @@ mips-select-have_irq_exit_on_irq_stack.patch
 mips-irq-stack-fix-erroneous-jal-to-plat_irq_dispatch.patch
 crypto-caam-fix-rng-deinstantiation-error-checking.patch
 net-packet-fix-overflow-in-check-for-priv-area-size.patch
+blk-mq-avoid-memory-reclaim-when-remapping-queues.patch
+usb-hub-wait-for-connection-to-be-reestablished-after-port-reset.patch
+net-mlx4_en-fix-bad-wqe-issue.patch
+net-mlx4_core-fix-racy-cq-completion-queue-free.patch
+net-mlx4_core-fix-when-to-save-some-qp-context-flags-for-dynamic-vst-to-vgt-transitions.patch
+ibmveth-set-correct-gso_size-and-gso_type.patch
diff --git a/queue-4.4/usb-hub-wait-for-connection-to-be-reestablished-after-port-reset.patch b/queue-4.4/usb-hub-wait-for-connection-to-be-reestablished-after-port-reset.patch
new file mode 100644
index 00000000000..c6b3960c933
--- /dev/null
+++ b/queue-4.4/usb-hub-wait-for-connection-to-be-reestablished-after-port-reset.patch
@@ -0,0 +1,65 @@
+From 22547c4cc4fe20698a6a85a55b8788859134b8e4 Mon Sep 17 00:00:00 2001
+From: Guenter Roeck <linux@roeck-us.net>
+Date: Thu, 1 Dec 2016 13:49:59 -0800
+Subject: usb: hub: Wait for connection to be reestablished after port reset
+
+From: Guenter Roeck <linux@roeck-us.net>
+
+commit 22547c4cc4fe20698a6a85a55b8788859134b8e4 upstream.
+
+On a system with a defective USB device connected to an USB hub,
+an endless sequence of port connect events was observed. The sequence
+of events as observed is as follows:
+
+- Port reports connected event (port status=USB_PORT_STAT_CONNECTION).
+- Event handler debounces port and resets it by calling hub_port_reset().
+- hub_port_reset() calls hub_port_wait_reset() to wait for the reset
+  to complete.
+- The reset completes, but USB_PORT_STAT_CONNECTION is not immediately
+  set in the port status register.
+- hub_port_wait_reset() returns -ENOTCONN.
+- Port initialization sequence is aborted.
+- A few milliseconds later, the port again reports a connected event,
+  and the sequence repeats.
+
+This continues either forever or, randomly, stops if the connection
+is already re-established when the port status is read. It results in
+a high rate of udev events. This in turn destabilizes userspace since
+the above sequence holds the device mutex pretty much continuously
+and prevents userspace from actually reading the device status.
+
+To prevent the problem from happening, let's wait for the connection
+to be re-established after a port reset. If the device was actually
+disconnected, the code will still return an error, but it will do so
+only after the long reset timeout.
+
+Cc: Douglas Anderson <dianders@chromium.org>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Acked-by: Alan Stern <stern@rowland.harvard.edu>
+Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/usb/core/hub.c |   11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -2602,8 +2602,15 @@ static int hub_port_wait_reset(struct us
+ 		if (ret < 0)
+ 			return ret;
+ 
+-		/* The port state is unknown until the reset completes. */
+-		if (!(portstatus & USB_PORT_STAT_RESET))
++		/*
++		 * The port state is unknown until the reset completes.
++		 *
++		 * On top of that, some chips may require additional time
++		 * to re-establish a connection after the reset is complete,
++		 * so also wait for the connection to be re-established.
++		 */
++		if (!(portstatus & USB_PORT_STAT_RESET) &&
++		    (portstatus & USB_PORT_STAT_CONNECTION))
+ 			break;
+ 
+ 		/* switch to the long delay after two short delay failures */