From 40149924ed3f7df96135120bd9c816972452bb4a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 1 Feb 2016 17:01:10 -0800
Subject: [PATCH] 4.4-stable patches

added patches:
	block-split-bios-to-max-possible-length.patch
	crypto-sun4i-ss-add-missing-statesize.patch
	hid-usbhid-fix-recursive-deadlock.patch
	nfsv4.1-pnfs-fixup-an-lo-plh_block_lgets-imbalance-in-layoutreturn.patch
	ocfs2-nfs-hangs-in-__ocfs2_cluster_lock-due-to-race-with-ocfs2_unblock_lock.patch
---
 ...ck-split-bios-to-max-possible-length.patch | 68 ++++++++++++++
 ...rypto-sun4i-ss-add-missing-statesize.patch | 42 +++++++++
 .../hid-usbhid-fix-recursive-deadlock.patch   | 61 ++++++++++++
 ...lock_lgets-imbalance-in-layoutreturn.patch | 31 +++++++
 ...-due-to-race-with-ocfs2_unblock_lock.patch | 93 +++++++++++++++++++
 queue-4.4/series                              |  5 +
 6 files changed, 300 insertions(+)
 create mode 100644 queue-4.4/block-split-bios-to-max-possible-length.patch
 create mode 100644 queue-4.4/crypto-sun4i-ss-add-missing-statesize.patch
 create mode 100644 queue-4.4/hid-usbhid-fix-recursive-deadlock.patch
 create mode 100644 queue-4.4/nfsv4.1-pnfs-fixup-an-lo-plh_block_lgets-imbalance-in-layoutreturn.patch
 create mode 100644 queue-4.4/ocfs2-nfs-hangs-in-__ocfs2_cluster_lock-due-to-race-with-ocfs2_unblock_lock.patch
 create mode 100644 queue-4.4/series

diff --git a/queue-4.4/block-split-bios-to-max-possible-length.patch b/queue-4.4/block-split-bios-to-max-possible-length.patch
new file mode 100644
index 00000000000..5120d2f9e0b
--- /dev/null
+++ b/queue-4.4/block-split-bios-to-max-possible-length.patch
@@ -0,0 +1,68 @@
+From e36f6204288088fda50d1c84830340ccb70f85ff Mon Sep 17 00:00:00 2001
+From: Keith Busch <keith.busch@intel.com>
+Date: Tue, 12 Jan 2016 15:08:39 -0700
+Subject: block: split bios to max possible length
+
+From: Keith Busch <keith.busch@intel.com>
+
+commit e36f6204288088fda50d1c84830340ccb70f85ff upstream.
+
+This splits bio in the middle of a vector to form the largest possible
+bio at the h/w's desired alignment, and guarantees the bio being split
+will have some data.
+
+The criteria for splitting is changed from the max sectors to the h/w's
+optimal sector alignment if it is provided. For h/w that advertise their
+block storage's underlying chunk size, it's a big performance win to not
+submit commands that cross them. If sector alignment is not provided,
+this patch uses the max sectors as before.
+
+This addresses the performance issue commit d380561113 attempted to
+fix, but was reverted due to splitting logic error.
+
+Signed-off-by: Keith Busch <keith.busch@intel.com>
+Cc: Jens Axboe <axboe@fb.com>
+Cc: Ming Lei <tom.leiming@gmail.com>
+Cc: Kent Overstreet <kent.overstreet@gmail.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-merge.c |   19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -81,9 +81,6 @@ static struct bio *blk_bio_segment_split
+ 	struct bio *new = NULL;
+ 
+ 	bio_for_each_segment(bv, bio, iter) {
+-		if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
+-			goto split;
+-
+ 		/*
+ 		 * If the queue doesn't support SG gaps and adding this
+ 		 * offset would create a gap, disallow it.
+@@ -91,6 +88,22 @@ static struct bio *blk_bio_segment_split
+ 		if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
+ 			goto split;
+ 
++		if (sectors + (bv.bv_len >> 9) >
++				blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
++			/*
++			 * Consider this a new segment if we're splitting in
++			 * the middle of this vector.
++			 */
++			if (nsegs < queue_max_segments(q) &&
++			    sectors < blk_max_size_offset(q,
++						bio->bi_iter.bi_sector)) {
++				nsegs++;
++				sectors = blk_max_size_offset(q,
++						bio->bi_iter.bi_sector);
++			}
++			goto split;
++		}
++
+ 		if (bvprvp && blk_queue_cluster(q)) {
+ 			if (seg_size + bv.bv_len > queue_max_segment_size(q))
+ 				goto new_segment;
diff --git a/queue-4.4/crypto-sun4i-ss-add-missing-statesize.patch b/queue-4.4/crypto-sun4i-ss-add-missing-statesize.patch
new file mode 100644
index 00000000000..784f22b3e01
--- /dev/null
+++ b/queue-4.4/crypto-sun4i-ss-add-missing-statesize.patch
@@ -0,0 +1,42 @@
+From 4f9ea86604e3ba64edd2817795798168fbb3c1a6 Mon Sep 17 00:00:00 2001
+From: LABBE Corentin <clabbe.montjoie@gmail.com>
+Date: Mon, 16 Nov 2015 09:35:54 +0100
+Subject: crypto: sun4i-ss - add missing statesize
+
+From: LABBE Corentin <clabbe.montjoie@gmail.com>
+
+commit 4f9ea86604e3ba64edd2817795798168fbb3c1a6 upstream.
+
+sun4i-ss implementaton of md5/sha1 is via ahash algorithms.
+Commit 8996eafdcbad ("crypto: ahash - ensure statesize is non-zero")
+made impossible to load them without giving statesize. This patch
+specifiy statesize for sha1 and md5.
+
+Fixes: 6298e948215f ("crypto: sunxi-ss - Add Allwinner Security System crypto accelerator")
+Tested-by: Chen-Yu Tsai <wens@csie.org>
+Signed-off-by: LABBE Corentin <clabbe.montjoie@gmail.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/crypto/sunxi-ss/sun4i-ss-core.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c
++++ b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
+@@ -39,6 +39,7 @@ static struct sun4i_ss_alg_template ss_a
+ 		.import = sun4i_hash_import_md5,
+ 		.halg = {
+ 			.digestsize = MD5_DIGEST_SIZE,
++			.statesize = sizeof(struct md5_state),
+ 			.base = {
+ 				.cra_name = "md5",
+ 				.cra_driver_name = "md5-sun4i-ss",
+@@ -66,6 +67,7 @@ static struct sun4i_ss_alg_template ss_a
+ 		.import = sun4i_hash_import_sha1,
+ 		.halg = {
+ 			.digestsize = SHA1_DIGEST_SIZE,
++			.statesize = sizeof(struct sha1_state),
+ 			.base = {
+ 				.cra_name = "sha1",
+ 				.cra_driver_name = "sha1-sun4i-ss",
diff --git a/queue-4.4/hid-usbhid-fix-recursive-deadlock.patch b/queue-4.4/hid-usbhid-fix-recursive-deadlock.patch
new file mode 100644
index 00000000000..e74292abbd5
--- /dev/null
+++ b/queue-4.4/hid-usbhid-fix-recursive-deadlock.patch
@@ -0,0 +1,61 @@
+From e470127e9606b1fa151c4184243e61296d1e0c0f Mon Sep 17 00:00:00 2001
+From: Ioan-Adrian Ratiu <adi@adirat.com>
+Date: Fri, 20 Nov 2015 22:19:02 +0200
+Subject: HID: usbhid: fix recursive deadlock
+
+From: Ioan-Adrian Ratiu <adi@adirat.com>
+
+commit e470127e9606b1fa151c4184243e61296d1e0c0f upstream.
+
+The critical section protected by usbhid->lock in hid_ctrl() is too
+big and because of this it causes a recursive deadlock. "Too big" means
+the case statement and the call to hid_input_report() do not need to be
+protected by the spinlock (no URB operations are done inside them).
+
+The deadlock happens because in certain rare cases drivers try to grab
+the lock while handling the ctrl irq which grabs the lock before them
+as described above. For example newer wacom tablets like 056a:033c try
+to reschedule proximity reads from wacom_intuos_schedule_prox_event()
+calling hid_hw_request() -> usbhid_request() -> usbhid_submit_report()
+which tries to grab the usbhid lock already held by hid_ctrl().
+
+There are two ways to get out of this deadlock:
+    1. Make the drivers work "around" the ctrl critical region, in the
+    wacom case for ex. by delaying the scheduling of the proximity read
+    request itself to a workqueue.
+    2. Shrink the critical region so the usbhid lock protects only the
+    instructions which modify usbhid state, calling hid_input_report()
+    with the spinlock unlocked, allowing the device driver to grab the
+    lock first, finish and then grab the lock afterwards in hid_ctrl().
+
+This patch implements the 2nd solution.
+
+Signed-off-by: Ioan-Adrian Ratiu <adi@adirat.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/hid/usbhid/hid-core.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/hid/usbhid/hid-core.c
++++ b/drivers/hid/usbhid/hid-core.c
+@@ -477,8 +477,6 @@ static void hid_ctrl(struct urb *urb)
+ 	struct usbhid_device *usbhid = hid->driver_data;
+ 	int unplug = 0, status = urb->status;
+ 
+-	spin_lock(&usbhid->lock);
+-
+ 	switch (status) {
+ 	case 0:			/* success */
+ 		if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_IN)
+@@ -498,6 +496,8 @@ static void hid_ctrl(struct urb *urb)
+ 		hid_warn(urb->dev, "ctrl urb status %d received\n", status);
+ 	}
+ 
++	spin_lock(&usbhid->lock);
++
+ 	if (unplug) {
+ 		usbhid->ctrltail = usbhid->ctrlhead;
+ 	} else {
diff --git a/queue-4.4/nfsv4.1-pnfs-fixup-an-lo-plh_block_lgets-imbalance-in-layoutreturn.patch b/queue-4.4/nfsv4.1-pnfs-fixup-an-lo-plh_block_lgets-imbalance-in-layoutreturn.patch
new file mode 100644
index 00000000000..b5b7ef4015e
--- /dev/null
+++ b/queue-4.4/nfsv4.1-pnfs-fixup-an-lo-plh_block_lgets-imbalance-in-layoutreturn.patch
@@ -0,0 +1,31 @@
+From 1a093ceb053832c25b92f3cf26b957543c7baf9b Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+Date: Mon, 28 Dec 2015 11:27:15 -0500
+Subject: NFSv4.1/pnfs: Fixup an lo->plh_block_lgets imbalance in layoutreturn
+
+From: Trond Myklebust <trond.myklebust@primarydata.com>
+
+commit 1a093ceb053832c25b92f3cf26b957543c7baf9b upstream.
+
+Since commit 2d8ae84fbc32, nothing is bumping lo->plh_block_lgets in the
+layoutreturn path, so it should not be touched in nfs4_layoutreturn_release
+either.
+
+Fixes: 2d8ae84fbc32 ("NFSv4.1/pnfs: Remove redundant lo->plh_block_lgets...")
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nfs/nfs4proc.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -8054,7 +8054,6 @@ static void nfs4_layoutreturn_release(vo
+ 		pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+ 	pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
+ 	pnfs_clear_layoutreturn_waitbit(lo);
+-	lo->plh_block_lgets--;
+ 	spin_unlock(&lo->plh_inode->i_lock);
+ 	pnfs_free_lseg_list(&freeme);
+ 	pnfs_put_layout_hdr(lrp->args.layout);
diff --git a/queue-4.4/ocfs2-nfs-hangs-in-__ocfs2_cluster_lock-due-to-race-with-ocfs2_unblock_lock.patch b/queue-4.4/ocfs2-nfs-hangs-in-__ocfs2_cluster_lock-due-to-race-with-ocfs2_unblock_lock.patch
new file mode 100644
index 00000000000..2b87cbf8c9c
--- /dev/null
+++ b/queue-4.4/ocfs2-nfs-hangs-in-__ocfs2_cluster_lock-due-to-race-with-ocfs2_unblock_lock.patch
@@ -0,0 +1,93 @@
+From b1b1e15ef6b80facf76d6757649dfd7295eda29f Mon Sep 17 00:00:00 2001
+From: Tariq Saeed <tariq.x.saeed@oracle.com>
+Date: Thu, 21 Jan 2016 16:40:39 -0800
+Subject: ocfs2: NFS hangs in __ocfs2_cluster_lock due to race with ocfs2_unblock_lock
+
+From: Tariq Saeed <tariq.x.saeed@oracle.com>
+
+commit b1b1e15ef6b80facf76d6757649dfd7295eda29f upstream.
+
+NFS on a 2 node ocfs2 cluster each node exporting dir.  The lock causing
+the hang is the global bit map inode lock.  Node 1 is master, has the
+lock granted in PR mode; Node 2 is in the converting list (PR -> EX).
+There are no holders of the lock on the master node so it should
+downconvert to NL and grant EX to node 2 but that does not happen.
+BLOCKED + QUEUED in lock res are set and it is on osb blocked list.
+Threads are waiting in __ocfs2_cluster_lock on BLOCKED.  One thread
+wants EX, rest want PR.  So it is as though the downconvert thread needs
+to be kicked to complete the conv.
+
+The hang is caused by an EX req coming into __ocfs2_cluster_lock on the
+heels of a PR req after it sets BUSY (drops l_lock, releasing EX
+thread), forcing the incoming EX to wait on BUSY without doing anything.
+PR has called ocfs2_dlm_lock, which sets the node 1 lock from NL -> PR,
+queues ast.
+
+At this time, upconvert (PR ->EX) arrives from node 2, finds conflict
+with node 1 lock in PR, so the lock res is put on dlm thread's dirty
+listt.
+
+After ret from ocf2_dlm_lock, PR thread now waits behind EX on BUSY till
+awoken by ast.
+
+Now it is dlm_thread that serially runs dlm_shuffle_lists, ast, bast, in
+that order.  dlm_shuffle_lists ques a bast on behalf of node 2 (which
+will be run by dlm_thread right after the ast).  ast does its part, sets
+UPCONVERT_FINISHING, clears BUSY and wakes its waiters.  Next,
+dlm_thread runs bast.  It sets BLOCKED and kicks dc thread.  dc thread
+runs ocfs2_unblock_lock, but since UPCONVERT_FINISHING set, skips doing
+anything and reques.
+
+Inside of __ocfs2_cluster_lock, since EX has been waiting on BUSY ahead
+of PR, it wakes up first, finds BLOCKED set and skips doing anything but
+clearing UPCONVERT_FINISHING (which was actually "meant" for the PR
+thread), and this time waits on BLOCKED.  Next, the PR thread comes out
+of wait but since UPCONVERT_FINISHING is not set, it skips updating the
+l_ro_holders and goes straight to wait on BLOCKED.  So there, we have a
+hang! Threads in __ocfs2_cluster_lock wait on BLOCKED, lock res in osb
+blocked list.  Only when dc thread is awoken, it will run
+ocfs2_unblock_lock and things will unhang.
+
+One way to fix this is to wake the dc thread on the flag after clearing
+UPCONVERT_FINISHING
+
+Orabug: 20933419
+Signed-off-by: Tariq Saeed <tariq.x.saeed@oracle.com>
+Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
+Reviewed-by: Wengang Wang <wen.gang.wang@oracle.com>
+Reviewed-by: Mark Fasheh <mfasheh@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
+Cc: Eric Ren <zren@suse.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/dlmglue.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -1390,6 +1390,7 @@ static int __ocfs2_cluster_lock(struct o
+ 	unsigned int gen;
+ 	int noqueue_attempted = 0;
+ 	int dlm_locked = 0;
++	int kick_dc = 0;
+ 
+ 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
+ 		mlog_errno(-EINVAL);
+@@ -1524,7 +1525,12 @@ update_holders:
+ unlock:
+ 	lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+ 
++	/* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
++	kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
++
+ 	spin_unlock_irqrestore(&lockres->l_lock, flags);
++	if (kick_dc)
++		ocfs2_wake_downconvert_thread(osb);
+ out:
+ 	/*
+ 	 * This is helping work around a lock inversion between the page lock
diff --git a/queue-4.4/series b/queue-4.4/series
new file mode 100644
index 00000000000..180619e4d7e
--- /dev/null
+++ b/queue-4.4/series
@@ -0,0 +1,5 @@
+crypto-sun4i-ss-add-missing-statesize.patch
+nfsv4.1-pnfs-fixup-an-lo-plh_block_lgets-imbalance-in-layoutreturn.patch
+block-split-bios-to-max-possible-length.patch
+ocfs2-nfs-hangs-in-__ocfs2_cluster_lock-due-to-race-with-ocfs2_unblock_lock.patch
+hid-usbhid-fix-recursive-deadlock.patch
-- 
2.47.3