From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 9 Sep 2016 14:37:36 +0000 (+0200)
Subject: 4.7-stable patches
X-Git-Tag: v3.14.79~7
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b4c040fb40eb50ba0afcc78a403e9d655c2f9bcb;p=thirdparty%2Fkernel%2Fstable-queue.git

4.7-stable patches

added patches:
	block-fix-race-triggered-by-blk_set_queue_dying.patch
	block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch
	cdc-acm-added-sanity-checking-for-probe.patch
	cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch
---

diff --git a/queue-4.7/block-fix-race-triggered-by-blk_set_queue_dying.patch b/queue-4.7/block-fix-race-triggered-by-blk_set_queue_dying.patch
new file mode 100644
index 00000000000..aca38ca49ae
--- /dev/null
+++ b/queue-4.7/block-fix-race-triggered-by-blk_set_queue_dying.patch
@@ -0,0 +1,36 @@
+From 1b856086813be9371929b6cc62045f9fd470f5a0 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+Date: Tue, 16 Aug 2016 16:48:36 -0700
+Subject: block: Fix race triggered by blk_set_queue_dying()
+
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+
+commit 1b856086813be9371929b6cc62045f9fd470f5a0 upstream.
+
+blk_set_queue_dying() can be called while another thread is
+submitting I/O or changing queue flags, e.g. through dm_stop_queue().
+Hence protect the QUEUE_FLAG_DYING flag change with locking.
+
+Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -515,7 +515,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
+ 
+ void blk_set_queue_dying(struct request_queue *q)
+ {
+-	queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
++	spin_lock_irq(q->queue_lock);
++	queue_flag_set(QUEUE_FLAG_DYING, q);
++	spin_unlock_irq(q->queue_lock);
+ 
+ 	if (q->mq_ops)
+ 		blk_mq_wake_waiters(q);
diff --git a/queue-4.7/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch b/queue-4.7/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch
new file mode 100644
index 00000000000..02198f0739e
--- /dev/null
+++ b/queue-4.7/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch
@@ -0,0 +1,84 @@
+From 4d70dca4eadf2f95abe389116ac02b8439c2d16c Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@canonical.com>
+Date: Tue, 23 Aug 2016 21:49:45 +0800
+Subject: block: make sure a big bio is split into at most 256 bvecs
+
+From: Ming Lei <ming.lei@canonical.com>
+
+commit 4d70dca4eadf2f95abe389116ac02b8439c2d16c upstream.
+
+After arbitrary bio size was introduced, the incoming bio may
+be very big. We have to split the bio into small bios so that
+each holds at most BIO_MAX_PAGES bvecs for safety reason, such
+as bio_clone().
+
+This patch fixes the following kernel crash:
+
+> [  172.660142] BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
+> [  172.660229] IP: [<ffffffff811e53b4>] bio_trim+0xf/0x2a
+> [  172.660289] PGD 7faf3e067 PUD 7f9279067 PMD 0
+> [  172.660399] Oops: 0000 [#1] SMP
+> [...]
+> [  172.664780] Call Trace:
+> [  172.664813]  [<ffffffffa007f3be>] ? raid1_make_request+0x2e8/0xad7 [raid1]
+> [  172.664846]  [<ffffffff811f07da>] ? blk_queue_split+0x377/0x3d4
+> [  172.664880]  [<ffffffffa005fb5f>] ? md_make_request+0xf6/0x1e9 [md_mod]
+> [  172.664912]  [<ffffffff811eb860>] ? generic_make_request+0xb5/0x155
+> [  172.664947]  [<ffffffffa0445c89>] ? prio_io+0x85/0x95 [bcache]
+> [  172.664981]  [<ffffffffa0448252>] ? register_cache_set+0x355/0x8d0 [bcache]
+> [  172.665016]  [<ffffffffa04497d3>] ? register_bcache+0x1006/0x1174 [bcache]
+
+The issue can be reproduced by the following steps:
+	- create one raid1 over two virtio-blk
+	- build bcache device over the above raid1 and another cache device
+	and bucket size is set as 2Mbytes
+	- set cache mode as writeback
+	- run random write over ext4 on the bcache device
+
+Fixes: 54efd50(block: make generic_make_request handle arbitrarily sized bios)
+Reported-by: Sebastian Roesner <sroesner-kernelorg@roesner-online.de>
+Reported-by: Eric Wheeler <bcache@lists.ewheeler.net>
+Cc: Shaohua Li <shli@fb.com>
+Acked-by: Kent Overstreet <kent.overstreet@gmail.com>
+Signed-off-by: Ming Lei <ming.lei@canonical.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-merge.c |   22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -94,9 +94,31 @@ static struct bio *blk_bio_segment_split
+ 	bool do_split = true;
+ 	struct bio *new = NULL;
+ 	const unsigned max_sectors = get_max_io_size(q, bio);
++	unsigned bvecs = 0;
+ 
+ 	bio_for_each_segment(bv, bio, iter) {
+ 		/*
++		 * With arbitrary bio size, the incoming bio may be very
++		 * big. We have to split the bio into small bios so that
++		 * each holds at most BIO_MAX_PAGES bvecs because
++		 * bio_clone() can fail to allocate big bvecs.
++		 *
++		 * It should have been better to apply the limit per
++		 * request queue in which bio_clone() is involved,
++		 * instead of globally. The biggest blocker is the
++		 * bio_clone() in bio bounce.
++		 *
++		 * If bio is splitted by this reason, we should have
++		 * allowed to continue bios merging, but don't do
++		 * that now for making the change simple.
++		 *
++		 * TODO: deal with bio bounce's bio_clone() gracefully
++		 * and convert the global limit into per-queue limit.
++		 */
++		if (bvecs++ >= BIO_MAX_PAGES)
++			goto split;
++
++		/*
+ 		 * If the queue doesn't support SG gaps and adding this
+ 		 * offset would create a gap, disallow it.
+ 		 */
diff --git a/queue-4.7/cdc-acm-added-sanity-checking-for-probe.patch b/queue-4.7/cdc-acm-added-sanity-checking-for-probe.patch
new file mode 100644
index 00000000000..3c78cecdf90
--- /dev/null
+++ b/queue-4.7/cdc-acm-added-sanity-checking-for-probe.patch
@@ -0,0 +1,98 @@
+From oneukum@suse.com  Fri Sep  9 16:15:08 2016
+From: Oliver Neukum <oneukum@suse.com>
+Date: Thu,  8 Sep 2016 11:27:30 +0200
+Subject: cdc-acm: added sanity checking for probe()
+To: stable@vger.kernel.org, jslaby@suse.com
+Cc: Oliver Neukum <oneukum@suse.com>
+Message-ID: <1473326850-5056-1-git-send-email-oneukum@suse.com>
+
+From: Oliver Neukum <oneukum@suse.com>
+
+This is an alternative to eccf2a4e6b64d249929acc1f7aaa2ab0fb199d3d
+which inadvertedly fixes an oops in probe by a device returning
+malformed descriptors. The problem allows a malicious device to
+attack the kernel.
+That patch in v4.8 is too extensive to backport to stable.
+Thus this alternative fix is needed up to v4.7
+
+Signed-off-by: Oliver Neukum <oneukum@suse.com>
+Reported-by: Binyamin Sharet <bsharet@cisco.com>
+Tested-by: Binyamin Sharet <bsharet@cisco.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/class/cdc-acm.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -1196,6 +1196,8 @@ static int acm_probe(struct usb_interfac
+ 	}
+ 
+ 	if (!buflen) {
++		if (!intf->cur_altsetting || !intf->cur_altsetting->endpoint)
++			return -EINVAL;
+ 		if (intf->cur_altsetting->endpoint &&
+ 				intf->cur_altsetting->endpoint->extralen &&
+ 				intf->cur_altsetting->endpoint->extra) {
+@@ -1276,6 +1278,8 @@ next_desc:
+ 				data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
+ 			control_interface = intf;
+ 		} else {
++			if (!intf->cur_altsetting)
++				return -ENODEV;
+ 			if (intf->cur_altsetting->desc.bNumEndpoints != 3) {
+ 				dev_dbg(&intf->dev,"No union descriptor, giving up\n");
+ 				return -ENODEV;
+@@ -1305,15 +1309,22 @@ next_desc:
+ 		combined_interfaces = 1;
+ 		/* a popular other OS doesn't use it */
+ 		quirks |= NO_CAP_LINE;
++		if (!data_interface->cur_altsetting)
++			return -EINVAL;
+ 		if (data_interface->cur_altsetting->desc.bNumEndpoints != 3) {
+ 			dev_err(&intf->dev, "This needs exactly 3 endpoints\n");
+ 			return -EINVAL;
+ 		}
+ look_for_collapsed_interface:
++		if (!data_interface->cur_altsetting)
++			return -EINVAL;
+ 		for (i = 0; i < 3; i++) {
+ 			struct usb_endpoint_descriptor *ep;
+ 			ep = &data_interface->cur_altsetting->endpoint[i].desc;
+ 
++			if (!ep)
++				return -ENODEV;
++
+ 			if (usb_endpoint_is_int_in(ep))
+ 				epctrl = ep;
+ 			else if (usb_endpoint_is_bulk_out(ep))
+@@ -1332,8 +1343,12 @@ look_for_collapsed_interface:
+ skip_normal_probe:
+ 
+ 	/*workaround for switched interfaces */
++	if (!data_interface->cur_altsetting)
++		return -EINVAL;
+ 	if (data_interface->cur_altsetting->desc.bInterfaceClass
+ 						!= CDC_DATA_INTERFACE_TYPE) {
++		if (!control_interface->cur_altsetting)
++			return -EINVAL;
+ 		if (control_interface->cur_altsetting->desc.bInterfaceClass
+ 						== CDC_DATA_INTERFACE_TYPE) {
+ 			dev_dbg(&intf->dev,
+@@ -1356,6 +1371,7 @@ skip_normal_probe:
+ 
+ 
+ 	if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 ||
++	    !control_interface->cur_altsetting ||
+ 	    control_interface->cur_altsetting->desc.bNumEndpoints == 0)
+ 		return -EINVAL;
+ 
+@@ -1363,6 +1379,8 @@ skip_normal_probe:
+ 	epread = &data_interface->cur_altsetting->endpoint[0].desc;
+ 	epwrite = &data_interface->cur_altsetting->endpoint[1].desc;
+ 
++	if (!epctrl || !epread || !epwrite)
++		return -ENODEV;
+ 
+ 	/* workaround for switched endpoints */
+ 	if (!usb_endpoint_dir_in(epread)) {
diff --git a/queue-4.7/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch b/queue-4.7/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch
new file mode 100644
index 00000000000..1ff0e0c2935
--- /dev/null
+++ b/queue-4.7/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch
@@ -0,0 +1,111 @@
+From 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 Mon Sep 17 00:00:00 2001
+From: Balbir Singh <bsingharora@gmail.com>
+Date: Wed, 10 Aug 2016 15:43:06 -0400
+Subject: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork
+
+From: Balbir Singh <bsingharora@gmail.com>
+
+commit 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 upstream.
+
+cgroup_threadgroup_rwsem is acquired in read mode during process exit
+and fork.  It is also grabbed in write mode during
+__cgroups_proc_write().  I've recently run into a scenario with lots
+of memory pressure and OOM and I am beginning to see
+
+systemd
+
+ __switch_to+0x1f8/0x350
+ __schedule+0x30c/0x990
+ schedule+0x48/0xc0
+ percpu_down_write+0x114/0x170
+ __cgroup_procs_write.isra.12+0xb8/0x3c0
+ cgroup_file_write+0x74/0x1a0
+ kernfs_fop_write+0x188/0x200
+ __vfs_write+0x6c/0xe0
+ vfs_write+0xc0/0x230
+ SyS_write+0x6c/0x110
+ system_call+0x38/0xb4
+
+This thread is waiting on the reader of cgroup_threadgroup_rwsem to
+exit.  The reader itself is under memory pressure and has gone into
+reclaim after fork. There are times the reader also ends up waiting on
+oom_lock as well.
+
+ __switch_to+0x1f8/0x350
+ __schedule+0x30c/0x990
+ schedule+0x48/0xc0
+ jbd2_log_wait_commit+0xd4/0x180
+ ext4_evict_inode+0x88/0x5c0
+ evict+0xf8/0x2a0
+ dispose_list+0x50/0x80
+ prune_icache_sb+0x6c/0x90
+ super_cache_scan+0x190/0x210
+ shrink_slab.part.15+0x22c/0x4c0
+ shrink_zone+0x288/0x3c0
+ do_try_to_free_pages+0x1dc/0x590
+ try_to_free_pages+0xdc/0x260
+ __alloc_pages_nodemask+0x72c/0xc90
+ alloc_pages_current+0xb4/0x1a0
+ page_table_alloc+0xc0/0x170
+ __pte_alloc+0x58/0x1f0
+ copy_page_range+0x4ec/0x950
+ copy_process.isra.5+0x15a0/0x1870
+ _do_fork+0xa8/0x4b0
+ ppc_clone+0x8/0xc
+
+In the meanwhile, all processes exiting/forking are blocked almost
+stalling the system.
+
+This patch moves the threadgroup_change_begin from before
+cgroup_fork() to just before cgroup_canfork().  There is no nee to
+worry about threadgroup changes till the task is actually added to the
+threadgroup.  This avoids having to call reclaim with
+cgroup_threadgroup_rwsem held.
+
+tj: Subject and description edits.
+
+Signed-off-by: Balbir Singh <bsingharora@gmail.com>
+Acked-by: Zefan Li <lizefan@huawei.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/fork.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1406,7 +1406,6 @@ static struct task_struct *copy_process(
+ 	p->real_start_time = ktime_get_boot_ns();
+ 	p->io_context = NULL;
+ 	p->audit_context = NULL;
+-	threadgroup_change_begin(current);
+ 	cgroup_fork(p);
+ #ifdef CONFIG_NUMA
+ 	p->mempolicy = mpol_dup(p->mempolicy);
+@@ -1558,6 +1557,7 @@ static struct task_struct *copy_process(
+ 	INIT_LIST_HEAD(&p->thread_group);
+ 	p->task_works = NULL;
+ 
++	threadgroup_change_begin(current);
+ 	/*
+ 	 * Ensure that the cgroup subsystem policies allow the new process to be
+ 	 * forked. It should be noted the the new process's css_set can be changed
+@@ -1658,6 +1658,7 @@ static struct task_struct *copy_process(
+ bad_fork_cancel_cgroup:
+ 	cgroup_cancel_fork(p);
+ bad_fork_free_pid:
++	threadgroup_change_end(current);
+ 	if (pid != &init_struct_pid)
+ 		free_pid(pid);
+ bad_fork_cleanup_thread:
+@@ -1690,7 +1691,6 @@ bad_fork_cleanup_policy:
+ 	mpol_put(p->mempolicy);
+ bad_fork_cleanup_threadgroup_lock:
+ #endif
+-	threadgroup_change_end(current);
+ 	delayacct_tsk_free(p);
+ bad_fork_cleanup_count:
+ 	atomic_dec(&p->cred->user->processes);
diff --git a/queue-4.7/series b/queue-4.7/series
index d7bd01a5da9..c44cffba8a0 100644
--- a/queue-4.7/series
+++ b/queue-4.7/series
@@ -11,3 +11,7 @@ ext4-fix-xattr-shifting-when-expanding-inodes-part-2.patch
 ext4-properly-align-shifted-xattrs-when-expanding-inodes.patch
 ext4-avoid-deadlock-when-expanding-inode-size.patch
 ext4-avoid-modifying-checksum-fields-directly-during-checksum-verification.patch
+block-fix-race-triggered-by-blk_set_queue_dying.patch
+block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch
+cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch
+cdc-acm-added-sanity-checking-for-probe.patch