4.7-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 9 Sep 2016 14:37:36 +0000 (16:37 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 9 Sep 2016 14:37:36 +0000 (16:37 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 9 Sep 2016 14:37:36 +0000 (16:37 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 9 Sep 2016 14:37:36 +0000 (16:37 +0200)
diff --git a/queue-4.7/block-fix-race-triggered-by-blk_set_queue_dying.patch b/queue-4.7/block-fix-race-triggered-by-blk_set_queue_dying.patch

new file mode 100644 (file)

index 0000000..aca38ca
--- /dev/null
+++ b/queue-4.7/block-fix-race-triggered-by-blk_set_queue_dying.patch
@@ -0,0 +1,36 @@
+From 1b856086813be9371929b6cc62045f9fd470f5a0 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+Date: Tue, 16 Aug 2016 16:48:36 -0700
+Subject: block: Fix race triggered by blk_set_queue_dying()
+
+From: Bart Van Assche <bart.vanassche@sandisk.com>
+
+commit 1b856086813be9371929b6cc62045f9fd470f5a0 upstream.
+
+blk_set_queue_dying() can be called while another thread is
+submitting I/O or changing queue flags, e.g. through dm_stop_queue().
+Hence protect the QUEUE_FLAG_DYING flag change with locking.
+
+Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-core.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -515,7 +515,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
+ 
+ void blk_set_queue_dying(struct request_queue *q)
+ {
+-      queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
++      spin_lock_irq(q->queue_lock);
++      queue_flag_set(QUEUE_FLAG_DYING, q);
++      spin_unlock_irq(q->queue_lock);
+ 
+       if (q->mq_ops)
+               blk_mq_wake_waiters(q);
diff --git a/queue-4.7/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch b/queue-4.7/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch

new file mode 100644 (file)

index 0000000..02198f0
--- /dev/null
+++ b/queue-4.7/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch
@@ -0,0 +1,84 @@
+From 4d70dca4eadf2f95abe389116ac02b8439c2d16c Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@canonical.com>
+Date: Tue, 23 Aug 2016 21:49:45 +0800
+Subject: block: make sure a big bio is split into at most 256 bvecs
+
+From: Ming Lei <ming.lei@canonical.com>
+
+commit 4d70dca4eadf2f95abe389116ac02b8439c2d16c upstream.
+
+After arbitrary bio size was introduced, the incoming bio may
+be very big. We have to split the bio into small bios so that
+each holds at most BIO_MAX_PAGES bvecs for safety reason, such
+as bio_clone().
+
+This patch fixes the following kernel crash:
+
+> [  172.660142] BUG: unable to handle kernel NULL pointer dereference at 0000000000000028
+> [  172.660229] IP: [<ffffffff811e53b4>] bio_trim+0xf/0x2a
+> [  172.660289] PGD 7faf3e067 PUD 7f9279067 PMD 0
+> [  172.660399] Oops: 0000 [#1] SMP
+> [...]
+> [  172.664780] Call Trace:
+> [  172.664813]  [<ffffffffa007f3be>] ? raid1_make_request+0x2e8/0xad7 [raid1]
+> [  172.664846]  [<ffffffff811f07da>] ? blk_queue_split+0x377/0x3d4
+> [  172.664880]  [<ffffffffa005fb5f>] ? md_make_request+0xf6/0x1e9 [md_mod]
+> [  172.664912]  [<ffffffff811eb860>] ? generic_make_request+0xb5/0x155
+> [  172.664947]  [<ffffffffa0445c89>] ? prio_io+0x85/0x95 [bcache]
+> [  172.664981]  [<ffffffffa0448252>] ? register_cache_set+0x355/0x8d0 [bcache]
+> [  172.665016]  [<ffffffffa04497d3>] ? register_bcache+0x1006/0x1174 [bcache]
+
+The issue can be reproduced by the following steps:
+       - create one raid1 over two virtio-blk
+       - build bcache device over the above raid1 and another cache device
+       and bucket size is set as 2Mbytes
+       - set cache mode as writeback
+       - run random write over ext4 on the bcache device
+
+Fixes: 54efd50(block: make generic_make_request handle arbitrarily sized bios)
+Reported-by: Sebastian Roesner <sroesner-kernelorg@roesner-online.de>
+Reported-by: Eric Wheeler <bcache@lists.ewheeler.net>
+Cc: Shaohua Li <shli@fb.com>
+Acked-by: Kent Overstreet <kent.overstreet@gmail.com>
+Signed-off-by: Ming Lei <ming.lei@canonical.com>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-merge.c |   22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -94,9 +94,31 @@ static struct bio *blk_bio_segment_split
+       bool do_split = true;
+       struct bio *new = NULL;
+       const unsigned max_sectors = get_max_io_size(q, bio);
++      unsigned bvecs = 0;
+ 
+       bio_for_each_segment(bv, bio, iter) {
+               /*
++               * With arbitrary bio size, the incoming bio may be very
++               * big. We have to split the bio into small bios so that
++               * each holds at most BIO_MAX_PAGES bvecs because
++               * bio_clone() can fail to allocate big bvecs.
++               *
++               * It should have been better to apply the limit per
++               * request queue in which bio_clone() is involved,
++               * instead of globally. The biggest blocker is the
++               * bio_clone() in bio bounce.
++               *
++               * If bio is splitted by this reason, we should have
++               * allowed to continue bios merging, but don't do
++               * that now for making the change simple.
++               *
++               * TODO: deal with bio bounce's bio_clone() gracefully
++               * and convert the global limit into per-queue limit.
++               */
++              if (bvecs++ >= BIO_MAX_PAGES)
++                      goto split;
++
++              /*
+                * If the queue doesn't support SG gaps and adding this
+                * offset would create a gap, disallow it.
+                */
diff --git a/queue-4.7/cdc-acm-added-sanity-checking-for-probe.patch b/queue-4.7/cdc-acm-added-sanity-checking-for-probe.patch

new file mode 100644 (file)

index 0000000..3c78cec
--- /dev/null
+++ b/queue-4.7/cdc-acm-added-sanity-checking-for-probe.patch
@@ -0,0 +1,98 @@
+From oneukum@suse.com  Fri Sep  9 16:15:08 2016
+From: Oliver Neukum <oneukum@suse.com>
+Date: Thu,  8 Sep 2016 11:27:30 +0200
+Subject: cdc-acm: added sanity checking for probe()
+To: stable@vger.kernel.org, jslaby@suse.com
+Cc: Oliver Neukum <oneukum@suse.com>
+Message-ID: <1473326850-5056-1-git-send-email-oneukum@suse.com>
+
+From: Oliver Neukum <oneukum@suse.com>
+
+This is an alternative to eccf2a4e6b64d249929acc1f7aaa2ab0fb199d3d
+which inadvertedly fixes an oops in probe by a device returning
+malformed descriptors. The problem allows a malicious device to
+attack the kernel.
+That patch in v4.8 is too extensive to backport to stable.
+Thus this alternative fix is needed up to v4.7
+
+Signed-off-by: Oliver Neukum <oneukum@suse.com>
+Reported-by: Binyamin Sharet <bsharet@cisco.com>
+Tested-by: Binyamin Sharet <bsharet@cisco.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/class/cdc-acm.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -1196,6 +1196,8 @@ static int acm_probe(struct usb_interfac
+       }
+ 
+       if (!buflen) {
++              if (!intf->cur_altsetting || !intf->cur_altsetting->endpoint)
++                      return -EINVAL;
+               if (intf->cur_altsetting->endpoint &&
+                               intf->cur_altsetting->endpoint->extralen &&
+                               intf->cur_altsetting->endpoint->extra) {
+@@ -1276,6 +1278,8 @@ next_desc:
+                               data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
+                       control_interface = intf;
+               } else {
++                      if (!intf->cur_altsetting)
++                              return -ENODEV;
+                       if (intf->cur_altsetting->desc.bNumEndpoints != 3) {
+                               dev_dbg(&intf->dev,"No union descriptor, giving up\n");
+                               return -ENODEV;
+@@ -1305,15 +1309,22 @@ next_desc:
+               combined_interfaces = 1;
+               /* a popular other OS doesn't use it */
+               quirks |= NO_CAP_LINE;
++              if (!data_interface->cur_altsetting)
++                      return -EINVAL;
+               if (data_interface->cur_altsetting->desc.bNumEndpoints != 3) {
+                       dev_err(&intf->dev, "This needs exactly 3 endpoints\n");
+                       return -EINVAL;
+               }
+ look_for_collapsed_interface:
++              if (!data_interface->cur_altsetting)
++                      return -EINVAL;
+               for (i = 0; i < 3; i++) {
+                       struct usb_endpoint_descriptor *ep;
+                       ep = &data_interface->cur_altsetting->endpoint[i].desc;
+ 
++                      if (!ep)
++                              return -ENODEV;
++
+                       if (usb_endpoint_is_int_in(ep))
+                               epctrl = ep;
+                       else if (usb_endpoint_is_bulk_out(ep))
+@@ -1332,8 +1343,12 @@ look_for_collapsed_interface:
+ skip_normal_probe:
+ 
+       /*workaround for switched interfaces */
++      if (!data_interface->cur_altsetting)
++              return -EINVAL;
+       if (data_interface->cur_altsetting->desc.bInterfaceClass
+                                               != CDC_DATA_INTERFACE_TYPE) {
++              if (!control_interface->cur_altsetting)
++                      return -EINVAL;
+               if (control_interface->cur_altsetting->desc.bInterfaceClass
+                                               == CDC_DATA_INTERFACE_TYPE) {
+                       dev_dbg(&intf->dev,
+@@ -1356,6 +1371,7 @@ skip_normal_probe:
+ 
+ 
+       if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 ||
++          !control_interface->cur_altsetting ||
+           control_interface->cur_altsetting->desc.bNumEndpoints == 0)
+               return -EINVAL;
+ 
+@@ -1363,6 +1379,8 @@ skip_normal_probe:
+       epread = &data_interface->cur_altsetting->endpoint[0].desc;
+       epwrite = &data_interface->cur_altsetting->endpoint[1].desc;
+ 
++      if (!epctrl || !epread || !epwrite)
++              return -ENODEV;
+ 
+       /* workaround for switched endpoints */
+       if (!usb_endpoint_dir_in(epread)) {
diff --git a/queue-4.7/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch b/queue-4.7/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch

new file mode 100644 (file)

index 0000000..1ff0e0c
--- /dev/null
+++ b/queue-4.7/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch
@@ -0,0 +1,111 @@
+From 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 Mon Sep 17 00:00:00 2001
+From: Balbir Singh <bsingharora@gmail.com>
+Date: Wed, 10 Aug 2016 15:43:06 -0400
+Subject: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork
+
+From: Balbir Singh <bsingharora@gmail.com>
+
+commit 568ac888215c7fb2fabe8ea739b00ec3c1f5d440 upstream.
+
+cgroup_threadgroup_rwsem is acquired in read mode during process exit
+and fork.  It is also grabbed in write mode during
+__cgroups_proc_write().  I've recently run into a scenario with lots
+of memory pressure and OOM and I am beginning to see
+
+systemd
+
+ __switch_to+0x1f8/0x350
+ __schedule+0x30c/0x990
+ schedule+0x48/0xc0
+ percpu_down_write+0x114/0x170
+ __cgroup_procs_write.isra.12+0xb8/0x3c0
+ cgroup_file_write+0x74/0x1a0
+ kernfs_fop_write+0x188/0x200
+ __vfs_write+0x6c/0xe0
+ vfs_write+0xc0/0x230
+ SyS_write+0x6c/0x110
+ system_call+0x38/0xb4
+
+This thread is waiting on the reader of cgroup_threadgroup_rwsem to
+exit.  The reader itself is under memory pressure and has gone into
+reclaim after fork. There are times the reader also ends up waiting on
+oom_lock as well.
+
+ __switch_to+0x1f8/0x350
+ __schedule+0x30c/0x990
+ schedule+0x48/0xc0
+ jbd2_log_wait_commit+0xd4/0x180
+ ext4_evict_inode+0x88/0x5c0
+ evict+0xf8/0x2a0
+ dispose_list+0x50/0x80
+ prune_icache_sb+0x6c/0x90
+ super_cache_scan+0x190/0x210
+ shrink_slab.part.15+0x22c/0x4c0
+ shrink_zone+0x288/0x3c0
+ do_try_to_free_pages+0x1dc/0x590
+ try_to_free_pages+0xdc/0x260
+ __alloc_pages_nodemask+0x72c/0xc90
+ alloc_pages_current+0xb4/0x1a0
+ page_table_alloc+0xc0/0x170
+ __pte_alloc+0x58/0x1f0
+ copy_page_range+0x4ec/0x950
+ copy_process.isra.5+0x15a0/0x1870
+ _do_fork+0xa8/0x4b0
+ ppc_clone+0x8/0xc
+
+In the meanwhile, all processes exiting/forking are blocked almost
+stalling the system.
+
+This patch moves the threadgroup_change_begin from before
+cgroup_fork() to just before cgroup_canfork().  There is no nee to
+worry about threadgroup changes till the task is actually added to the
+threadgroup.  This avoids having to call reclaim with
+cgroup_threadgroup_rwsem held.
+
+tj: Subject and description edits.
+
+Signed-off-by: Balbir Singh <bsingharora@gmail.com>
+Acked-by: Zefan Li <lizefan@huawei.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/fork.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1406,7 +1406,6 @@ static struct task_struct *copy_process(
+       p->real_start_time = ktime_get_boot_ns();
+       p->io_context = NULL;
+       p->audit_context = NULL;
+-      threadgroup_change_begin(current);
+       cgroup_fork(p);
+ #ifdef CONFIG_NUMA
+       p->mempolicy = mpol_dup(p->mempolicy);
+@@ -1558,6 +1557,7 @@ static struct task_struct *copy_process(
+       INIT_LIST_HEAD(&p->thread_group);
+       p->task_works = NULL;
+ 
++      threadgroup_change_begin(current);
+       /*
+        * Ensure that the cgroup subsystem policies allow the new process to be
+        * forked. It should be noted the the new process's css_set can be changed
+@@ -1658,6 +1658,7 @@ static struct task_struct *copy_process(
+ bad_fork_cancel_cgroup:
+       cgroup_cancel_fork(p);
+ bad_fork_free_pid:
++      threadgroup_change_end(current);
+       if (pid != &init_struct_pid)
+               free_pid(pid);
+ bad_fork_cleanup_thread:
+@@ -1690,7 +1691,6 @@ bad_fork_cleanup_policy:
+       mpol_put(p->mempolicy);
+ bad_fork_cleanup_threadgroup_lock:
+ #endif
+-      threadgroup_change_end(current);
+       delayacct_tsk_free(p);
+ bad_fork_cleanup_count:
+       atomic_dec(&p->cred->user->processes);
diff --git a/queue-4.7/series b/queue-4.7/series

index d7bd01a5da9299f91ed13cc083909844893d3e4b..c44cffba8a0ffa97ab79ab3ed422e3be8af4afe3 100644 (file)
--- a/queue-4.7/series
+++ b/queue-4.7/series
@@ -11,3 +11,7 @@ ext4-fix-xattr-shifting-when-expanding-inodes-part-2.patch
  ext4-properly-align-shifted-xattrs-when-expanding-inodes.patch
  ext4-avoid-deadlock-when-expanding-inode-size.patch
  ext4-avoid-modifying-checksum-fields-directly-during-checksum-verification.patch
+block-fix-race-triggered-by-blk_set_queue_dying.patch
+block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch
+cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch
+cdc-acm-added-sanity-checking-for-probe.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 9 Sep 2016 14:37:36 +0000 (16:37 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 9 Sep 2016 14:37:36 +0000 (16:37 +0200)
queue-4.7/block-fix-race-triggered-by-blk_set_queue_dying.patch	[new file with mode: 0644]	patch \| blob
queue-4.7/block-make-sure-a-big-bio-is-split-into-at-most-256-bvecs.patch	[new file with mode: 0644]	patch \| blob
queue-4.7/cdc-acm-added-sanity-checking-for-probe.patch	[new file with mode: 0644]	patch \| blob
queue-4.7/cgroup-reduce-read-locked-section-of-cgroup_threadgroup_rwsem-during-fork.patch	[new file with mode: 0644]	patch \| blob
queue-4.7/series		patch \| blob \| blame \| history