3.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 7 Jul 2014 20:30:21 +0000 (13:30 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 7 Jul 2014 20:30:21 +0000 (13:30 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Jul 2014 20:30:21 +0000 (13:30 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Jul 2014 20:30:21 +0000 (13:30 -0700)
diff --git a/queue-3.15/blkcg-fix-use-after-free-in-__blkg_release_rcu-by-making-blkcg_gq-refcnt-an-atomic_t.patch b/queue-3.15/blkcg-fix-use-after-free-in-__blkg_release_rcu-by-making-blkcg_gq-refcnt-an-atomic_t.patch

new file mode 100644 (file)

index 0000000..3a802b4
--- /dev/null
+++ b/queue-3.15/blkcg-fix-use-after-free-in-__blkg_release_rcu-by-making-blkcg_gq-refcnt-an-atomic_t.patch
@@ -0,0 +1,169 @@
+From a5049a8ae34950249a7ae94c385d7c5c98914412 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 19 Jun 2014 17:42:57 -0400
+Subject: blkcg: fix use-after-free in __blkg_release_rcu() by making blkcg_gq refcnt an atomic_t
+
+From: Tejun Heo <tj@kernel.org>
+
+commit a5049a8ae34950249a7ae94c385d7c5c98914412 upstream.
+
+Hello,
+
+So, this patch should do.  Joe, Vivek, can one of you guys please
+verify that the oops goes away with this patch?
+
+Jens, the original thread can be read at
+
+  http://thread.gmane.org/gmane.linux.kernel/1720729
+
+The fix converts blkg->refcnt from int to atomic_t.  It does some
+overhead but it should be minute compared to everything else which is
+going on and the involved cacheline bouncing, so I think it's highly
+unlikely to cause any noticeable difference.  Also, the refcnt in
+question should be converted to a perpcu_ref for blk-mq anyway, so the
+atomic_t is likely to go away pretty soon anyway.
+
+Thanks.
+
+------- 8< -------
+__blkg_release_rcu() may be invoked after the associated request_queue
+is released with a RCU grace period inbetween.  As such, the function
+and callbacks invoked from it must not dereference the associated
+request_queue.  This is clearly indicated in the comment above the
+function.
+
+Unfortunately, while trying to fix a different issue, 2a4fd070ee85
+("blkcg: move bulk of blkcg_gq release operations to the RCU
+callback") ignored this and added [un]locking of @blkg->q->queue_lock
+to __blkg_release_rcu().  This of course can cause oops as the
+request_queue may be long gone by the time this code gets executed.
+
+  general protection fault: 0000 [#1] SMP
+  CPU: 21 PID: 30 Comm: rcuos/21 Not tainted 3.15.0 #1
+  Hardware name: Stratus ftServer 6400/G7LAZ, BIOS BIOS Version 6.3:57 12/25/2013
+  task: ffff880854021de0 ti: ffff88085403c000 task.ti: ffff88085403c000
+  RIP: 0010:[<ffffffff8162e9e5>]  [<ffffffff8162e9e5>] _raw_spin_lock_irq+0x15/0x60
+  RSP: 0018:ffff88085403fdf0  EFLAGS: 00010086
+  RAX: 0000000000020000 RBX: 0000000000000010 RCX: 0000000000000000
+  RDX: 000060ef80008248 RSI: 0000000000000286 RDI: 6b6b6b6b6b6b6b6b
+  RBP: ffff88085403fdf0 R08: 0000000000000286 R09: 0000000000009f39
+  R10: 0000000000020001 R11: 0000000000020001 R12: ffff88103c17a130
+  R13: ffff88103c17a080 R14: 0000000000000000 R15: 0000000000000000
+  FS:  0000000000000000(0000) GS:ffff88107fca0000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00000000006e5ab8 CR3: 000000000193d000 CR4: 00000000000407e0
+  Stack:
+   ffff88085403fe18 ffffffff812cbfc2 ffff88103c17a130 0000000000000000
+   ffff88103c17a130 ffff88085403fec0 ffffffff810d1d28 ffff880854021de0
+   ffff880854021de0 ffff88107fcaec58 ffff88085403fe80 ffff88107fcaec30
+  Call Trace:
+   [<ffffffff812cbfc2>] __blkg_release_rcu+0x72/0x150
+   [<ffffffff810d1d28>] rcu_nocb_kthread+0x1e8/0x300
+   [<ffffffff81091d81>] kthread+0xe1/0x100
+   [<ffffffff8163813c>] ret_from_fork+0x7c/0xb0
+  Code: ff 47 04 48 8b 7d 08 be 00 02 00 00 e8 55 48 a4 ff 5d c3 0f 1f 00 66 66 66 66 90 55 48 89 e5
+  +fa 66 66 90 66 66 90 b8 00 00 02 00 <f0> 0f c1 07 89 c2 c1 ea 10 66 39 c2 75 02 5d c3 83 e2 fe 0f
+  +b7
+  RIP  [<ffffffff8162e9e5>] _raw_spin_lock_irq+0x15/0x60
+   RSP <ffff88085403fdf0>
+
+The request_queue locking was added because blkcg_gq->refcnt is an int
+protected with the queue lock and __blkg_release_rcu() needs to put
+the parent.  Let's fix it by making blkcg_gq->refcnt an atomic_t and
+dropping queue locking in the function.
+
+Given the general heavy weight of the current request_queue and blkcg
+operations, this is unlikely to cause any noticeable overhead.
+Moreover, blkcg_gq->refcnt is likely to be converted to percpu_ref in
+the near future, so whatever (most likely negligible) overhead it may
+add is temporary.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Joe Lawrence <joe.lawrence@stratus.com>
+Acked-by: Vivek Goyal <vgoyal@redhat.com>
+Link: http://lkml.kernel.org/g/alpine.DEB.2.02.1406081816540.17948@jlaw-desktop.mno.stratus.com
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ block/blk-cgroup.c |    7 ++-----
+ block/blk-cgroup.h |   17 +++++++----------
+ 2 files changed, 9 insertions(+), 15 deletions(-)
+
+--- a/block/blk-cgroup.c
++++ b/block/blk-cgroup.c
+@@ -80,7 +80,7 @@ static struct blkcg_gq *blkg_alloc(struc
+       blkg->q = q;
+       INIT_LIST_HEAD(&blkg->q_node);
+       blkg->blkcg = blkcg;
+-      blkg->refcnt = 1;
++      atomic_set(&blkg->refcnt, 1);
+ 
+       /* root blkg uses @q->root_rl, init rl only for !root blkgs */
+       if (blkcg != &blkcg_root) {
+@@ -399,11 +399,8 @@ void __blkg_release_rcu(struct rcu_head
+ 
+       /* release the blkcg and parent blkg refs this blkg has been holding */
+       css_put(&blkg->blkcg->css);
+-      if (blkg->parent) {
+-              spin_lock_irq(blkg->q->queue_lock);
++      if (blkg->parent)
+               blkg_put(blkg->parent);
+-              spin_unlock_irq(blkg->q->queue_lock);
+-      }
+ 
+       blkg_free(blkg);
+ }
+--- a/block/blk-cgroup.h
++++ b/block/blk-cgroup.h
+@@ -18,6 +18,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/radix-tree.h>
+ #include <linux/blkdev.h>
++#include <linux/atomic.h>
+ 
+ /* Max limits for throttle policy */
+ #define THROTL_IOPS_MAX               UINT_MAX
+@@ -104,7 +105,7 @@ struct blkcg_gq {
+       struct request_list             rl;
+ 
+       /* reference count */
+-      int                             refcnt;
++      atomic_t                        refcnt;
+ 
+       /* is this blkg online? protected by both blkcg and q locks */
+       bool                            online;
+@@ -257,13 +258,12 @@ static inline int blkg_path(struct blkcg
+  * blkg_get - get a blkg reference
+  * @blkg: blkg to get
+  *
+- * The caller should be holding queue_lock and an existing reference.
++ * The caller should be holding an existing reference.
+  */
+ static inline void blkg_get(struct blkcg_gq *blkg)
+ {
+-      lockdep_assert_held(blkg->q->queue_lock);
+-      WARN_ON_ONCE(!blkg->refcnt);
+-      blkg->refcnt++;
++      WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
++      atomic_inc(&blkg->refcnt);
+ }
+ 
+ void __blkg_release_rcu(struct rcu_head *rcu);
+@@ -271,14 +271,11 @@ void __blkg_release_rcu(struct rcu_head
+ /**
+  * blkg_put - put a blkg reference
+  * @blkg: blkg to put
+- *
+- * The caller should be holding queue_lock.
+  */
+ static inline void blkg_put(struct blkcg_gq *blkg)
+ {
+-      lockdep_assert_held(blkg->q->queue_lock);
+-      WARN_ON_ONCE(blkg->refcnt <= 0);
+-      if (!--blkg->refcnt)
++      WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
++      if (atomic_dec_and_test(&blkg->refcnt))
+               call_rcu(&blkg->rcu_head, __blkg_release_rcu);
+ }
+ 
diff --git a/queue-3.15/ext4-fix-buffer-double-free-in-ext4_alloc_branch.patch b/queue-3.15/ext4-fix-buffer-double-free-in-ext4_alloc_branch.patch

new file mode 100644 (file)

index 0000000..668e302
--- /dev/null
+++ b/queue-3.15/ext4-fix-buffer-double-free-in-ext4_alloc_branch.patch
@@ -0,0 +1,50 @@
+From c5c7b8ddfbf8cb3b2291e515a34ab1b8982f5a2d Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Sun, 15 Jun 2014 23:46:28 -0400
+Subject: ext4: Fix buffer double free in ext4_alloc_branch()
+
+From: Jan Kara <jack@suse.cz>
+
+commit c5c7b8ddfbf8cb3b2291e515a34ab1b8982f5a2d upstream.
+
+Error recovery in ext4_alloc_branch() calls ext4_forget() even for
+buffer corresponding to indirect block it did not allocate. This leads
+to brelse() being called twice for that buffer (once from ext4_forget()
+and once from cleanup in ext4_ind_map_blocks()) leading to buffer use
+count misaccounting. Eventually (but often much later because there
+are other users of the buffer) we will see messages like:
+VFS: brelse: Trying to free free buffer
+
+Another manifestation of this problem is an error:
+JBD2 unexpected failure: jbd2_journal_revoke: !buffer_revoked(bh);
+inconsistent data on disk
+
+The fix is easy - don't forget buffer we did not allocate. Also add an
+explanatory comment because the indexing at ext4_alloc_branch() is
+somewhat subtle.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/indirect.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/indirect.c
++++ b/fs/ext4/indirect.c
+@@ -389,7 +389,13 @@ static int ext4_alloc_branch(handle_t *h
+       return 0;
+ failed:
+       for (; i >= 0; i--) {
+-              if (i != indirect_blks && branch[i].bh)
++              /*
++               * We want to ext4_forget() only freshly allocated indirect
++               * blocks.  Buffer for new_blocks[i-1] is at branch[i].bh and
++               * buffer at branch[0].bh is indirect block / inode already
++               * existing before ext4_alloc_branch() was called.
++               */
++              if (i > 0 && i != indirect_blks && branch[i].bh)
+                       ext4_forget(handle, 1, inode, branch[i].bh,
+                                   branch[i].bh->b_blocknr);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i],
diff --git a/queue-3.15/ext4-fix-hole-punching-for-files-with-indirect-blocks.patch b/queue-3.15/ext4-fix-hole-punching-for-files-with-indirect-blocks.patch

new file mode 100644 (file)

index 0000000..b00a35d
--- /dev/null
+++ b/queue-3.15/ext4-fix-hole-punching-for-files-with-indirect-blocks.patch
@@ -0,0 +1,57 @@
+From a93cd4cf86466caa49cfe64607bea7f0bde3f916 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 26 Jun 2014 12:30:54 -0400
+Subject: ext4: Fix hole punching for files with indirect blocks
+
+From: Jan Kara <jack@suse.cz>
+
+commit a93cd4cf86466caa49cfe64607bea7f0bde3f916 upstream.
+
+Hole punching code for files with indirect blocks wrongly computed
+number of blocks which need to be cleared when traversing the indirect
+block tree. That could result in punching more blocks than actually
+requested and thus effectively cause a data loss. For example:
+
+fallocate -n -p 10240000 4096
+
+will punch the range 10240000 - 12632064 instead of the range 1024000 -
+10244096. Fix the calculation.
+
+Fixes: 8bad6fc813a3a5300f51369c39d315679fd88c72
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/indirect.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/indirect.c
++++ b/fs/ext4/indirect.c
+@@ -1318,16 +1318,24 @@ static int free_hole_blocks(handle_t *ha
+               blk = *i_data;
+               if (level > 0) {
+                       ext4_lblk_t first2;
++                      ext4_lblk_t count2;
++
+                       bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
+                       if (!bh) {
+                               EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
+                                                      "Read failure");
+                               return -EIO;
+                       }
+-                      first2 = (first > offset) ? first - offset : 0;
++                      if (first > offset) {
++                              first2 = first - offset;
++                              count2 = count;
++                      } else {
++                              first2 = 0;
++                              count2 = count - (offset - first);
++                      }
+                       ret = free_hole_blocks(handle, inode, bh,
+                                              (__le32 *)bh->b_data, level - 1,
+-                                             first2, count - offset,
++                                             first2, count2,
+                                              inode->i_sb->s_blocksize >> 2);
+                       if (ret) {
+                               brelse(bh);
diff --git a/queue-3.15/fs-seq_file-fallback-to-vmalloc-allocation.patch b/queue-3.15/fs-seq_file-fallback-to-vmalloc-allocation.patch

new file mode 100644 (file)

index 0000000..991e8e1
--- /dev/null
+++ b/queue-3.15/fs-seq_file-fallback-to-vmalloc-allocation.patch
@@ -0,0 +1,156 @@
+From 058504edd02667eef8fac9be27ab3ea74332e9b4 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Wed, 2 Jul 2014 15:22:37 -0700
+Subject: fs/seq_file: fallback to vmalloc allocation
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+commit 058504edd02667eef8fac9be27ab3ea74332e9b4 upstream.
+
+There are a couple of seq_files which use the single_open() interface.
+This interface requires that the whole output must fit into a single
+buffer.
+
+E.g.  for /proc/stat allocation failures have been observed because an
+order-4 memory allocation failed due to memory fragmentation.  In such
+situations reading /proc/stat is not possible anymore.
+
+Therefore change the seq_file code to fallback to vmalloc allocations
+which will usually result in a couple of order-0 allocations and hence
+also work if memory is fragmented.
+
+For reference a call trace where reading from /proc/stat failed:
+
+  sadc: page allocation failure: order:4, mode:0x1040d0
+  CPU: 1 PID: 192063 Comm: sadc Not tainted 3.10.0-123.el7.s390x #1
+  [...]
+  Call Trace:
+    show_stack+0x6c/0xe8
+    warn_alloc_failed+0xd6/0x138
+    __alloc_pages_nodemask+0x9da/0xb68
+    __get_free_pages+0x2e/0x58
+    kmalloc_order_trace+0x44/0xc0
+    stat_open+0x5a/0xd8
+    proc_reg_open+0x8a/0x140
+    do_dentry_open+0x1bc/0x2c8
+    finish_open+0x46/0x60
+    do_last+0x382/0x10d0
+    path_openat+0xc8/0x4f8
+    do_filp_open+0x46/0xa8
+    do_sys_open+0x114/0x1f0
+    sysc_tracego+0x14/0x1a
+
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Tested-by: David Rientjes <rientjes@google.com>
+Cc: Ian Kent <raven@themaw.net>
+Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+Cc: Thorsten Diehl <thorsten.diehl@de.ibm.com>
+Cc: Andrea Righi <andrea@betterlinux.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Stefan Bader <stefan.bader@canonical.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/seq_file.c |   30 +++++++++++++++++++++---------
+ 1 file changed, 21 insertions(+), 9 deletions(-)
+
+--- a/fs/seq_file.c
++++ b/fs/seq_file.c
+@@ -8,8 +8,10 @@
+ #include <linux/fs.h>
+ #include <linux/export.h>
+ #include <linux/seq_file.h>
++#include <linux/vmalloc.h>
+ #include <linux/slab.h>
+ #include <linux/cred.h>
++#include <linux/mm.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/page.h>
+@@ -30,6 +32,16 @@ static void seq_set_overflow(struct seq_
+       m->count = m->size;
+ }
+ 
++static void *seq_buf_alloc(unsigned long size)
++{
++      void *buf;
++
++      buf = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
++      if (!buf && size > PAGE_SIZE)
++              buf = vmalloc(size);
++      return buf;
++}
++
+ /**
+  *    seq_open -      initialize sequential file
+  *    @file: file we initialize
+@@ -96,7 +108,7 @@ static int traverse(struct seq_file *m,
+               return 0;
+       }
+       if (!m->buf) {
+-              m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
++              m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
+               if (!m->buf)
+                       return -ENOMEM;
+       }
+@@ -135,9 +147,9 @@ static int traverse(struct seq_file *m,
+ 
+ Eoverflow:
+       m->op->stop(m, p);
+-      kfree(m->buf);
++      kvfree(m->buf);
+       m->count = 0;
+-      m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
++      m->buf = seq_buf_alloc(m->size <<= 1);
+       return !m->buf ? -ENOMEM : -EAGAIN;
+ }
+ 
+@@ -192,7 +204,7 @@ ssize_t seq_read(struct file *file, char
+ 
+       /* grab buffer if we didn't have one */
+       if (!m->buf) {
+-              m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
++              m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
+               if (!m->buf)
+                       goto Enomem;
+       }
+@@ -232,9 +244,9 @@ ssize_t seq_read(struct file *file, char
+               if (m->count < m->size)
+                       goto Fill;
+               m->op->stop(m, p);
+-              kfree(m->buf);
++              kvfree(m->buf);
+               m->count = 0;
+-              m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
++              m->buf = seq_buf_alloc(m->size <<= 1);
+               if (!m->buf)
+                       goto Enomem;
+               m->version = 0;
+@@ -350,7 +362,7 @@ EXPORT_SYMBOL(seq_lseek);
+ int seq_release(struct inode *inode, struct file *file)
+ {
+       struct seq_file *m = file->private_data;
+-      kfree(m->buf);
++      kvfree(m->buf);
+       kfree(m);
+       return 0;
+ }
+@@ -605,13 +617,13 @@ EXPORT_SYMBOL(single_open);
+ int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
+               void *data, size_t size)
+ {
+-      char *buf = kmalloc(size, GFP_KERNEL);
++      char *buf = seq_buf_alloc(size);
+       int ret;
+       if (!buf)
+               return -ENOMEM;
+       ret = single_open(file, show, data);
+       if (ret) {
+-              kfree(buf);
++              kvfree(buf);
+               return ret;
+       }
+       ((struct seq_file *)file->private_data)->buf = buf;
diff --git a/queue-3.15/mm-page_alloc-fix-cma-area-initialisation-when-pageblock-max_order.patch b/queue-3.15/mm-page_alloc-fix-cma-area-initialisation-when-pageblock-max_order.patch

new file mode 100644 (file)

index 0000000..aee8432
--- /dev/null
+++ b/queue-3.15/mm-page_alloc-fix-cma-area-initialisation-when-pageblock-max_order.patch
@@ -0,0 +1,96 @@
+From dc78327c0ea7da5186d8cbc1647bd6088c5c9fa5 Mon Sep 17 00:00:00 2001
+From: Michal Nazarewicz <mina86@mina86.com>
+Date: Wed, 2 Jul 2014 15:22:35 -0700
+Subject: mm: page_alloc: fix CMA area initialisation when pageblock > MAX_ORDER
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Michal Nazarewicz <mina86@mina86.com>
+
+commit dc78327c0ea7da5186d8cbc1647bd6088c5c9fa5 upstream.
+
+With a kernel configured with ARM64_64K_PAGES && !TRANSPARENT_HUGEPAGE,
+the following is triggered at early boot:
+
+  SMP: Total of 8 processors activated.
+  devtmpfs: initialized
+  Unable to handle kernel NULL pointer dereference at virtual address 00000008
+  pgd = fffffe0000050000
+  [00000008] *pgd=00000043fba00003, *pmd=00000043fba00003, *pte=00e0000078010407
+  Internal error: Oops: 96000006 [#1] SMP
+  Modules linked in:
+  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.15.0-rc864k+ #44
+  task: fffffe03bc040000 ti: fffffe03bc080000 task.ti: fffffe03bc080000
+  PC is at __list_add+0x10/0xd4
+  LR is at free_one_page+0x270/0x638
+  ...
+  Call trace:
+    __list_add+0x10/0xd4
+    free_one_page+0x26c/0x638
+    __free_pages_ok.part.52+0x84/0xbc
+    __free_pages+0x74/0xbc
+    init_cma_reserved_pageblock+0xe8/0x104
+    cma_init_reserved_areas+0x190/0x1e4
+    do_one_initcall+0xc4/0x154
+    kernel_init_freeable+0x204/0x2a8
+    kernel_init+0xc/0xd4
+
+This happens because init_cma_reserved_pageblock() calls
+__free_one_page() with pageblock_order as page order but it is bigger
+than MAX_ORDER.  This in turn causes accesses past zone->free_list[].
+
+Fix the problem by changing init_cma_reserved_pageblock() such that it
+splits pageblock into individual MAX_ORDER pages if pageblock is bigger
+than a MAX_ORDER page.
+
+In cases where !CONFIG_HUGETLB_PAGE_SIZE_VARIABLE, which is all
+architectures expect for ia64, powerpc and tile at the moment, the
+â\80\9cpageblock_order > MAX_ORDERâ\80\9d condition will be optimised out since both
+sides of the operator are constants.  In cases where pageblock size is
+variable, the performance degradation should not be significant anyway
+since init_cma_reserved_pageblock() is called only at boot time at most
+MAX_CMA_AREAS times which by default is eight.
+
+Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
+Reported-by: Mark Salter <msalter@redhat.com>
+Tested-by: Mark Salter <msalter@redhat.com>
+Tested-by: Christopher Covington <cov@codeaurora.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -800,9 +800,21 @@ void __init init_cma_reserved_pageblock(
+               set_page_count(p, 0);
+       } while (++p, --i);
+ 
+-      set_page_refcounted(page);
+       set_pageblock_migratetype(page, MIGRATE_CMA);
+-      __free_pages(page, pageblock_order);
++
++      if (pageblock_order >= MAX_ORDER) {
++              i = pageblock_nr_pages;
++              p = page;
++              do {
++                      set_page_refcounted(p);
++                      __free_pages(p, MAX_ORDER - 1);
++                      p += MAX_ORDER_NR_PAGES;
++              } while (i -= MAX_ORDER_NR_PAGES);
++      } else {
++              set_page_refcounted(page);
++              __free_pages(page, pageblock_order);
++      }
++
+       adjust_managed_page_count(page, pageblock_nr_pages);
+ }
+ #endif
diff --git a/queue-3.15/proc-stat-convert-to-single_open_size.patch b/queue-3.15/proc-stat-convert-to-single_open_size.patch

new file mode 100644 (file)

index 0000000..f20c816
--- /dev/null
+++ b/queue-3.15/proc-stat-convert-to-single_open_size.patch
@@ -0,0 +1,80 @@
+From f74373a5cc7a0155d232c4e999648c7a95435bb2 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+Date: Wed, 2 Jul 2014 15:22:37 -0700
+Subject: /proc/stat: convert to single_open_size()
+
+From: Heiko Carstens <heiko.carstens@de.ibm.com>
+
+commit f74373a5cc7a0155d232c4e999648c7a95435bb2 upstream.
+
+These two patches are supposed to "fix" failed order-4 memory
+allocations which have been observed when reading /proc/stat.  The
+problem has been observed on s390 as well as on x86.
+
+To address the problem change the seq_file memory allocations to
+fallback to use vmalloc, so that allocations also work if memory is
+fragmented.
+
+This approach seems to be simpler and less intrusive than changing
+/proc/stat to use an interator.  Also it "fixes" other users as well,
+which use seq_file's single_open() interface.
+
+This patch (of 2):
+
+Use seq_file's single_open_size() to preallocate a buffer that is large
+enough to hold the whole output, instead of open coding it.  Also
+calculate the requested size using the number of online cpus instead of
+possible cpus, since the size of the output only depends on the number
+of online cpus.
+
+Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Cc: Ian Kent <raven@themaw.net>
+Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+Cc: Thorsten Diehl <thorsten.diehl@de.ibm.com>
+Cc: Andrea Righi <andrea@betterlinux.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Stefan Bader <stefan.bader@canonical.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/stat.c |   22 ++--------------------
+ 1 file changed, 2 insertions(+), 20 deletions(-)
+
+--- a/fs/proc/stat.c
++++ b/fs/proc/stat.c
+@@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p,
+ 
+ static int stat_open(struct inode *inode, struct file *file)
+ {
+-      size_t size = 1024 + 128 * num_possible_cpus();
+-      char *buf;
+-      struct seq_file *m;
+-      int res;
++      size_t size = 1024 + 128 * num_online_cpus();
+ 
+       /* minimum size to display an interrupt count : 2 bytes */
+       size += 2 * nr_irqs;
+-
+-      /* don't ask for more than the kmalloc() max size */
+-      if (size > KMALLOC_MAX_SIZE)
+-              size = KMALLOC_MAX_SIZE;
+-      buf = kmalloc(size, GFP_KERNEL);
+-      if (!buf)
+-              return -ENOMEM;
+-
+-      res = single_open(file, show_stat, NULL);
+-      if (!res) {
+-              m = file->private_data;
+-              m->buf = buf;
+-              m->size = ksize(buf);
+-      } else
+-              kfree(buf);
+-      return res;
++      return single_open_size(file, show_stat, NULL, size);
+ }
+ 
+ static const struct file_operations proc_stat_operations = {
diff --git a/queue-3.15/series b/queue-3.15/series

index 1d1fc1bfb255e0828812aecc4e512c0a979d6ff5..fb2d2c3a6264b4d1c2f19a6f426b62989f10c490 100644 (file)
--- a/queue-3.15/series
+++ b/queue-3.15/series
@@ -86,3 +86,10 @@ cifs-fix-mount-failure-with-broken-pathnames-when-smb3-mount-with-mapchars-optio
  regulator-tps65218-add-the-missing-of_node-assignment-in-probe.patch
  regulator-tps65218-correct-the-the-config-register-for-ldo1.patch
  iommu-vt-d-fix-bug-in-handling-multiple-rmrrs-for-the-same-pci-device.patch
+blkcg-fix-use-after-free-in-__blkg_release_rcu-by-making-blkcg_gq-refcnt-an-atomic_t.patch
+ext4-fix-buffer-double-free-in-ext4_alloc_branch.patch
+ext4-fix-hole-punching-for-files-with-indirect-blocks.patch
+fs-seq_file-fallback-to-vmalloc-allocation.patch
+mm-page_alloc-fix-cma-area-initialisation-when-pageblock-max_order.patch
+zram-revalidate-disk-after-capacity-change.patch
+proc-stat-convert-to-single_open_size.patch
diff --git a/queue-3.15/zram-revalidate-disk-after-capacity-change.patch b/queue-3.15/zram-revalidate-disk-after-capacity-change.patch

new file mode 100644 (file)

index 0000000..bc86390
--- /dev/null
+++ b/queue-3.15/zram-revalidate-disk-after-capacity-change.patch
@@ -0,0 +1,69 @@
+From 2e32baea46ce542c561a519414c840295b229c8f Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Wed, 2 Jul 2014 15:22:36 -0700
+Subject: zram: revalidate disk after capacity change
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit 2e32baea46ce542c561a519414c840295b229c8f upstream.
+
+Alexander reported mkswap on /dev/zram0 is failed if other process is
+opening the block device file.
+
+Step is as follows,
+
+0. Reset the unused zram device.
+1. Use a program that opens /dev/zram0 with O_RDWR and sleeps
+   until killed.
+2. While that program sleeps, echo the correct value to
+   /sys/block/zram0/disksize.
+3. Verify (e.g. in /proc/partitions) that the disk size is applied
+   correctly. It is.
+4. While that program still sleeps, attempt to mkswap /dev/zram0.
+   This fails: mkswap: error: swap area needs to be at least 40 KiB
+
+When I investigated, the size get by ioctl(fd, BLKGETSIZE64, xxx) on
+mkswap to get a size of blockdev was zero although zram0 has right size by
+2.
+
+The reason is zram didn't revalidate disk after changing capacity so that
+size of blockdev's inode is not uptodate until all of file is close.
+
+This patch should fix the BUG.
+
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Reported-by: Alexander E. Patrakov <patrakov@gmail.com>
+Tested-by: Alexander E. Patrakov <patrakov@gmail.com>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Nitin Gupta <ngupta@vflare.org>
+Acked-by: Jerome Marchand <jmarchan@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/zram/zram_drv.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -622,8 +622,10 @@ static void zram_reset_device(struct zra
+       memset(&zram->stats, 0, sizeof(zram->stats));
+ 
+       zram->disksize = 0;
+-      if (reset_capacity)
++      if (reset_capacity) {
+               set_capacity(zram->disk, 0);
++              revalidate_disk(zram->disk);
++      }
+       up_write(&zram->init_lock);
+ }
+ 
+@@ -664,6 +666,7 @@ static ssize_t disksize_store(struct dev
+       zram->comp = comp;
+       zram->disksize = disksize;
+       set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
++      revalidate_disk(zram->disk);
+       up_write(&zram->init_lock);
+       return len;
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 7 Jul 2014 20:30:21 +0000 (13:30 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 7 Jul 2014 20:30:21 +0000 (13:30 -0700)
queue-3.15/blkcg-fix-use-after-free-in-__blkg_release_rcu-by-making-blkcg_gq-refcnt-an-atomic_t.patch	[new file with mode: 0644]	patch \| blob
queue-3.15/ext4-fix-buffer-double-free-in-ext4_alloc_branch.patch	[new file with mode: 0644]	patch \| blob
queue-3.15/ext4-fix-hole-punching-for-files-with-indirect-blocks.patch	[new file with mode: 0644]	patch \| blob
queue-3.15/fs-seq_file-fallback-to-vmalloc-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-3.15/mm-page_alloc-fix-cma-area-initialisation-when-pageblock-max_order.patch	[new file with mode: 0644]	patch \| blob
queue-3.15/proc-stat-convert-to-single_open_size.patch	[new file with mode: 0644]	patch \| blob
queue-3.15/series		patch \| blob \| blame \| history
queue-3.15/zram-revalidate-disk-after-capacity-change.patch	[new file with mode: 0644]	patch \| blob