]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.13-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Mar 2014 22:35:56 +0000 (22:35 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Mar 2014 22:35:56 +0000 (22:35 +0000)
added patches:
memcg-fix-endless-loop-in-__mem_cgroup_iter_next.patch
mm-include-vm_mixedmap-flag-in-the-vm_special-list-to-avoid-m-un-locking.patch
mm-page_alloc-exempt-gfp_thisnode-allocations-from-zone-fairness.patch
ocfs2-fix-quota-file-corruption.patch
ocfs2-syncs-the-wrong-range.patch
zram-avoid-null-access-when-fail-to-alloc-meta.patch

queue-3.13/memcg-fix-endless-loop-in-__mem_cgroup_iter_next.patch [new file with mode: 0644]
queue-3.13/mm-include-vm_mixedmap-flag-in-the-vm_special-list-to-avoid-m-un-locking.patch [new file with mode: 0644]
queue-3.13/mm-page_alloc-exempt-gfp_thisnode-allocations-from-zone-fairness.patch [new file with mode: 0644]
queue-3.13/ocfs2-fix-quota-file-corruption.patch [new file with mode: 0644]
queue-3.13/ocfs2-syncs-the-wrong-range.patch [new file with mode: 0644]
queue-3.13/series [new file with mode: 0644]
queue-3.13/zram-avoid-null-access-when-fail-to-alloc-meta.patch [new file with mode: 0644]

diff --git a/queue-3.13/memcg-fix-endless-loop-in-__mem_cgroup_iter_next.patch b/queue-3.13/memcg-fix-endless-loop-in-__mem_cgroup_iter_next.patch
new file mode 100644 (file)
index 0000000..20a6028
--- /dev/null
@@ -0,0 +1,48 @@
+From ce48225fe3b1b0d1fc9fceb96ac3d8a879e45114 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 3 Mar 2014 15:38:24 -0800
+Subject: memcg: fix endless loop in __mem_cgroup_iter_next()
+
+From: Hugh Dickins <hughd@google.com>
+
+commit ce48225fe3b1b0d1fc9fceb96ac3d8a879e45114 upstream.
+
+Commit 0eef615665ed ("memcg: fix css reference leak and endless loop in
+mem_cgroup_iter") got the interaction with the commit a few before it
+d8ad30559715 ("mm/memcg: iteration skip memcgs not yet fully
+initialized") slightly wrong, and we didn't notice at the time.
+
+It's elusive, and harder to get than the original, but for a couple of
+days before rc1, I several times saw a endless loop similar to that
+supposedly being fixed.
+
+This time it was a tighter loop in __mem_cgroup_iter_next(): because we
+can get here when our root has already been offlined, and the ordering
+of conditions was such that we then just cycled around forever.
+
+Fixes: 0eef615665ed ("memcg: fix css reference leak and endless loop in mem_cgroup_iter").
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Michal Hocko <mhocko@suse.cz>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Greg Thelen <gthelen@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -1108,8 +1108,8 @@ skip_node:
+        * skipping css reference should be safe.
+        */
+       if (next_css) {
+-              if ((next_css->flags & CSS_ONLINE) &&
+-                              (next_css == &root->css || css_tryget(next_css)))
++              if ((next_css == &root->css) ||
++                  ((next_css->flags & CSS_ONLINE) && css_tryget(next_css)))
+                       return mem_cgroup_from_css(next_css);
+               prev_css = next_css;
diff --git a/queue-3.13/mm-include-vm_mixedmap-flag-in-the-vm_special-list-to-avoid-m-un-locking.patch b/queue-3.13/mm-include-vm_mixedmap-flag-in-the-vm_special-list-to-avoid-m-un-locking.patch
new file mode 100644 (file)
index 0000000..23e4978
--- /dev/null
@@ -0,0 +1,103 @@
+From 9050d7eba40b3d79551668f54e68fd6f51945ef3 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Mon, 3 Mar 2014 15:38:27 -0800
+Subject: mm: include VM_MIXEDMAP flag in the VM_SPECIAL list to avoid m(un)locking
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 9050d7eba40b3d79551668f54e68fd6f51945ef3 upstream.
+
+Daniel Borkmann reported a VM_BUG_ON assertion failing:
+
+  ------------[ cut here ]------------
+  kernel BUG at mm/mlock.c:528!
+  invalid opcode: 0000 [#1] SMP
+  Modules linked in: ccm arc4 iwldvm [...]
+   video
+  CPU: 3 PID: 2266 Comm: netsniff-ng Not tainted 3.14.0-rc2+ #8
+  Hardware name: LENOVO 2429BP3/2429BP3, BIOS G4ET37WW (1.12 ) 05/29/2012
+  task: ffff8801f87f9820 ti: ffff88002cb44000 task.ti: ffff88002cb44000
+  RIP: 0010:[<ffffffff81171ad0>]  [<ffffffff81171ad0>] munlock_vma_pages_range+0x2e0/0x2f0
+  Call Trace:
+    do_munmap+0x18f/0x3b0
+    vm_munmap+0x41/0x60
+    SyS_munmap+0x22/0x30
+    system_call_fastpath+0x1a/0x1f
+  RIP   munlock_vma_pages_range+0x2e0/0x2f0
+  ---[ end trace a0088dcf07ae10f2 ]---
+
+because munlock_vma_pages_range() thinks it's unexpectedly in the middle
+of a THP page.  This can be reproduced with default config since 3.11
+kernels.  A reproducer can be found in the kernel's selftest directory
+for networking by running ./psock_tpacket.
+
+The problem is that an order=2 compound page (allocated by
+alloc_one_pg_vec_page() is part of the munlocked VM_MIXEDMAP vma (mapped
+by packet_mmap()) and mistaken for a THP page and assumed to be order=9.
+
+The checks for THP in munlock came with commit ff6a6da60b89 ("mm:
+accelerate munlock() treatment of THP pages"), i.e.  since 3.9, but did
+not trigger a bug.  It just makes munlock_vma_pages_range() skip such
+compound pages until the next 512-pages-aligned page, when it encounters
+a head page.  This is however not a problem for vma's where mlocking has
+no effect anyway, but it can distort the accounting.
+
+Since commit 7225522bb429 ("mm: munlock: batch non-THP page isolation
+and munlock+putback using pagevec") this can trigger a VM_BUG_ON in
+PageTransHuge() check.
+
+This patch fixes the issue by adding VM_MIXEDMAP flag to VM_SPECIAL, a
+list of flags that make vma's non-mlockable and non-mergeable.  The
+reasoning is that VM_MIXEDMAP vma's are similar to VM_PFNMAP, which is
+already on the VM_SPECIAL list, and both are intended for non-LRU pages
+where mlocking makes no sense anyway.  Related Lkml discussion can be
+found in [2].
+
+ [1] tools/testing/selftests/net/psock_tpacket
+ [2] https://lkml.org/lkml/2014/1/10/427
+
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Reported-by: Daniel Borkmann <dborkman@redhat.com>
+Tested-by: Daniel Borkmann <dborkman@redhat.com>
+Cc: Thomas Hellstrom <thellstrom@vmware.com>
+Cc: John David Anglin <dave.anglin@bell.net>
+Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
+Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Cc: Carsten Otte <cotte@de.ibm.com>
+Cc: Jared Hulbert <jaredeh@gmail.com>
+Tested-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Rik van Riel <riel@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm.h |    2 +-
+ mm/huge_memory.c   |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -165,7 +165,7 @@ extern unsigned int kobjsize(const void
+  * Special vmas that are non-mergable, non-mlock()able.
+  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
+  */
+-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP)
++#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+ /*
+  * mapping from the currently active vm_flags protection bits (the
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1960,7 +1960,7 @@ out:
+       return ret;
+ }
+-#define VM_NO_THP (VM_SPECIAL|VM_MIXEDMAP|VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
++#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
+ int hugepage_madvise(struct vm_area_struct *vma,
+                    unsigned long *vm_flags, int advice)
diff --git a/queue-3.13/mm-page_alloc-exempt-gfp_thisnode-allocations-from-zone-fairness.patch b/queue-3.13/mm-page_alloc-exempt-gfp_thisnode-allocations-from-zone-fairness.patch
new file mode 100644 (file)
index 0000000..6fc76ba
--- /dev/null
@@ -0,0 +1,92 @@
+From 27329369c9ecf37771b2a65202cbf5578cff3331 Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Mon, 3 Mar 2014 15:38:41 -0800
+Subject: mm: page_alloc: exempt GFP_THISNODE allocations from zone fairness
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 27329369c9ecf37771b2a65202cbf5578cff3331 upstream.
+
+Jan Stancek reports manual page migration encountering allocation
+failures after some pages when there is still plenty of memory free, and
+bisected the problem down to commit 81c0a2bb515f ("mm: page_alloc: fair
+zone allocator policy").
+
+The problem is that GFP_THISNODE obeys the zone fairness allocation
+batches on one hand, but doesn't reset them and wake kswapd on the other
+hand.  After a few of those allocations, the batches are exhausted and
+the allocations fail.
+
+Fixing this means either having GFP_THISNODE wake up kswapd, or
+GFP_THISNODE not participating in zone fairness at all.  The latter
+seems safer as an acute bugfix, we can clean up later.
+
+Reported-by: Jan Stancek <jstancek@redhat.com>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Rik van Riel <riel@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/page_alloc.c |   26 ++++++++++++++++++++++----
+ 1 file changed, 22 insertions(+), 4 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -1211,6 +1211,15 @@ void drain_zone_pages(struct zone *zone,
+       }
+       local_irq_restore(flags);
+ }
++static bool gfp_thisnode_allocation(gfp_t gfp_mask)
++{
++      return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
++}
++#else
++static bool gfp_thisnode_allocation(gfp_t gfp_mask)
++{
++      return false;
++}
+ #endif
+ /*
+@@ -1547,7 +1556,13 @@ again:
+                                         get_pageblock_migratetype(page));
+       }
+-      __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
++      /*
++       * NOTE: GFP_THISNODE allocations do not partake in the kswapd
++       * aging protocol, so they can't be fair.
++       */
++      if (!gfp_thisnode_allocation(gfp_flags))
++              __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
++
+       __count_zone_vm_events(PGALLOC, zone, 1 << order);
+       zone_statistics(preferred_zone, zone, gfp_flags);
+       local_irq_restore(flags);
+@@ -1919,8 +1934,12 @@ zonelist_scan:
+                * ultimately fall back to remote zones that do not
+                * partake in the fairness round-robin cycle of this
+                * zonelist.
++               *
++               * NOTE: GFP_THISNODE allocations do not partake in
++               * the kswapd aging protocol, so they can't be fair.
+                */
+-              if (alloc_flags & ALLOC_WMARK_LOW) {
++              if ((alloc_flags & ALLOC_WMARK_LOW) &&
++                  !gfp_thisnode_allocation(gfp_mask)) {
+                       if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+                               continue;
+                       if (!zone_local(preferred_zone, zone))
+@@ -2486,8 +2505,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
+        * allowed per node queues are empty and that nodes are
+        * over allocated.
+        */
+-      if (IS_ENABLED(CONFIG_NUMA) &&
+-                      (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
++      if (gfp_thisnode_allocation(gfp_mask))
+               goto nopage;
+ restart:
diff --git a/queue-3.13/ocfs2-fix-quota-file-corruption.patch b/queue-3.13/ocfs2-fix-quota-file-corruption.patch
new file mode 100644 (file)
index 0000000..fba97ec
--- /dev/null
@@ -0,0 +1,88 @@
+From 15c34a760630ca2c803848fba90ca0646a9907dd Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 3 Mar 2014 15:38:32 -0800
+Subject: ocfs2: fix quota file corruption
+
+From: Jan Kara <jack@suse.cz>
+
+commit 15c34a760630ca2c803848fba90ca0646a9907dd upstream.
+
+Global quota files are accessed from different nodes.  Thus we cannot
+cache offset of quota structure in the quota file after we drop our node
+reference count to it because after that moment quota structure may be
+freed and reallocated elsewhere by a different node resulting in
+corruption of quota file.
+
+Fix the problem by clearing dq_off when we are releasing dquot structure.
+We also remove the DB_READ_B handling because it is useless -
+DQ_ACTIVE_B is set iff DQ_READ_B is set.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Cc: Goldwyn Rodrigues <rgoldwyn@suse.de>
+Cc: Joel Becker <jlbec@evilplan.org>
+Reviewed-by: Mark Fasheh <mfasheh@suse.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/quota_global.c |   27 +++++++++++++++++----------
+ fs/ocfs2/quota_local.c  |    4 ----
+ 2 files changed, 17 insertions(+), 14 deletions(-)
+
+--- a/fs/ocfs2/quota_global.c
++++ b/fs/ocfs2/quota_global.c
+@@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dq
+        */
+       if (status < 0)
+               mlog_errno(status);
++      /*
++       * Clear dq_off so that we search for the structure in quota file next
++       * time we acquire it. The structure might be deleted and reallocated
++       * elsewhere by another node while our dquot structure is on freelist.
++       */
++      dquot->dq_off = 0;
+       clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+ out_trans:
+       ocfs2_commit_trans(osb, handle);
+@@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dq
+       status = ocfs2_lock_global_qf(info, 1);
+       if (status < 0)
+               goto out;
+-      if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
+-              status = ocfs2_qinfo_lock(info, 0);
+-              if (status < 0)
+-                      goto out_dq;
+-              status = qtree_read_dquot(&info->dqi_gi, dquot);
+-              ocfs2_qinfo_unlock(info, 0);
+-              if (status < 0)
+-                      goto out_dq;
+-      }
+-      set_bit(DQ_READ_B, &dquot->dq_flags);
++      status = ocfs2_qinfo_lock(info, 0);
++      if (status < 0)
++              goto out_dq;
++      /*
++       * We always want to read dquot structure from disk because we don't
++       * know what happened with it while it was on freelist.
++       */
++      status = qtree_read_dquot(&info->dqi_gi, dquot);
++      ocfs2_qinfo_unlock(info, 0);
++      if (status < 0)
++              goto out_dq;
+       OCFS2_DQUOT(dquot)->dq_use_count++;
+       OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
+--- a/fs/ocfs2/quota_local.c
++++ b/fs/ocfs2/quota_local.c
+@@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *
+       ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
+ out:
+-      /* Clear the read bit so that next time someone uses this
+-       * dquot he reads fresh info from disk and allocates local
+-       * dquot structure */
+-      clear_bit(DQ_READ_B, &dquot->dq_flags);
+       return status;
+ }
diff --git a/queue-3.13/ocfs2-syncs-the-wrong-range.patch b/queue-3.13/ocfs2-syncs-the-wrong-range.patch
new file mode 100644 (file)
index 0000000..6d6c451
--- /dev/null
@@ -0,0 +1,40 @@
+From 1b56e98990bcdbb20b9fab163654b9315bf158e8 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 10 Feb 2014 15:18:55 -0500
+Subject: ocfs2 syncs the wrong range...
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 1b56e98990bcdbb20b9fab163654b9315bf158e8 upstream.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ocfs2/file.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -2370,8 +2370,8 @@ out_dio:
+       if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
+           ((file->f_flags & O_DIRECT) && !direct_io)) {
+-              ret = filemap_fdatawrite_range(file->f_mapping, pos,
+-                                             pos + count - 1);
++              ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
++                                             *ppos + count - 1);
+               if (ret < 0)
+                       written = ret;
+@@ -2384,8 +2384,8 @@ out_dio:
+               }
+               if (!ret)
+-                      ret = filemap_fdatawait_range(file->f_mapping, pos,
+-                                                    pos + count - 1);
++                      ret = filemap_fdatawait_range(file->f_mapping, *ppos,
++                                                    *ppos + count - 1);
+       }
+       /*
diff --git a/queue-3.13/series b/queue-3.13/series
new file mode 100644 (file)
index 0000000..893ca5e
--- /dev/null
@@ -0,0 +1,6 @@
+zram-avoid-null-access-when-fail-to-alloc-meta.patch
+mm-page_alloc-exempt-gfp_thisnode-allocations-from-zone-fairness.patch
+mm-include-vm_mixedmap-flag-in-the-vm_special-list-to-avoid-m-un-locking.patch
+ocfs2-fix-quota-file-corruption.patch
+ocfs2-syncs-the-wrong-range.patch
+memcg-fix-endless-loop-in-__mem_cgroup_iter_next.patch
diff --git a/queue-3.13/zram-avoid-null-access-when-fail-to-alloc-meta.patch b/queue-3.13/zram-avoid-null-access-when-fail-to-alloc-meta.patch
new file mode 100644 (file)
index 0000000..1d94a7e
--- /dev/null
@@ -0,0 +1,33 @@
+From db5d711e2db776f18219b033e5dc4fb7e4264dd7 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Mon, 3 Mar 2014 15:38:34 -0800
+Subject: zram: avoid null access when fail to alloc meta
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit db5d711e2db776f18219b033e5dc4fb7e4264dd7 upstream.
+
+zram_meta_alloc could fail so caller should check it.  Otherwise, your
+system will hang.
+
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Jerome Marchand <jmarchan@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/staging/zram/zram_drv.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/staging/zram/zram_drv.c
++++ b/drivers/staging/zram/zram_drv.c
+@@ -621,6 +621,8 @@ static ssize_t disksize_store(struct dev
+       disksize = PAGE_ALIGN(disksize);
+       meta = zram_meta_alloc(disksize);
++      if (!meta)
++              return -ENOMEM;
+       down_write(&zram->init_lock);
+       if (zram->init_done) {
+               up_write(&zram->init_lock);