3.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 7 Feb 2014 00:09:44 +0000 (16:09 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 7 Feb 2014 00:09:44 +0000 (16:09 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 7 Feb 2014 00:09:44 +0000 (16:09 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 7 Feb 2014 00:09:44 +0000 (16:09 -0800)
diff --git a/queue-3.12/memcg-fix-css-reference-leak-and-endless-loop-in-mem_cgroup_iter.patch b/queue-3.12/memcg-fix-css-reference-leak-and-endless-loop-in-mem_cgroup_iter.patch

new file mode 100644 (file)

index 0000000..8983e2a
--- /dev/null
+++ b/queue-3.12/memcg-fix-css-reference-leak-and-endless-loop-in-mem_cgroup_iter.patch
@@ -0,0 +1,79 @@
+From 0eef615665ede1e0d603ea9ecca88c1da6f02234 Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.cz>
+Date: Thu, 23 Jan 2014 15:53:37 -0800
+Subject: memcg: fix css reference leak and endless loop in mem_cgroup_iter
+
+From: Michal Hocko <mhocko@suse.cz>
+
+commit 0eef615665ede1e0d603ea9ecca88c1da6f02234 upstream.
+
+Commit 19f39402864e ("memcg: simplify mem_cgroup_iter") has reorganized
+mem_cgroup_iter code in order to simplify it.  A part of that change was
+dropping an optimization which didn't call css_tryget on the root of the
+walked tree.  The patch however didn't change the css_put part in
+mem_cgroup_iter which excludes root.
+
+This wasn't an issue at the time because __mem_cgroup_iter_next bailed
+out for root early without taking a reference as cgroup iterators
+(css_next_descendant_pre) didn't visit root themselves.
+
+Nevertheless cgroup iterators have been reworked to visit root by commit
+bd8815a6d802 ("cgroup: make css_for_each_descendant() and friends
+include the origin css in the iteration") when the root bypass have been
+dropped in __mem_cgroup_iter_next.  This means that css_put is not
+called for root and so css along with mem_cgroup and other cgroup
+internal object tied by css lifetime are never freed.
+
+Fix the issue by reintroducing root check in __mem_cgroup_iter_next and
+do not take css reference for it.
+
+This reference counting magic protects us also from another issue, an
+endless loop reported by Hugh Dickins when reclaim races with root
+removal and css_tryget called by iterator internally would fail.  There
+would be no other nodes to visit so __mem_cgroup_iter_next would return
+NULL and mem_cgroup_iter would interpret it as "start looping from root
+again" and so mem_cgroup_iter would loop forever internally.
+
+Signed-off-by: Michal Hocko <mhocko@suse.cz>
+Reported-by: Hugh Dickins <hughd@google.com>
+Tested-by: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Greg Thelen <gthelen@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |   18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -1079,14 +1079,22 @@ skip_node:
+        * skipped and we should continue the tree walk.
+        * last_visited css is safe to use because it is
+        * protected by css_get and the tree walk is rcu safe.
++       *
++       * We do not take a reference on the root of the tree walk
++       * because we might race with the root removal when it would
++       * be the only node in the iterated hierarchy and mem_cgroup_iter
++       * would end up in an endless loop because it expects that at
++       * least one valid node will be returned. Root cannot disappear
++       * because caller of the iterator should hold it already so
++       * skipping css reference should be safe.
+        */
+       if (next_css) {
+-              if ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))
++              if ((next_css->flags & CSS_ONLINE) &&
++                              (next_css == &root->css || css_tryget(next_css)))
+                       return mem_cgroup_from_css(next_css);
+-              else {
+-                      prev_css = next_css;
+-                      goto skip_node;
+-              }
++
++              prev_css = next_css;
++              goto skip_node;
+       }
+ 
+       return NULL;
diff --git a/queue-3.12/memcg-fix-endless-loop-caused-by-mem_cgroup_iter.patch b/queue-3.12/memcg-fix-endless-loop-caused-by-mem_cgroup_iter.patch

new file mode 100644 (file)

index 0000000..a6e7532
--- /dev/null
+++ b/queue-3.12/memcg-fix-endless-loop-caused-by-mem_cgroup_iter.patch
@@ -0,0 +1,98 @@
+From ecc736fc3c71c411a9d201d8588c9e7e049e5d8c Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.cz>
+Date: Thu, 23 Jan 2014 15:53:35 -0800
+Subject: memcg: fix endless loop caused by mem_cgroup_iter
+
+From: Michal Hocko <mhocko@suse.cz>
+
+commit ecc736fc3c71c411a9d201d8588c9e7e049e5d8c upstream.
+
+Hugh has reported an endless loop when the hardlimit reclaim sees the
+same group all the time.  This might happen when the reclaim races with
+the memcg removal.
+
+shrink_zone
+                                                [rmdir root]
+  mem_cgroup_iter(root, NULL, reclaim)
+    // prev = NULL
+    rcu_read_lock()
+    mem_cgroup_iter_load
+      last_visited = iter->last_visited   // gets root || NULL
+      css_tryget(last_visited)            // failed
+      last_visited = NULL                 [1]
+    memcg = root = __mem_cgroup_iter_next(root, NULL)
+    mem_cgroup_iter_update
+      iter->last_visited = root;
+    reclaim->generation = iter->generation
+
+ mem_cgroup_iter(root, root, reclaim)
+   // prev = root
+   rcu_read_lock
+    mem_cgroup_iter_load
+      last_visited = iter->last_visited   // gets root
+      css_tryget(last_visited)            // failed
+    [1]
+
+The issue seemed to be introduced by commit 5f5781619718 ("memcg: relax
+memcg iter caching") which has replaced unconditional css_get/css_put by
+css_tryget/css_put for the cached iterator.
+
+This patch fixes the issue by skipping css_tryget on the root of the
+tree walk in mem_cgroup_iter_load and symmetrically doesn't release it
+in mem_cgroup_iter_update.
+
+Signed-off-by: Michal Hocko <mhocko@suse.cz>
+Reported-by: Hugh Dickins <hughd@google.com>
+Tested-by: Hugh Dickins <hughd@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Greg Thelen <gthelen@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memcontrol.c |   17 ++++++++++++++---
+ 1 file changed, 14 insertions(+), 3 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -1120,7 +1120,15 @@ mem_cgroup_iter_load(struct mem_cgroup_r
+       if (iter->last_dead_count == *sequence) {
+               smp_rmb();
+               position = iter->last_visited;
+-              if (position && !css_tryget(&position->css))
++
++              /*
++               * We cannot take a reference to root because we might race
++               * with root removal and returning NULL would end up in
++               * an endless loop on the iterator user level when root
++               * would be returned all the time.
++               */
++              if (position && position != root &&
++                              !css_tryget(&position->css))
+                       position = NULL;
+       }
+       return position;
+@@ -1129,9 +1137,11 @@ mem_cgroup_iter_load(struct mem_cgroup_r
+ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
+                                  struct mem_cgroup *last_visited,
+                                  struct mem_cgroup *new_position,
++                                 struct mem_cgroup *root,
+                                  int sequence)
+ {
+-      if (last_visited)
++      /* root reference counting symmetric to mem_cgroup_iter_load */
++      if (last_visited && last_visited != root)
+               css_put(&last_visited->css);
+       /*
+        * We store the sequence count from the time @last_visited was
+@@ -1206,7 +1216,8 @@ struct mem_cgroup *mem_cgroup_iter(struc
+               memcg = __mem_cgroup_iter_next(root, last_visited);
+ 
+               if (reclaim) {
+-                      mem_cgroup_iter_update(iter, last_visited, memcg, seq);
++                      mem_cgroup_iter_update(iter, last_visited, memcg, root,
++                                      seq);
+ 
+                       if (!memcg)
+                               iter->generation++;
diff --git a/queue-3.12/mm-don-t-lose-the-soft_dirty-flag-on-mprotect.patch b/queue-3.12/mm-don-t-lose-the-soft_dirty-flag-on-mprotect.patch

new file mode 100644 (file)

index 0000000..ddcb84c
--- /dev/null
+++ b/queue-3.12/mm-don-t-lose-the-soft_dirty-flag-on-mprotect.patch
@@ -0,0 +1,52 @@
+From 24f91eba18bbfdb27e71a1aae5b3a61b67fcd091 Mon Sep 17 00:00:00 2001
+From: Andrey Vagin <avagin@openvz.org>
+Date: Thu, 30 Jan 2014 15:46:10 -0800
+Subject: mm: don't lose the SOFT_DIRTY flag on mprotect
+
+From: Andrey Vagin <avagin@openvz.org>
+
+commit 24f91eba18bbfdb27e71a1aae5b3a61b67fcd091 upstream.
+
+The SOFT_DIRTY bit shows that the content of memory was changed after a
+defined point in the past.  mprotect() doesn't change the content of
+memory, so it must not change the SOFT_DIRTY bit.
+
+This bug causes a malfunction: on the first iteration all pages are
+dumped.  On other iterations only pages with the SOFT_DIRTY bit are
+dumped.  So if the SOFT_DIRTY bit is cleared from a page by mistake, the
+page is not dumped and its content will be restored incorrectly.
+
+This patch does nothing with _PAGE_SWP_SOFT_DIRTY, becase pte_modify()
+is called only for present pages.
+
+Fixes commit 0f8975ec4db2 ("mm: soft-dirty bits for user memory changes
+tracking").
+
+Signed-off-by: Andrey Vagin <avagin@openvz.org>
+Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Cc: Borislav Petkov <bp@suse.de>
+Cc: Wen Congyang <wency@cn.fujitsu.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/pgtable_types.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -121,7 +121,8 @@
+ 
+ /* Set of bits not changed in pte_modify */
+ #define _PAGE_CHG_MASK        (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |         \
+-                       _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
++                       _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
++                       _PAGE_SOFT_DIRTY)
+ #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+ 
+ #define _PAGE_CACHE_MASK      (_PAGE_PCD | _PAGE_PWT)
diff --git a/queue-3.12/mm-ignore-vm_softdirty-on-vma-merging.patch b/queue-3.12/mm-ignore-vm_softdirty-on-vma-merging.patch

new file mode 100644 (file)

index 0000000..2a79ac0
--- /dev/null
+++ b/queue-3.12/mm-ignore-vm_softdirty-on-vma-merging.patch
@@ -0,0 +1,83 @@
+From 34228d473efe764d4db7c0536375f0c993e6e06a Mon Sep 17 00:00:00 2001
+From: Cyrill Gorcunov <gorcunov@gmail.com>
+Date: Thu, 23 Jan 2014 15:53:42 -0800
+Subject: mm: ignore VM_SOFTDIRTY on VMA merging
+
+From: Cyrill Gorcunov <gorcunov@gmail.com>
+
+commit 34228d473efe764d4db7c0536375f0c993e6e06a upstream.
+
+The VM_SOFTDIRTY bit affects vma merge routine: if two VMAs has all bits
+in vm_flags matched except dirty bit the kernel can't longer merge them
+and this forces the kernel to generate new VMAs instead.
+
+It finally may lead to the situation when userspace application reaches
+vm.max_map_count limit and get crashed in worse case
+
+ | (gimp:11768): GLib-ERROR **: gmem.c:110: failed to allocate 4096 bytes
+ |
+ | (file-tiff-load:12038): LibGimpBase-WARNING **: file-tiff-load: gimp_wire_read(): error
+ | xinit: connection to X server lost
+ |
+ | waiting for X server to shut down
+ | /usr/lib64/gimp/2.0/plug-ins/file-tiff-load terminated: Hangup
+ | /usr/lib64/gimp/2.0/plug-ins/script-fu terminated: Hangup
+ | /usr/lib64/gimp/2.0/plug-ins/script-fu terminated: Hangup
+
+  https://bugzilla.kernel.org/show_bug.cgi?id=67651
+  https://bugzilla.gnome.org/show_bug.cgi?id=719619#c0
+
+Initial problem came from missed VM_SOFTDIRTY in do_brk() routine but
+even if we would set up VM_SOFTDIRTY here, there is still a way to
+prevent VMAs from merging: one can call
+
+ | echo 4 > /proc/$PID/clear_refs
+
+and clear all VM_SOFTDIRTY over all VMAs presented in memory map, then
+new do_brk() will try to extend old VMA and finds that dirty bit doesn't
+match thus new VMA will be generated.
+
+As discussed with Pavel, the right approach should be to ignore
+VM_SOFTDIRTY bit when we're trying to merge VMAs and if merge successed
+we mark extended VMA with dirty bit where needed.
+
+Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Reported-by: Bastian Hougaard <gnome@rvzt.net>
+Reported-by: Mel Gorman <mgorman@suse.de>
+Cc: Pavel Emelyanov <xemul@parallels.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mmap.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -895,7 +895,15 @@ again:                    remove_next = 1 + (end > next->
+ static inline int is_mergeable_vma(struct vm_area_struct *vma,
+                       struct file *file, unsigned long vm_flags)
+ {
+-      if (vma->vm_flags ^ vm_flags)
++      /*
++       * VM_SOFTDIRTY should not prevent from VMA merging, if we
++       * match the flags but dirty bit -- the caller should mark
++       * merged VMA as dirty. If dirty bit won't be excluded from
++       * comparison, we increase pressue on the memory system forcing
++       * the kernel to generate new VMAs when old one could be
++       * extended instead.
++       */
++      if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
+               return 0;
+       if (vma->vm_file != file)
+               return 0;
+@@ -1084,7 +1092,7 @@ static int anon_vma_compatible(struct vm
+       return a->vm_end == b->vm_start &&
+               mpol_equal(vma_policy(a), vma_policy(b)) &&
+               a->vm_file == b->vm_file &&
+-              !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
++              !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
+               b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
+ }
+ 
diff --git a/queue-3.12/mm-munlock-fix-potential-race-with-thp-page-split.patch b/queue-3.12/mm-munlock-fix-potential-race-with-thp-page-split.patch

new file mode 100644 (file)

index 0000000..040801d
--- /dev/null
+++ b/queue-3.12/mm-munlock-fix-potential-race-with-thp-page-split.patch
@@ -0,0 +1,207 @@
+From 01cc2e58697e34c6ee9a40fb6cebc18bf5a1923f Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 23 Jan 2014 15:52:50 -0800
+Subject: mm: munlock: fix potential race with THP page split
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 01cc2e58697e34c6ee9a40fb6cebc18bf5a1923f upstream.
+
+Since commit ff6a6da60b89 ("mm: accelerate munlock() treatment of THP
+pages") munlock skips tail pages of a munlocked THP page.  There is some
+attempt to prevent bad consequences of racing with a THP page split, but
+code inspection indicates that there are two problems that may lead to a
+non-fatal, yet wrong outcome.
+
+First, __split_huge_page_refcount() copies flags including PageMlocked
+from the head page to the tail pages.  Clearing PageMlocked by
+munlock_vma_page() in the middle of this operation might result in part
+of tail pages left with PageMlocked flag.  As the head page still
+appears to be a THP page until all tail pages are processed,
+munlock_vma_page() might think it munlocked the whole THP page and skip
+all the former tail pages.  Before ff6a6da60, those pages would be
+cleared in further iterations of munlock_vma_pages_range(), but NR_MLOCK
+would still become undercounted (related the next point).
+
+Second, NR_MLOCK accounting is based on call to hpage_nr_pages() after
+the PageMlocked is cleared.  The accounting might also become
+inconsistent due to race with __split_huge_page_refcount()
+
+- undercount when HUGE_PMD_NR is subtracted, but some tail pages are
+  left with PageMlocked set and counted again (only possible before
+  ff6a6da60)
+
+- overcount when hpage_nr_pages() sees a normal page (split has already
+  finished), but the parallel split has meanwhile cleared PageMlocked from
+  additional tail pages
+
+This patch prevents both problems via extending the scope of lru_lock in
+munlock_vma_page().  This is convenient because:
+
+- __split_huge_page_refcount() takes lru_lock for its whole operation
+
+- munlock_vma_page() typically takes lru_lock anyway for page isolation
+
+As this becomes a second function where page isolation is done with
+lru_lock already held, factor this out to a new
+__munlock_isolate_lru_page() function and clean up the code around.
+
+[akpm@linux-foundation.org: avoid a coding-style ugly]
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Sasha Levin <sasha.levin@oracle.com>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mlock.c |  104 +++++++++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 60 insertions(+), 44 deletions(-)
+
+--- a/mm/mlock.c
++++ b/mm/mlock.c
+@@ -91,6 +91,26 @@ void mlock_vma_page(struct page *page)
+ }
+ 
+ /*
++ * Isolate a page from LRU with optional get_page() pin.
++ * Assumes lru_lock already held and page already pinned.
++ */
++static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
++{
++      if (PageLRU(page)) {
++              struct lruvec *lruvec;
++
++              lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
++              if (getpage)
++                      get_page(page);
++              ClearPageLRU(page);
++              del_page_from_lru_list(page, lruvec, page_lru(page));
++              return true;
++      }
++
++      return false;
++}
++
++/*
+  * Finish munlock after successful page isolation
+  *
+  * Page must be locked. This is a wrapper for try_to_munlock()
+@@ -126,9 +146,9 @@ static void __munlock_isolated_page(stru
+ static void __munlock_isolation_failed(struct page *page)
+ {
+       if (PageUnevictable(page))
+-              count_vm_event(UNEVICTABLE_PGSTRANDED);
++              __count_vm_event(UNEVICTABLE_PGSTRANDED);
+       else
+-              count_vm_event(UNEVICTABLE_PGMUNLOCKED);
++              __count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+ }
+ 
+ /**
+@@ -152,28 +172,34 @@ static void __munlock_isolation_failed(s
+ unsigned int munlock_vma_page(struct page *page)
+ {
+       unsigned int nr_pages;
++      struct zone *zone = page_zone(page);
+ 
+       BUG_ON(!PageLocked(page));
+ 
+-      if (TestClearPageMlocked(page)) {
+-              nr_pages = hpage_nr_pages(page);
+-              mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
+-              if (!isolate_lru_page(page))
+-                      __munlock_isolated_page(page);
+-              else
+-                      __munlock_isolation_failed(page);
+-      } else {
+-              nr_pages = hpage_nr_pages(page);
+-      }
+-
+       /*
+-       * Regardless of the original PageMlocked flag, we determine nr_pages
+-       * after touching the flag. This leaves a possible race with a THP page
+-       * split, such that a whole THP page was munlocked, but nr_pages == 1.
+-       * Returning a smaller mask due to that is OK, the worst that can
+-       * happen is subsequent useless scanning of the former tail pages.
+-       * The NR_MLOCK accounting can however become broken.
++       * Serialize with any parallel __split_huge_page_refcount() which
++       * might otherwise copy PageMlocked to part of the tail pages before
++       * we clear it in the head page. It also stabilizes hpage_nr_pages().
+        */
++      spin_lock_irq(&zone->lru_lock);
++
++      nr_pages = hpage_nr_pages(page);
++      if (!TestClearPageMlocked(page))
++              goto unlock_out;
++
++      __mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
++
++      if (__munlock_isolate_lru_page(page, true)) {
++              spin_unlock_irq(&zone->lru_lock);
++              __munlock_isolated_page(page);
++              goto out;
++      }
++      __munlock_isolation_failed(page);
++
++unlock_out:
++      spin_unlock_irq(&zone->lru_lock);
++
++out:
+       return nr_pages - 1;
+ }
+ 
+@@ -310,34 +336,24 @@ static void __munlock_pagevec(struct pag
+               struct page *page = pvec->pages[i];
+ 
+               if (TestClearPageMlocked(page)) {
+-                      struct lruvec *lruvec;
+-                      int lru;
+-
+-                      if (PageLRU(page)) {
+-                              lruvec = mem_cgroup_page_lruvec(page, zone);
+-                              lru = page_lru(page);
+-                              /*
+-                               * We already have pin from follow_page_mask()
+-                               * so we can spare the get_page() here.
+-                               */
+-                              ClearPageLRU(page);
+-                              del_page_from_lru_list(page, lruvec, lru);
+-                      } else {
+-                              __munlock_isolation_failed(page);
+-                              goto skip_munlock;
+-                      }
+-
+-              } else {
+-skip_munlock:
+                       /*
+-                       * We won't be munlocking this page in the next phase
+-                       * but we still need to release the follow_page_mask()
+-                       * pin. We cannot do it under lru_lock however. If it's
+-                       * the last pin, __page_cache_release would deadlock.
++                       * We already have pin from follow_page_mask()
++                       * so we can spare the get_page() here.
+                        */
+-                      pagevec_add(&pvec_putback, pvec->pages[i]);
+-                      pvec->pages[i] = NULL;
++                      if (__munlock_isolate_lru_page(page, false))
++                              continue;
++                      else
++                              __munlock_isolation_failed(page);
+               }
++
++              /*
++               * We won't be munlocking this page in the next phase
++               * but we still need to release the follow_page_mask()
++               * pin. We cannot do it under lru_lock however. If it's
++               * the last pin, __page_cache_release() would deadlock.
++               */
++              pagevec_add(&pvec_putback, pvec->pages[i]);
++              pvec->pages[i] = NULL;
+       }
+       delta_munlocked = -nr + pagevec_count(&pvec_putback);
+       __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
diff --git a/queue-3.12/mmc-atmel-mci-fix-timeout-errors-in-sdio-mode-when-using-dma.patch b/queue-3.12/mmc-atmel-mci-fix-timeout-errors-in-sdio-mode-when-using-dma.patch

new file mode 100644 (file)

index 0000000..9cbf8ce
--- /dev/null
+++ b/queue-3.12/mmc-atmel-mci-fix-timeout-errors-in-sdio-mode-when-using-dma.patch
@@ -0,0 +1,50 @@
+From 66b512eda74d59b17eac04c4da1b38d82059e6c9 Mon Sep 17 00:00:00 2001
+From: Ludovic Desroches <ludovic.desroches@atmel.com>
+Date: Wed, 20 Nov 2013 16:01:11 +0100
+Subject: mmc: atmel-mci: fix timeout errors in SDIO mode when using DMA
+
+From: Ludovic Desroches <ludovic.desroches@atmel.com>
+
+commit 66b512eda74d59b17eac04c4da1b38d82059e6c9 upstream.
+
+With some SDIO devices, timeout errors can happen when reading data.
+To solve this issue, the DMA transfer has to be activated before sending
+the command to the device. This order is incorrect in PDC mode. So we
+have to take care if we are using DMA or PDC to know when to send the
+MMC command.
+
+Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
+Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
+Signed-off-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/host/atmel-mci.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/drivers/mmc/host/atmel-mci.c
++++ b/drivers/mmc/host/atmel-mci.c
+@@ -1193,11 +1193,22 @@ static void atmci_start_request(struct a
+       iflags |= ATMCI_CMDRDY;
+       cmd = mrq->cmd;
+       cmdflags = atmci_prepare_command(slot->mmc, cmd);
+-      atmci_send_command(host, cmd, cmdflags);
++
++      /*
++       * DMA transfer should be started before sending the command to avoid
++       * unexpected errors especially for read operations in SDIO mode.
++       * Unfortunately, in PDC mode, command has to be sent before starting
++       * the transfer.
++       */
++      if (host->submit_data != &atmci_submit_data_dma)
++              atmci_send_command(host, cmd, cmdflags);
+ 
+       if (data)
+               host->submit_data(host, data);
+ 
++      if (host->submit_data == &atmci_submit_data_dma)
++              atmci_send_command(host, cmd, cmdflags);
++
+       if (mrq->stop) {
+               host->stop_cmdr = atmci_prepare_command(slot->mmc, mrq->stop);
+               host->stop_cmdr |= ATMCI_CMDR_STOP_XFER;
diff --git a/queue-3.12/mmc-core-sd-implement-proper-support-for-sd3.0-au-sizes.patch b/queue-3.12/mmc-core-sd-implement-proper-support-for-sd3.0-au-sizes.patch

new file mode 100644 (file)

index 0000000..dce3fa8
--- /dev/null
+++ b/queue-3.12/mmc-core-sd-implement-proper-support-for-sd3.0-au-sizes.patch
@@ -0,0 +1,98 @@
+From 9288cac05405a7da406097a44721aa4004609b4d Mon Sep 17 00:00:00 2001
+From: Wolfram Sang <wsa@the-dreams.de>
+Date: Tue, 26 Nov 2013 02:16:25 +0100
+Subject: mmc: core: sd: implement proper support for sd3.0 au sizes
+
+From: Wolfram Sang <wsa@the-dreams.de>
+
+commit 9288cac05405a7da406097a44721aa4004609b4d upstream.
+
+This reverts and updates commit 77776fd0a4cc541b9 ("mmc: sd: fix the
+maximum au_size for SD3.0"). The au_size for SD3.0 cannot be achieved
+by a simple bit shift, so this needs to be implemented differently.
+Also, don't print the warning in case of 0 since 'not defined' is
+different from 'invalid'.
+
+Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
+Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
+Reviewed-by: H Hartley Sweeten <hsweeten@visionengravers.com>
+Signed-off-by: Chris Ball <chris@printf.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/core/sd.c |   37 ++++++++++++++++++++++---------------
+ 1 file changed, 22 insertions(+), 15 deletions(-)
+
+--- a/drivers/mmc/core/sd.c
++++ b/drivers/mmc/core/sd.c
+@@ -11,6 +11,7 @@
+  */
+ 
+ #include <linux/err.h>
++#include <linux/sizes.h>
+ #include <linux/slab.h>
+ #include <linux/stat.h>
+ 
+@@ -44,6 +45,13 @@ static const unsigned int tacc_mant[] =
+       35,     40,     45,     50,     55,     60,     70,     80,
+ };
+ 
++static const unsigned int sd_au_size[] = {
++      0,              SZ_16K / 512,           SZ_32K / 512,   SZ_64K / 512,
++      SZ_128K / 512,  SZ_256K / 512,          SZ_512K / 512,  SZ_1M / 512,
++      SZ_2M / 512,    SZ_4M / 512,            SZ_8M / 512,    (SZ_8M + SZ_4M) / 512,
++      SZ_16M / 512,   (SZ_16M + SZ_8M) / 512, SZ_32M / 512,   SZ_64M / 512,
++};
++
+ #define UNSTUFF_BITS(resp,start,size)                                 \
+       ({                                                              \
+               const int __size = size;                                \
+@@ -215,7 +223,7 @@ static int mmc_decode_scr(struct mmc_car
+ static int mmc_read_ssr(struct mmc_card *card)
+ {
+       unsigned int au, es, et, eo;
+-      int err, i, max_au;
++      int err, i;
+       u32 *ssr;
+ 
+       if (!(card->csd.cmdclass & CCC_APP_SPEC)) {
+@@ -239,26 +247,25 @@ static int mmc_read_ssr(struct mmc_card
+       for (i = 0; i < 16; i++)
+               ssr[i] = be32_to_cpu(ssr[i]);
+ 
+-      /* SD3.0 increases max AU size to 64MB (0xF) from 4MB (0x9) */
+-      max_au = card->scr.sda_spec3 ? 0xF : 0x9;
+-
+       /*
+        * UNSTUFF_BITS only works with four u32s so we have to offset the
+        * bitfield positions accordingly.
+        */
+       au = UNSTUFF_BITS(ssr, 428 - 384, 4);
+-      if (au > 0 && au <= max_au) {
+-              card->ssr.au = 1 << (au + 4);
+-              es = UNSTUFF_BITS(ssr, 408 - 384, 16);
+-              et = UNSTUFF_BITS(ssr, 402 - 384, 6);
+-              eo = UNSTUFF_BITS(ssr, 400 - 384, 2);
+-              if (es && et) {
+-                      card->ssr.erase_timeout = (et * 1000) / es;
+-                      card->ssr.erase_offset = eo * 1000;
++      if (au) {
++              if (au <= 9 || card->scr.sda_spec3) {
++                      card->ssr.au = sd_au_size[au];
++                      es = UNSTUFF_BITS(ssr, 408 - 384, 16);
++                      et = UNSTUFF_BITS(ssr, 402 - 384, 6);
++                      if (es && et) {
++                              eo = UNSTUFF_BITS(ssr, 400 - 384, 2);
++                              card->ssr.erase_timeout = (et * 1000) / es;
++                              card->ssr.erase_offset = eo * 1000;
++                      }
++              } else {
++                      pr_warning("%s: SD Status: Invalid Allocation Unit size.\n",
++                                 mmc_hostname(card->host));
+               }
+-      } else {
+-              pr_warning("%s: SD Status: Invalid Allocation Unit "
+-                      "size.\n", mmc_hostname(card->host));
+       }
+ out:
+       kfree(ssr);
diff --git a/queue-3.12/mmc-fix-host-release-issue-after-discard-operation.patch b/queue-3.12/mmc-fix-host-release-issue-after-discard-operation.patch

new file mode 100644 (file)

index 0000000..22eb886
--- /dev/null
+++ b/queue-3.12/mmc-fix-host-release-issue-after-discard-operation.patch
@@ -0,0 +1,66 @@
+From f662ae48ae67dfd42739e65750274fe8de46240a Mon Sep 17 00:00:00 2001
+From: Ray Jui <rjui@broadcom.com>
+Date: Sat, 26 Oct 2013 11:03:44 -0700
+Subject: mmc: fix host release issue after discard operation
+
+From: Ray Jui <rjui@broadcom.com>
+
+commit f662ae48ae67dfd42739e65750274fe8de46240a upstream.
+
+Under function mmc_blk_issue_rq, after an MMC discard operation,
+the MMC request data structure may be freed in memory. Later in
+the same function, the check of req->cmd_flags & MMC_REQ_SPECIAL_MASK
+is dangerous and invalid. It causes the MMC host not to be released
+when it should.
+
+This patch fixes the issue by marking the special request down before
+the discard/flush operation.
+
+Reported by: Harold (SoonYeal) Yang <haroldsy@broadcom.com>
+Signed-off-by: Ray Jui <rjui@broadcom.com>
+Reviewed-by: Seungwon Jeon <tgih.jun@samsung.com>
+Acked-by: Seungwon Jeon <tgih.jun@samsung.com>
+Signed-off-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/card/block.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/mmc/card/block.c
++++ b/drivers/mmc/card/block.c
+@@ -1959,6 +1959,7 @@ static int mmc_blk_issue_rq(struct mmc_q
+       struct mmc_card *card = md->queue.card;
+       struct mmc_host *host = card->host;
+       unsigned long flags;
++      unsigned int cmd_flags = req ? req->cmd_flags : 0;
+ 
+       if (req && !mq->mqrq_prev->req)
+               /* claim host only for the first request */
+@@ -1974,7 +1975,7 @@ static int mmc_blk_issue_rq(struct mmc_q
+       }
+ 
+       mq->flags &= ~MMC_QUEUE_NEW_REQUEST;
+-      if (req && req->cmd_flags & REQ_DISCARD) {
++      if (cmd_flags & REQ_DISCARD) {
+               /* complete ongoing async transfer before issuing discard */
+               if (card->host->areq)
+                       mmc_blk_issue_rw_rq(mq, NULL);
+@@ -1983,7 +1984,7 @@ static int mmc_blk_issue_rq(struct mmc_q
+                       ret = mmc_blk_issue_secdiscard_rq(mq, req);
+               else
+                       ret = mmc_blk_issue_discard_rq(mq, req);
+-      } else if (req && req->cmd_flags & REQ_FLUSH) {
++      } else if (cmd_flags & REQ_FLUSH) {
+               /* complete ongoing async transfer before issuing flush */
+               if (card->host->areq)
+                       mmc_blk_issue_rw_rq(mq, NULL);
+@@ -1999,7 +2000,7 @@ static int mmc_blk_issue_rq(struct mmc_q
+ 
+ out:
+       if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) ||
+-           (req && (req->cmd_flags & MMC_REQ_SPECIAL_MASK)))
++           (cmd_flags & MMC_REQ_SPECIAL_MASK))
+               /*
+                * Release host when there are no more requests
+                * and after special request(discard, flush) is done.
diff --git a/queue-3.12/series b/queue-3.12/series

index 0ac9d1de8d6bef390755d06ba81f306598d452ab..5a49429c3ac614d82fb17f6b0339dab48734a0f4 100644 (file)
--- a/queue-3.12/series
+++ b/queue-3.12/series
@@ -14,3 +14,11 @@ mm-memory-failure.c-shift-page-lock-from-head-page-to-tail-page-after-thp-split.
  mm-memcg-iteration-skip-memcgs-not-yet-fully-initialized.patch
  mm-page-writeback.c-fix-dirty_balance_reserve-subtraction-from-dirtyable-memory.patch
  mm-page-writeback.c-do-not-count-anon-pages-as-dirtyable-memory.patch
+mm-munlock-fix-potential-race-with-thp-page-split.patch
+memcg-fix-endless-loop-caused-by-mem_cgroup_iter.patch
+memcg-fix-css-reference-leak-and-endless-loop-in-mem_cgroup_iter.patch
+mm-ignore-vm_softdirty-on-vma-merging.patch
+mm-don-t-lose-the-soft_dirty-flag-on-mprotect.patch
+mmc-fix-host-release-issue-after-discard-operation.patch
+mmc-atmel-mci-fix-timeout-errors-in-sdio-mode-when-using-dma.patch
+mmc-core-sd-implement-proper-support-for-sd3.0-au-sizes.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 7 Feb 2014 00:09:44 +0000 (16:09 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 7 Feb 2014 00:09:44 +0000 (16:09 -0800)
queue-3.12/memcg-fix-css-reference-leak-and-endless-loop-in-mem_cgroup_iter.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/memcg-fix-endless-loop-caused-by-mem_cgroup_iter.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/mm-don-t-lose-the-soft_dirty-flag-on-mprotect.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/mm-ignore-vm_softdirty-on-vma-merging.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/mm-munlock-fix-potential-race-with-thp-page-split.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/mmc-atmel-mci-fix-timeout-errors-in-sdio-mode-when-using-dma.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/mmc-core-sd-implement-proper-support-for-sd3.0-au-sizes.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/mmc-fix-host-release-issue-after-discard-operation.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/series		patch \| blob \| blame \| history