]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 14 Jan 2015 06:55:11 +0000 (22:55 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 14 Jan 2015 06:55:11 +0000 (22:55 -0800)
added patches:
drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch
exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch
mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch
mm-propagate-error-from-stack-expansion-even-for-guard-page.patch
mm-protect-set_page_dirty-from-ongoing-truncation.patch
mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch
mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch
nouveau-bring-back-legacy-mmap-handler.patch
regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch
spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch

queue-3.18/drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch [new file with mode: 0644]
queue-3.18/exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch [new file with mode: 0644]
queue-3.18/mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch [new file with mode: 0644]
queue-3.18/mm-propagate-error-from-stack-expansion-even-for-guard-page.patch [new file with mode: 0644]
queue-3.18/mm-protect-set_page_dirty-from-ongoing-truncation.patch [new file with mode: 0644]
queue-3.18/mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch [new file with mode: 0644]
queue-3.18/mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch [new file with mode: 0644]
queue-3.18/nouveau-bring-back-legacy-mmap-handler.patch [new file with mode: 0644]
queue-3.18/regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch [new file with mode: 0644]
queue-3.18/series
queue-3.18/spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch [new file with mode: 0644]

diff --git a/queue-3.18/drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch b/queue-3.18/drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch
new file mode 100644 (file)
index 0000000..b9930a6
--- /dev/null
@@ -0,0 +1,61 @@
+From ff4c0d5213b015e60aa87c1352604f10ba9c3e12 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= <bonbons@linux-vserver.org>
+Date: Sun, 21 Dec 2014 17:43:31 +0100
+Subject: drm/nouveau/nouveau: Do not BUG_ON(!spin_is_locked()) on UP
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= <bonbons@linux-vserver.org>
+
+commit ff4c0d5213b015e60aa87c1352604f10ba9c3e12 upstream.
+
+On !SMP systems spinlocks do not exist. Thus checking of they
+are active will always fail.
+
+Use
+  assert_spin_locked(lock);
+instead of
+  BUG_ON(!spin_is_locked(lock));
+to not BUG() on all UP systems.
+
+Signed-off-by: Bruno PrĂ©mont <bonbons@linux-vserver.org>
+Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/nouveau/core/core/event.c  |    4 ++--
+ drivers/gpu/drm/nouveau/core/core/notify.c |    2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/nouveau/core/core/event.c
++++ b/drivers/gpu/drm/nouveau/core/core/event.c
+@@ -26,7 +26,7 @@
+ void
+ nvkm_event_put(struct nvkm_event *event, u32 types, int index)
+ {
+-      BUG_ON(!spin_is_locked(&event->refs_lock));
++      assert_spin_locked(&event->refs_lock);
+       while (types) {
+               int type = __ffs(types); types &= ~(1 << type);
+               if (--event->refs[index * event->types_nr + type] == 0) {
+@@ -39,7 +39,7 @@ nvkm_event_put(struct nvkm_event *event,
+ void
+ nvkm_event_get(struct nvkm_event *event, u32 types, int index)
+ {
+-      BUG_ON(!spin_is_locked(&event->refs_lock));
++      assert_spin_locked(&event->refs_lock);
+       while (types) {
+               int type = __ffs(types); types &= ~(1 << type);
+               if (++event->refs[index * event->types_nr + type] == 1) {
+--- a/drivers/gpu/drm/nouveau/core/core/notify.c
++++ b/drivers/gpu/drm/nouveau/core/core/notify.c
+@@ -98,7 +98,7 @@ nvkm_notify_send(struct nvkm_notify *not
+       struct nvkm_event *event = notify->event;
+       unsigned long flags;
+-      BUG_ON(!spin_is_locked(&event->list_lock));
++      assert_spin_locked(&event->list_lock);
+       BUG_ON(size != notify->size);
+       spin_lock_irqsave(&event->refs_lock, flags);
diff --git a/queue-3.18/exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch b/queue-3.18/exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch
new file mode 100644 (file)
index 0000000..e51b5a5
--- /dev/null
@@ -0,0 +1,71 @@
+From 3245d6acab981a2388ffb877c7ecc97e763c59d4 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Thu, 8 Jan 2015 14:32:12 -0800
+Subject: exit: fix race between wait_consider_task() and wait_task_zombie()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 3245d6acab981a2388ffb877c7ecc97e763c59d4 upstream.
+
+wait_consider_task() checks EXIT_ZOMBIE after EXIT_DEAD/EXIT_TRACE and
+both checks can fail if we race with EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE
+change in between, gcc needs to reload p->exit_state after
+security_task_wait().  In this case ->notask_error will be wrongly
+cleared and do_wait() can hang forever if it was the last eligible
+child.
+
+Many thanks to Arne who carefully investigated the problem.
+
+Note: this bug is very old but it was pure theoretical until commit
+b3ab03160dfa ("wait: completely ignore the EXIT_DEAD tasks").  Before
+this commit "-O2" was probably enough to guarantee that compiler won't
+read ->exit_state twice.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Arne Goedeke <el@laramies.com>
+Tested-by: Arne Goedeke <el@laramies.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/exit.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1302,9 +1302,15 @@ static int wait_task_continued(struct wa
+ static int wait_consider_task(struct wait_opts *wo, int ptrace,
+                               struct task_struct *p)
+ {
++      /*
++       * We can race with wait_task_zombie() from another thread.
++       * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition
++       * can't confuse the checks below.
++       */
++      int exit_state = ACCESS_ONCE(p->exit_state);
+       int ret;
+-      if (unlikely(p->exit_state == EXIT_DEAD))
++      if (unlikely(exit_state == EXIT_DEAD))
+               return 0;
+       ret = eligible_child(wo, p);
+@@ -1325,7 +1331,7 @@ static int wait_consider_task(struct wai
+               return 0;
+       }
+-      if (unlikely(p->exit_state == EXIT_TRACE)) {
++      if (unlikely(exit_state == EXIT_TRACE)) {
+               /*
+                * ptrace == 0 means we are the natural parent. In this case
+                * we should clear notask_error, debugger will notify us.
+@@ -1352,7 +1358,7 @@ static int wait_consider_task(struct wai
+       }
+       /* slay zombie? */
+-      if (p->exit_state == EXIT_ZOMBIE) {
++      if (exit_state == EXIT_ZOMBIE) {
+               /* we don't reap group leaders with subthreads */
+               if (!delay_group_leader(p)) {
+                       /*
diff --git a/queue-3.18/mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch b/queue-3.18/mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch
new file mode 100644 (file)
index 0000000..e4c2a4a
--- /dev/null
@@ -0,0 +1,54 @@
+From 690eac53daff34169a4d74fc7bfbd388c4896abb Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 11 Jan 2015 11:33:57 -0800
+Subject: mm: Don't count the stack guard page towards RLIMIT_STACK
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 690eac53daff34169a4d74fc7bfbd388c4896abb upstream.
+
+Commit fee7e49d4514 ("mm: propagate error from stack expansion even for
+guard page") made sure that we return the error properly for stack
+growth conditions.  It also theorized that counting the guard page
+towards the stack limit might break something, but also said "Let's see
+if anybody notices".
+
+Somebody did notice.  Apparently android-x86 sets the stack limit very
+close to the limit indeed, and including the guard page in the rlimit
+check causes the android 'zygote' process problems.
+
+So this adds the (fairly trivial) code to make the stack rlimit check be
+against the actual real stack size, rather than the size of the vma that
+includes the guard page.
+
+Reported-and-tested-by: Chih-Wei Huang <cwhuang@android-x86.org>
+Cc: Jay Foad <jay.foad@gmail.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/mmap.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -2099,14 +2099,17 @@ static int acct_stack_growth(struct vm_a
+ {
+       struct mm_struct *mm = vma->vm_mm;
+       struct rlimit *rlim = current->signal->rlim;
+-      unsigned long new_start;
++      unsigned long new_start, actual_size;
+       /* address space limit tests */
+       if (!may_expand_vm(mm, grow))
+               return -ENOMEM;
+       /* Stack limit test */
+-      if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
++      actual_size = size;
++      if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
++              actual_size -= PAGE_SIZE;
++      if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+               return -ENOMEM;
+       /* mlock limit tests */
diff --git a/queue-3.18/mm-propagate-error-from-stack-expansion-even-for-guard-page.patch b/queue-3.18/mm-propagate-error-from-stack-expansion-even-for-guard-page.patch
new file mode 100644 (file)
index 0000000..4ce03c7
--- /dev/null
@@ -0,0 +1,70 @@
+From fee7e49d45149fba60156f5b59014f764d3e3728 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 6 Jan 2015 13:00:05 -0800
+Subject: mm: propagate error from stack expansion even for guard page
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit fee7e49d45149fba60156f5b59014f764d3e3728 upstream.
+
+Jay Foad reports that the address sanitizer test (asan) sometimes gets
+confused by a stack pointer that ends up being outside the stack vma
+that is reported by /proc/maps.
+
+This happens due to an interaction between RLIMIT_STACK and the guard
+page: when we do the guard page check, we ignore the potential error
+from the stack expansion, which effectively results in a missing guard
+page, since the expected stack expansion won't have been done.
+
+And since /proc/maps explicitly ignores the guard page (commit
+d7824370e263: "mm: fix up some user-visible effects of the stack guard
+page"), the stack pointer ends up being outside the reported stack area.
+
+This is the minimal patch: it just propagates the error.  It also
+effectively makes the guard page part of the stack limit, which in turn
+measn that the actual real stack is one page less than the stack limit.
+
+Let's see if anybody notices.  We could teach acct_stack_growth() to
+allow an extra page for a grow-up/grow-down stack in the rlimit test,
+but I don't want to add more complexity if it isn't needed.
+
+Reported-and-tested-by: Jay Foad <jay.foad@gmail.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/mm.h |    2 +-
+ mm/memory.c        |    4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1936,7 +1936,7 @@ extern int expand_downwards(struct vm_ar
+ #if VM_GROWSUP
+ extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
+ #else
+-  #define expand_upwards(vma, address) do { } while (0)
++  #define expand_upwards(vma, address) (0)
+ #endif
+ /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2613,7 +2613,7 @@ static inline int check_stack_guard_page
+               if (prev && prev->vm_end == address)
+                       return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+-              expand_downwards(vma, address - PAGE_SIZE);
++              return expand_downwards(vma, address - PAGE_SIZE);
+       }
+       if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+               struct vm_area_struct *next = vma->vm_next;
+@@ -2622,7 +2622,7 @@ static inline int check_stack_guard_page
+               if (next && next->vm_start == address + PAGE_SIZE)
+                       return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+-              expand_upwards(vma, address + PAGE_SIZE);
++              return expand_upwards(vma, address + PAGE_SIZE);
+       }
+       return 0;
+ }
diff --git a/queue-3.18/mm-protect-set_page_dirty-from-ongoing-truncation.patch b/queue-3.18/mm-protect-set_page_dirty-from-ongoing-truncation.patch
new file mode 100644 (file)
index 0000000..1b0e00d
--- /dev/null
@@ -0,0 +1,175 @@
+From 2d6d7f98284648c5ed113fe22a132148950b140f Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Thu, 8 Jan 2015 14:32:18 -0800
+Subject: mm: protect set_page_dirty() from ongoing truncation
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 2d6d7f98284648c5ed113fe22a132148950b140f upstream.
+
+Tejun, while reviewing the code, spotted the following race condition
+between the dirtying and truncation of a page:
+
+__set_page_dirty_nobuffers()       __delete_from_page_cache()
+  if (TestSetPageDirty(page))
+                                     page->mapping = NULL
+                                    if (PageDirty())
+                                      dec_zone_page_state(page, NR_FILE_DIRTY);
+                                      dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+    if (page->mapping)
+      account_page_dirtied(page)
+        __inc_zone_page_state(page, NR_FILE_DIRTY);
+       __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+
+which results in an imbalance of NR_FILE_DIRTY and BDI_RECLAIMABLE.
+
+Dirtiers usually lock out truncation, either by holding the page lock
+directly, or in case of zap_pte_range(), by pinning the mapcount with
+the page table lock held.  The notable exception to this rule, though,
+is do_wp_page(), for which this race exists.  However, do_wp_page()
+already waits for a locked page to unlock before setting the dirty bit,
+in order to prevent a race where clear_page_dirty() misses the page bit
+in the presence of dirty ptes.  Upgrade that wait to a fully locked
+set_page_dirty() to also cover the situation explained above.
+
+Afterwards, the code in set_page_dirty() dealing with a truncation race
+is no longer needed.  Remove it.
+
+Reported-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/writeback.h |    1 -
+ mm/memory.c               |   27 +++++++++++++++++----------
+ mm/page-writeback.c       |   43 ++++++++++++-------------------------------
+ 3 files changed, 29 insertions(+), 42 deletions(-)
+
+--- a/include/linux/writeback.h
++++ b/include/linux/writeback.h
+@@ -177,7 +177,6 @@ int write_cache_pages(struct address_spa
+                     struct writeback_control *wbc, writepage_t writepage,
+                     void *data);
+ int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
+-void set_page_dirty_balance(struct page *page);
+ void writeback_set_ratelimit(void);
+ void tag_pages_for_writeback(struct address_space *mapping,
+                            pgoff_t start, pgoff_t end);
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2150,17 +2150,24 @@ reuse:
+               if (!dirty_page)
+                       return ret;
+-              /*
+-               * Yes, Virginia, this is actually required to prevent a race
+-               * with clear_page_dirty_for_io() from clearing the page dirty
+-               * bit after it clear all dirty ptes, but before a racing
+-               * do_wp_page installs a dirty pte.
+-               *
+-               * do_shared_fault is protected similarly.
+-               */
+               if (!page_mkwrite) {
+-                      wait_on_page_locked(dirty_page);
+-                      set_page_dirty_balance(dirty_page);
++                      struct address_space *mapping;
++                      int dirtied;
++
++                      lock_page(dirty_page);
++                      dirtied = set_page_dirty(dirty_page);
++                      VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page);
++                      mapping = dirty_page->mapping;
++                      unlock_page(dirty_page);
++
++                      if (dirtied && mapping) {
++                              /*
++                               * Some device drivers do not set page.mapping
++                               * but still dirty their pages
++                               */
++                              balance_dirty_pages_ratelimited(mapping);
++                      }
++
+                       /* file_update_time outside page_lock */
+                       if (vma->vm_file)
+                               file_update_time(vma->vm_file);
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -1541,16 +1541,6 @@ pause:
+               bdi_start_background_writeback(bdi);
+ }
+-void set_page_dirty_balance(struct page *page)
+-{
+-      if (set_page_dirty(page)) {
+-              struct address_space *mapping = page_mapping(page);
+-
+-              if (mapping)
+-                      balance_dirty_pages_ratelimited(mapping);
+-      }
+-}
+-
+ static DEFINE_PER_CPU(int, bdp_ratelimits);
+ /*
+@@ -2123,32 +2113,25 @@ EXPORT_SYMBOL(account_page_dirtied);
+  * page dirty in that case, but not all the buffers.  This is a "bottom-up"
+  * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
+  *
+- * Most callers have locked the page, which pins the address_space in memory.
+- * But zap_pte_range() does not lock the page, however in that case the
+- * mapping is pinned by the vma's ->vm_file reference.
+- *
+- * We take care to handle the case where the page was truncated from the
+- * mapping by re-checking page_mapping() inside tree_lock.
++ * The caller must ensure this doesn't race with truncation.  Most will simply
++ * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and
++ * the pte lock held, which also locks out truncation.
+  */
+ int __set_page_dirty_nobuffers(struct page *page)
+ {
+       if (!TestSetPageDirty(page)) {
+               struct address_space *mapping = page_mapping(page);
+-              struct address_space *mapping2;
+               unsigned long flags;
+               if (!mapping)
+                       return 1;
+               spin_lock_irqsave(&mapping->tree_lock, flags);
+-              mapping2 = page_mapping(page);
+-              if (mapping2) { /* Race with truncate? */
+-                      BUG_ON(mapping2 != mapping);
+-                      WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
+-                      account_page_dirtied(page, mapping);
+-                      radix_tree_tag_set(&mapping->page_tree,
+-                              page_index(page), PAGECACHE_TAG_DIRTY);
+-              }
++              BUG_ON(page_mapping(page) != mapping);
++              WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
++              account_page_dirtied(page, mapping);
++              radix_tree_tag_set(&mapping->page_tree, page_index(page),
++                                 PAGECACHE_TAG_DIRTY);
+               spin_unlock_irqrestore(&mapping->tree_lock, flags);
+               if (mapping->host) {
+                       /* !PageAnon && !swapper_space */
+@@ -2305,12 +2288,10 @@ int clear_page_dirty_for_io(struct page
+               /*
+                * We carefully synchronise fault handlers against
+                * installing a dirty pte and marking the page dirty
+-               * at this point. We do this by having them hold the
+-               * page lock at some point after installing their
+-               * pte, but before marking the page dirty.
+-               * Pages are always locked coming in here, so we get
+-               * the desired exclusion. See mm/memory.c:do_wp_page()
+-               * for more comments.
++               * at this point.  We do this by having them hold the
++               * page lock while dirtying the page, and pages are
++               * always locked coming in here, so we get the desired
++               * exclusion.
+                */
+               if (TestClearPageDirty(page)) {
+                       dec_zone_page_state(page, NR_FILE_DIRTY);
diff --git a/queue-3.18/mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch b/queue-3.18/mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch
new file mode 100644 (file)
index 0000000..7965774
--- /dev/null
@@ -0,0 +1,109 @@
+From 9e5e3661727eaf960d3480213f8e87c8d67b6956 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Thu, 8 Jan 2015 14:32:40 -0800
+Subject: mm, vmscan: prevent kswapd livelock due to pfmemalloc-throttled process being killed
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 9e5e3661727eaf960d3480213f8e87c8d67b6956 upstream.
+
+Charles Shirron and Paul Cassella from Cray Inc have reported kswapd
+stuck in a busy loop with nothing left to balance, but
+kswapd_try_to_sleep() failing to sleep.  Their analysis found the cause
+to be a combination of several factors:
+
+1. A process is waiting in throttle_direct_reclaim() on pgdat->pfmemalloc_wait
+
+2. The process has been killed (by OOM in this case), but has not yet been
+   scheduled to remove itself from the waitqueue and die.
+
+3. kswapd checks for throttled processes in prepare_kswapd_sleep():
+
+        if (waitqueue_active(&pgdat->pfmemalloc_wait)) {
+                wake_up(&pgdat->pfmemalloc_wait);
+               return false; // kswapd will not go to sleep
+       }
+
+   However, for a process that was already killed, wake_up() does not remove
+   the process from the waitqueue, since try_to_wake_up() checks its state
+   first and returns false when the process is no longer waiting.
+
+4. kswapd is running on the same CPU as the only CPU that the process is
+   allowed to run on (through cpus_allowed, or possibly single-cpu system).
+
+5. CONFIG_PREEMPT_NONE=y kernel is used. If there's nothing to balance, kswapd
+   encounters no voluntary preemption points and repeatedly fails
+   prepare_kswapd_sleep(), blocking the process from running and removing
+   itself from the waitqueue, which would let kswapd sleep.
+
+So, the source of the problem is that we prevent kswapd from going to
+sleep until there are processes waiting on the pfmemalloc_wait queue,
+and a process waiting on a queue is guaranteed to be removed from the
+queue only when it gets scheduled.  This was done to make sure that no
+process is left sleeping on pfmemalloc_wait when kswapd itself goes to
+sleep.
+
+However, it isn't necessary to postpone kswapd sleep until the
+pfmemalloc_wait queue actually empties.  To prevent processes from being
+left sleeping, it's actually enough to guarantee that all processes
+waiting on pfmemalloc_wait queue have been woken up by the time we put
+kswapd to sleep.
+
+This patch therefore fixes this issue by substituting 'wake_up' with
+'wake_up_all' and removing 'return false' in the code snippet from
+prepare_kswapd_sleep() above.  Note that if any process puts itself in
+the queue after this waitqueue_active() check, or after the wake up
+itself, it means that the process will also wake up kswapd - and since
+we are under prepare_to_wait(), the wake up won't be missed.  Also we
+update the comment prepare_kswapd_sleep() to hopefully more clearly
+describe the races it is preventing.
+
+Fixes: 5515061d22f0 ("mm: throttle direct reclaimers if PF_MEMALLOC reserves are low and swap is backed by network storage")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Michal Hocko <mhocko@suse.cz>
+Acked-by: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/vmscan.c |   24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2904,18 +2904,20 @@ static bool prepare_kswapd_sleep(pg_data
+               return false;
+       /*
+-       * There is a potential race between when kswapd checks its watermarks
+-       * and a process gets throttled. There is also a potential race if
+-       * processes get throttled, kswapd wakes, a large process exits therby
+-       * balancing the zones that causes kswapd to miss a wakeup. If kswapd
+-       * is going to sleep, no process should be sleeping on pfmemalloc_wait
+-       * so wake them now if necessary. If necessary, processes will wake
+-       * kswapd and get throttled again
++       * The throttled processes are normally woken up in balance_pgdat() as
++       * soon as pfmemalloc_watermark_ok() is true. But there is a potential
++       * race between when kswapd checks the watermarks and a process gets
++       * throttled. There is also a potential race if processes get
++       * throttled, kswapd wakes, a large process exits thereby balancing the
++       * zones, which causes kswapd to exit balance_pgdat() before reaching
++       * the wake up checks. If kswapd is going to sleep, no process should
++       * be sleeping on pfmemalloc_wait, so wake them now if necessary. If
++       * the wake up is premature, processes will wake kswapd and get
++       * throttled again. The difference from wake ups in balance_pgdat() is
++       * that here we are under prepare_to_wait().
+        */
+-      if (waitqueue_active(&pgdat->pfmemalloc_wait)) {
+-              wake_up(&pgdat->pfmemalloc_wait);
+-              return false;
+-      }
++      if (waitqueue_active(&pgdat->pfmemalloc_wait))
++              wake_up_all(&pgdat->pfmemalloc_wait);
+       return pgdat_balanced(pgdat, order, classzone_idx);
+ }
diff --git a/queue-3.18/mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch b/queue-3.18/mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch
new file mode 100644 (file)
index 0000000..1c2dfdf
--- /dev/null
@@ -0,0 +1,98 @@
+From 2836766a9d0bd02c66073f8dd44796e6cc23848d Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Date: Mon, 5 Jan 2015 10:50:15 +0100
+Subject: mmc: sdhci: Fix sleep in atomic after inserting SD card
+
+From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+
+commit 2836766a9d0bd02c66073f8dd44796e6cc23848d upstream.
+
+Sleep in atomic context happened on Trats2 board after inserting or
+removing SD card because mmc_gpio_get_cd() was called under spin lock.
+
+Fix this by moving card detection earlier, before acquiring spin lock.
+The mmc_gpio_get_cd() call does not have to be protected by spin lock
+because it does not access any sdhci internal data.
+The sdhci_do_get_cd() call access host flags (SDHCI_DEVICE_DEAD). After
+moving it out side of spin lock it could theoretically race with driver
+removal but still there is no actual protection against manual card
+eject.
+
+Dmesg after inserting SD card:
+[   41.663414] BUG: sleeping function called from invalid context at drivers/gpio/gpiolib.c:1511
+[   41.670469] in_atomic(): 1, irqs_disabled(): 128, pid: 30, name: kworker/u8:1
+[   41.677580] INFO: lockdep is turned off.
+[   41.681486] irq event stamp: 61972
+[   41.684872] hardirqs last  enabled at (61971): [<c0490ee0>] _raw_spin_unlock_irq+0x24/0x5c
+[   41.693118] hardirqs last disabled at (61972): [<c04907ac>] _raw_spin_lock_irq+0x18/0x54
+[   41.701190] softirqs last  enabled at (61648): [<c0026fd4>] __do_softirq+0x234/0x2c8
+[   41.708914] softirqs last disabled at (61631): [<c00273a0>] irq_exit+0xd0/0x114
+[   41.716206] Preemption disabled at:[<  (null)>]   (null)
+[   41.721500]
+[   41.722985] CPU: 3 PID: 30 Comm: kworker/u8:1 Tainted: G        W      3.18.0-rc5-next-20141121 #883
+[   41.732111] Workqueue: kmmcd mmc_rescan
+[   41.735945] [<c0014d2c>] (unwind_backtrace) from [<c0011c80>] (show_stack+0x10/0x14)
+[   41.743661] [<c0011c80>] (show_stack) from [<c0489d14>] (dump_stack+0x70/0xbc)
+[   41.750867] [<c0489d14>] (dump_stack) from [<c0228b74>] (gpiod_get_raw_value_cansleep+0x18/0x30)
+[   41.759628] [<c0228b74>] (gpiod_get_raw_value_cansleep) from [<c03646e8>] (mmc_gpio_get_cd+0x38/0x58)
+[   41.768821] [<c03646e8>] (mmc_gpio_get_cd) from [<c036d378>] (sdhci_request+0x50/0x1a4)
+[   41.776808] [<c036d378>] (sdhci_request) from [<c0357934>] (mmc_start_request+0x138/0x268)
+[   41.785051] [<c0357934>] (mmc_start_request) from [<c0357cc8>] (mmc_wait_for_req+0x58/0x1a0)
+[   41.793469] [<c0357cc8>] (mmc_wait_for_req) from [<c0357e68>] (mmc_wait_for_cmd+0x58/0x78)
+[   41.801714] [<c0357e68>] (mmc_wait_for_cmd) from [<c0361c00>] (mmc_io_rw_direct_host+0x98/0x124)
+[   41.810480] [<c0361c00>] (mmc_io_rw_direct_host) from [<c03620f8>] (sdio_reset+0x2c/0x64)
+[   41.818641] [<c03620f8>] (sdio_reset) from [<c035a3d8>] (mmc_rescan+0x254/0x2e4)
+[   41.826028] [<c035a3d8>] (mmc_rescan) from [<c003a0e0>] (process_one_work+0x180/0x3f4)
+[   41.833920] [<c003a0e0>] (process_one_work) from [<c003a3bc>] (worker_thread+0x34/0x4b0)
+[   41.841991] [<c003a3bc>] (worker_thread) from [<c003fed8>] (kthread+0xe4/0x104)
+[   41.849285] [<c003fed8>] (kthread) from [<c000f268>] (ret_from_fork+0x14/0x2c)
+[   42.038276] mmc0: new high speed SDHC card at address 1234
+
+Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Fixes: 94144a465dd0 ("mmc: sdhci: add get_cd() implementation")
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/mmc/host/sdhci.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/mmc/host/sdhci.c
++++ b/drivers/mmc/host/sdhci.c
+@@ -1319,6 +1319,8 @@ static void sdhci_request(struct mmc_hos
+       sdhci_runtime_pm_get(host);
++      present = mmc_gpio_get_cd(host->mmc);
++
+       spin_lock_irqsave(&host->lock, flags);
+       WARN_ON(host->mrq != NULL);
+@@ -1347,7 +1349,6 @@ static void sdhci_request(struct mmc_hos
+        *     zero: cd-gpio is used, and card is removed
+        *     one: cd-gpio is used, and card is present
+        */
+-      present = mmc_gpio_get_cd(host->mmc);
+       if (present < 0) {
+               /* If polling, assume that the card is always present. */
+               if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
+@@ -2072,15 +2073,18 @@ static void sdhci_card_event(struct mmc_
+ {
+       struct sdhci_host *host = mmc_priv(mmc);
+       unsigned long flags;
++      int present;
+       /* First check if client has provided their own card event */
+       if (host->ops->card_event)
+               host->ops->card_event(host);
++      present = sdhci_do_get_cd(host);
++
+       spin_lock_irqsave(&host->lock, flags);
+       /* Check host->mrq first in case we are runtime suspended */
+-      if (host->mrq && !sdhci_do_get_cd(host)) {
++      if (host->mrq && !present) {
+               pr_err("%s: Card removed during transfer!\n",
+                       mmc_hostname(host->mmc));
+               pr_err("%s: Resetting controller.\n",
diff --git a/queue-3.18/nouveau-bring-back-legacy-mmap-handler.patch b/queue-3.18/nouveau-bring-back-legacy-mmap-handler.patch
new file mode 100644 (file)
index 0000000..459aa1b
--- /dev/null
@@ -0,0 +1,45 @@
+From 2036eaa74031b11028ee8fc1f44f128fdc871dda Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Tue, 16 Dec 2014 16:33:09 +1000
+Subject: nouveau: bring back legacy mmap handler
+
+From: Dave Airlie <airlied@redhat.com>
+
+commit 2036eaa74031b11028ee8fc1f44f128fdc871dda upstream.
+
+nouveau userspace back at 1.0.1 used to call the X server
+DRIOpenDRMMaster interface even for DRI2 (doh!), this attempts
+to map the sarea and fails if it can't.
+
+Since 884c6dabb0eafe7227f099c9e78e514191efaf13 from Daniel,
+this fails, but only ancient drivers would see it.
+
+Revert the nouveau bits of that fix.
+
+Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/nouveau/nouveau_ttm.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
+@@ -28,6 +28,7 @@
+ #include "nouveau_ttm.h"
+ #include "nouveau_gem.h"
++#include "drm_legacy.h"
+ static int
+ nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
+ {
+@@ -281,7 +282,7 @@ nouveau_ttm_mmap(struct file *filp, stru
+       struct nouveau_drm *drm = nouveau_drm(file_priv->minor->dev);
+       if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
+-              return -EINVAL;
++              return drm_legacy_mmap(filp, vma);
+       return ttm_bo_mmap(filp, vma, &drm->ttm.bdev);
+ }
diff --git a/queue-3.18/regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch b/queue-3.18/regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch
new file mode 100644 (file)
index 0000000..fd2eb1b
--- /dev/null
@@ -0,0 +1,78 @@
+From 1222d8fe578cd28a6c7f5e4e6c6b664c56abfdc0 Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Date: Thu, 11 Dec 2014 14:40:21 +0100
+Subject: regulator: s2mps11: Fix dw_mmc failure on Gear 2
+
+From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+
+commit 1222d8fe578cd28a6c7f5e4e6c6b664c56abfdc0 upstream.
+
+Invalid buck4 configuration for linear mapping of voltage in S2MPS14
+regulators caused boot failure on Gear 2 (dw_mmc-exynos):
+
+[    3.569137] EXT4-fs (mmcblk0p15): mounted filesystem with ordered data mode. Opts: (null)
+[    3.571716] VFS: Mounted root (ext4 filesystem) readonly on device 179:15.
+[    3.629842] mmcblk0: error -110 sending status command, retrying
+[    3.630244] mmcblk0: error -110 sending status command, retrying
+[    3.636292] mmcblk0: error -110 sending status command, aborting
+
+Buck4 voltage regulator has different minimal voltage value than other
+bucks. Commit merging multiple regulator description macros caused to
+use linear_min_sel from buck[1235] regulators as value for buck4. This
+lead to lower voltage of buck4 than required.
+
+Output of the buck4 is used internally as power source for
+LDO{3,4,7,11,19,20,21,23}. On Gear 2 board LDO11 is used as MMC
+regulator (V_EMMC_1.8V).
+
+Fixes: 5a867cf28893 ("regulator: s2mps11: Optimize the regulator description macro")
+Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/regulator/s2mps11.c |   19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/drivers/regulator/s2mps11.c
++++ b/drivers/regulator/s2mps11.c
+@@ -479,7 +479,7 @@ static struct regulator_ops s2mps14_reg_
+       .enable_mask    = S2MPS14_ENABLE_MASK           \
+ }
+-#define regulator_desc_s2mps14_buck(num, min, step) {         \
++#define regulator_desc_s2mps14_buck(num, min, step, min_sel) {        \
+       .name           = "BUCK"#num,                           \
+       .id             = S2MPS14_BUCK##num,                    \
+       .ops            = &s2mps14_reg_ops,                     \
+@@ -488,7 +488,7 @@ static struct regulator_ops s2mps14_reg_
+       .min_uV         = min,                                  \
+       .uV_step        = step,                                 \
+       .n_voltages     = S2MPS14_BUCK_N_VOLTAGES,              \
+-      .linear_min_sel = S2MPS14_BUCK1235_START_SEL,           \
++      .linear_min_sel = min_sel,                              \
+       .ramp_delay     = S2MPS14_BUCK_RAMP_DELAY,              \
+       .vsel_reg       = S2MPS14_REG_B1CTRL2 + (num - 1) * 2,  \
+       .vsel_mask      = S2MPS14_BUCK_VSEL_MASK,               \
+@@ -522,11 +522,16 @@ static const struct regulator_desc s2mps
+       regulator_desc_s2mps14_ldo(23, MIN_800_MV, STEP_25_MV),
+       regulator_desc_s2mps14_ldo(24, MIN_1800_MV, STEP_25_MV),
+       regulator_desc_s2mps14_ldo(25, MIN_1800_MV, STEP_25_MV),
+-      regulator_desc_s2mps14_buck(1, MIN_600_MV, STEP_6_25_MV),
+-      regulator_desc_s2mps14_buck(2, MIN_600_MV, STEP_6_25_MV),
+-      regulator_desc_s2mps14_buck(3, MIN_600_MV, STEP_6_25_MV),
+-      regulator_desc_s2mps14_buck(4, MIN_1400_MV, STEP_12_5_MV),
+-      regulator_desc_s2mps14_buck(5, MIN_600_MV, STEP_6_25_MV),
++      regulator_desc_s2mps14_buck(1, MIN_600_MV, STEP_6_25_MV,
++                                  S2MPS14_BUCK1235_START_SEL),
++      regulator_desc_s2mps14_buck(2, MIN_600_MV, STEP_6_25_MV,
++                                  S2MPS14_BUCK1235_START_SEL),
++      regulator_desc_s2mps14_buck(3, MIN_600_MV, STEP_6_25_MV,
++                                  S2MPS14_BUCK1235_START_SEL),
++      regulator_desc_s2mps14_buck(4, MIN_1400_MV, STEP_12_5_MV,
++                                  S2MPS14_BUCK4_START_SEL),
++      regulator_desc_s2mps14_buck(5, MIN_600_MV, STEP_6_25_MV,
++                                  S2MPS14_BUCK1235_START_SEL),
+ };
+ static int s2mps14_pmic_enable_ext_control(struct s2mps11_info *s2mps11,
index 8151e69ae55d87a0f5e0e1d68aa5628dea680202..4da0f542fee50c2efe0113bb4fff51f94090d2d3 100644 (file)
@@ -138,3 +138,13 @@ perf-x86-intel-uncore-make-sure-only-uncore-events-are-collected.patch
 perf-fix-events-installation-during-moving-group.patch
 perf-x86-uncore-hsw-ep-handle-systems-with-only-two-sboxes.patch
 perf-session-do-not-fail-on-processing-out-of-order-event.patch
+spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch
+drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch
+nouveau-bring-back-legacy-mmap-handler.patch
+regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch
+mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch
+exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch
+mm-protect-set_page_dirty-from-ongoing-truncation.patch
+mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch
+mm-propagate-error-from-stack-expansion-even-for-guard-page.patch
+mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch
diff --git a/queue-3.18/spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch b/queue-3.18/spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch
new file mode 100644 (file)
index 0000000..70bb872
--- /dev/null
@@ -0,0 +1,44 @@
+From 015760563ec77bf17cec712fa94afdf53b285287 Mon Sep 17 00:00:00 2001
+From: Hisashi Nakamura <hisashi.nakamura.ak@renesas.com>
+Date: Mon, 15 Dec 2014 23:01:11 +0900
+Subject: spi: sh-msiof: Add runtime PM lock in initializing
+
+From: Hisashi Nakamura <hisashi.nakamura.ak@renesas.com>
+
+commit 015760563ec77bf17cec712fa94afdf53b285287 upstream.
+
+SH-MSIOF driver is enabled autosuspend API of spi framework.
+But autosuspend framework doesn't work during initializing.
+So runtime PM lock is added in SH-MSIOF driver initializing.
+
+Fixes: e2a0ba547ba31c (spi: sh-msiof: Convert to spi core auto_runtime_pm framework)
+Signed-off-by: Hisashi Nakamura <hisashi.nakamura.ak@renesas.com>
+Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/spi/spi-sh-msiof.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/spi/spi-sh-msiof.c
++++ b/drivers/spi/spi-sh-msiof.c
+@@ -480,6 +480,8 @@ static int sh_msiof_spi_setup(struct spi
+       struct device_node      *np = spi->master->dev.of_node;
+       struct sh_msiof_spi_priv *p = spi_master_get_devdata(spi->master);
++      pm_runtime_get_sync(&p->pdev->dev);
++
+       if (!np) {
+               /*
+                * Use spi->controller_data for CS (same strategy as spi_gpio),
+@@ -498,6 +500,9 @@ static int sh_msiof_spi_setup(struct spi
+       if (spi->cs_gpio >= 0)
+               gpio_set_value(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
++
++      pm_runtime_put_sync(&p->pdev->dev);
++
+       return 0;
+ }