From 99fd615686000d2f49199030f43284dd79587e50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 13 Jan 2015 22:55:11 -0800 Subject: [PATCH] 3.18-stable patches added patches: drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch mm-propagate-error-from-stack-expansion-even-for-guard-page.patch mm-protect-set_page_dirty-from-ongoing-truncation.patch mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch nouveau-bring-back-legacy-mmap-handler.patch regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch --- ...u-do-not-bug_on-spin_is_locked-on-up.patch | 61 ++++++ ...t_consider_task-and-wait_task_zombie.patch | 71 +++++++ ...tack-guard-page-towards-rlimit_stack.patch | 54 ++++++ ...-stack-expansion-even-for-guard-page.patch | 70 +++++++ ...t_page_dirty-from-ongoing-truncation.patch | 175 ++++++++++++++++++ ...alloc-throttled-process-being-killed.patch | 109 +++++++++++ ...ep-in-atomic-after-inserting-sd-card.patch | 98 ++++++++++ ...uveau-bring-back-legacy-mmap-handler.patch | 45 +++++ ...s2mps11-fix-dw_mmc-failure-on-gear-2.patch | 78 ++++++++ queue-3.18/series | 10 + ...-add-runtime-pm-lock-in-initializing.patch | 44 +++++ 11 files changed, 815 insertions(+) create mode 100644 queue-3.18/drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch create mode 100644 queue-3.18/exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch create mode 100644 queue-3.18/mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch create mode 100644 queue-3.18/mm-propagate-error-from-stack-expansion-even-for-guard-page.patch create mode 100644 queue-3.18/mm-protect-set_page_dirty-from-ongoing-truncation.patch create mode 100644 queue-3.18/mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch create mode 100644 queue-3.18/mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch create mode 100644 queue-3.18/nouveau-bring-back-legacy-mmap-handler.patch create mode 100644 queue-3.18/regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch create mode 100644 queue-3.18/spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch diff --git a/queue-3.18/drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch b/queue-3.18/drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch new file mode 100644 index 00000000000..b9930a614b5 --- /dev/null +++ b/queue-3.18/drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch @@ -0,0 +1,61 @@ +From ff4c0d5213b015e60aa87c1352604f10ba9c3e12 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= +Date: Sun, 21 Dec 2014 17:43:31 +0100 +Subject: drm/nouveau/nouveau: Do not BUG_ON(!spin_is_locked()) on UP +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= + +commit ff4c0d5213b015e60aa87c1352604f10ba9c3e12 upstream. + +On !SMP systems spinlocks do not exist. Thus checking of they +are active will always fail. + +Use + assert_spin_locked(lock); +instead of + BUG_ON(!spin_is_locked(lock)); +to not BUG() on all UP systems. + +Signed-off-by: Bruno Prémont +Signed-off-by: Ben Skeggs +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/nouveau/core/core/event.c | 4 ++-- + drivers/gpu/drm/nouveau/core/core/notify.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/nouveau/core/core/event.c ++++ b/drivers/gpu/drm/nouveau/core/core/event.c +@@ -26,7 +26,7 @@ + void + nvkm_event_put(struct nvkm_event *event, u32 types, int index) + { +- BUG_ON(!spin_is_locked(&event->refs_lock)); ++ assert_spin_locked(&event->refs_lock); + while (types) { + int type = __ffs(types); types &= ~(1 << type); + if (--event->refs[index * event->types_nr + type] == 0) { +@@ -39,7 +39,7 @@ nvkm_event_put(struct nvkm_event *event, + void + nvkm_event_get(struct nvkm_event *event, u32 types, int index) + { +- BUG_ON(!spin_is_locked(&event->refs_lock)); ++ assert_spin_locked(&event->refs_lock); + while (types) { + int type = __ffs(types); types &= ~(1 << type); + if (++event->refs[index * event->types_nr + type] == 1) { +--- a/drivers/gpu/drm/nouveau/core/core/notify.c ++++ b/drivers/gpu/drm/nouveau/core/core/notify.c +@@ -98,7 +98,7 @@ nvkm_notify_send(struct nvkm_notify *not + struct nvkm_event *event = notify->event; + unsigned long flags; + +- BUG_ON(!spin_is_locked(&event->list_lock)); ++ assert_spin_locked(&event->list_lock); + BUG_ON(size != notify->size); + + spin_lock_irqsave(&event->refs_lock, flags); diff --git a/queue-3.18/exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch b/queue-3.18/exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch new file mode 100644 index 00000000000..e51b5a5cefd --- /dev/null +++ b/queue-3.18/exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch @@ -0,0 +1,71 @@ +From 3245d6acab981a2388ffb877c7ecc97e763c59d4 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Thu, 8 Jan 2015 14:32:12 -0800 +Subject: exit: fix race between wait_consider_task() and wait_task_zombie() + +From: Oleg Nesterov + +commit 3245d6acab981a2388ffb877c7ecc97e763c59d4 upstream. + +wait_consider_task() checks EXIT_ZOMBIE after EXIT_DEAD/EXIT_TRACE and +both checks can fail if we race with EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE +change in between, gcc needs to reload p->exit_state after +security_task_wait(). In this case ->notask_error will be wrongly +cleared and do_wait() can hang forever if it was the last eligible +child. + +Many thanks to Arne who carefully investigated the problem. + +Note: this bug is very old but it was pure theoretical until commit +b3ab03160dfa ("wait: completely ignore the EXIT_DEAD tasks"). Before +this commit "-O2" was probably enough to guarantee that compiler won't +read ->exit_state twice. + +Signed-off-by: Oleg Nesterov +Reported-by: Arne Goedeke +Tested-by: Arne Goedeke +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/exit.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -1302,9 +1302,15 @@ static int wait_task_continued(struct wa + static int wait_consider_task(struct wait_opts *wo, int ptrace, + struct task_struct *p) + { ++ /* ++ * We can race with wait_task_zombie() from another thread. ++ * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition ++ * can't confuse the checks below. ++ */ ++ int exit_state = ACCESS_ONCE(p->exit_state); + int ret; + +- if (unlikely(p->exit_state == EXIT_DEAD)) ++ if (unlikely(exit_state == EXIT_DEAD)) + return 0; + + ret = eligible_child(wo, p); +@@ -1325,7 +1331,7 @@ static int wait_consider_task(struct wai + return 0; + } + +- if (unlikely(p->exit_state == EXIT_TRACE)) { ++ if (unlikely(exit_state == EXIT_TRACE)) { + /* + * ptrace == 0 means we are the natural parent. In this case + * we should clear notask_error, debugger will notify us. +@@ -1352,7 +1358,7 @@ static int wait_consider_task(struct wai + } + + /* slay zombie? */ +- if (p->exit_state == EXIT_ZOMBIE) { ++ if (exit_state == EXIT_ZOMBIE) { + /* we don't reap group leaders with subthreads */ + if (!delay_group_leader(p)) { + /* diff --git a/queue-3.18/mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch b/queue-3.18/mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch new file mode 100644 index 00000000000..e4c2a4a0848 --- /dev/null +++ b/queue-3.18/mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch @@ -0,0 +1,54 @@ +From 690eac53daff34169a4d74fc7bfbd388c4896abb Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sun, 11 Jan 2015 11:33:57 -0800 +Subject: mm: Don't count the stack guard page towards RLIMIT_STACK + +From: Linus Torvalds + +commit 690eac53daff34169a4d74fc7bfbd388c4896abb upstream. + +Commit fee7e49d4514 ("mm: propagate error from stack expansion even for +guard page") made sure that we return the error properly for stack +growth conditions. It also theorized that counting the guard page +towards the stack limit might break something, but also said "Let's see +if anybody notices". + +Somebody did notice. Apparently android-x86 sets the stack limit very +close to the limit indeed, and including the guard page in the rlimit +check causes the android 'zygote' process problems. + +So this adds the (fairly trivial) code to make the stack rlimit check be +against the actual real stack size, rather than the size of the vma that +includes the guard page. + +Reported-and-tested-by: Chih-Wei Huang +Cc: Jay Foad +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/mmap.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2099,14 +2099,17 @@ static int acct_stack_growth(struct vm_a + { + struct mm_struct *mm = vma->vm_mm; + struct rlimit *rlim = current->signal->rlim; +- unsigned long new_start; ++ unsigned long new_start, actual_size; + + /* address space limit tests */ + if (!may_expand_vm(mm, grow)) + return -ENOMEM; + + /* Stack limit test */ +- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) ++ actual_size = size; ++ if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN))) ++ actual_size -= PAGE_SIZE; ++ if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) + return -ENOMEM; + + /* mlock limit tests */ diff --git a/queue-3.18/mm-propagate-error-from-stack-expansion-even-for-guard-page.patch b/queue-3.18/mm-propagate-error-from-stack-expansion-even-for-guard-page.patch new file mode 100644 index 00000000000..4ce03c7b61f --- /dev/null +++ b/queue-3.18/mm-propagate-error-from-stack-expansion-even-for-guard-page.patch @@ -0,0 +1,70 @@ +From fee7e49d45149fba60156f5b59014f764d3e3728 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 6 Jan 2015 13:00:05 -0800 +Subject: mm: propagate error from stack expansion even for guard page + +From: Linus Torvalds + +commit fee7e49d45149fba60156f5b59014f764d3e3728 upstream. + +Jay Foad reports that the address sanitizer test (asan) sometimes gets +confused by a stack pointer that ends up being outside the stack vma +that is reported by /proc/maps. + +This happens due to an interaction between RLIMIT_STACK and the guard +page: when we do the guard page check, we ignore the potential error +from the stack expansion, which effectively results in a missing guard +page, since the expected stack expansion won't have been done. + +And since /proc/maps explicitly ignores the guard page (commit +d7824370e263: "mm: fix up some user-visible effects of the stack guard +page"), the stack pointer ends up being outside the reported stack area. + +This is the minimal patch: it just propagates the error. It also +effectively makes the guard page part of the stack limit, which in turn +measn that the actual real stack is one page less than the stack limit. + +Let's see if anybody notices. We could teach acct_stack_growth() to +allow an extra page for a grow-up/grow-down stack in the rlimit test, +but I don't want to add more complexity if it isn't needed. + +Reported-and-tested-by: Jay Foad +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mm.h | 2 +- + mm/memory.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1936,7 +1936,7 @@ extern int expand_downwards(struct vm_ar + #if VM_GROWSUP + extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); + #else +- #define expand_upwards(vma, address) do { } while (0) ++ #define expand_upwards(vma, address) (0) + #endif + + /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2613,7 +2613,7 @@ static inline int check_stack_guard_page + if (prev && prev->vm_end == address) + return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; + +- expand_downwards(vma, address - PAGE_SIZE); ++ return expand_downwards(vma, address - PAGE_SIZE); + } + if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { + struct vm_area_struct *next = vma->vm_next; +@@ -2622,7 +2622,7 @@ static inline int check_stack_guard_page + if (next && next->vm_start == address + PAGE_SIZE) + return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; + +- expand_upwards(vma, address + PAGE_SIZE); ++ return expand_upwards(vma, address + PAGE_SIZE); + } + return 0; + } diff --git a/queue-3.18/mm-protect-set_page_dirty-from-ongoing-truncation.patch b/queue-3.18/mm-protect-set_page_dirty-from-ongoing-truncation.patch new file mode 100644 index 00000000000..1b0e00d072f --- /dev/null +++ b/queue-3.18/mm-protect-set_page_dirty-from-ongoing-truncation.patch @@ -0,0 +1,175 @@ +From 2d6d7f98284648c5ed113fe22a132148950b140f Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Thu, 8 Jan 2015 14:32:18 -0800 +Subject: mm: protect set_page_dirty() from ongoing truncation + +From: Johannes Weiner + +commit 2d6d7f98284648c5ed113fe22a132148950b140f upstream. + +Tejun, while reviewing the code, spotted the following race condition +between the dirtying and truncation of a page: + +__set_page_dirty_nobuffers() __delete_from_page_cache() + if (TestSetPageDirty(page)) + page->mapping = NULL + if (PageDirty()) + dec_zone_page_state(page, NR_FILE_DIRTY); + dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + if (page->mapping) + account_page_dirtied(page) + __inc_zone_page_state(page, NR_FILE_DIRTY); + __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + +which results in an imbalance of NR_FILE_DIRTY and BDI_RECLAIMABLE. + +Dirtiers usually lock out truncation, either by holding the page lock +directly, or in case of zap_pte_range(), by pinning the mapcount with +the page table lock held. The notable exception to this rule, though, +is do_wp_page(), for which this race exists. However, do_wp_page() +already waits for a locked page to unlock before setting the dirty bit, +in order to prevent a race where clear_page_dirty() misses the page bit +in the presence of dirty ptes. Upgrade that wait to a fully locked +set_page_dirty() to also cover the situation explained above. + +Afterwards, the code in set_page_dirty() dealing with a truncation race +is no longer needed. Remove it. + +Reported-by: Tejun Heo +Signed-off-by: Johannes Weiner +Acked-by: Kirill A. Shutemov +Reviewed-by: Jan Kara +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/writeback.h | 1 - + mm/memory.c | 27 +++++++++++++++++---------- + mm/page-writeback.c | 43 ++++++++++++------------------------------- + 3 files changed, 29 insertions(+), 42 deletions(-) + +--- a/include/linux/writeback.h ++++ b/include/linux/writeback.h +@@ -177,7 +177,6 @@ int write_cache_pages(struct address_spa + struct writeback_control *wbc, writepage_t writepage, + void *data); + int do_writepages(struct address_space *mapping, struct writeback_control *wbc); +-void set_page_dirty_balance(struct page *page); + void writeback_set_ratelimit(void); + void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end); +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2150,17 +2150,24 @@ reuse: + if (!dirty_page) + return ret; + +- /* +- * Yes, Virginia, this is actually required to prevent a race +- * with clear_page_dirty_for_io() from clearing the page dirty +- * bit after it clear all dirty ptes, but before a racing +- * do_wp_page installs a dirty pte. +- * +- * do_shared_fault is protected similarly. +- */ + if (!page_mkwrite) { +- wait_on_page_locked(dirty_page); +- set_page_dirty_balance(dirty_page); ++ struct address_space *mapping; ++ int dirtied; ++ ++ lock_page(dirty_page); ++ dirtied = set_page_dirty(dirty_page); ++ VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page); ++ mapping = dirty_page->mapping; ++ unlock_page(dirty_page); ++ ++ if (dirtied && mapping) { ++ /* ++ * Some device drivers do not set page.mapping ++ * but still dirty their pages ++ */ ++ balance_dirty_pages_ratelimited(mapping); ++ } ++ + /* file_update_time outside page_lock */ + if (vma->vm_file) + file_update_time(vma->vm_file); +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -1541,16 +1541,6 @@ pause: + bdi_start_background_writeback(bdi); + } + +-void set_page_dirty_balance(struct page *page) +-{ +- if (set_page_dirty(page)) { +- struct address_space *mapping = page_mapping(page); +- +- if (mapping) +- balance_dirty_pages_ratelimited(mapping); +- } +-} +- + static DEFINE_PER_CPU(int, bdp_ratelimits); + + /* +@@ -2123,32 +2113,25 @@ EXPORT_SYMBOL(account_page_dirtied); + * page dirty in that case, but not all the buffers. This is a "bottom-up" + * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. + * +- * Most callers have locked the page, which pins the address_space in memory. +- * But zap_pte_range() does not lock the page, however in that case the +- * mapping is pinned by the vma's ->vm_file reference. +- * +- * We take care to handle the case where the page was truncated from the +- * mapping by re-checking page_mapping() inside tree_lock. ++ * The caller must ensure this doesn't race with truncation. Most will simply ++ * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and ++ * the pte lock held, which also locks out truncation. + */ + int __set_page_dirty_nobuffers(struct page *page) + { + if (!TestSetPageDirty(page)) { + struct address_space *mapping = page_mapping(page); +- struct address_space *mapping2; + unsigned long flags; + + if (!mapping) + return 1; + + spin_lock_irqsave(&mapping->tree_lock, flags); +- mapping2 = page_mapping(page); +- if (mapping2) { /* Race with truncate? */ +- BUG_ON(mapping2 != mapping); +- WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); +- account_page_dirtied(page, mapping); +- radix_tree_tag_set(&mapping->page_tree, +- page_index(page), PAGECACHE_TAG_DIRTY); +- } ++ BUG_ON(page_mapping(page) != mapping); ++ WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); ++ account_page_dirtied(page, mapping); ++ radix_tree_tag_set(&mapping->page_tree, page_index(page), ++ PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + if (mapping->host) { + /* !PageAnon && !swapper_space */ +@@ -2305,12 +2288,10 @@ int clear_page_dirty_for_io(struct page + /* + * We carefully synchronise fault handlers against + * installing a dirty pte and marking the page dirty +- * at this point. We do this by having them hold the +- * page lock at some point after installing their +- * pte, but before marking the page dirty. +- * Pages are always locked coming in here, so we get +- * the desired exclusion. See mm/memory.c:do_wp_page() +- * for more comments. ++ * at this point. We do this by having them hold the ++ * page lock while dirtying the page, and pages are ++ * always locked coming in here, so we get the desired ++ * exclusion. + */ + if (TestClearPageDirty(page)) { + dec_zone_page_state(page, NR_FILE_DIRTY); diff --git a/queue-3.18/mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch b/queue-3.18/mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch new file mode 100644 index 00000000000..79657747e8e --- /dev/null +++ b/queue-3.18/mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch @@ -0,0 +1,109 @@ +From 9e5e3661727eaf960d3480213f8e87c8d67b6956 Mon Sep 17 00:00:00 2001 +From: Vlastimil Babka +Date: Thu, 8 Jan 2015 14:32:40 -0800 +Subject: mm, vmscan: prevent kswapd livelock due to pfmemalloc-throttled process being killed + +From: Vlastimil Babka + +commit 9e5e3661727eaf960d3480213f8e87c8d67b6956 upstream. + +Charles Shirron and Paul Cassella from Cray Inc have reported kswapd +stuck in a busy loop with nothing left to balance, but +kswapd_try_to_sleep() failing to sleep. Their analysis found the cause +to be a combination of several factors: + +1. A process is waiting in throttle_direct_reclaim() on pgdat->pfmemalloc_wait + +2. The process has been killed (by OOM in this case), but has not yet been + scheduled to remove itself from the waitqueue and die. + +3. kswapd checks for throttled processes in prepare_kswapd_sleep(): + + if (waitqueue_active(&pgdat->pfmemalloc_wait)) { + wake_up(&pgdat->pfmemalloc_wait); + return false; // kswapd will not go to sleep + } + + However, for a process that was already killed, wake_up() does not remove + the process from the waitqueue, since try_to_wake_up() checks its state + first and returns false when the process is no longer waiting. + +4. kswapd is running on the same CPU as the only CPU that the process is + allowed to run on (through cpus_allowed, or possibly single-cpu system). + +5. CONFIG_PREEMPT_NONE=y kernel is used. If there's nothing to balance, kswapd + encounters no voluntary preemption points and repeatedly fails + prepare_kswapd_sleep(), blocking the process from running and removing + itself from the waitqueue, which would let kswapd sleep. + +So, the source of the problem is that we prevent kswapd from going to +sleep until there are processes waiting on the pfmemalloc_wait queue, +and a process waiting on a queue is guaranteed to be removed from the +queue only when it gets scheduled. This was done to make sure that no +process is left sleeping on pfmemalloc_wait when kswapd itself goes to +sleep. + +However, it isn't necessary to postpone kswapd sleep until the +pfmemalloc_wait queue actually empties. To prevent processes from being +left sleeping, it's actually enough to guarantee that all processes +waiting on pfmemalloc_wait queue have been woken up by the time we put +kswapd to sleep. + +This patch therefore fixes this issue by substituting 'wake_up' with +'wake_up_all' and removing 'return false' in the code snippet from +prepare_kswapd_sleep() above. Note that if any process puts itself in +the queue after this waitqueue_active() check, or after the wake up +itself, it means that the process will also wake up kswapd - and since +we are under prepare_to_wait(), the wake up won't be missed. Also we +update the comment prepare_kswapd_sleep() to hopefully more clearly +describe the races it is preventing. + +Fixes: 5515061d22f0 ("mm: throttle direct reclaimers if PF_MEMALLOC reserves are low and swap is backed by network storage") +Signed-off-by: Vlastimil Babka +Signed-off-by: Vladimir Davydov +Cc: Mel Gorman +Cc: Johannes Weiner +Acked-by: Michal Hocko +Acked-by: Rik van Riel +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/vmscan.c | 24 +++++++++++++----------- + 1 file changed, 13 insertions(+), 11 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2904,18 +2904,20 @@ static bool prepare_kswapd_sleep(pg_data + return false; + + /* +- * There is a potential race between when kswapd checks its watermarks +- * and a process gets throttled. There is also a potential race if +- * processes get throttled, kswapd wakes, a large process exits therby +- * balancing the zones that causes kswapd to miss a wakeup. If kswapd +- * is going to sleep, no process should be sleeping on pfmemalloc_wait +- * so wake them now if necessary. If necessary, processes will wake +- * kswapd and get throttled again ++ * The throttled processes are normally woken up in balance_pgdat() as ++ * soon as pfmemalloc_watermark_ok() is true. But there is a potential ++ * race between when kswapd checks the watermarks and a process gets ++ * throttled. There is also a potential race if processes get ++ * throttled, kswapd wakes, a large process exits thereby balancing the ++ * zones, which causes kswapd to exit balance_pgdat() before reaching ++ * the wake up checks. If kswapd is going to sleep, no process should ++ * be sleeping on pfmemalloc_wait, so wake them now if necessary. If ++ * the wake up is premature, processes will wake kswapd and get ++ * throttled again. The difference from wake ups in balance_pgdat() is ++ * that here we are under prepare_to_wait(). + */ +- if (waitqueue_active(&pgdat->pfmemalloc_wait)) { +- wake_up(&pgdat->pfmemalloc_wait); +- return false; +- } ++ if (waitqueue_active(&pgdat->pfmemalloc_wait)) ++ wake_up_all(&pgdat->pfmemalloc_wait); + + return pgdat_balanced(pgdat, order, classzone_idx); + } diff --git a/queue-3.18/mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch b/queue-3.18/mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch new file mode 100644 index 00000000000..1c2dfdfad9c --- /dev/null +++ b/queue-3.18/mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch @@ -0,0 +1,98 @@ +From 2836766a9d0bd02c66073f8dd44796e6cc23848d Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Mon, 5 Jan 2015 10:50:15 +0100 +Subject: mmc: sdhci: Fix sleep in atomic after inserting SD card + +From: Krzysztof Kozlowski + +commit 2836766a9d0bd02c66073f8dd44796e6cc23848d upstream. + +Sleep in atomic context happened on Trats2 board after inserting or +removing SD card because mmc_gpio_get_cd() was called under spin lock. + +Fix this by moving card detection earlier, before acquiring spin lock. +The mmc_gpio_get_cd() call does not have to be protected by spin lock +because it does not access any sdhci internal data. +The sdhci_do_get_cd() call access host flags (SDHCI_DEVICE_DEAD). After +moving it out side of spin lock it could theoretically race with driver +removal but still there is no actual protection against manual card +eject. + +Dmesg after inserting SD card: +[ 41.663414] BUG: sleeping function called from invalid context at drivers/gpio/gpiolib.c:1511 +[ 41.670469] in_atomic(): 1, irqs_disabled(): 128, pid: 30, name: kworker/u8:1 +[ 41.677580] INFO: lockdep is turned off. +[ 41.681486] irq event stamp: 61972 +[ 41.684872] hardirqs last enabled at (61971): [] _raw_spin_unlock_irq+0x24/0x5c +[ 41.693118] hardirqs last disabled at (61972): [] _raw_spin_lock_irq+0x18/0x54 +[ 41.701190] softirqs last enabled at (61648): [] __do_softirq+0x234/0x2c8 +[ 41.708914] softirqs last disabled at (61631): [] irq_exit+0xd0/0x114 +[ 41.716206] Preemption disabled at:[< (null)>] (null) +[ 41.721500] +[ 41.722985] CPU: 3 PID: 30 Comm: kworker/u8:1 Tainted: G W 3.18.0-rc5-next-20141121 #883 +[ 41.732111] Workqueue: kmmcd mmc_rescan +[ 41.735945] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) +[ 41.743661] [] (show_stack) from [] (dump_stack+0x70/0xbc) +[ 41.750867] [] (dump_stack) from [] (gpiod_get_raw_value_cansleep+0x18/0x30) +[ 41.759628] [] (gpiod_get_raw_value_cansleep) from [] (mmc_gpio_get_cd+0x38/0x58) +[ 41.768821] [] (mmc_gpio_get_cd) from [] (sdhci_request+0x50/0x1a4) +[ 41.776808] [] (sdhci_request) from [] (mmc_start_request+0x138/0x268) +[ 41.785051] [] (mmc_start_request) from [] (mmc_wait_for_req+0x58/0x1a0) +[ 41.793469] [] (mmc_wait_for_req) from [] (mmc_wait_for_cmd+0x58/0x78) +[ 41.801714] [] (mmc_wait_for_cmd) from [] (mmc_io_rw_direct_host+0x98/0x124) +[ 41.810480] [] (mmc_io_rw_direct_host) from [] (sdio_reset+0x2c/0x64) +[ 41.818641] [] (sdio_reset) from [] (mmc_rescan+0x254/0x2e4) +[ 41.826028] [] (mmc_rescan) from [] (process_one_work+0x180/0x3f4) +[ 41.833920] [] (process_one_work) from [] (worker_thread+0x34/0x4b0) +[ 41.841991] [] (worker_thread) from [] (kthread+0xe4/0x104) +[ 41.849285] [] (kthread) from [] (ret_from_fork+0x14/0x2c) +[ 42.038276] mmc0: new high speed SDHC card at address 1234 + +Signed-off-by: Krzysztof Kozlowski +Fixes: 94144a465dd0 ("mmc: sdhci: add get_cd() implementation") +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mmc/host/sdhci.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/mmc/host/sdhci.c ++++ b/drivers/mmc/host/sdhci.c +@@ -1319,6 +1319,8 @@ static void sdhci_request(struct mmc_hos + + sdhci_runtime_pm_get(host); + ++ present = mmc_gpio_get_cd(host->mmc); ++ + spin_lock_irqsave(&host->lock, flags); + + WARN_ON(host->mrq != NULL); +@@ -1347,7 +1349,6 @@ static void sdhci_request(struct mmc_hos + * zero: cd-gpio is used, and card is removed + * one: cd-gpio is used, and card is present + */ +- present = mmc_gpio_get_cd(host->mmc); + if (present < 0) { + /* If polling, assume that the card is always present. */ + if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) +@@ -2072,15 +2073,18 @@ static void sdhci_card_event(struct mmc_ + { + struct sdhci_host *host = mmc_priv(mmc); + unsigned long flags; ++ int present; + + /* First check if client has provided their own card event */ + if (host->ops->card_event) + host->ops->card_event(host); + ++ present = sdhci_do_get_cd(host); ++ + spin_lock_irqsave(&host->lock, flags); + + /* Check host->mrq first in case we are runtime suspended */ +- if (host->mrq && !sdhci_do_get_cd(host)) { ++ if (host->mrq && !present) { + pr_err("%s: Card removed during transfer!\n", + mmc_hostname(host->mmc)); + pr_err("%s: Resetting controller.\n", diff --git a/queue-3.18/nouveau-bring-back-legacy-mmap-handler.patch b/queue-3.18/nouveau-bring-back-legacy-mmap-handler.patch new file mode 100644 index 00000000000..459aa1b6a91 --- /dev/null +++ b/queue-3.18/nouveau-bring-back-legacy-mmap-handler.patch @@ -0,0 +1,45 @@ +From 2036eaa74031b11028ee8fc1f44f128fdc871dda Mon Sep 17 00:00:00 2001 +From: Dave Airlie +Date: Tue, 16 Dec 2014 16:33:09 +1000 +Subject: nouveau: bring back legacy mmap handler + +From: Dave Airlie + +commit 2036eaa74031b11028ee8fc1f44f128fdc871dda upstream. + +nouveau userspace back at 1.0.1 used to call the X server +DRIOpenDRMMaster interface even for DRI2 (doh!), this attempts +to map the sarea and fails if it can't. + +Since 884c6dabb0eafe7227f099c9e78e514191efaf13 from Daniel, +this fails, but only ancient drivers would see it. + +Revert the nouveau bits of that fix. + +Acked-by: Daniel Vetter +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/nouveau/nouveau_ttm.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c +@@ -28,6 +28,7 @@ + #include "nouveau_ttm.h" + #include "nouveau_gem.h" + ++#include "drm_legacy.h" + static int + nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize) + { +@@ -281,7 +282,7 @@ nouveau_ttm_mmap(struct file *filp, stru + struct nouveau_drm *drm = nouveau_drm(file_priv->minor->dev); + + if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) +- return -EINVAL; ++ return drm_legacy_mmap(filp, vma); + + return ttm_bo_mmap(filp, vma, &drm->ttm.bdev); + } diff --git a/queue-3.18/regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch b/queue-3.18/regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch new file mode 100644 index 00000000000..fd2eb1b7bfc --- /dev/null +++ b/queue-3.18/regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch @@ -0,0 +1,78 @@ +From 1222d8fe578cd28a6c7f5e4e6c6b664c56abfdc0 Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Thu, 11 Dec 2014 14:40:21 +0100 +Subject: regulator: s2mps11: Fix dw_mmc failure on Gear 2 + +From: Krzysztof Kozlowski + +commit 1222d8fe578cd28a6c7f5e4e6c6b664c56abfdc0 upstream. + +Invalid buck4 configuration for linear mapping of voltage in S2MPS14 +regulators caused boot failure on Gear 2 (dw_mmc-exynos): + +[ 3.569137] EXT4-fs (mmcblk0p15): mounted filesystem with ordered data mode. Opts: (null) +[ 3.571716] VFS: Mounted root (ext4 filesystem) readonly on device 179:15. +[ 3.629842] mmcblk0: error -110 sending status command, retrying +[ 3.630244] mmcblk0: error -110 sending status command, retrying +[ 3.636292] mmcblk0: error -110 sending status command, aborting + +Buck4 voltage regulator has different minimal voltage value than other +bucks. Commit merging multiple regulator description macros caused to +use linear_min_sel from buck[1235] regulators as value for buck4. This +lead to lower voltage of buck4 than required. + +Output of the buck4 is used internally as power source for +LDO{3,4,7,11,19,20,21,23}. On Gear 2 board LDO11 is used as MMC +regulator (V_EMMC_1.8V). + +Fixes: 5a867cf28893 ("regulator: s2mps11: Optimize the regulator description macro") +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/regulator/s2mps11.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +--- a/drivers/regulator/s2mps11.c ++++ b/drivers/regulator/s2mps11.c +@@ -479,7 +479,7 @@ static struct regulator_ops s2mps14_reg_ + .enable_mask = S2MPS14_ENABLE_MASK \ + } + +-#define regulator_desc_s2mps14_buck(num, min, step) { \ ++#define regulator_desc_s2mps14_buck(num, min, step, min_sel) { \ + .name = "BUCK"#num, \ + .id = S2MPS14_BUCK##num, \ + .ops = &s2mps14_reg_ops, \ +@@ -488,7 +488,7 @@ static struct regulator_ops s2mps14_reg_ + .min_uV = min, \ + .uV_step = step, \ + .n_voltages = S2MPS14_BUCK_N_VOLTAGES, \ +- .linear_min_sel = S2MPS14_BUCK1235_START_SEL, \ ++ .linear_min_sel = min_sel, \ + .ramp_delay = S2MPS14_BUCK_RAMP_DELAY, \ + .vsel_reg = S2MPS14_REG_B1CTRL2 + (num - 1) * 2, \ + .vsel_mask = S2MPS14_BUCK_VSEL_MASK, \ +@@ -522,11 +522,16 @@ static const struct regulator_desc s2mps + regulator_desc_s2mps14_ldo(23, MIN_800_MV, STEP_25_MV), + regulator_desc_s2mps14_ldo(24, MIN_1800_MV, STEP_25_MV), + regulator_desc_s2mps14_ldo(25, MIN_1800_MV, STEP_25_MV), +- regulator_desc_s2mps14_buck(1, MIN_600_MV, STEP_6_25_MV), +- regulator_desc_s2mps14_buck(2, MIN_600_MV, STEP_6_25_MV), +- regulator_desc_s2mps14_buck(3, MIN_600_MV, STEP_6_25_MV), +- regulator_desc_s2mps14_buck(4, MIN_1400_MV, STEP_12_5_MV), +- regulator_desc_s2mps14_buck(5, MIN_600_MV, STEP_6_25_MV), ++ regulator_desc_s2mps14_buck(1, MIN_600_MV, STEP_6_25_MV, ++ S2MPS14_BUCK1235_START_SEL), ++ regulator_desc_s2mps14_buck(2, MIN_600_MV, STEP_6_25_MV, ++ S2MPS14_BUCK1235_START_SEL), ++ regulator_desc_s2mps14_buck(3, MIN_600_MV, STEP_6_25_MV, ++ S2MPS14_BUCK1235_START_SEL), ++ regulator_desc_s2mps14_buck(4, MIN_1400_MV, STEP_12_5_MV, ++ S2MPS14_BUCK4_START_SEL), ++ regulator_desc_s2mps14_buck(5, MIN_600_MV, STEP_6_25_MV, ++ S2MPS14_BUCK1235_START_SEL), + }; + + static int s2mps14_pmic_enable_ext_control(struct s2mps11_info *s2mps11, diff --git a/queue-3.18/series b/queue-3.18/series index 8151e69ae55..4da0f542fee 100644 --- a/queue-3.18/series +++ b/queue-3.18/series @@ -138,3 +138,13 @@ perf-x86-intel-uncore-make-sure-only-uncore-events-are-collected.patch perf-fix-events-installation-during-moving-group.patch perf-x86-uncore-hsw-ep-handle-systems-with-only-two-sboxes.patch perf-session-do-not-fail-on-processing-out-of-order-event.patch +spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch +drm-nouveau-nouveau-do-not-bug_on-spin_is_locked-on-up.patch +nouveau-bring-back-legacy-mmap-handler.patch +regulator-s2mps11-fix-dw_mmc-failure-on-gear-2.patch +mmc-sdhci-fix-sleep-in-atomic-after-inserting-sd-card.patch +exit-fix-race-between-wait_consider_task-and-wait_task_zombie.patch +mm-protect-set_page_dirty-from-ongoing-truncation.patch +mm-vmscan-prevent-kswapd-livelock-due-to-pfmemalloc-throttled-process-being-killed.patch +mm-propagate-error-from-stack-expansion-even-for-guard-page.patch +mm-don-t-count-the-stack-guard-page-towards-rlimit_stack.patch diff --git a/queue-3.18/spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch b/queue-3.18/spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch new file mode 100644 index 00000000000..70bb8727669 --- /dev/null +++ b/queue-3.18/spi-sh-msiof-add-runtime-pm-lock-in-initializing.patch @@ -0,0 +1,44 @@ +From 015760563ec77bf17cec712fa94afdf53b285287 Mon Sep 17 00:00:00 2001 +From: Hisashi Nakamura +Date: Mon, 15 Dec 2014 23:01:11 +0900 +Subject: spi: sh-msiof: Add runtime PM lock in initializing + +From: Hisashi Nakamura + +commit 015760563ec77bf17cec712fa94afdf53b285287 upstream. + +SH-MSIOF driver is enabled autosuspend API of spi framework. +But autosuspend framework doesn't work during initializing. +So runtime PM lock is added in SH-MSIOF driver initializing. + +Fixes: e2a0ba547ba31c (spi: sh-msiof: Convert to spi core auto_runtime_pm framework) +Signed-off-by: Hisashi Nakamura +Signed-off-by: Yoshihiro Kaneko +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/spi/spi-sh-msiof.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/spi/spi-sh-msiof.c ++++ b/drivers/spi/spi-sh-msiof.c +@@ -480,6 +480,8 @@ static int sh_msiof_spi_setup(struct spi + struct device_node *np = spi->master->dev.of_node; + struct sh_msiof_spi_priv *p = spi_master_get_devdata(spi->master); + ++ pm_runtime_get_sync(&p->pdev->dev); ++ + if (!np) { + /* + * Use spi->controller_data for CS (same strategy as spi_gpio), +@@ -498,6 +500,9 @@ static int sh_msiof_spi_setup(struct spi + if (spi->cs_gpio >= 0) + gpio_set_value(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); + ++ ++ pm_runtime_put_sync(&p->pdev->dev); ++ + return 0; + } + -- 2.47.3