From f3f3f9cccb3fe516ba276586bbdf6ecc4a8c6419 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 13 May 2018 11:17:44 +0200 Subject: [PATCH] 4.9-stable patches added patches: mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap-v3.patch thermal-exynos-propagate-error-value-from-tmu_read.patch thermal-exynos-reading-temperature-makes-sense-only-when-tmu-is-turned-on.patch --- ...rent-munlock-and-oom-reaper-unmap-v3.patch | 246 ++++++++++++++++++ queue-4.9/series | 3 + ...-propagate-error-value-from-tmu_read.patch | 53 ++++ ...kes-sense-only-when-tmu-is-turned-on.patch | 63 +++++ 4 files changed, 365 insertions(+) create mode 100644 queue-4.9/mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap-v3.patch create mode 100644 queue-4.9/thermal-exynos-propagate-error-value-from-tmu_read.patch create mode 100644 queue-4.9/thermal-exynos-reading-temperature-makes-sense-only-when-tmu-is-turned-on.patch diff --git a/queue-4.9/mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap-v3.patch b/queue-4.9/mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap-v3.patch new file mode 100644 index 00000000000..49b852b5afd --- /dev/null +++ b/queue-4.9/mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap-v3.patch @@ -0,0 +1,246 @@ +From 27ae357fa82be5ab73b2ef8d39dcb8ca2563483a Mon Sep 17 00:00:00 2001 +From: David Rientjes +Date: Fri, 11 May 2018 16:02:04 -0700 +Subject: mm, oom: fix concurrent munlock and oom reaper unmap, v3 + +From: David Rientjes + +commit 27ae357fa82be5ab73b2ef8d39dcb8ca2563483a upstream. + +Since exit_mmap() is done without the protection of mm->mmap_sem, it is +possible for the oom reaper to concurrently operate on an mm until +MMF_OOM_SKIP is set. + +This allows munlock_vma_pages_all() to concurrently run while the oom +reaper is operating on a vma. Since munlock_vma_pages_range() depends +on clearing VM_LOCKED from vm_flags before actually doing the munlock to +determine if any other vmas are locking the same memory, the check for +VM_LOCKED in the oom reaper is racy. + +This is especially noticeable on architectures such as powerpc where +clearing a huge pmd requires serialize_against_pte_lookup(). If the pmd +is zapped by the oom reaper during follow_page_mask() after the check +for pmd_none() is bypassed, this ends up deferencing a NULL ptl or a +kernel oops. + +Fix this by manually freeing all possible memory from the mm before +doing the munlock and then setting MMF_OOM_SKIP. The oom reaper can not +run on the mm anymore so the munlock is safe to do in exit_mmap(). It +also matches the logic that the oom reaper currently uses for +determining when to set MMF_OOM_SKIP itself, so there's no new risk of +excessive oom killing. + +This issue fixes CVE-2018-1000200. + +Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1804241526320.238665@chino.kir.corp.google.com +Fixes: 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently") +Signed-off-by: David Rientjes +Suggested-by: Tetsuo Handa +Acked-by: Michal Hocko +Cc: Andrea Arcangeli +Cc: [4.14+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/oom.h | 2 + + mm/mmap.c | 44 ++++++++++++++++------------ + mm/oom_kill.c | 81 +++++++++++++++++++++++++++------------------------- + 3 files changed, 71 insertions(+), 56 deletions(-) + +--- a/include/linux/oom.h ++++ b/include/linux/oom.h +@@ -95,6 +95,8 @@ static inline int check_stable_address_s + return 0; + } + ++void __oom_reap_task_mm(struct mm_struct *mm); ++ + extern unsigned long oom_badness(struct task_struct *p, + struct mem_cgroup *memcg, const nodemask_t *nodemask, + unsigned long totalpages); +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -2997,6 +2997,32 @@ void exit_mmap(struct mm_struct *mm) + /* mm's last user has gone, and its about to be pulled down */ + mmu_notifier_release(mm); + ++ if (unlikely(mm_is_oom_victim(mm))) { ++ /* ++ * Manually reap the mm to free as much memory as possible. ++ * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard ++ * this mm from further consideration. Taking mm->mmap_sem for ++ * write after setting MMF_OOM_SKIP will guarantee that the oom ++ * reaper will not run on this mm again after mmap_sem is ++ * dropped. ++ * ++ * Nothing can be holding mm->mmap_sem here and the above call ++ * to mmu_notifier_release(mm) ensures mmu notifier callbacks in ++ * __oom_reap_task_mm() will not block. ++ * ++ * This needs to be done before calling munlock_vma_pages_all(), ++ * which clears VM_LOCKED, otherwise the oom reaper cannot ++ * reliably test it. ++ */ ++ mutex_lock(&oom_lock); ++ __oom_reap_task_mm(mm); ++ mutex_unlock(&oom_lock); ++ ++ set_bit(MMF_OOM_SKIP, &mm->flags); ++ down_write(&mm->mmap_sem); ++ up_write(&mm->mmap_sem); ++ } ++ + if (mm->locked_vm) { + vma = mm->mmap; + while (vma) { +@@ -3018,24 +3044,6 @@ void exit_mmap(struct mm_struct *mm) + /* update_hiwater_rss(mm) here? but nobody should be looking */ + /* Use -1 here to ensure all VMAs in the mm are unmapped */ + unmap_vmas(&tlb, vma, 0, -1); +- +- if (unlikely(mm_is_oom_victim(mm))) { +- /* +- * Wait for oom_reap_task() to stop working on this +- * mm. Because MMF_OOM_SKIP is already set before +- * calling down_read(), oom_reap_task() will not run +- * on this "mm" post up_write(). +- * +- * mm_is_oom_victim() cannot be set from under us +- * either because victim->mm is already set to NULL +- * under task_lock before calling mmput and oom_mm is +- * set not NULL by the OOM killer only if victim->mm +- * is found not NULL while holding the task_lock. +- */ +- set_bit(MMF_OOM_SKIP, &mm->flags); +- down_write(&mm->mmap_sem); +- up_write(&mm->mmap_sem); +- } + free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); + tlb_finish_mmu(&tlb, 0, -1); + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -474,7 +474,6 @@ bool process_shares_mm(struct task_struc + return false; + } + +- + #ifdef CONFIG_MMU + /* + * OOM Reaper kernel thread which tries to reap the memory used by the OOM +@@ -485,16 +484,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reape + static struct task_struct *oom_reaper_list; + static DEFINE_SPINLOCK(oom_reaper_lock); + +-static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) ++void __oom_reap_task_mm(struct mm_struct *mm) + { +- struct mmu_gather tlb; + struct vm_area_struct *vma; ++ ++ /* ++ * Tell all users of get_user/copy_from_user etc... that the content ++ * is no longer stable. No barriers really needed because unmapping ++ * should imply barriers already and the reader would hit a page fault ++ * if it stumbled over a reaped memory. ++ */ ++ set_bit(MMF_UNSTABLE, &mm->flags); ++ ++ for (vma = mm->mmap ; vma; vma = vma->vm_next) { ++ if (!can_madv_dontneed_vma(vma)) ++ continue; ++ ++ /* ++ * Only anonymous pages have a good chance to be dropped ++ * without additional steps which we cannot afford as we ++ * are OOM already. ++ * ++ * We do not even care about fs backed pages because all ++ * which are reclaimable have already been reclaimed and ++ * we do not want to block exit_mmap by keeping mm ref ++ * count elevated without a good reason. ++ */ ++ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { ++ const unsigned long start = vma->vm_start; ++ const unsigned long end = vma->vm_end; ++ struct mmu_gather tlb; ++ ++ tlb_gather_mmu(&tlb, mm, start, end); ++ mmu_notifier_invalidate_range_start(mm, start, end); ++ unmap_page_range(&tlb, vma, start, end, NULL); ++ mmu_notifier_invalidate_range_end(mm, start, end); ++ tlb_finish_mmu(&tlb, start, end); ++ } ++ } ++} ++ ++static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) ++{ + bool ret = true; + + /* + * We have to make sure to not race with the victim exit path + * and cause premature new oom victim selection: +- * __oom_reap_task_mm exit_mm ++ * oom_reap_task_mm exit_mm + * mmget_not_zero + * mmput + * atomic_dec_and_test +@@ -539,39 +576,8 @@ static bool __oom_reap_task_mm(struct ta + + trace_start_task_reaping(tsk->pid); + +- /* +- * Tell all users of get_user/copy_from_user etc... that the content +- * is no longer stable. No barriers really needed because unmapping +- * should imply barriers already and the reader would hit a page fault +- * if it stumbled over a reaped memory. +- */ +- set_bit(MMF_UNSTABLE, &mm->flags); +- +- for (vma = mm->mmap ; vma; vma = vma->vm_next) { +- if (!can_madv_dontneed_vma(vma)) +- continue; ++ __oom_reap_task_mm(mm); + +- /* +- * Only anonymous pages have a good chance to be dropped +- * without additional steps which we cannot afford as we +- * are OOM already. +- * +- * We do not even care about fs backed pages because all +- * which are reclaimable have already been reclaimed and +- * we do not want to block exit_mmap by keeping mm ref +- * count elevated without a good reason. +- */ +- if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { +- const unsigned long start = vma->vm_start; +- const unsigned long end = vma->vm_end; +- +- tlb_gather_mmu(&tlb, mm, start, end); +- mmu_notifier_invalidate_range_start(mm, start, end); +- unmap_page_range(&tlb, vma, start, end, NULL); +- mmu_notifier_invalidate_range_end(mm, start, end); +- tlb_finish_mmu(&tlb, start, end); +- } +- } + pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", + task_pid_nr(tsk), tsk->comm, + K(get_mm_counter(mm, MM_ANONPAGES)), +@@ -592,13 +598,12 @@ static void oom_reap_task(struct task_st + struct mm_struct *mm = tsk->signal->oom_mm; + + /* Retry the down_read_trylock(mmap_sem) a few times */ +- while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) ++ while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) + schedule_timeout_idle(HZ/10); + + if (attempts <= MAX_OOM_REAP_RETRIES) + goto done; + +- + pr_info("oom_reaper: unable to reap pid:%d (%s)\n", + task_pid_nr(tsk), tsk->comm); + debug_show_all_locks(); diff --git a/queue-4.9/series b/queue-4.9/series index 5c1abe43069..52fee582bfb 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -26,3 +26,6 @@ drm-i915-fix-drm-intel_enable_lvds-error-message-in-kernel-log.patch net-atm-fix-potential-spectre-v1.patch atm-zatm-fix-potential-spectre-v1.patch revert-bluetooth-btusb-fix-quirk-for-atheros-1525-qca6174.patch +thermal-exynos-reading-temperature-makes-sense-only-when-tmu-is-turned-on.patch +thermal-exynos-propagate-error-value-from-tmu_read.patch +mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap-v3.patch diff --git a/queue-4.9/thermal-exynos-propagate-error-value-from-tmu_read.patch b/queue-4.9/thermal-exynos-propagate-error-value-from-tmu_read.patch new file mode 100644 index 00000000000..9b7c8d031ff --- /dev/null +++ b/queue-4.9/thermal-exynos-propagate-error-value-from-tmu_read.patch @@ -0,0 +1,53 @@ +From c8da6cdef57b459ac0fd5d9d348f8460a575ae90 Mon Sep 17 00:00:00 2001 +From: Marek Szyprowski +Date: Mon, 16 Apr 2018 12:11:53 +0200 +Subject: thermal: exynos: Propagate error value from tmu_read() + +From: Marek Szyprowski + +commit c8da6cdef57b459ac0fd5d9d348f8460a575ae90 upstream. + +tmu_read() in case of Exynos4210 might return error for out of bound +values. Current code ignores such value, what leads to reporting critical +temperature value. Add proper error code propagation to exynos_get_temp() +function. + +Signed-off-by: Marek Szyprowski +CC: stable@vger.kernel.org # v4.6+ +Signed-off-by: Bartlomiej Zolnierkiewicz +Signed-off-by: Eduardo Valentin +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/thermal/samsung/exynos_tmu.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/drivers/thermal/samsung/exynos_tmu.c ++++ b/drivers/thermal/samsung/exynos_tmu.c +@@ -892,6 +892,7 @@ static void exynos7_tmu_control(struct p + static int exynos_get_temp(void *p, int *temp) + { + struct exynos_tmu_data *data = p; ++ int value, ret = 0; + + if (!data || !data->tmu_read || !data->enabled) + return -EINVAL; +@@ -899,12 +900,16 @@ static int exynos_get_temp(void *p, int + mutex_lock(&data->lock); + clk_enable(data->clk); + +- *temp = code_to_temp(data, data->tmu_read(data)) * MCELSIUS; ++ value = data->tmu_read(data); ++ if (value < 0) ++ ret = value; ++ else ++ *temp = code_to_temp(data, value) * MCELSIUS; + + clk_disable(data->clk); + mutex_unlock(&data->lock); + +- return 0; ++ return ret; + } + + #ifdef CONFIG_THERMAL_EMULATION diff --git a/queue-4.9/thermal-exynos-reading-temperature-makes-sense-only-when-tmu-is-turned-on.patch b/queue-4.9/thermal-exynos-reading-temperature-makes-sense-only-when-tmu-is-turned-on.patch new file mode 100644 index 00000000000..d2c94855c3b --- /dev/null +++ b/queue-4.9/thermal-exynos-reading-temperature-makes-sense-only-when-tmu-is-turned-on.patch @@ -0,0 +1,63 @@ +From 88fc6f73fddf64eb507b04f7b2bd01d7291db514 Mon Sep 17 00:00:00 2001 +From: Marek Szyprowski +Date: Mon, 16 Apr 2018 12:11:52 +0200 +Subject: thermal: exynos: Reading temperature makes sense only when TMU is turned on + +From: Marek Szyprowski + +commit 88fc6f73fddf64eb507b04f7b2bd01d7291db514 upstream. + +When thermal sensor is not yet enabled, reading temperature might return +random value. This might even result in stopping system booting when such +temperature is higher than the critical value. Fix this by checking if TMU +has been actually enabled before reading the temperature. + +This change fixes booting of Exynos4210-based board with TMU enabled (for +example Samsung Trats board), which was broken since v4.4 kernel release. + +Signed-off-by: Marek Szyprowski +Fixes: 9e4249b40340 ("thermal: exynos: Fix first temperature read after registering sensor") +CC: stable@vger.kernel.org # v4.6+ +Signed-off-by: Bartlomiej Zolnierkiewicz +Signed-off-by: Eduardo Valentin +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/thermal/samsung/exynos_tmu.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/thermal/samsung/exynos_tmu.c ++++ b/drivers/thermal/samsung/exynos_tmu.c +@@ -185,6 +185,7 @@ + * @regulator: pointer to the TMU regulator structure. + * @reg_conf: pointer to structure to register with core thermal. + * @ntrip: number of supported trip points. ++ * @enabled: current status of TMU device + * @tmu_initialize: SoC specific TMU initialization method + * @tmu_control: SoC specific TMU control method + * @tmu_read: SoC specific TMU temperature read method +@@ -205,6 +206,7 @@ struct exynos_tmu_data { + struct regulator *regulator; + struct thermal_zone_device *tzd; + unsigned int ntrip; ++ bool enabled; + + int (*tmu_initialize)(struct platform_device *pdev); + void (*tmu_control)(struct platform_device *pdev, bool on); +@@ -398,6 +400,7 @@ static void exynos_tmu_control(struct pl + mutex_lock(&data->lock); + clk_enable(data->clk); + data->tmu_control(pdev, on); ++ data->enabled = on; + clk_disable(data->clk); + mutex_unlock(&data->lock); + } +@@ -890,7 +893,7 @@ static int exynos_get_temp(void *p, int + { + struct exynos_tmu_data *data = p; + +- if (!data || !data->tmu_read) ++ if (!data || !data->tmu_read || !data->enabled) + return -EINVAL; + + mutex_lock(&data->lock); -- 2.47.3